def run_task_in_region(task='test', region_name=default_region_name(), parallel=False, output=False): ''' Runs a task from fabfile.py on all instances in a given region. :param string task: name of a task defined in fabfile.py :param string region_name: region from which instances are picked :param bool parallel: indicates whether task should be dispatched in parallel :param bool output: indicates whether output of task is needed ''' print(f'running task {task} in {region_name}') ip_list = instances_ip_in_region(region_name) if parallel: hosts = " ".join(["ubuntu@" + ip for ip in ip_list]) cmd = 'parallel fab -i key_pairs/aleph.pem -H' + ' {} ' + task + ' ::: ' + hosts else: hosts = ",".join(["ubuntu@" + ip for ip in ip_list]) cmd = f'fab -i key_pairs/aleph.pem -H {hosts} {task}' try: if output: return check_output(cmd.split()) return call(cmd.split()) except Exception as e: print('paramiko troubles')
def instances_ip_in_region(region_name=default_region_name()): '''Returns ips of all running or pending instances in a given region.''' ips = [] for instance in all_instances_in_region(region_name): ips.append(instance.public_ip_address) return ips
def wait_in_region(target_state, region_name=default_region_name()): '''Waits until all machines in a given region reach a given state.''' if region_name == default_region_name(): region_name = default_region_name() print('waiting in', region_name) instances = all_instances_in_region(region_name) if target_state == 'running': for i in instances: i.wait_until_running() elif target_state == 'terminated': for i in instances: i.wait_until_terminated() elif target_state == 'open 22': for i in instances: cmd = f'fab -i key_pairs/aleph.pem -H ubuntu@{i.public_ip_address} test' while call(cmd.split(), stderr=DEVNULL): pass if target_state == 'ssh ready': ids = [instance.id for instance in instances] initializing = True while initializing: responses = boto3.client( 'ec2', region_name).describe_instance_status(InstanceIds=ids) statuses = responses['InstanceStatuses'] all_initialized = True if statuses: for status in statuses: if status['InstanceStatus']['Status'] != 'ok' or status[ 'SystemStatus']['Status'] != 'ok': all_initialized = False else: all_initialized = False if all_initialized: initializing = False else: print('.', end='') import sys sys.stdout.flush() sleep(5) print()
def instances_state_in_region(region_name=default_region_name()): '''Returns states of all instances in a given regions.''' print(region_name, 'collecting instances states') states = [] possible_states = ['running', 'pending', 'shutting-down', 'terminated'] for instance in all_instances_in_region(region_name, possible_states): states.append(instance.state['Name']) return states
def installation_finished_in_region(region_name=default_region_name()): '''Checks if installation has finished on all instances in a given region.''' results = [] cmd = "tail -1 setup.log" results = run_cmd_in_region(cmd, region_name, output=True) for result in results: if len(result) < 4 or result[:4] != b'done': return False print(f'installation in {region_name} finished') return True
def all_instances_in_region( region_name=default_region_name(), states=['running', 'pending']): '''Returns all running or pending instances in a given region.''' ec2 = boto3.resource('ec2', region_name) instances = [] print(region_name, 'collecting instances') for instance in ec2.instances.all(): if instance.state['Name'] in states: instances.append(instance) return instances
def latency_in_region(region_name=default_region_name()): ''' Calculates latency in a given region ''' print('finding latency', region_name) ip_list = instances_ip_in_region(region_name) assert ip_list, 'there are no instances running!' reps = 10 cmd = f'parallel nping -q -c {reps} -p 22 ::: ' + ' '.join(ip_list) output = check_output(cmd.split()).decode() lines = output.split('\n') times = [] for i in range(len(lines) // 5): # equivalent to range(len(ip_list)) times_ = lines[5 * i + 2].split('|') times_ = [t.split()[2][:-2] for t in times_] times.append([float(t.strip()) for t in times_]) latency = [f'{round(t, 2)}ms' for t in np.mean(times, 0)] latency = dict(zip(['max', 'min', 'avg'], latency)) return latency
def run_cmd_in_region(cmd='tail -f proof-of-concept/experiments/aleph.log', region_name=default_region_name(), output=False): ''' Runs a shell command cmd on all instances in a given region. :param string cmd: a shell command that is run on instances :param string region_name: region from which instances are picked :param bool output: indicates whether output of cmd is needed ''' print(f'running command {cmd} in {region_name}') ip_list = instances_ip_in_region(region_name) results = [] for ip in ip_list: cmd_ = f'ssh -o "StrictHostKeyChecking no" -q -i key_pairs/aleph.pem ubuntu@{ip} -t "{cmd}"' if output: results.append(check_output(cmd_, shell=True)) else: results.append(call(cmd_, shell=True)) return results
def launch_new_instances_in_region(n_processes=1, region_name=default_region_name(), instance_type='t2.micro'): '''Launches n_processes in a given region.''' print('launching instances in', region_name) key_name = 'aleph' init_key_pair(region_name, key_name) security_group_name = 'aleph' security_group_id = security_group_id_by_region(region_name, security_group_name) image_id = image_id_in_region(region_name) ec2 = boto3.resource('ec2', region_name) instances = ec2.create_instances(ImageId=image_id, MinCount=n_processes, MaxCount=n_processes, InstanceType=instance_type, BlockDeviceMappings=[ { 'DeviceName': '/dev/xvda', 'Ebs': { 'DeleteOnTermination': True, 'VolumeSize': 8, 'VolumeType': 'gp2' }, }, ], KeyName=key_name, Monitoring={'Enabled': False}, SecurityGroupIds=[security_group_id]) return instances
def terminate_instances_in_region(region_name=default_region_name()): '''Terminates all running instances in a given regions.''' print(region_name, 'terminating instances') for instance in all_instances_in_region(region_name): instance.terminate()