def keypair_setup(): """Creates keypair if necessary, saves private key locally, returns contents of private key file.""" existing_keypairs = u.get_keypair_dict() keypair = existing_keypairs.get(KEYPAIR_NAME, None) keypair_fn = u.get_keypair_fn(KEYPAIR_NAME) if keypair: print("Reusing keypair "+KEYPAIR_NAME) # check that local pem file exists and is readable assert os.path.exists(keypair_fn) keypair_contents = open(keypair_fn).read() assert len(keypair_contents)>0 # todo: check that fingerprint matches keypair.key_fingerprint return keypair print("Creating keypair "+KEYPAIR_NAME) ec2 = u.create_ec2_resource() keypair = ec2.create_key_pair(KeyName=KEYPAIR_NAME) assert not os.path.exists(keypair_fn), "previous, keypair exists, delete it with 'sudo rm %s'"%(keypair_fn) open(keypair_fn, 'w').write(keypair.key_material) os.system('chmod 400 '+keypair_fn) return keypair
def keypair_setup(): """Creates keypair if necessary, saves private key locally, returns contents of private key file.""" os.system('mkdir -p ' + u.PRIVATE_KEY_LOCATION) keypair = u.get_keypair_dict().get(KEYPAIR_NAME, None) keypair_fn = u.get_keypair_fn() if keypair: print("Reusing keypair " + KEYPAIR_NAME) # check that local pem file exists and is readable assert os.path.exists( keypair_fn ), "Keypair %s exists, but corresponding .pem file %s is not found, delete keypair %s through console and run again to recreate keypair/.pem together" % ( KEYPAIR_NAME, keypair_fn, KEYPAIR_NAME) keypair_contents = open(keypair_fn).read() assert len(keypair_contents) > 0 # todo: check that fingerprint matches keypair.key_fingerprint else: print("Creating keypair " + KEYPAIR_NAME) ec2 = u.create_ec2_resource() assert not os.path.exists( keypair_fn ), "previous keypair exists, delete it with 'sudo rm %s' and also delete corresponding keypair through console" % ( keypair_fn) keypair = ec2.create_key_pair(KeyName=KEYPAIR_NAME) open(keypair_fn, 'w').write(keypair.key_material) os.system('chmod 400 ' + keypair_fn) return keypair
def main(): fragment = args.fragment # TODO: prevent CTRL+c/CTRL+d from killing session if not args.skip_tmux: print("Launching into TMUX session, use CTRL+b d to exit") region = os.environ['AWS_DEFAULT_REGION'] client = boto3.client('ec2', region_name=region) ec2 = boto3.resource('ec2', region_name=region) response = client.describe_instances() username = os.environ.get("USERNAME", "ubuntu") print("Using username '%s'"%(username,)) instance_list = [] for instance in ec2.instances.all(): if instance.state['Name'] != 'running': continue name = u.get_name(instance.tags) if (fragment in name or fragment in instance.public_ip_address or fragment in instance.id or fragment in instance.private_ip_address): instance_list.append((toseconds(instance.launch_time), instance)) import pytz from tzlocal import get_localzone # $ pip install tzlocal sorted_instance_list = reversed(sorted(instance_list, key=itemgetter(0))) cmd = '' print("Using region ", region) for (ts, instance) in sorted_instance_list: localtime = instance.launch_time.astimezone(get_localzone()) assert instance.key_name == u.get_keypair_name(), "Got key %s, expected %s"%(instance.key_name, u.get_keypair_name()) keypair_fn = u.get_keypair_fn(instance.key_name) print("Found to %s in %s launched at %s with key %s" % (u.get_name(instance.tags), region, localtime, instance.key_name)) cmd = make_cmd(keypair_fn, username, instance.public_ip_address) break if not cmd: print("no instance id contains fragment '%s'"%(fragment,)) return print(cmd) result = os.system(cmd) if username == 'ubuntu': username = '******' elif username == 'ec2-user': username = '******' if result != 0: print("ssh failed with code %d, trying username %s"%(result, username)) cmd = make_cmd(keypair_fn, username, instance.public_ip_address) os.system(cmd)
def __init__(self, instance, job, task_id, install_script=None, user_data='', linux_type=None, skip_efs_mount=False): self.initialize_called = False self.instance = instance self.job = job self.id = task_id if user_data: assert user_data.startswith('#!/bin/bash') self.install_script = install_script self.user_data = user_data self.linux_type = linux_type self._run_counter = 0 self.cached_ip = None self.cached_public_ip = None self.skip_efs_mount = skip_efs_mount self.name = u.format_task_name(task_id, job.name) # TODO, make below actually mean stuff (also, run_command_available) self.initialized = False # scratch is client-local space for temporary files # TODO: this job.name already contains name of run, the directory # below uses run name twice self.scratch = "{}/{}.{}.{}.{}/scratch".format(TASKDIR_PREFIX, job._run.name, job.name, self.id, 0) # u.now_micros()) self.remote_scratch = '/tmp/tmux' # self.log("Creating local scratch dir %s", self.scratch) self._ossystem('rm -Rf ' + self.scratch) # TODO: don't delete this? self._ossystem('mkdir -p ' + self.scratch) # os.chdir(self.scratch) # todo: create taskdir self.connect_instructions = "waiting for initialize()" self.keypair_fn = u.get_keypair_fn() # username to use to ssh into instances # ec2-user or ubuntu if linux_type == 'ubuntu': self.username = '******' elif linux_type == 'amazon': self.username = '******' else: assert False, "Unknown linux type '%s', expected 'ubuntu' or 'amazon'." self.taskdir = '/home/' + self.username
def main(): fragment = args.fragment # TODO: prevent CTRL+c/CTRL+d from killing session if not args.skip_tmux: print("Launching into TMUX session, use CTRL+b d to exit") region = u.get_region() client = u.create_ec2_client() ec2 = u.create_ec2_resource() response = client.describe_instances() username = os.environ.get("USERNAME", "ubuntu") print("Using username '%s'" % (username, )) instance_list = [] for instance in ec2.instances.all(): if instance.state['Name'] != 'running': continue name = u.get_name(instance.tags) if (fragment in name or fragment in str(instance.public_ip_address) or fragment in str(instance.id) or fragment in str(instance.private_ip_address)): instance_list.append((u.toseconds(instance.launch_time), instance)) from tzlocal import get_localzone # $ pip install tzlocal filtered_instance_list = u.get_instances(fragment) if not filtered_instance_list: print("no instance id contains fragment '%s'" % (fragment, )) return # connect to most recent instance print(filtered_instance_list) instance = filtered_instance_list[0] print("Connecting to ", u.get_name(instance), " launched ", instance.launch_time.astimezone(get_localzone())) cmd = '' keypair_fn = u.get_keypair_fn() cmd = make_cmd(keypair_fn, username, instance.public_ip_address) print(cmd) result = os.system(cmd) if username == 'ubuntu': username = '******' elif username == 'ec2-user': username = '******' if result != 0: print("ssh failed with code %d, trying username %s" % (result, username)) cmd = make_cmd(keypair_fn, username, instance.public_ip_address) os.system(cmd)
def delete_keypair(): keypairs = u.get_keypair_dict() keypair = keypairs.get(KEYPAIR_NAME, '') if keypair: try: sys.stdout.write("Deleting keypair %s (%s) ... " % (keypair.key_name, KEYPAIR_NAME)) sys.stdout.write(response_type(keypair.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') keypair_fn = u.get_keypair_fn() if os.path.exists(keypair_fn): print("Deleting local keypair file %s" % (keypair_fn, )) os.system('rm -f ' + keypair_fn)
def __init__(self, instance, job, task_id, install_script=None, linux_type=None, user_data='', skip_efs_mount=False): self.initialize_called = False self.instance = instance self.job = job self.id = task_id self.install_script = install_script self._run_counter = 0 self.cached_ip = None self.cached_public_ip = None self.skip_efs_mount = skip_efs_mount self.initialized = False # scratch is client-local space for temporary files self.scratch = "{}/{}.{}.{}.{}/scratch".format(TASKDIR_PREFIX, job._run.name, job.name, self.id, 0) # u.now_micros()) self.remote_scratch = '/tmp/tmux' # self.log("Creating local scratch dir %s", self.scratch) self._ossystem('rm -Rf ' + self.scratch) # TODO: don't delete this? self._ossystem('mkdir -p ' + self.scratch) # os.chdir(self.scratch) # todo: create taskdir self.connect_instructions = "waiting for initialize()" self.keypair_fn = u.get_keypair_fn(u.get_keypair_name()) # username to use to ssh into instances # ec2-user or ubuntu if linux_type == 'ubuntu': self.username = '******' elif linux_type == 'amazon': self.username = '******' else: assert False, "Unknown linux type '%s', expected 'ubuntu' or 'amazon'." self.taskdir = '/home/' + self.username
def main(): fragment = '' if len(sys.argv) > 1: fragment = sys.argv[1] def get_name(instance_response): names = [ entry['Value'] for entry in instance_response.get('Tags', []) if entry['Key'] == 'Name' ] if not names: names = [''] assert len(names) == 1 return names[0] region = u.get_region() client = boto3.client('ec2', region_name=region) ec2 = boto3.resource('ec2', region_name=region) response = client.describe_instances() username = os.environ.get("EC2_USER", "ubuntu") print("Using username '%s'" % (username, )) instance_list = [] for instance in ec2.instances.all(): if instance.state['Name'] != 'running': continue name = u.get_name(instance.tags) if (fragment in name or fragment in instance.public_ip_address or fragment in instance.id or fragment in instance.private_ip_address): print("Uninitializing %s %s %s" % (name, instance.public_ip_address, instance.private_ip_address)) key_file = u.get_keypair_fn(instance.key_name) ssh_client = u.SshClient(hostname=instance.public_ip_address, ssh_key=key_file, username=username) ssh_client.run('rm /tmp/is_initialized || echo "failed 1"') ssh_client.run('rm /tmp/nv_setup_complete || echo "failed 2"') ssh_client.run('rm *.sh') # remove install scripts
def command(self, instance, pem_location=''): excludes = [] for exclude in self.excludes: excludes += ['--exclude', exclude] # todo, rename no_strict_checking to ssh_command keypair_fn = u.get_keypair_fn() username = u.get_username(instance) ip = instance.public_ip_address ssh_command = "ssh -i %s -o StrictHostKeyChecking=no" % (keypair_fn, ) no_strict_checking = ['-arvce', ssh_command] command = ['rsync'] + no_strict_checking + excludes if self.modify_window: command += ['--update', '--modify-window=600'] if self.copy_links: command += ['-L'] command += ['-rv', self.source, username + "@" + ip + ':' + self.dest] print("Running ") print(command) return command
def main(): # TODO: also bring down all the instances and wait for them to come down region = os.environ['AWS_DEFAULT_REGION'] if DEFAULT_NAME == 'nexus': print("Nexus resources are protected, don't delete them") sys.exit() print("Deleting %s resources in region %s" % ( DEFAULT_NAME, region, )) existing_vpcs = u.get_vpc_dict() client = u.create_ec2_client() ec2 = u.create_ec2_resource() def response_type(response): return 'ok' if u.is_good_response(response) else 'failed' # delete EFS efss = u.get_efs_dict() efs_id = efss.get(DEFAULT_NAME, '') efs_client = u.create_efs_client() if efs_id: try: # delete mount targets first print("About to delete %s (%s)" % (efs_id, DEFAULT_NAME)) response = efs_client.describe_mount_targets(FileSystemId=efs_id) assert u.is_good_response(response) for mount_response in response['MountTargets']: subnet = ec2.Subnet(mount_response['SubnetId']) zone = subnet.availability_zone state = mount_response['LifeCycleState'] id = mount_response['MountTargetId'] ip = mount_response['IpAddress'] sys.stdout.write('Deleting mount target %s ... ' % (id, )) sys.stdout.flush() response = efs_client.delete_mount_target(MountTargetId=id) print(response_type(response)) sys.stdout.write('Deleting EFS %s (%s)... ' % (efs_id, DEFAULT_NAME)) sys.stdout.flush() u.delete_efs_id(efs_id) except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') if VPC_NAME in existing_vpcs: vpc = ec2.Vpc(existing_vpcs[VPC_NAME].id) print("Deleting VPC %s (%s) subresources:" % (VPC_NAME, vpc.id)) for subnet in vpc.subnets.all(): try: sys.stdout.write("Deleting subnet %s ... " % (subnet.id)) sys.stdout.write(response_type(subnet.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') for gateway in vpc.internet_gateways.all(): sys.stdout.write("Deleting gateway %s ... " % (gateway.id)) # todo: if instances are using VPC, this fails with # botocore.exceptions.ClientError: An error occurred (DependencyViolation) when calling the DetachInternetGateway operation: Network vpc-ca4abab3 has some mapped public address(es). Please unmap those public address(es) before detaching the gateway. sys.stdout.write('detached ... ' if u.is_good_response( gateway.detach_from_vpc(VpcId=vpc.id)) else ' detach_failed ') sys.stdout.write('deleted ' if u.is_good_response(gateway.delete( )) else ' delete_failed ') sys.stdout.write('\n') def desc(route_table): return "%s (%s)" % (route_table.id, u.get_name(route_table.tags)) for route_table in vpc.route_tables.all(): sys.stdout.write("Deleting route table %s ... " % (desc(route_table))) try: sys.stdout.write(response_type(route_table.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') def desc(security_group): return "%s (%s, %s)" % (security_group.id, u.get_name(security_group.tags), security_group.group_name) # TODO: this tries to remove default security group, maybe not remove it? for security_group in vpc.security_groups.all(): sys.stdout.write('Deleting security group %s ... ' % (desc(security_group))) try: sys.stdout.write(response_type(security_group.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') sys.stdout.write("Deleting VPC %s ... " % (vpc.id)) sys.stdout.write(response_type(vpc.delete()) + '\n') # delete keypair keypairs = u.get_keypair_dict() keypair = keypairs.get(DEFAULT_NAME, '') if keypair: try: sys.stdout.write("Deleting keypair %s (%s) ... " % (keypair.key_name, DEFAULT_NAME)) sys.stdout.write(response_type(keypair.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') keypair_fn = u.get_keypair_fn(KEYPAIR_NAME) if os.path.exists(keypair_fn): print("Deleting local keypair file %s" % (keypair_fn, )) os.system('rm -f ' + keypair_fn)