def delete_efs(): efss = u.get_efs_dict() efs_id = efss.get(EFS_NAME, '') efs_client = u.create_efs_client() if efs_id: try: # delete mount targets first print("About to delete %s (%s)" % (efs_id, EFS_NAME)) response = efs_client.describe_mount_targets(FileSystemId=efs_id) assert u.is_good_response(response) for mount_response in response['MountTargets']: subnet = ec2.Subnet(mount_response['SubnetId']) zone = subnet.availability_zone state = mount_response['LifeCycleState'] id = mount_response['MountTargetId'] ip = mount_response['IpAddress'] sys.stdout.write('Deleting mount target %s ... ' % (id, )) sys.stdout.flush() response = efs_client.delete_mount_target(MountTargetId=id) print(response_type(response)) sys.stdout.write('Deleting EFS %s (%s)... ' % (efs_id, EFS_NAME)) sys.stdout.flush() u.delete_efs_id(efs_id) except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n')
def mount_efs(self): region = u.get_region() efs_id = u.get_efs_dict()[u.RESOURCE_NAME] dns = "{efs_id}.efs.{region}.amazonaws.com".format(**locals()) self.run('sudo mkdir -p /efs') self.run('sudo chmod 777 /efs') self.run( "sudo mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2 %s:/ /efs" % (dns, ), ignore_errors=True) # error on remount
def create_resources(): region = u.get_region() print("Creating %s resources in region %s" % ( DEFAULT_NAME, region, )) vpc, security_group = network_setup() keypair = keypair_setup() # saves private key locally to keypair_fn # create EFS efss = u.get_efs_dict() efs_id = efss.get(DEFAULT_NAME, '') if not efs_id: print("Creating EFS " + DEFAULT_NAME) efs_id = u.create_efs(DEFAULT_NAME) else: print("Reusing EFS " + DEFAULT_NAME) efs_client = u.create_efs_client() # create mount target for each subnet in the VPC # added retries because efs is not immediately available MAX_FAILURES = 10 RETRY_INTERVAL_SEC = 1 for subnet in vpc.subnets.all(): for retry_attempt in range(MAX_FAILURES): try: sys.stdout.write("Creating efs mount target for %s ... " % (subnet.availability_zone, )) sys.stdout.flush() response = efs_client.create_mount_target( FileSystemId=efs_id, SubnetId=subnet.id, SecurityGroups=[security_group.id]) if u.is_good_response(response): print("success") break except Exception as e: if 'already exists' in str( e): # ignore "already exists" errors print('already exists') break # Takes couple of seconds for EFS to come online, with # errors like this: # Creating efs mount target for us-east-1f ... Failed with An error occurred (IncorrectFileSystemLifeCycleState) when calling the CreateMountTarget operation: None, retrying in 1 sec print("Got %s, retrying in %s sec" % (str(e), RETRY_INTERVAL_SEC)) time.sleep(RETRY_INTERVAL_SEC) else: print("Giving up.")
def _mount_efs(self): self.log("Mounting EFS") region = u.get_region() efs_id = u.get_efs_dict()[u.get_resource_name()] dns = "{efs_id}.efs.{region}.amazonaws.com".format(**locals()) self.run('sudo mkdir -p /efs') self.run('sudo chmod 777 /efs') # ignore error on remount self.run( "sudo mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2 %s:/ /efs" % (dns, ), ignore_errors=True)
def _mount_efs(self): self.log("Mounting EFS") region = u.get_region() efs_id = u.get_efs_dict()[u.get_resource_name()] dns = "{efs_id}.efs.{region}.amazonaws.com".format(**locals()) self.run('sudo mkdir -p /efs') # ignore error on remount (efs already mounted) self.run( "sudo mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2 %s:/ /efs" % (dns, ), ignore_errors=True) # make sure chmod is successful, hack to fix occasional permission errors self.run('sudo chmod 777 /efs') while 'drwxrwxrwx' not in self.run_and_capture_output('ls -ld /efs'): print(f"chmod 777 /efs didn't take, retrying in {TIMEOUT_SEC}") time.sleep(TIMEOUT_SEC) self.run('sudo chmod 777 /efs')
def main(): if len(sys.argv) < 2: mode = 'list' else: mode = sys.argv[1] if mode == 'list': list_efss() elif mode == 'delete': name_or_id = sys.argv[2] efs_dict = u.get_efs_dict() if name_or_id in efs_dict: efs_id = efs_dict[name_or_id] sys.stdout.write('Deleting EFS %s (%s)... ' % (efs_id, name_or_id)) sys.stdout.flush() u.delete_efs_id(efs_id) else: efs_id = name_or_id sys.stdout.write('Deleting EFS %s ()... ' % (efs_id, )) sys.stdout.flush() u.delete_efs_id(name_or_id) print("success")
def main(): assert 'AWS_DEFAULT_REGION' in os.environ, "Must specify default region" region = os.environ.get("AWS_DEFAULT_REGION") assert args.zone.startswith( region), "Availability zone must be in default region." os.system('mkdir -p /tmp/tmux') if args.linux_type == 'ubuntu': install_script = INSTALL_SCRIPT_UBUNTU ami_dict = ami_dict_ubuntu elif args.linux_type == 'amazon': install_script = INSTALL_SCRIPT_AMAZON ami_dict = ami_dict_amazon else: assert False, "Unknown linux type " + args.linux_type if args.ami: ami = args.ami else: ami = ami_dict[region] if args.linux_type == 'ubuntu': install_script = INSTALL_SCRIPT_UBUNTU ami_dict = ami_dict_ubuntu elif args.linux_type == 'amazon': install_script = INSTALL_SCRIPT_AMAZON ami_dict = ami_dict_amazon else: assert False, "Unknown linux type " + args.linux_type # # vpc = u.get_vpc_dict()[u.RESOURCE_NAME] # # pick AZ to use for instance based on available subnets # subnets = list(vpc.subnets.all()) # if not subnets: # print("<no subnets>, failing") # sys.exit() # subnets = list(vpc.subnets.all()) # subnet_dict = {} # for subnet in subnets: # zone = subnet.availability_zone # assert zone not in subnet_dict, "More than one subnet in %s, why?" %(zone,) # subnet_dict[zone] = subnet if not args.zone: machine_class = args.instance_type[:2] zone = availability_mapping[region][machine_class][0] print("Chose %s based on availability mapping for %s" % (zone, machine_class)) else: zone = args.zone # subnet = subnet_dict[zone] # print("Available zones: %s" %(', '.join(sorted(subnet_dict.keys())))) # print("Using %-16s %-16s"%(subnet.id, subnet.availability_zone)) print("Launching %s in %s" % (args.name, zone)) security_group = u.get_security_group_dict()[u.RESOURCE_NAME] keypair = u.get_keypair_dict()[u.RESOURCE_NAME] job = aws.server_job(args.name, ami=ami, num_tasks=1, instance_type=args.instance_type, install_script=install_script, availability_zone=zone, linux_type=args.linux_type) job.wait_until_ready() task = job.tasks[0] # this needs DNS to be enabled on VPC # alternative way is to provide direct IP from efs_tool.py efs_id = u.get_efs_dict()[u.RESOURCE_NAME] dns = "{efs_id}.efs.{region}.amazonaws.com".format(**locals()) # try mounting EFS several times for i in range(3): try: task.run( "sudo mount -t nfs -o nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2 %s:/ /efs && sudo chmod 777 /efs" % (dns, )) print("EFS Mount succeeded") break except Exception as e: print("Got error %s, retrying in 10 seconds" % (str(e))) time.sleep(10) # connect instructions print("To connect:") print(task.connect_instructions)
def main(): # TODO: also bring down all the instances and wait for them to come down region = os.environ['AWS_DEFAULT_REGION'] if DEFAULT_NAME == 'nexus': print("Nexus resources are protected, don't delete them") sys.exit() print("Deleting %s resources in region %s" % ( DEFAULT_NAME, region, )) existing_vpcs = u.get_vpc_dict() client = u.create_ec2_client() ec2 = u.create_ec2_resource() def response_type(response): return 'ok' if u.is_good_response(response) else 'failed' # delete EFS efss = u.get_efs_dict() efs_id = efss.get(DEFAULT_NAME, '') efs_client = u.create_efs_client() if efs_id: try: # delete mount targets first print("About to delete %s (%s)" % (efs_id, DEFAULT_NAME)) response = efs_client.describe_mount_targets(FileSystemId=efs_id) assert u.is_good_response(response) for mount_response in response['MountTargets']: subnet = ec2.Subnet(mount_response['SubnetId']) zone = subnet.availability_zone state = mount_response['LifeCycleState'] id = mount_response['MountTargetId'] ip = mount_response['IpAddress'] sys.stdout.write('Deleting mount target %s ... ' % (id, )) sys.stdout.flush() response = efs_client.delete_mount_target(MountTargetId=id) print(response_type(response)) sys.stdout.write('Deleting EFS %s (%s)... ' % (efs_id, DEFAULT_NAME)) sys.stdout.flush() u.delete_efs_id(efs_id) except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') if VPC_NAME in existing_vpcs: vpc = ec2.Vpc(existing_vpcs[VPC_NAME].id) print("Deleting VPC %s (%s) subresources:" % (VPC_NAME, vpc.id)) for subnet in vpc.subnets.all(): try: sys.stdout.write("Deleting subnet %s ... " % (subnet.id)) sys.stdout.write(response_type(subnet.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') for gateway in vpc.internet_gateways.all(): sys.stdout.write("Deleting gateway %s ... " % (gateway.id)) # todo: if instances are using VPC, this fails with # botocore.exceptions.ClientError: An error occurred (DependencyViolation) when calling the DetachInternetGateway operation: Network vpc-ca4abab3 has some mapped public address(es). Please unmap those public address(es) before detaching the gateway. sys.stdout.write('detached ... ' if u.is_good_response( gateway.detach_from_vpc(VpcId=vpc.id)) else ' detach_failed ') sys.stdout.write('deleted ' if u.is_good_response(gateway.delete( )) else ' delete_failed ') sys.stdout.write('\n') def desc(route_table): return "%s (%s)" % (route_table.id, u.get_name(route_table.tags)) for route_table in vpc.route_tables.all(): sys.stdout.write("Deleting route table %s ... " % (desc(route_table))) try: sys.stdout.write(response_type(route_table.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') def desc(security_group): return "%s (%s, %s)" % (security_group.id, u.get_name(security_group.tags), security_group.group_name) # TODO: this tries to remove default security group, maybe not remove it? for security_group in vpc.security_groups.all(): sys.stdout.write('Deleting security group %s ... ' % (desc(security_group))) try: sys.stdout.write(response_type(security_group.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') sys.stdout.write("Deleting VPC %s ... " % (vpc.id)) sys.stdout.write(response_type(vpc.delete()) + '\n') # delete keypair keypairs = u.get_keypair_dict() keypair = keypairs.get(DEFAULT_NAME, '') if keypair: try: sys.stdout.write("Deleting keypair %s (%s) ... " % (keypair.key_name, DEFAULT_NAME)) sys.stdout.write(response_type(keypair.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') keypair_fn = u.get_keypair_fn(KEYPAIR_NAME) if os.path.exists(keypair_fn): print("Deleting local keypair file %s" % (keypair_fn, )) os.system('rm -f ' + keypair_fn)