def delete_network(): existing_vpcs = u.get_vpc_dict() if VPC_NAME in existing_vpcs: vpc = ec2.Vpc(existing_vpcs[VPC_NAME].id) print("Deleting VPC %s (%s) subresources:" % (VPC_NAME, vpc.id)) for subnet in vpc.subnets.all(): try: sys.stdout.write("Deleting subnet %s ... " % (subnet.id)) sys.stdout.write(response_type(subnet.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') for gateway in vpc.internet_gateways.all(): sys.stdout.write("Deleting gateway %s ... " % (gateway.id)) # todo: if instances are using VPC, this fails with # botocore.exceptions.ClientError: An error occurred (DependencyViolation) when calling the DetachInternetGateway operation: Network vpc-ca4abab3 has some mapped public address(es). Please unmap those public address(es) before detaching the gateway. sys.stdout.write('detached ... ' if u.is_good_response( gateway.detach_from_vpc(VpcId=vpc.id)) else ' detach_failed ') sys.stdout.write('deleted ' if u.is_good_response(gateway.delete( )) else ' delete_failed ') sys.stdout.write('\n') def desc(route_table): return "%s (%s)" % (route_table.id, u.get_name(route_table.tags)) for route_table in vpc.route_tables.all(): sys.stdout.write("Deleting route table %s ... " % (desc(route_table))) try: sys.stdout.write(response_type(route_table.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') def desc(security_group): return "%s (%s, %s)" % (security_group.id, u.get_name(security_group.tags), security_group.group_name) # TODO: this tries to remove default security group, maybe not remove it? for security_group in vpc.security_groups.all(): sys.stdout.write('Deleting security group %s ... ' % (desc(security_group))) try: sys.stdout.write(response_type(security_group.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') sys.stdout.write("Deleting VPC %s ... " % (vpc.id)) try: sys.stdout.write(response_type(vpc.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n')
def list_efss(): for region in ['us-west-2', 'us-east-1']: print() print('=' * 80) print(region) print('=' * 80) efs_client = boto3.client('efs', region_name=region) response = efs_client.describe_file_systems() assert u.is_good_response(response) for efs_response in response['FileSystems']: # {'CreationTime': datetime.datetime(2017, 12, 19, 10, 3, 44, tzinfo=tzlocal()), # 'CreationToken': '1513706624330134', # 'Encrypted': False, # 'FileSystemId': 'fs-0f95ab46', # 'LifeCycleState': 'available', # 'Name': 'nexus01', # 'NumberOfMountTargets': 0, # 'OwnerId': '316880547378', # 'PerformanceMode': 'generalPurpose', # 'SizeInBytes': {'Value': 6144}}, efs_id = efs_response['FileSystemId'] tags_response = efs_client.describe_tags(FileSystemId=efs_id) assert u.is_good_response(tags_response) key = u.get_name(tags_response.get('Tags', '')) print("%-16s %-16s" % (efs_id, key)) print('-' * 40) # list mount points response = efs_client.describe_mount_targets(FileSystemId=efs_id) ec2 = boto3.resource('ec2', region_name=region) if not response['MountTargets']: print("<no mount targets>") else: for mount_response in response['MountTargets']: subnet = ec2.Subnet(mount_response['SubnetId']) zone = subnet.availability_zone state = mount_response['LifeCycleState'] id = mount_response['MountTargetId'] ip = mount_response['IpAddress'] print('%-16s %-16s %-16s %-16s' % ( zone, ip, id, state, )) print()
def delete_efs(): efss = u.get_efs_dict() efs_id = efss.get(EFS_NAME, '') efs_client = u.create_efs_client() if efs_id: try: # delete mount targets first print("About to delete %s (%s)" % (efs_id, EFS_NAME)) response = efs_client.describe_mount_targets(FileSystemId=efs_id) assert u.is_good_response(response) for mount_response in response['MountTargets']: subnet = ec2.Subnet(mount_response['SubnetId']) zone = subnet.availability_zone state = mount_response['LifeCycleState'] id = mount_response['MountTargetId'] ip = mount_response['IpAddress'] sys.stdout.write('Deleting mount target %s ... ' % (id, )) sys.stdout.flush() response = efs_client.delete_mount_target(MountTargetId=id) print(response_type(response)) sys.stdout.write('Deleting EFS %s (%s)... ' % (efs_id, EFS_NAME)) sys.stdout.flush() u.delete_efs_id(efs_id) except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n')
def grow_ebs_for_task(task_fragment, target_size_gb): """Grows EBS volume for given task.""" ec2 = u.create_ec2_resource() client = u.create_ec2_client() # todo: don't crash on missing/duplicate names instances = {u.get_name(i.tags): i for i in ec2.instances.all()} ec2 = u.create_ec2_resource() instances = [(u.seconds_from_datetime(i.launch_time), i) for i in ec2.instances.all()] sorted_instances = sorted(instances, key=itemgetter(0)) for (seconds, instance) in sorted_instances: task_name = u.get_name(instance.tags) hours_ago = (time.time() - seconds) / 3600 hours_ago += 8 # adjust for time being in UTC if task_fragment in task_name: print("Found instance %s launched %.1f hours ago" % (task_name, hours_ago)) break print(instance.id) volumes = list(instance.volumes.all()) assert len(volumes) == 1, "Must have 1 volume" print("Growing %s to %s" % (volumes[0].id, target_size_gb)) response = client.modify_volume( VolumeId=volumes[0].id, Size=target_size_gb, ) assert u.is_good_response(response)
def create_resources(): region = u.get_region() print("Creating %s resources in region %s" % ( DEFAULT_NAME, region, )) vpc, security_group = network_setup() keypair = keypair_setup() # saves private key locally to keypair_fn # create EFS efss = u.get_efs_dict() efs_id = efss.get(DEFAULT_NAME, '') if not efs_id: print("Creating EFS " + DEFAULT_NAME) efs_id = u.create_efs(DEFAULT_NAME) else: print("Reusing EFS " + DEFAULT_NAME) efs_client = u.create_efs_client() # create mount target for each subnet in the VPC # added retries because efs is not immediately available MAX_FAILURES = 10 RETRY_INTERVAL_SEC = 1 for subnet in vpc.subnets.all(): for retry_attempt in range(MAX_FAILURES): try: sys.stdout.write("Creating efs mount target for %s ... " % (subnet.availability_zone, )) sys.stdout.flush() response = efs_client.create_mount_target( FileSystemId=efs_id, SubnetId=subnet.id, SecurityGroups=[security_group.id]) if u.is_good_response(response): print("success") break except Exception as e: if 'already exists' in str( e): # ignore "already exists" errors print('already exists') break # Takes couple of seconds for EFS to come online, with # errors like this: # Creating efs mount target for us-east-1f ... Failed with An error occurred (IncorrectFileSystemLifeCycleState) when calling the CreateMountTarget operation: None, retrying in 1 sec print("Got %s, retrying in %s sec" % (str(e), RETRY_INTERVAL_SEC)) time.sleep(RETRY_INTERVAL_SEC) else: print("Giving up.")
def main(): if len(sys.argv) < 2: mode = 'list' else: mode = sys.argv[1] if mode == 'list': list_vpcs() elif mode == 'delete': assert len(sys.argv) == 3 assert 'AWS_DEFAULT_REGION' in os.environ client = u.create_ec2_client() ec2 = u.create_ec2_resource() response = client.describe_vpcs() for vpc_response in response['Vpcs']: vpc_name = _get_name(vpc_response.get('Tags', [])) vpc = ec2.Vpc(vpc_response['VpcId']) if vpc_name == sys.argv[2] or vpc.id == sys.argv[2]: print("Deleting VPC name=%s, id=%s" % (vpc_name, vpc.id)) for subnet in vpc.subnets.all(): print("Deleting subnet %s" % (subnet.id)) assert u.is_good_response(subnet.delete()) for gateway in vpc.internet_gateways.all(): print("Deleting gateway %s" % (gateway.id)) assert u.is_good_response( gateway.detach_from_vpc(VpcId=vpc.id)) assert u.is_good_response(gateway.delete()) for security_group in vpc.security_groups.all(): try: assert u.is_good_response(security_group.delete()) except Exception as e: print("Failed with " + str(e)) for route_table in vpc.route_tables.all(): print("Deleting route table %s" % (route_table.id)) try: assert u.is_good_response(route_table.delete()) except Exception as e: print("Failed with " + str(e)) if u.is_good_response(client.delete_vpc(VpcId=vpc.id)): print("Succeeded deleting VPC ", vpc.id)
def network_setup(): """Creates VPC if it doesn't already exists, configures it for public internet access, returns vpc, subnet, security_group""" # from https://gist.github.com/nguyendv/8cfd92fc8ed32ebb78e366f44c2daea6 ec2 = u.create_ec2_resource() existing_vpcs = u.get_vpc_dict() zones = u.get_available_zones() if VPC_NAME in existing_vpcs: print("Reusing VPC " + VPC_NAME) vpc = existing_vpcs[VPC_NAME] subnets = list(vpc.subnets.all()) assert len(subnets) == len( zones ), "Has %s subnets, but %s zones, something went wrong during resource creation, try delete_resources.py/create_resources.py" % ( len(subnets), len(zones)) else: print("Creating VPC " + VPC_NAME) vpc = ec2.create_vpc(CidrBlock='192.168.0.0/16') # enable DNS on the VPC response = vpc.modify_attribute(EnableDnsHostnames={"Value": True}) assert u.is_good_response(response) response = vpc.modify_attribute(EnableDnsSupport={"Value": True}) assert u.is_good_response(response) vpc.create_tags(Tags=u.make_name(VPC_NAME)) vpc.wait_until_available() gateways = u.get_gateway_dict(vpc) if DEFAULT_NAME in gateways: print("Reusing gateways " + DEFAULT_NAME) else: print("Creating gateway " + DEFAULT_NAME) ig = ec2.create_internet_gateway() ig.attach_to_vpc(VpcId=vpc.id) ig.create_tags(Tags=u.make_name(DEFAULT_NAME)) # check that attachment succeeded # TODO: sometimes get # AssertionError: vpc vpc-33d0804b is in state None attach_state = u.get1(ig.attachments, State=-1, VpcId=vpc.id) assert attach_state == 'available', "vpc %s is in state %s" % ( vpc.id, attach_state) route_table = vpc.create_route_table() route_table.create_tags(Tags=u.make_name(ROUTE_TABLE_NAME)) dest_cidr = '0.0.0.0/0' route = route_table.create_route(DestinationCidrBlock=dest_cidr, GatewayId=ig.id) # check success for route in route_table.routes: # result looks like this # ec2.Route(route_table_id='rtb-a8b438cf', # destination_cidr_block='0.0.0.0/0') if route.destination_cidr_block == dest_cidr: break else: # sometimes get # AssertionError: Route for 0.0.0.0/0 not found in [ec2.Route(route_table_id='rtb-cd9153b0', destination_cidr_block='192.168.0.0/16')] # TODO: add a wait/retry? assert False, "Route for %s not found in %s" % (dest_cidr, route_table.routes) assert len(zones) <= 16 # for cidr/20 to fit into cidr/16 ip = 0 for zone in zones: cidr_block = '192.168.%d.0/20' % (ip, ) ip += 16 print("Creating subnet %s in zone %s" % (cidr_block, zone)) subnet = vpc.create_subnet(CidrBlock=cidr_block, AvailabilityZone=zone) subnet.create_tags(Tags=[{ 'Key': 'Name', 'Value': f'{VPC_NAME}-subnet' }, { 'Key': 'Region', 'Value': zone }]) u.wait_until_available(subnet) route_table.associate_with_subnet(SubnetId=subnet.id) # Creates security group if necessary existing_security_groups = u.get_security_group_dict() if SECURITY_GROUP_NAME in existing_security_groups: print("Reusing security group " + SECURITY_GROUP_NAME) security_group = existing_security_groups[SECURITY_GROUP_NAME] else: print("Creating security group " + SECURITY_GROUP_NAME) security_group = ec2.create_security_group( GroupName=SECURITY_GROUP_NAME, Description=SECURITY_GROUP_NAME, VpcId=vpc.id) security_group.create_tags(Tags=[{ "Key": "Name", "Value": SECURITY_GROUP_NAME }]) # allow ICMP access for public ping security_group.authorize_ingress(CidrIp='0.0.0.0/0', IpProtocol='icmp', FromPort=-1, ToPort=-1) # open public ports # always include SSH port which is required for basic functionality assert 22 in PUBLIC_TCP_RANGES, "Must enable SSH access" for port in PUBLIC_TCP_RANGES: if u.is_list_or_tuple(port): assert len(port) == 2 from_port, to_port = port else: from_port, to_port = port, port response = security_group.authorize_ingress(IpProtocol="tcp", CidrIp="0.0.0.0/0", FromPort=from_port, ToPort=to_port) assert u.is_good_response(response) for port in PUBLIC_UDP_RANGES: if u.is_list_or_tuple(port): assert len(port) == 2 from_port, to_port = port else: from_port, to_port = port, port response = security_group.authorize_ingress(IpProtocol="udp", CidrIp="0.0.0.0/0", FromPort=from_port, ToPort=to_port) assert u.is_good_response(response) # allow ingress within security group # Authorizing ingress doesn't work with names in a non-default VPC, # so must use more complicated syntax # https://github.com/boto/boto3/issues/158 for protocol in ['icmp']: try: rule = { 'FromPort': -1, 'IpProtocol': protocol, 'IpRanges': [], 'PrefixListIds': [], 'ToPort': -1, 'UserIdGroupPairs': [{ 'GroupId': security_group.id }] } security_group.authorize_ingress(IpPermissions=[rule]) except Exception as e: if e.response['Error'][ 'Code'] == 'InvalidPermission.Duplicate': print("Warning, got " + str(e)) else: assert False, "Failed while authorizing ingress with " + str( e) for protocol in ['tcp', 'udp']: try: rule = { 'FromPort': 0, 'IpProtocol': protocol, 'IpRanges': [], 'PrefixListIds': [], 'ToPort': 65535, 'UserIdGroupPairs': [{ 'GroupId': security_group.id }] } security_group.authorize_ingress(IpPermissions=[rule]) except Exception as e: if e.response['Error'][ 'Code'] == 'InvalidPermission.Duplicate': print("Warning, got " + str(e)) else: assert False, "Failed while authorizing ingress with " + str( e) return vpc, security_group
def response_type(response): return 'ok' if u.is_good_response(response) else 'failed'
def main(): # TODO: also bring down all the instances and wait for them to come down region = os.environ['AWS_DEFAULT_REGION'] if DEFAULT_NAME == 'nexus': print("Nexus resources are protected, don't delete them") sys.exit() print("Deleting %s resources in region %s" % ( DEFAULT_NAME, region, )) existing_vpcs = u.get_vpc_dict() client = u.create_ec2_client() ec2 = u.create_ec2_resource() def response_type(response): return 'ok' if u.is_good_response(response) else 'failed' # delete EFS efss = u.get_efs_dict() efs_id = efss.get(DEFAULT_NAME, '') efs_client = u.create_efs_client() if efs_id: try: # delete mount targets first print("About to delete %s (%s)" % (efs_id, DEFAULT_NAME)) response = efs_client.describe_mount_targets(FileSystemId=efs_id) assert u.is_good_response(response) for mount_response in response['MountTargets']: subnet = ec2.Subnet(mount_response['SubnetId']) zone = subnet.availability_zone state = mount_response['LifeCycleState'] id = mount_response['MountTargetId'] ip = mount_response['IpAddress'] sys.stdout.write('Deleting mount target %s ... ' % (id, )) sys.stdout.flush() response = efs_client.delete_mount_target(MountTargetId=id) print(response_type(response)) sys.stdout.write('Deleting EFS %s (%s)... ' % (efs_id, DEFAULT_NAME)) sys.stdout.flush() u.delete_efs_id(efs_id) except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') if VPC_NAME in existing_vpcs: vpc = ec2.Vpc(existing_vpcs[VPC_NAME].id) print("Deleting VPC %s (%s) subresources:" % (VPC_NAME, vpc.id)) for subnet in vpc.subnets.all(): try: sys.stdout.write("Deleting subnet %s ... " % (subnet.id)) sys.stdout.write(response_type(subnet.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') for gateway in vpc.internet_gateways.all(): sys.stdout.write("Deleting gateway %s ... " % (gateway.id)) # todo: if instances are using VPC, this fails with # botocore.exceptions.ClientError: An error occurred (DependencyViolation) when calling the DetachInternetGateway operation: Network vpc-ca4abab3 has some mapped public address(es). Please unmap those public address(es) before detaching the gateway. sys.stdout.write('detached ... ' if u.is_good_response( gateway.detach_from_vpc(VpcId=vpc.id)) else ' detach_failed ') sys.stdout.write('deleted ' if u.is_good_response(gateway.delete( )) else ' delete_failed ') sys.stdout.write('\n') def desc(route_table): return "%s (%s)" % (route_table.id, u.get_name(route_table.tags)) for route_table in vpc.route_tables.all(): sys.stdout.write("Deleting route table %s ... " % (desc(route_table))) try: sys.stdout.write(response_type(route_table.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') def desc(security_group): return "%s (%s, %s)" % (security_group.id, u.get_name(security_group.tags), security_group.group_name) # TODO: this tries to remove default security group, maybe not remove it? for security_group in vpc.security_groups.all(): sys.stdout.write('Deleting security group %s ... ' % (desc(security_group))) try: sys.stdout.write(response_type(security_group.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') sys.stdout.write("Deleting VPC %s ... " % (vpc.id)) sys.stdout.write(response_type(vpc.delete()) + '\n') # delete keypair keypairs = u.get_keypair_dict() keypair = keypairs.get(DEFAULT_NAME, '') if keypair: try: sys.stdout.write("Deleting keypair %s (%s) ... " % (keypair.key_name, DEFAULT_NAME)) sys.stdout.write(response_type(keypair.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') keypair_fn = u.get_keypair_fn(KEYPAIR_NAME) if os.path.exists(keypair_fn): print("Deleting local keypair file %s" % (keypair_fn, )) os.system('rm -f ' + keypair_fn)