def grow_ebs_for_task(task_fragment, target_size_gb): """Grows EBS volume for given task.""" ec2 = u.create_ec2_resource() client = u.create_ec2_client() # todo: don't crash on missing/duplicate names instances = {u.get_name(i.tags): i for i in ec2.instances.all()} ec2 = u.create_ec2_resource() instances = [(u.seconds_from_datetime(i.launch_time), i) for i in ec2.instances.all()] sorted_instances = sorted(instances, key=itemgetter(0)) for (seconds, instance) in sorted_instances: task_name = u.get_name(instance.tags) hours_ago = (time.time() - seconds) / 3600 hours_ago += 8 # adjust for time being in UTC if task_fragment in task_name: print("Found instance %s launched %.1f hours ago" % (task_name, hours_ago)) break print(instance.id) volumes = list(instance.volumes.all()) assert len(volumes) == 1, "Must have 1 volume" print("Growing %s to %s" % (volumes[0].id, target_size_gb)) response = client.modify_volume( VolumeId=volumes[0].id, Size=target_size_gb, ) assert u.is_good_response(response)
def main(): ec2 = u.create_ec2_resource() # ec2 resource ec2_client = u.create_ec2_client() # ec2 client instances = list(ec2.instances.all()) # todo: use filter? region = u.get_region() instances_to_kill = [] for i in instances: name = u.get_name(i.tags) state = i.state['Name'] if not fragment in name: continue if args.skip_tensorboard and '.tb.' in name: continue if args.skip_stopped and state == 'stopped': continue if args.limit_to_key and not (USER_KEY_NAME in i.key_name): continue if state == 'terminated': continue instances_to_kill.append(i) print(u.get_name(i), i.instance_type, i.key_name, state if state == 'stopped' else '') # print extra info if couldn't find anything to kill if not instances_to_kill: valid_names = sorted( list( set("%s,%s" % (u.get_name(i), u.get_state(i)) for i in instances))) from pprint import pprint as pp print("Current instances:") pp(valid_names) print("No running instances found for: Name '%s', key '%s'" % (fragment, USER_KEY_NAME)) if args.skip_tensorboard: print("skipping tensorboard") return action = 'soft terminate' if args.soft else 'terminate' if args.yes: answer = 'y' else: answer = input("%d instances found, %s in %s? (y/N) " % (len(instances_to_kill), action, region)) if not answer: answer = "n" if answer.lower() == "y" or args.yes: instance_ids = [i.id for i in instances_to_kill] if args.delay: print(f"Sleeping for {args.delay} seconds") time.sleep(args.delay) if args.soft: response = ec2_client.stop_instances(InstanceIds=instance_ids) print("soft terminating, got response: %s", response) else: response = ec2_client.terminate_instances(InstanceIds=instance_ids) print("terminating, got response: %s", response) else: print("Didn't get y, doing nothing")
def list_spot_requests(): ec2 = u.create_ec2_resource() client = u.create_ec2_client() for request in client.describe_spot_instance_requests( )['SpotInstanceRequests']: launch_spec = request['LaunchSpecification'] print(request['SpotInstanceRequestId'], launch_spec['InstanceType'], launch_spec['KeyName'], request['State'])
def main(): fragment = args.fragment # TODO: prevent CTRL+c/CTRL+d from killing session if not args.skip_tmux: print("Launching into TMUX session, use CTRL+b d to exit") region = u.get_region() client = u.create_ec2_client() ec2 = u.create_ec2_resource() response = client.describe_instances() username = os.environ.get("USERNAME", "ubuntu") print("Using username '%s'" % (username, )) instance_list = [] for instance in ec2.instances.all(): if instance.state['Name'] != 'running': continue name = u.get_name(instance.tags) if (fragment in name or fragment in str(instance.public_ip_address) or fragment in str(instance.id) or fragment in str(instance.private_ip_address)): instance_list.append((u.toseconds(instance.launch_time), instance)) from tzlocal import get_localzone # $ pip install tzlocal filtered_instance_list = u.get_instances(fragment) if not filtered_instance_list: print("no instance id contains fragment '%s'" % (fragment, )) return # connect to most recent instance print(filtered_instance_list) instance = filtered_instance_list[0] print("Connecting to ", u.get_name(instance), " launched ", instance.launch_time.astimezone(get_localzone())) cmd = '' keypair_fn = u.get_keypair_fn() cmd = make_cmd(keypair_fn, username, instance.public_ip_address) print(cmd) result = os.system(cmd) if username == 'ubuntu': username = '******' elif username == 'ec2-user': username = '******' if result != 0: print("ssh failed with code %d, trying username %s" % (result, username)) cmd = make_cmd(keypair_fn, username, instance.public_ip_address) os.system(cmd)
def main(): if len(sys.argv) < 2: mode = 'list' else: mode = sys.argv[1] if mode == 'list': list_vpcs() elif mode == 'delete': assert len(sys.argv) == 3 assert 'AWS_DEFAULT_REGION' in os.environ client = u.create_ec2_client() ec2 = u.create_ec2_resource() response = client.describe_vpcs() for vpc_response in response['Vpcs']: vpc_name = _get_name(vpc_response.get('Tags', [])) vpc = ec2.Vpc(vpc_response['VpcId']) if vpc_name == sys.argv[2] or vpc.id == sys.argv[2]: print("Deleting VPC name=%s, id=%s" % (vpc_name, vpc.id)) for subnet in vpc.subnets.all(): print("Deleting subnet %s" % (subnet.id)) assert u.is_good_response(subnet.delete()) for gateway in vpc.internet_gateways.all(): print("Deleting gateway %s" % (gateway.id)) assert u.is_good_response( gateway.detach_from_vpc(VpcId=vpc.id)) assert u.is_good_response(gateway.delete()) for security_group in vpc.security_groups.all(): try: assert u.is_good_response(security_group.delete()) except Exception as e: print("Failed with " + str(e)) for route_table in vpc.route_tables.all(): print("Deleting route table %s" % (route_table.id)) try: assert u.is_good_response(route_table.delete()) except Exception as e: print("Failed with " + str(e)) if u.is_good_response(client.delete_vpc(VpcId=vpc.id)): print("Succeeded deleting VPC ", vpc.id)
def cancel_spot_requests(): ec2 = u.create_ec2_resource() client = u.create_ec2_client() for request in client.describe_spot_instance_requests( )['SpotInstanceRequests']: state = request['State'] if state == 'cancelled' or state == 'closed': continue launch_spec = request['LaunchSpecification'] print('cancelling', request['SpotInstanceRequestId'], launch_spec['InstanceType'], launch_spec['KeyName'], request['State']) client.cancel_spot_instance_requests( SpotInstanceRequestIds=[request['SpotInstanceRequestId']])
) print_response(inspect.getframeinfo(inspect.currentframe())[2], route) def describe_route_tables(ec2_client): # https://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.Client.describe_route_tables response = ec2_client.describe_route_tables() print_response(inspect.getframeinfo(inspect.currentframe())[2], response) if __name__ == '__main__': aws = {} # profileを使い分ける場合には、profileをセット session = boto3.Session(profile_name='my-profile') # 使用するクライアントとリソースを作成 client = create_ec2_client(session) resource = create_ec2_resource(session) # VPCの作成と確認 aws['vpc_id'] = create_vpc(client) add_vpc_name_tag(resource, aws['vpc_id']) describe_vpc(client) # サブネットの作成 # アベイラビリティゾーンの確認 zones = describe_availability_zones(client) # 最初のアベイラビリティゾーンを使用するアベイラビリティゾーンとする first_zone = zones['AvailabilityZones'][0]['ZoneName'] print_response('first availability zone', first_zone) subnet = create_vpc_subnet(resource, aws['vpc_id'], first_zone, '192.168.1.0/24') aws['public_subnet_id'] = subnet.subnet_id
default='all', help=("which resources to delete, all/network/keypair/efs")) parser.add_argument('--force-delete-efs', action='store_true', help="force deleting main EFS") args = parser.parse_args() EFS_NAME = u.get_resource_name() VPC_NAME = u.get_resource_name() SECURITY_GROUP_NAME = u.get_resource_name() ROUTE_TABLE_NAME = u.get_resource_name() KEYPAIR_NAME = u.get_keypair_name() EFS_NAME = u.get_resource_name() client = u.create_ec2_client() ec2 = u.create_ec2_resource() def response_type(response): return 'ok' if u.is_good_response(response) else 'failed' def delete_efs(): efss = u.get_efs_dict() efs_id = efss.get(EFS_NAME, '') efs_client = u.create_efs_client() if efs_id: try: # delete mount targets first print("About to delete %s (%s)" % (efs_id, EFS_NAME))
def main(): # TODO: also bring down all the instances and wait for them to come down region = os.environ['AWS_DEFAULT_REGION'] if DEFAULT_NAME == 'nexus': print("Nexus resources are protected, don't delete them") sys.exit() print("Deleting %s resources in region %s" % ( DEFAULT_NAME, region, )) existing_vpcs = u.get_vpc_dict() client = u.create_ec2_client() ec2 = u.create_ec2_resource() def response_type(response): return 'ok' if u.is_good_response(response) else 'failed' # delete EFS efss = u.get_efs_dict() efs_id = efss.get(DEFAULT_NAME, '') efs_client = u.create_efs_client() if efs_id: try: # delete mount targets first print("About to delete %s (%s)" % (efs_id, DEFAULT_NAME)) response = efs_client.describe_mount_targets(FileSystemId=efs_id) assert u.is_good_response(response) for mount_response in response['MountTargets']: subnet = ec2.Subnet(mount_response['SubnetId']) zone = subnet.availability_zone state = mount_response['LifeCycleState'] id = mount_response['MountTargetId'] ip = mount_response['IpAddress'] sys.stdout.write('Deleting mount target %s ... ' % (id, )) sys.stdout.flush() response = efs_client.delete_mount_target(MountTargetId=id) print(response_type(response)) sys.stdout.write('Deleting EFS %s (%s)... ' % (efs_id, DEFAULT_NAME)) sys.stdout.flush() u.delete_efs_id(efs_id) except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') if VPC_NAME in existing_vpcs: vpc = ec2.Vpc(existing_vpcs[VPC_NAME].id) print("Deleting VPC %s (%s) subresources:" % (VPC_NAME, vpc.id)) for subnet in vpc.subnets.all(): try: sys.stdout.write("Deleting subnet %s ... " % (subnet.id)) sys.stdout.write(response_type(subnet.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') for gateway in vpc.internet_gateways.all(): sys.stdout.write("Deleting gateway %s ... " % (gateway.id)) # todo: if instances are using VPC, this fails with # botocore.exceptions.ClientError: An error occurred (DependencyViolation) when calling the DetachInternetGateway operation: Network vpc-ca4abab3 has some mapped public address(es). Please unmap those public address(es) before detaching the gateway. sys.stdout.write('detached ... ' if u.is_good_response( gateway.detach_from_vpc(VpcId=vpc.id)) else ' detach_failed ') sys.stdout.write('deleted ' if u.is_good_response(gateway.delete( )) else ' delete_failed ') sys.stdout.write('\n') def desc(route_table): return "%s (%s)" % (route_table.id, u.get_name(route_table.tags)) for route_table in vpc.route_tables.all(): sys.stdout.write("Deleting route table %s ... " % (desc(route_table))) try: sys.stdout.write(response_type(route_table.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') def desc(security_group): return "%s (%s, %s)" % (security_group.id, u.get_name(security_group.tags), security_group.group_name) # TODO: this tries to remove default security group, maybe not remove it? for security_group in vpc.security_groups.all(): sys.stdout.write('Deleting security group %s ... ' % (desc(security_group))) try: sys.stdout.write(response_type(security_group.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') sys.stdout.write("Deleting VPC %s ... " % (vpc.id)) sys.stdout.write(response_type(vpc.delete()) + '\n') # delete keypair keypairs = u.get_keypair_dict() keypair = keypairs.get(DEFAULT_NAME, '') if keypair: try: sys.stdout.write("Deleting keypair %s (%s) ... " % (keypair.key_name, DEFAULT_NAME)) sys.stdout.write(response_type(keypair.delete()) + '\n') except Exception as e: sys.stdout.write('failed\n') u.loge(str(e) + '\n') keypair_fn = u.get_keypair_fn(KEYPAIR_NAME) if os.path.exists(keypair_fn): print("Deleting local keypair file %s" % (keypair_fn, )) os.system('rm -f ' + keypair_fn)