def aws_ec2_discovery(self, region=settings['DEFAULT_REGION']): ec2_instances_info = {} ec2_instances = [] result = {} self.logger.debug("Discovering ec2 instances") try: aws_ec2 = aws.Ec2(logger=self.logger, region=region) ec2 = boto.ec2.connect_to_region(region) for instance in aws_ec2.get_instance_obj(): ec2_instances_info['{#INSTANCE_ID}'] = instance.id ec2_instances_info['{#INSTANCE_NAME}'] = instance.tags['Name'] ec2_instances_info['{#INSTANCE_TYPE}'] = instance.instance_type ec2_instances_info['{#INSTANCE_AVAILABILITY_ZONE}'] = instance.placement ec2_instances_info['{#INSTANCE_STATE}'] = instance.state try: ec2_instances_info['{#INSTANCE_SYSTEM_STATUS}'] = ec2.get_all_instance_status(instance_ids=[instance.id])[0].system_status.status ec2_instances_info['{#INSTANCE_SYSTEM_STATUS}'] = ec2.get_all_instance_status(instance_ids=[instance.id])[0].instance_status.status except Exception, e: pass ec2_instances_info['{#INSTANCE_PUBLIC_DNS}'] = instance.public_dns_name ec2_instances_info['{#INSTANCE_PUBLIC_IP}'] = instance.ip_address ec2_instances_info['{#INSTANCE_KEY_NAME}'] = instance.key_name ec2_instances_info['{#INSTANCE_MONITORING}'] = str(instance.monitored) ec2_instances_info['{#INSTANCE_LAUNCH_TIME}'] = instance.launch_time ec2_instances_info['{#INSTANCE_SECURITY_GROUPS}'] = instance.groups[0].name ec2_instances.append(ec2_instances_info.copy()) result['data'] = ec2_instances
def launch(key_name=None, region='us-west-2', image_id='ami-5189a661', instance_type='t2.micro', security_groups='launch-wizard-1', user_data=None, initial_check=False): ''' ''' if not isinstance(security_groups, list): security_groups = [security_groups] ec2 = boto.ec2.connect_to_region(region) reserve = ec2.run_instances(image_id, key_name=key_name, instance_type=instance_type, security_groups=security_groups, user_data=user_data) inst = reserve.instances[0] while inst.state == u'pending': time.sleep(10) inst.update() if initial_check: # Wait for the status checks first status = ec2.get_all_instance_status(instance_ids=[inst.id])[0] check_stat = "Status:initializing" while str(status.system_status) == check_stat and str(status.instance_status) == check_stat: time.sleep(10) status = ec2.get_all_instance_status(instance_ids=[inst.id])[0] return inst
def create_box(): old_ids = set(i.id for i in ec2.get_only_instances()) machine = ec2.run_instances(AMI_ID, key_name=KEYNAME, min_count=NODE_COUNT, max_count=NODE_COUNT, security_groups=[SECURITY_GROUP,], instance_type=os.environ.get('EC2_INSTANCE_TYPE', 'm3.medium')) new_instances = [i for i in ec2.get_only_instances() if i.id not in old_ids] for new_instance in new_instances: print("new instance:", new_instance.id) ec2.create_tags([new_instance.id], {"billingProject": "aei"}) is_running = [False] * len(new_instances) while not all(is_running): for count, newinstance in enumerate(new_instances): is_running[count] = new_instance.state == u'running' time.sleep(3) for new_instance in new_instances: new_instance.update() is_reachable = [False] * len(new_instances) while not all(is_reachable): instance_ids=[new_instance.id for new_instance in new_instances] inst_statuses = ec2.get_all_instance_status(instance_ids=instance_ids) is_reachable = [inst_status.system_status.details['reachability'] != 'passed' for inst_status in inst_statuses] time.sleep(3) time.sleep(1) for new_instance in new_instances: assert new_instance.public_dns_name print("public dns name:", new_instance.public_dns_name) return new_instances
def run(self): #def ec2_status(): global conn, globalconf, awsconf, BASE_URL, info, last if 'endpoint' in globalconf: BASE_URL = '%s/alerta/app/v1' % globalconf['endpoint'] url = '%s/alerts?%s' % (BASE_URL, awsconf.get('filter', 'tags=cloud:AWS/EC2')) if 'proxy' in globalconf: os.environ['http_proxy'] = globalconf['proxy']['http'] os.environ['https_proxy'] = globalconf['proxy']['https'] last = info.copy() info = dict() for account, keys in awsconf['accounts'].iteritems(): access_key = keys.get('aws_access_key_id', '') secret_key = keys.get('aws_secret_access_key', '') logging.debug('AWS Account=%s, AwsAccessKey=%s, AwsSecretKey=************************************%s', account, access_key, secret_key[-4:]) for region in awsconf['regions']: try: ec2 = boto.ec2.connect_to_region(region, aws_access_key_id=access_key, aws_secret_access_key=secret_key) except boto.exception.EC2ResponseError, e: logging.warning('EC2 API call connect_to_region(region=%s) failed: %s', region, e) continue logging.info('Get all instances for account %s in %s', account, region) try: reservations = ec2.get_all_instances() except boto.exception.EC2ResponseError, e: logging.warning('EC2 API call get_all_instances() failed: %s', e) continue instances = [i for r in reservations for i in r.instances if i.tags] for i in instances: info[i.id] = dict() info[i.id]['state'] = i.state info[i.id]['stage'] = i.tags.get('Stage', 'unknown') info[i.id]['role'] = i.tags.get('Role', 'unknown') info[i.id]['tags'] = ['os:Linux', 'role:%s' % info[i.id]['role'], 'datacentre:%s' % region, 'virtual:xen', 'cloud:AWS/EC2', 'account:%s' % account] info[i.id]['tags'].append('cluster:%s_%s' % ( info[i.id]['role'], region)) # FIXME - replace match on cluster with match on role # FIXME - this is a hack until all EC2 instances are keyed off instance id logging.debug('%s -> %s', i.private_dns_name, i.id) lookup[i.private_dns_name.split('.')[0]] = i.id logging.info('Get system and instance status for account %s in %s', account, region) try: status = ec2.get_all_instance_status() except boto.exception.EC2ResponseError, e: logging.warning('EC2 API call get_all_instance_status() failed: %s', e) continue
def launch(key_name=None, region='us-west-2', image_id='ami-5189a661', instance_type='t2.micro', security_groups='launch-wizard-1', user_data=None, initial_check=False): ''' ''' if not isinstance(security_groups, list): security_groups = [security_groups] ec2 = boto.ec2.connect_to_region(region) reserve = ec2.run_instances(image_id, key_name=key_name, instance_type=instance_type, security_groups=security_groups, user_data=user_data) inst = reserve.instances[0] while inst.state == u'pending': time.sleep(10) inst.update() if initial_check: # Wait for the status checks first status = ec2.get_all_instance_status(instance_ids=[inst.id])[0] check_stat = "Status:initializing" while str(status.system_status) == check_stat and str( status.instance_status) == check_stat: time.sleep(10) status = ec2.get_all_instance_status(instance_ids=[inst.id])[0] return inst
def ec2_status_check(self): self.last = self.info.copy() self.info = {} for account, credential in self.fog.iteritems(): account = account[1:] aws_access_key = credential.get(':aws_access_key_id', None) aws_secret_key = credential.get(':aws_secret_access_key', None) if not aws_access_key or not aws_secret_key: LOG.error('Invalid FOG credentials for %s, either access key or secret key missing' % account) sys.exit(1) for region in CONF.ec2_regions: try: ec2 = boto.ec2.connect_to_region( region, aws_access_key_id=aws_access_key, aws_secret_access_key=aws_secret_key ) except boto.exception.EC2ResponseError, e: LOG.warning('EC2 API call connect_to_region(region=%s) failed: %s', region, e) continue LOG.info('Get all instances for account %s in %s', account, region) try: reservations = ec2.get_all_instances() except boto.exception.EC2ResponseError, e: LOG.warning('EC2 API call get_all_instances() failed: %s', e) continue instances = [i for r in reservations for i in r.instances if i.tags] for i in instances: self.info[i.id] = dict() self.info[i.id]['state'] = i.state self.info[i.id]['stage'] = i.tags.get('Stage', 'unknown') self.info[i.id]['role'] = i.tags.get('Role', 'unknown') self.info[i.id]['tags'] = ['os:Linux', 'role:%s' % self.info[i.id]['role'], 'datacentre:%s' % region, 'virtual:xen', 'cloud:AWS/EC2', 'account:%s' % account] self.info[i.id]['tags'].append('cluster:%s_%s' % (self.info[i.id]['role'], region)) # FIXME - replace match on cluster with match on role # FIXME - this is a hack until all EC2 instances are keyed off instance id LOG.debug('%s -> %s', i.private_dns_name, i.id) self.lookup[i.private_dns_name.split('.')[0]] = i.id LOG.info('Get system and instance status for account %s in %s', account, region) try: status = ec2.get_all_instance_status() except boto.exception.EC2ResponseError, e: LOG.warning('EC2 API call get_all_instance_status() failed: %s', e) continue
def awsFilterImpairedNodes(nodes, ec2): # if TOIL_AWS_NODE_DEBUG is set don't terminate nodes with # failing status checks so they can be debugged nodeDebug = less_strict_bool(os.environ.get('TOIL_AWS_NODE_DEBUG')) if not nodeDebug: return nodes nodeIDs = [node.id for node in nodes] statuses = ec2.get_all_instance_status(instance_ids=nodeIDs) statusMap = {status.id: status.instance_status for status in statuses} healthyNodes = [node for node in nodes if statusMap.get(node.id, None) != 'impaired'] impairedNodes = [node.id for node in nodes if statusMap.get(node.id, None) == 'impaired'] logger.warn('TOIL_AWS_NODE_DEBUG is set and nodes %s have failed EC2 status checks so ' 'will not be terminated.', ' '.join(impairedNodes)) return healthyNodes
def create_box(): old_ids = set(i.id for i in ec2.get_only_instances()) machine = ec2.run_instances(AMI_ID, key_name=KEYNAME, security_groups=[os.environ.get('SECURITY_GROUP', 'memex-explorer-prod'),], instance_type='m3.2xlarge') new_instance = [i for i in ec2.get_only_instances() if i.id not in old_ids][0] print(new_instance.id) while new_instance.state != u'running': time.sleep(3) new_instance.update() while ec2.get_all_instance_status(instance_ids=[new_instance.id])[0].system_status.details['reachability'] != 'passed': time.sleep(3) time.sleep(1) assert new_instance.public_dns_name print(new_instance.public_dns_name) return new_instance
def wait_for_servers_up(): done=False while not done: completed = 0 time.sleep(30) existing_instances = ec2.get_all_instance_status() for instance in existing_instances: if instance.system_status.status == 'ok' and instance.instance_status.status == 'ok': completed += 1 print completed if completed == DEMO_MINIONS+1: done=True print "All instances online" return;
def get_status(instance_id): ec2 = boto.ec2.connect_to_region(settings.aws_region, aws_access_key_id=settings.aws_access, aws_secret_access_key=settings.aws_secret) if not ec2: logging.error('Cannot connect to region %s' % settings.aws_region) return None try: statuses = ec2.get_all_instance_status(instance_ids=[instance_id]) if len(statuses) == 1: logging.info('current %s status: %s' % (instance_id, statuses[0].system_status)) return statuses[0].system_status return None except Exception, e: logging.exception('Could not get status for %s (%s)' % (instance_id, e)) return None
def load_hosts_list(): # first get a list of the id's of all the running instances. running = list() existing_instances = ec2.get_all_instance_status() for instance in existing_instances: if instance.system_status.status == 'ok' and instance.instance_status.status == 'ok': running.append(instance.id) # now get all the instances. instances = ec2.get_only_instances() # iterate through the instances list and copy out the ones that are running for instance in instances: for id in running: if instance.id == id: hosts.append(instance) return;
def autoscaling_peers(): metadata = boto.utils.get_instance_identity()['document'] autoscaling = boto.ec2.autoscale.connect_to_region(metadata['region']) ec2 = boto.ec2.connect_to_region(metadata['region']) for group in autoscaling.get_all_groups(): for instance in group.instances: if instance.instance_id == metadata['instanceId']: group.instances.remove(instance) instance_ids = [i.instance_id for i in group.instances] for status in ec2.get_all_instance_status(instance_ids): if status.instance_status.status != 'ok': instance_ids.remove(status.id) if not instance_ids: return [] return ec2.get_only_instances(instance_ids)
import boto.ec2 import sys import collections regions = boto.ec2.regions() names = [region.name for region in regions] try: if len(sys.argv) > 1: region = regions[names.index(sys.argv[1])] else: region = regions[names.index('us-east-1')] except ValueError: sys.stderr.write("Sorry, the region '%s' does not exist.\n" % sys.argv[1]) sys.exit(1) # proper return value for a script run as a command ec2 = region.connect() stats = ec2.get_all_instance_status(filters={"system-status.reachability": "failed"}) if len(stats) > 0: print "The following instances show 'failed' for the ec2 reachability check: " for stat in stats: reservation = ec2.get_all_instances(filters={'instance-id': stat.id}) dead_instance = reservation[0].instances[0] print dead_instance.tags.get('Name'), stat.id, stat.zone, stat.state_name if isinstance(stat.events, collections.Iterable): print "\tmost recent events: ", [(event.code, event.description) for event in stat.events]
#!/usr/bin/env python import boto.ec2 aws_access_key_id = raw_input = ("Please enter the aws_access_key_id: ") aws_secret_access_key = raw_input = ("Please enter the aws_access_key_id: ") auth = { "aws_access_key_id": aws_access_key_id, "aws_secret_access_key": aws_secret_access_key } ec2 = boto.ec2.connect_to_region("eu-west-2", **auth) print(ec2.get_all_instance_status()) print(ec2.get_all_instances()) for reservation in ec2.get_all_instances(): print(reservation) print(reservation.instances[0]) ec2.start_instances(reservation.instances[0].id)
for _ in xrange(EC2_RUN_TIMEOUT): res = ec2.get_all_instances(instance_ids=[instance_id]) if res[0].instances[0].state == "running": status_delta = time.time() - status_start run_summary.append(("EC2 Launch", status_delta)) print "[ OK ] {:0>2.0f}:{:0>2.0f}".format(status_delta / 60, status_delta % 60) break else: time.sleep(1) else: raise Exception("Timeout waiting for running status: {} ".format(instance_id)) print "{:<40}".format("Waiting for system status:"), system_start = time.time() for _ in xrange(EC2_STATUS_TIMEOUT): status = ec2.get_all_instance_status(inst.id) if status[0].system_status.status == u"ok": system_delta = time.time() - system_start run_summary.append(("EC2 Status Checks", system_delta)) print "[ OK ] {:0>2.0f}:{:0>2.0f}".format(system_delta / 60, system_delta % 60) break else: time.sleep(1) else: raise Exception("Timeout waiting for status checks: {} ".format(instance_id)) user_start = time.time() print print "{:<40}".format("Waiting for user-data, polling sqs for Ansible events:") (ansible_delta, task_report) = poll_sqs_ansible()
def launch_and_configure(ec2_args): """ Creates an sqs queue, launches an ec2 instance, configures it and creates an AMI. Polls SQS for updates """ print "{:<40}".format( "Creating SQS queue and launching instance for {}:".format(run_id)) print for k, v in ec2_args.iteritems(): if k != 'user_data': print " {:<25}{}".format(k, v) print global sqs_queue global instance_id sqs_queue = sqs.create_queue(run_id) sqs_queue.set_message_class(RawMessage) res = ec2.run_instances(**ec2_args) inst = res.instances[0] instance_id = inst.id print "{:<40}".format( "Waiting for instance {} to reach running status:".format(instance_id)), status_start = time.time() for _ in xrange(EC2_RUN_TIMEOUT): res = ec2.get_all_instances(instance_ids=[instance_id]) if res[0].instances[0].state == 'running': status_delta = time.time() - status_start run_summary.append(('EC2 Launch', status_delta)) print "[ OK ] {:0>2.0f}:{:0>2.0f}".format( status_delta / 60, status_delta % 60) break else: time.sleep(1) else: raise Exception("Timeout waiting for running status: {} ".format( instance_id)) print "{:<40}".format("Waiting for system status:"), system_start = time.time() for _ in xrange(EC2_STATUS_TIMEOUT): status = ec2.get_all_instance_status(inst.id) if status[0].system_status.status == u'ok': system_delta = time.time() - system_start run_summary.append(('EC2 Status Checks', system_delta)) print "[ OK ] {:0>2.0f}:{:0>2.0f}".format( system_delta / 60, system_delta % 60) break else: time.sleep(1) else: raise Exception("Timeout waiting for status checks: {} ".format( instance_id)) print print "{:<40}".format( "Waiting for user-data, polling sqs for Ansible events:") (ansible_delta, task_report) = poll_sqs_ansible() run_summary.append(('Ansible run', ansible_delta)) print print "{} longest Ansible tasks (seconds):".format(NUM_TASKS) for task in sorted( task_report, reverse=True, key=lambda k: k['DELTA'])[:NUM_TASKS]: print "{:0>3.0f} {}".format(task['DELTA'], task['TASK']) print " - {}".format(task['INVOCATION']) print print "{:<40}".format("Creating AMI:"), ami_start = time.time() ami = create_ami(instance_id, run_id, run_id) ami_delta = time.time() - ami_start print "[ OK ] {:0>2.0f}:{:0>2.0f}".format( ami_delta / 60, ami_delta % 60) run_summary.append(('AMI Build', ami_delta)) total_time = time.time() - start_time all_stages = sum(run[1] for run in run_summary) if total_time - all_stages > 0: run_summary.append(('Other', total_time - all_stages)) run_summary.append(('Total', total_time)) return run_summary, ami
status_delta = time.time() - status_start run_summary.append(('EC2 Launch', status_delta)) print "[ OK ] {:0>2.0f}:{:0>2.0f}".format( status_delta / 60, status_delta % 60) break else: time.sleep(1) else: raise Exception("Timeout waiting for running status: {} ".format( instance_id)) print "{:<40}".format("Waiting for system status:"), system_start = time.time() for _ in xrange(EC2_STATUS_TIMEOUT): status = ec2.get_all_instance_status(inst.id) if status[0].system_status.status == u'ok': system_delta = time.time() - system_start run_summary.append(('EC2 Status Checks', system_delta)) print "[ OK ] {:0>2.0f}:{:0>2.0f}".format( system_delta / 60, system_delta % 60) break else: time.sleep(1) else: raise Exception("Timeout waiting for status checks: {} ".format( instance_id)) user_start = time.time() print
# emits a warning if any pending events are scheduled import sys import boto.ec2 def list_regions(): """returns a list of ec2 region names""" DISABLED_REGIONS = ['cn-north-1', 'us-gov-west-1'] return [r.name for r in boto.ec2.regions() if r.name not in DISABLED_REGIONS] pending_events = 0 details = [] for region in list_regions(): ec2 = boto.ec2.connect_to_region(region) statuses = [x for x in ec2.get_all_instance_status() if x.events] for status in statuses: for event in status.events: if 'Completed' not in event.description: pending_events += 1 details.append("%s: %s %s" % (region, event.description, status.id)) if pending_events: print 'EVENT WARNING - %d pending events\n%s' % (pending_events, "\n".join(details)) sys.exit(1) else: print 'EVENT OK - %d pending events' % pending_events sys.exit(0)
def launch_and_configure(ec2_args): """ Creates an sqs queue, launches an ec2 instance, configures it and creates an AMI. Polls SQS for updates """ print "{:<40}".format( "Creating SQS queue and launching instance for {}:".format(run_id)) print for k, v in ec2_args.iteritems(): if k != 'user_data': print " {:<25}{}".format(k, v) print global sqs_queue global instance_id sqs_queue = sqs.create_queue(run_id) sqs_queue.set_message_class(RawMessage) res = ec2.run_instances(**ec2_args) inst = res.instances[0] instance_id = inst.id print "{:<40}".format( "Waiting for instance {} to reach running status:".format( instance_id)), status_start = time.time() for _ in xrange(EC2_RUN_TIMEOUT): res = ec2.get_all_instances(instance_ids=[instance_id]) if res[0].instances[0].state == 'running': status_delta = time.time() - status_start run_summary.append(('EC2 Launch', status_delta)) print "[ OK ] {:0>2.0f}:{:0>2.0f}".format(status_delta / 60, status_delta % 60) break else: time.sleep(1) else: raise Exception( "Timeout waiting for running status: {} ".format(instance_id)) print "{:<40}".format("Waiting for system status:"), system_start = time.time() for _ in xrange(EC2_STATUS_TIMEOUT): status = ec2.get_all_instance_status(inst.id) if status[0].system_status.status == u'ok': system_delta = time.time() - system_start run_summary.append(('EC2 Status Checks', system_delta)) print "[ OK ] {:0>2.0f}:{:0>2.0f}".format(system_delta / 60, system_delta % 60) break else: time.sleep(1) else: raise Exception( "Timeout waiting for status checks: {} ".format(instance_id)) print print "{:<40}".format( "Waiting for user-data, polling sqs for Ansible events:") (ansible_delta, task_report) = poll_sqs_ansible() run_summary.append(('Ansible run', ansible_delta)) print print "{} longest Ansible tasks (seconds):".format(NUM_TASKS) for task in sorted(task_report, reverse=True, key=lambda k: k['DELTA'])[:NUM_TASKS]: print "{:0>3.0f} {}".format(task['DELTA'], task['TASK']) print " - {}".format(task['INVOCATION']) print print "{:<40}".format("Creating AMI:"), ami_start = time.time() ami = create_ami(instance_id, run_id, run_id) ami_delta = time.time() - ami_start print "[ OK ] {:0>2.0f}:{:0>2.0f}".format(ami_delta / 60, ami_delta % 60) run_summary.append(('AMI Build', ami_delta)) total_time = time.time() - start_time all_stages = sum(run[1] for run in run_summary) if total_time - all_stages > 0: run_summary.append(('Other', total_time - all_stages)) run_summary.append(('Total', total_time)) return run_summary, ami
def ec2_status_check(self): self.last = self.info.copy() self.info = {} for account, credential in self.fog.iteritems(): account = account[1:] aws_access_key = credential.get(':aws_access_key_id', None) aws_secret_key = credential.get(':aws_secret_access_key', None) if not aws_access_key or not aws_secret_key: LOG.error( 'Invalid FOG credentials for %s, either access key or secret key missing' % account) sys.exit(1) for region in CONF.ec2_regions: try: ec2 = boto.ec2.connect_to_region( region, aws_access_key_id=aws_access_key, aws_secret_access_key=aws_secret_key) except boto.exception.EC2ResponseError, e: LOG.warning( 'EC2 API call connect_to_region(region=%s) failed: %s', region, e) continue LOG.info('Get all instances for account %s in %s', account, region) try: reservations = ec2.get_all_instances() except boto.exception.EC2ResponseError, e: LOG.warning('EC2 API call get_all_instances() failed: %s', e) continue instances = [ i for r in reservations for i in r.instances if i.tags ] for i in instances: self.info[i.id] = dict() self.info[i.id]['state'] = i.state self.info[i.id]['stage'] = i.tags.get('Stage', 'unknown') self.info[i.id]['role'] = i.tags.get('Role', 'unknown') self.info[i.id]['tags'] = [ 'os:Linux', 'role:%s' % self.info[i.id]['role'], 'datacentre:%s' % region, 'virtual:xen', 'cloud:AWS/EC2', 'account:%s' % account ] self.info[i.id]['tags'].append( 'cluster:%s_%s' % (self.info[i.id]['role'], region) ) # FIXME - replace match on cluster with match on role # FIXME - this is a hack until all EC2 instances are keyed off instance id LOG.debug('%s -> %s', i.private_dns_name, i.id) self.lookup[i.private_dns_name.split('.')[0]] = i.id LOG.info('Get system and instance status for account %s in %s', account, region) try: status = ec2.get_all_instance_status() except boto.exception.EC2ResponseError, e: LOG.warning( 'EC2 API call get_all_instance_status() failed: %s', e) continue