def main(args): global verbose global dry_run (verbose, dry_run) = dry_run_necessaries(args.dry_run, args.verbose) for region in [r.name for r in boto.ec2.regions() if r.name not in args.excluded_regions]: try: print_verbose( os.path.basename(__file__), 'info', 'Starting pass on %s' % region) as_conn = boto.ec2.autoscale.connect_to_region(region) all_launch_configs = as_conn.get_all_launch_configurations() as_groups = as_conn.get_all_groups() for launch_config in all_launch_configs: if not [g for g in as_groups if g.launch_config_name == launch_config.name]: print_verbose(os.path.basename( __file__), 'info', "Launch config %s looks to be abandoned." % launch_config.name) if not dry_run: print_verbose( os.path.basename(__file__), 'info', "DESTROY!") kill_with_fire(launch_config) print_verbose( os.path.basename(__file__), 'info', 'Done with pass on %s' % region) except Exception as e: handle_exception(e) sys.exit(1)
def main(args): global verbose global dry_run (verbose, dry_run) = dry_run_necessaries(args.dry_run, args.verbose) for region in [ r.name for r in boto.ec2.regions() if r.name not in args.excluded_regions ]: try: print_verbose(os.path.basename(__file__), 'info', 'Starting pass on %s' % region) as_conn = boto.ec2.autoscale.connect_to_region(region) all_launch_configs = as_conn.get_all_launch_configurations() as_groups = as_conn.get_all_groups() for launch_config in all_launch_configs: if not [ g for g in as_groups if g.launch_config_name == launch_config.name ]: print_verbose( os.path.basename(__file__), 'info', "Launch config %s looks to be abandoned." % launch_config.name) if not dry_run: print_verbose(os.path.basename(__file__), 'info', "DESTROY!") kill_with_fire(launch_config) print_verbose(os.path.basename(__file__), 'info', 'Done with pass on %s' % region) except Exception as e: handle_exception(e) sys.exit(1)
def main(args): (verbose, dry_run) = dry_run_necessaries(args.dry_run, args.verbose) for region in [ r.name for r in boto.ec2.regions() if r.name not in args.excluded_regions ]: try: print_verbose(os.path.basename(__file__), 'info', 'Starting pass on %s' % region) as_conn = boto.ec2.autoscale.connect_to_region(region) as_groups = get_ssr_groups(as_conn) health_tags = [] for as_group in as_groups: bid = get_bid(as_group) current_prices = get_current_spot_prices(as_group) health_dict = {} if current_prices: print_verbose(os.path.basename(__file__), 'info', "Updating health for %s" % as_group.name) for price in current_prices: # * 1.1: #NOTE: potential feature to require a price buffer here? if price.price > bid: health_dict[price.availability_zone[-1]] = 1 else: health_dict[price.availability_zone[-1]] = 0 health_tags.append( update_az_health_list_tag(as_group, health_dict)) if health_tags and not dry_run: update_tags(as_conn, health_tags) print_verbose(os.path.basename(__file__), 'info', "All tags updated!") print_verbose(os.path.basename(__file__), 'info', 'Done with pass on %s' % region) except EC2ResponseError as e: handle_exception(e) except Exception as e: handle_exception(e) return 1 print_verbose(os.path.basename(__file__), 'info', "All regions complete")
def main(args): (verbose, dry_run) = dry_run_necessaries(args.dry_run, args.verbose) for region in [r.name for r in boto.ec2.regions() if r.name not in args.excluded_regions]: try: print_verbose( os.path.basename(__file__), 'info', 'Starting pass on %s' % region) as_conn = boto.ec2.autoscale.connect_to_region(region) as_groups = get_ssr_groups(as_conn) health_tags = [] for as_group in as_groups: bid = get_bid(as_group) current_prices = get_current_spot_prices(as_group) health_dict = {} if current_prices: print_verbose( os.path.basename(__file__), 'info', "Updating health for %s" % as_group.name) for price in current_prices: # * 1.1: #NOTE: potential feature to require a price buffer here? if price.price > bid: health_dict[price.availability_zone[-1]] = 1 else: health_dict[price.availability_zone[-1]] = 0 health_tags.append( update_az_health_list_tag(as_group, health_dict)) if health_tags and not dry_run: update_tags(as_conn, health_tags) print_verbose( os.path.basename(__file__), 'info', "All tags updated!") print_verbose( os.path.basename(__file__), 'info', 'Done with pass on %s' % region) except EC2ResponseError as e: handle_exception(e) except Exception as e: handle_exception(e) return 1 print_verbose(os.path.basename(__file__), 'info', "All regions complete")
def main(args): global verbose global dry_run (verbose, dry_run) = dry_run_necessaries(args.dry_run, args.verbose) for region in [r.name for r in boto.ec2.regions() if r.name not in args.excluded_regions]: try: print_verbose( os.path.basename(__file__), 'info', 'Starting pass on %s' % region) as_conn = boto.ec2.autoscale.connect_to_region(region) as_groups = get_ssr_groups(as_conn) elb_conn = boto.ec2.elb.connect_to_region(as_conn.region.name) minutes_multiplier = 60 for as_group in as_groups: as_group = reload_as_group(as_group) print_verbose( os.path.basename(__file__), 'info', "Checking %s" % as_group.name) if as_group.load_balancers: maximize_elb_azs(elb_conn, as_group, dry_run) demand_expiration = get_tag_dict_value( as_group, 'ssr_config')['demand_expiration'] healthy_zones = get_healthy_zones( as_group, args.min_health_threshold) if demand_expiration is not False: if demand_expiration < int(time.time()): if len(healthy_zones) >= get_min_azs(as_group): print_verbose(os.path.basename( __file__), 'info', 'Woot! We can move back to spots at original bid price.') modify_as_group_azs( as_group, healthy_zones, dry_run) modify_price( as_group, get_tag_dict_value(as_group, 'ssr_config')['original_bid'], dry_run) set_tag_dict_value( as_group, 'ssr_config', 'demand_expiration', False) # kill all demand instances that were created ec2_conn = boto.ec2.connect_to_region( as_group.connection.region.name) all_ec2_instances = ec2_conn.get_all_instances() print_verbose(os.path.basename( __file__), 'info', "Looking at %s instances for potential termination" % str(len(as_group.instances))) for instance in as_group.instances: if not [i for i in all_ec2_instances if i.instances[0].id == instance.instance_id][0].instances[0].spot_instance_request_id and \ not dry_run: terminate_instance(instance) else: print_verbose(os.path.basename( __file__), 'info', 'Extending the life of demand instances as we cant fulfill with spots still') set_tag_dict_value(as_group, 'ssr_config', 'demand_expiration', int( time.time()) + (args.demand_expiration * minutes_multiplier)) elif sorted(as_group.availability_zones) != sorted(healthy_zones): as_group = reload_as_group(as_group) print_verbose( os.path.basename(__file__), 'info', "Healthy zones and zones in use dont match") if len(healthy_zones) >= get_min_azs(as_group): print_verbose( os.path.basename(__file__), 'info', 'Modifying zones accordingly.') modify_as_group_azs(as_group, healthy_zones, dry_run) else: print_verbose(os.path.basename( __file__), 'info', "Bid will need to be modified as we can't meet AZ minimum of %s" % str(get_min_azs(as_group))) best_bid = find_best_bid_price(as_group) print_verbose(os.path.basename( __file__), 'info', "Best possible bid given AZ minimum is %s" % str(best_bid)) if best_bid: modify_price(as_group, best_bid, dry_run) else: print_verbose( os.path.basename(__file__), 'info', "Moving to ondemand.") modify_price( as_group, None, dry_run, minutes_multiplier, args.demand_expiration) set_tag_dict_value(as_group, 'ssr_config', 'demand_expiration', int( time.time()) + (args.demand_expiration * minutes_multiplier)) modify_as_group_azs( as_group, get_usable_zones(as_group), dry_run) else: print_verbose( os.path.basename(__file__), 'info', 'No further actions to take on this ASG.') print_verbose( os.path.basename(__file__), 'info', 'Done with pass on %s' % region) except EC2ResponseError as e: handle_exception(e) except Exception as e: handle_exception(e) return 1 print_verbose(os.path.basename(__file__), 'info', "All regions complete")
def main(args): global verbose global dry_run (verbose, dry_run) = dry_run_necessaries(args.dry_run, args.verbose) for region in [r.name for r in boto.ec2.regions() if r.name not in args.excluded_regions]: try: print_verbose( os.path.basename(__file__), 'info', 'Starting pass on %s' % region) ec2_conn = boto.ec2.connect_to_region(region) as_conn = boto.ec2.autoscale.connect_to_region(region) all_groups = as_conn.get_all_groups() spot_lcs = [ e for e in as_conn.get_all_launch_configurations() if e.spot_price] # these need to be pulled from the same all_groups list or # duplicate objects will be seen as distinct. spot_lc_groups = [ g for g in all_groups if g.launch_config_name in [s.name for s in spot_lcs]] previously_ssr_managed_groups = [g for g in all_groups if get_tag_dict_value( g, 'ssr_config') and get_tag_dict_value(g, 'ssr_config')['enabled'] is True] all_groups = list( set(spot_lc_groups + previously_ssr_managed_groups)) for as_group in all_groups: print_verbose( os.path.basename(__file__), 'info', "Evaluating %s" % as_group.name) # this latter condition can happen when tag value (a dict) # can't be interpreted by ast.literal_eval() if args.reset_tags or not [t for t in as_group.tags if t.key == 'ssr_config'] or not get_tag_dict_value(as_group, 'ssr_config'): print_verbose(os.path.basename( __file__), 'info', 'Tags not found or reset tags option flagged. Adding all tags anew now.') init_ssr_config_tag(as_group, args.min_healthy_AZs) init_az_status_tag(as_group) elif [t for t in as_group.tags if t.key == 'ssr_config' and not get_tag_dict_value(as_group, 'ssr_config')['enabled']]: print_verbose( os.path.basename(__file__), 'info', 'ssr_config DISABLED. Doing nothing.') elif [t for t in as_group.tags if t.key == 'ssr_config' and get_tag_dict_value(as_group, 'ssr_config')['enabled']]: print_verbose(os.path.basename( __file__), 'info', 'ssr management enabled. Verifying all config values in place.') config_keys = [ 'enabled', 'original_bid', 'LC_name', 'min_AZs', 'demand_expiration'] if not verify_tag_dict_keys(as_group, 'ssr_config', config_keys) or \ not get_tag_dict_value(as_group, 'ssr_config')['LC_name'] == as_group.launch_config_name[-155:]: # this would indicate a change to the LC outside of ssr # scope. In that case, we need to disable ssr via tag # deletion. if not get_launch_config(as_group).spot_price: del_ssr_tags(as_group) continue else: init_ssr_config_tag(as_group, args.min_healthy_AZs) zones = [z.name[-1] for z in ec2_conn.get_all_zones()] if not verify_tag_dict_keys(as_group, 'AZ_status', zones): init_az_status_tag(as_group) else: raise Exception( "ssr_enabled tag found for %s but isn't a valid value." % (as_group.name,)) print_verbose( os.path.basename(__file__), 'info', 'Done with pass on %s' % region) except EC2ResponseError as e: handle_exception(e) except BotoServerError as e: handle_exception(e) except Exception as e: handle_exception(e) return 1 print_verbose(os.path.basename(__file__), 'info', "All regions complete")
def main(args): global verbose global dry_run (verbose, dry_run) = dry_run_necessaries(args.dry_run, args.verbose) for region in [ r.name for r in boto.ec2.regions() if r.name not in args.excluded_regions ]: try: print_verbose(os.path.basename(__file__), 'info', 'Starting pass on %s' % region) as_conn = boto.ec2.autoscale.connect_to_region(region) as_groups = get_ssr_groups(as_conn) elb_conn = boto.ec2.elb.connect_to_region(as_conn.region.name) minutes_multiplier = 60 for as_group in as_groups: as_group = reload_as_group(as_group) print_verbose(os.path.basename(__file__), 'info', "Checking %s" % as_group.name) if as_group.load_balancers: maximize_elb_azs(elb_conn, as_group, dry_run) demand_expiration = get_tag_dict_value( as_group, 'ssr_config')['demand_expiration'] healthy_zones = get_healthy_zones(as_group, args.min_health_threshold) if demand_expiration is not False: if demand_expiration < int(time.time()): if len(healthy_zones) >= get_min_azs(as_group): print_verbose( os.path.basename(__file__), 'info', 'Woot! We can move back to spots at original bid price.' ) modify_as_group_azs(as_group, healthy_zones, dry_run) modify_price( as_group, get_tag_dict_value( as_group, 'ssr_config')['original_bid'], dry_run) set_tag_dict_value(as_group, 'ssr_config', 'demand_expiration', False) # kill all demand instances that were created ec2_conn = boto.ec2.connect_to_region( as_group.connection.region.name) all_ec2_instances = ec2_conn.get_all_instances() print_verbose( os.path.basename(__file__), 'info', "Looking at %s instances for potential termination" % str(len(as_group.instances))) for instance in as_group.instances: if not [i for i in all_ec2_instances if i.instances[0].id == instance.instance_id][0].instances[0].spot_instance_request_id and \ not dry_run: terminate_instance(instance) else: print_verbose( os.path.basename(__file__), 'info', 'Extending the life of demand instances as we cant fulfill with spots still' ) set_tag_dict_value( as_group, 'ssr_config', 'demand_expiration', int(time.time()) + (args.demand_expiration * minutes_multiplier)) elif sorted( as_group.availability_zones) != sorted(healthy_zones): as_group = reload_as_group(as_group) print_verbose(os.path.basename(__file__), 'info', "Healthy zones and zones in use dont match") if len(healthy_zones) >= get_min_azs(as_group): print_verbose(os.path.basename(__file__), 'info', 'Modifying zones accordingly.') modify_as_group_azs(as_group, healthy_zones, dry_run) else: print_verbose( os.path.basename(__file__), 'info', "Bid will need to be modified as we can't meet AZ minimum of %s" % str(get_min_azs(as_group))) best_bid = find_best_bid_price(as_group) print_verbose( os.path.basename(__file__), 'info', "Best possible bid given AZ minimum is %s" % str(best_bid)) if best_bid: modify_price(as_group, best_bid, dry_run) else: print_verbose(os.path.basename(__file__), 'info', "Moving to ondemand.") modify_price(as_group, None, dry_run, minutes_multiplier, args.demand_expiration) set_tag_dict_value( as_group, 'ssr_config', 'demand_expiration', int(time.time()) + (args.demand_expiration * minutes_multiplier)) modify_as_group_azs(as_group, get_usable_zones(as_group), dry_run) else: print_verbose(os.path.basename(__file__), 'info', 'No further actions to take on this ASG.') print_verbose(os.path.basename(__file__), 'info', 'Done with pass on %s' % region) except EC2ResponseError as e: handle_exception(e) except Exception as e: handle_exception(e) return 1 print_verbose(os.path.basename(__file__), 'info', "All regions complete")
def main(args): (verbose, dry_run) = dry_run_necessaries(args.dry_run, args.verbose) this_file = os.path.basename(__file__) for region in [ r.name for r in boto.ec2.regions() if r.name not in args.excluded_regions ]: try: ec2_conn = boto.ec2.connect_to_region(region) as_conn = boto.ec2.autoscale.connect_to_region(region) as_groups = get_all_as_groups(as_conn) all_spot_lcs = get_spot_lcs(as_conn) pending_requests = [] bad_statuses = json.loads('''{"status-code": [ "capacity-not-available", "capacity-oversubscribed", "price-too-low", "not-scheduled-yet", "launch-group-constraint", "az-group-constraint", "placement-group-constraint", "constraint-not-fulfillable" ]}''') pending_requests.append( ec2_conn.get_all_spot_instance_requests(filters=bad_statuses)) oldest_time = datetime.utcnow() - timedelta(minutes=args.minutes) # flattening the list of lists here pending_requests = [ item for sublist in pending_requests for item in sublist ] health_tags = [] for request in pending_requests: if any('ElasticMapReduce' in sec_group.name for sec_group in request.launch_specification.groups): print_verbose( this_file, 'info', "This request belongs to the ElasticMapReduce group and will not be SSR managed." ) continue if oldest_time > datetime.strptime(request.create_time, "%Y-%m-%dT%H:%M:%S.000Z"): print_verbose( this_file, 'info', "Bad request found. Identifying LC and associated ASGs to tag AZ health." ) launch_configs = [ lc for lc in all_spot_lcs if request.price == lc.spot_price and request.launch_specification.instance_type == lc.instance_type and request.launch_specification. instance_profile['name'] == lc.instance_profile_name and request.launch_specification.image_id == lc.image_id ] # This could be made hella specific if we want to go that route if len(launch_configs) != 1: raise Exception( "Only one launch config should be found. You may need to run remove_old_launch_configs.py to clear this: %s" % launch_configs) else: launch_config = launch_configs[0] offending_as_groups = [ g for g in as_groups if g.launch_config_name == launch_config.name ] bad_az = request.launch_group.split( request.region.name)[1][0] health_dict = {bad_az: 1} for as_group in offending_as_groups: print_verbose( this_file, 'info', "The following AZ will be tagged as an offender: %s." % str(as_group)) health_tags.append( update_az_health_list_tag(as_group, health_dict)) print_verbose(this_file, 'info', "Killing spot request %s." % str(request.id)) if not args.dry_run: request.cancel() update_tags(as_conn, health_tags) else: print_verbose(this_file, 'info', "PSYCH! Dry run.") else: print_verbose( this_file, 'info', "Request %s not older than %s minutes. Continuing..." % (request.id, str(args.minutes))) print_verbose(this_file, 'info', "Region %s pass complete." % region) except EC2ResponseError as e: handle_exception(e) except Exception as e: handle_exception(e) sys.exit(1) print_verbose(this_file, 'info', "All regions complete")
def main(args): (verbose, dry_run) = dry_run_necessaries(args.dry_run, args.verbose) this_file = os.path.basename(__file__) for region in [r.name for r in boto.ec2.regions() if r.name not in args.excluded_regions]: try: ec2_conn = boto.ec2.connect_to_region(region) as_conn = boto.ec2.autoscale.connect_to_region(region) as_groups = get_all_as_groups(as_conn) all_spot_lcs = get_spot_lcs(as_conn) pending_requests = [] bad_statuses = json.loads('''{"status-code": [ "capacity-not-available", "capacity-oversubscribed", "price-too-low", "not-scheduled-yet", "launch-group-constraint", "az-group-constraint", "placement-group-constraint", "constraint-not-fulfillable" ]}''') pending_requests.append( ec2_conn.get_all_spot_instance_requests(filters=bad_statuses)) oldest_time = datetime.utcnow() - timedelta(minutes=args.minutes) # flattening the list of lists here pending_requests = [ item for sublist in pending_requests for item in sublist] health_tags = [] for request in pending_requests: if any('ElasticMapReduce' in sec_group.name for sec_group in request.launch_specification.groups): print_verbose( this_file, 'info', "This request belongs to the ElasticMapReduce group and will not be SSR managed.") continue if oldest_time > datetime.strptime(request.create_time, "%Y-%m-%dT%H:%M:%S.000Z"): print_verbose( this_file, 'info', "Bad request found. Identifying LC and associated ASGs to tag AZ health.") launch_configs = [lc for lc in all_spot_lcs if request.price == lc.spot_price and request.launch_specification.instance_type == lc.instance_type and request.launch_specification.instance_profile['name'] == lc.instance_profile_name and request.launch_specification.image_id == lc.image_id] # This could be made hella specific if we want to go that route if len(launch_configs) != 1: raise Exception( "Only one launch config should be found. You may need to run remove_old_launch_configs.py to clear this: %s" % launch_configs) else: launch_config = launch_configs[0] offending_as_groups = [ g for g in as_groups if g.launch_config_name == launch_config.name] bad_az = request.launch_group.split( request.region.name)[1][0] health_dict = {bad_az: 1} for as_group in offending_as_groups: print_verbose( this_file, 'info', "The following AZ will be tagged as an offender: %s." % str(as_group)) health_tags.append( update_az_health_list_tag(as_group, health_dict)) print_verbose( this_file, 'info', "Killing spot request %s." % str(request.id)) if not args.dry_run: request.cancel() update_tags(as_conn, health_tags) else: print_verbose(this_file, 'info', "PSYCH! Dry run.") else: print_verbose(this_file, 'info', "Request %s not older than %s minutes. Continuing..." % ( request.id, str(args.minutes))) print_verbose( this_file, 'info', "Region %s pass complete." % region) except EC2ResponseError as e: handle_exception(e) except Exception as e: handle_exception(e) sys.exit(1) print_verbose(this_file, 'info', "All regions complete")
def main(args): global verbose global dry_run (verbose, dry_run) = dry_run_necessaries(args.dry_run, args.verbose) for region in [ r.name for r in boto.ec2.regions() if r.name not in args.excluded_regions ]: try: print_verbose(os.path.basename(__file__), 'info', 'Starting pass on %s' % region) ec2_conn = boto.ec2.connect_to_region(region) as_conn = boto.ec2.autoscale.connect_to_region(region) all_groups = as_conn.get_all_groups() spot_lcs = [ e for e in as_conn.get_all_launch_configurations() if e.spot_price ] # these need to be pulled from the same all_groups list or # duplicate objects will be seen as distinct. spot_lc_groups = [ g for g in all_groups if g.launch_config_name in [s.name for s in spot_lcs] ] previously_ssr_managed_groups = [ g for g in all_groups if get_tag_dict_value(g, 'ssr_config') and get_tag_dict_value(g, 'ssr_config')['enabled'] is True ] all_groups = list( set(spot_lc_groups + previously_ssr_managed_groups)) for as_group in all_groups: print_verbose(os.path.basename(__file__), 'info', "Evaluating %s" % as_group.name) # this latter condition can happen when tag value (a dict) # can't be interpreted by ast.literal_eval() if args.reset_tags or not [ t for t in as_group.tags if t.key == 'ssr_config' ] or not get_tag_dict_value(as_group, 'ssr_config'): print_verbose( os.path.basename(__file__), 'info', 'Tags not found or reset tags option flagged. Adding all tags anew now.' ) init_ssr_config_tag(as_group, args.min_healthy_AZs) init_az_status_tag(as_group) elif [ t for t in as_group.tags if t.key == 'ssr_config' and not get_tag_dict_value( as_group, 'ssr_config')['enabled'] ]: print_verbose(os.path.basename(__file__), 'info', 'ssr_config DISABLED. Doing nothing.') elif [ t for t in as_group.tags if t.key == 'ssr_config' and get_tag_dict_value( as_group, 'ssr_config')['enabled'] ]: print_verbose( os.path.basename(__file__), 'info', 'ssr management enabled. Verifying all config values in place.' ) config_keys = [ 'enabled', 'original_bid', 'LC_name', 'min_AZs', 'demand_expiration' ] if not verify_tag_dict_keys(as_group, 'ssr_config', config_keys) or \ not get_tag_dict_value(as_group, 'ssr_config')['LC_name'] == as_group.launch_config_name[-155:]: # this would indicate a change to the LC outside of ssr # scope. In that case, we need to disable ssr via tag # deletion. if not get_launch_config(as_group).spot_price: del_ssr_tags(as_group) continue else: init_ssr_config_tag(as_group, args.min_healthy_AZs) zones = [z.name[-1] for z in ec2_conn.get_all_zones()] if not verify_tag_dict_keys(as_group, 'AZ_status', zones): init_az_status_tag(as_group) else: raise Exception( "ssr_enabled tag found for %s but isn't a valid value." % (as_group.name, )) print_verbose(os.path.basename(__file__), 'info', 'Done with pass on %s' % region) except EC2ResponseError as e: handle_exception(e) except BotoServerError as e: handle_exception(e) except Exception as e: handle_exception(e) return 1 print_verbose(os.path.basename(__file__), 'info', "All regions complete")