def main(args):
    global verbose
    global dry_run
    (verbose, dry_run) = dry_run_necessaries(args.dry_run, args.verbose)
    for region in [r.name for r in boto.ec2.regions() if r.name not in args.excluded_regions]:
        try:
            print_verbose(
                os.path.basename(__file__), 'info', 'Starting pass on %s' % region)
            as_conn = boto.ec2.autoscale.connect_to_region(region)
            all_launch_configs = as_conn.get_all_launch_configurations()
            as_groups = as_conn.get_all_groups()

            for launch_config in all_launch_configs:
                if not [g for g in as_groups if g.launch_config_name == launch_config.name]:
                    print_verbose(os.path.basename(
                        __file__), 'info', "Launch config %s looks to be abandoned." % launch_config.name)
                    if not dry_run:
                        print_verbose(
                            os.path.basename(__file__), 'info', "DESTROY!")
                        kill_with_fire(launch_config)

            print_verbose(
                os.path.basename(__file__), 'info', 'Done with pass on %s' % region)

        except Exception as e:
            handle_exception(e)
            sys.exit(1)
Esempio n. 2
0
def main(args):
    global verbose
    global dry_run
    (verbose, dry_run) = dry_run_necessaries(args.dry_run, args.verbose)
    for region in [
            r.name for r in boto.ec2.regions()
            if r.name not in args.excluded_regions
    ]:
        try:
            print_verbose(os.path.basename(__file__), 'info',
                          'Starting pass on %s' % region)
            as_conn = boto.ec2.autoscale.connect_to_region(region)
            all_launch_configs = as_conn.get_all_launch_configurations()
            as_groups = as_conn.get_all_groups()

            for launch_config in all_launch_configs:
                if not [
                        g for g in as_groups
                        if g.launch_config_name == launch_config.name
                ]:
                    print_verbose(
                        os.path.basename(__file__), 'info',
                        "Launch config %s looks to be abandoned." %
                        launch_config.name)
                    if not dry_run:
                        print_verbose(os.path.basename(__file__), 'info',
                                      "DESTROY!")
                        kill_with_fire(launch_config)

            print_verbose(os.path.basename(__file__), 'info',
                          'Done with pass on %s' % region)

        except Exception as e:
            handle_exception(e)
            sys.exit(1)
def get_ondemand_price(launch_config):
    try:
        region = launch_config.connection.region.name
        ec2_conn = boto.ec2.connect_to_region(region)
        ec2_conn.get_image(launch_config.image_id)

        url = get_price_url(launch_config)
        resp = requests.get(url)
        # need to remove comments and callback syntax before parsing the broken
        # json
        json_str = str(resp.text.split('callback(')[1])[:-2]
        prices_dict = demjson.decode(json_str)['config']['regions']

        regional_prices_json = [
            r for r in prices_dict if r['region'] == region][0]['instanceTypes']
        instance_class_prices_json = [r for r in regional_prices_json if launch_config.instance_type in [
            e['size'] for e in r['sizes']]][0]['sizes']
        price = float([e for e in instance_class_prices_json if e[
                      'size'] == launch_config.instance_type][0]['valueColumns'][0]['prices']['USD'])
        print_verbose(os.path.basename(__file__), 'info', "On demand price for %s in %s is %s" % (
            launch_config.instance_type, region, price))
        return price

    except Exception as e:
        handle_exception(e)
        sys.exit(1)
Esempio n. 4
0
def find_best_bid_price(as_group):
    try:
        prices = get_current_spot_prices(as_group)
        print_verbose(os.path.basename(__file__), 'info', prices)
        if len(prices) != len(get_usable_zones(as_group)):
            raise Exception(
                "Different number of AZs found than expected. Prices = %s\nAZs = %s"
                % (str(prices), str(get_usable_zones(as_group))))
        best_bid = sorted(
            prices, key=lambda price: price.price)[int(get_min_azs(as_group)) -
                                                   1].price
        print_verbose(os.path.basename(__file__), 'info', 'best_bid=',
                      best_bid)
        max_bid = get_max_bid(as_group)
        print_verbose(os.path.basename(__file__), 'info', 'max_bid=', max_bid)
        if get_rounded_price(best_bid) >= get_rounded_price(max_bid) or \
                get_rounded_price(get_bid(as_group)) >= get_rounded_price(get_ondemand_price(get_launch_config(as_group))):
            # since ondemand instances are faster to spin up and more
            # available, if demand and max_bid are equal, ondemand should win
            # out.
            return False
        else:
            return get_rounded_price(best_bid)
    except Exception as e:
        handle_exception(e)
        sys.exit(1)
Esempio n. 5
0
def get_ondemand_price(launch_config):
    try:
        region = launch_config.connection.region.name
        ec2_conn = boto.ec2.connect_to_region(region)
        ec2_conn.get_image(launch_config.image_id)

        url = get_price_url(launch_config)
        resp = requests.get(url)
        # need to remove comments and callback syntax before parsing the broken
        # json
        json_str = str(resp.text.split('callback(')[1])[:-2]
        prices_dict = demjson.decode(json_str)['config']['regions']

        regional_prices_json = [
            r for r in prices_dict if r['region'] == region
        ][0]['instanceTypes']
        instance_class_prices_json = [
            r for r in regional_prices_json
            if launch_config.instance_type in [e['size'] for e in r['sizes']]
        ][0]['sizes']
        price = float([
            e for e in instance_class_prices_json
            if e['size'] == launch_config.instance_type
        ][0]['valueColumns'][0]['prices']['USD'])
        print_verbose(
            os.path.basename(__file__), 'info',
            "On demand price for %s in %s is %s" %
            (launch_config.instance_type, region, price))
        return price

    except Exception as e:
        handle_exception(e)
        sys.exit(1)
def modify_price(as_group, new_bid, dry_run, minutes_multiplier=None, demand_expiration=None):
    try:
        as_group = reload_as_group(as_group)
        as_conn = boto.ec2.autoscale.connect_to_region(
            as_group.connection.region.name)
        old_launch_config = get_launch_config(as_group)
        new_launch_config_name = old_launch_config.name[
            :-13] + 'ssr' + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10))

        launch_config = LaunchConfiguration(
            image_id=old_launch_config.image_id,
            key_name=old_launch_config.key_name,
            security_groups=old_launch_config.security_groups,
            user_data=old_launch_config.user_data,
            instance_type=old_launch_config.instance_type,
            kernel_id=old_launch_config.kernel_id,
            ramdisk_id=old_launch_config.ramdisk_id,
            block_device_mappings=old_launch_config.block_device_mappings,
            instance_monitoring=old_launch_config.instance_monitoring.enabled,
            instance_profile_name=old_launch_config.instance_profile_name,
            ebs_optimized=old_launch_config.ebs_optimized,
            associate_public_ip_address=old_launch_config.associate_public_ip_address,
            volume_type=old_launch_config.volume_type,
            delete_on_termination=old_launch_config.delete_on_termination,
            iops=old_launch_config.iops,
            use_block_device_types=old_launch_config.use_block_device_types,
            spot_price=new_bid,  # new values
            name=new_launch_config_name,
        )

        as_conn.create_launch_configuration(launch_config)
        print_verbose(os.path.basename(
            __file__), 'info', "Created LC %s with price %s." % (launch_config.name, new_bid))
        as_groups = [a for a in as_group.connection.get_all_groups(
        ) if old_launch_config.name == a.launch_config_name]
        for as_group in as_groups:
            as_group.launch_config_name = launch_config.name
            if not dry_run:
                print_verbose(os.path.basename(__file__), 'info',
                              "Applying new LC to ASG %s" % as_group.name)
                as_group.update()
                set_tag_dict_value(
                    as_group, 'ssr_config', 'LC_name', launch_config.name[-155:])
                if not new_bid:
                    set_tag_dict_value(as_group, 'ssr_config', 'demand_expiration', int(
                        time.time()) + (demand_expiration * minutes_multiplier))
                    modify_as_group_azs(
                        as_group, get_usable_zones(as_group), dry_run)

        print_verbose(os.path.basename(__file__), 'info',
                      "Autoscaling group launch configuration update complete.")
        print_verbose(os.path.basename(__file__), 'info',
                      "Deleting old launch_config: %s" % old_launch_config)
        old_launch_config.delete()  # XXX is this actually working?

    except Exception as e:
        handle_exception(e)
        sys.exit(1)
def terminate_instance(instance):
    try:
        instance.connection.terminate_instance(
            instance.instance_id, decrement_capacity=False)
        time.sleep(30)
    except BotoServerError as e:
        throttle_response(e)
        return terminate_instance(instance)
    except Exception as e:
        handle_exception(e)
        sys.exit(1)
Esempio n. 8
0
def terminate_instance(instance):
    try:
        instance.connection.terminate_instance(instance.instance_id,
                                               decrement_capacity=False)
        time.sleep(30)
    except BotoServerError as e:
        throttle_response(e)
        return terminate_instance(instance)
    except Exception as e:
        handle_exception(e)
        sys.exit(1)
Esempio n. 9
0
def get_max_bid(as_group):
    try:
        demand_price = get_ondemand_price(get_launch_config(as_group))
        original_bid = get_tag_dict_value(as_group,
                                          'ssr_config')['original_bid']
        if get_rounded_price(demand_price) <= get_rounded_price(original_bid):
            return original_bid
        else:
            return demand_price
    except Exception as e:
        handle_exception(e)
        sys.exit(1)
def get_max_bid(as_group):
    try:
        demand_price = get_ondemand_price(get_launch_config(as_group))
        original_bid = get_tag_dict_value(
            as_group, 'ssr_config')['original_bid']
        if get_rounded_price(demand_price) <= get_rounded_price(original_bid):
            return original_bid
        else:
            return demand_price
    except Exception as e:
        handle_exception(e)
        sys.exit(1)
Esempio n. 11
0
def init_az_status_tag(as_group):
    try:
        potential_zones = get_potential_azs(as_group)
        ec2_conn = boto.ec2.connect_to_region(as_group.connection.region.name)
        all_zones = ec2_conn.get_all_zones()
        zone_dict = {}
        for zone in all_zones:
            if zone.name in potential_zones:
                zone_dict[zone.name[-1]] = {"use": True, "health": [0, 0, 0]}
            else:
                zone_dict[zone.name[-1]] = {"use": False, "health": [0, 0, 0]}
        return create_tag(as_group, "AZ_status", zone_dict)
    except Exception as e:
        handle_exception(e)
        sys.exit(1)
Esempio n. 12
0
def init_ssr_config_tag(as_group, min_healthy_azs):
    try:
        config_dict = {
            'enabled': True,
            'original_bid': get_bid(as_group),
            'min_AZs': min_healthy_azs,
            # LC name size can be up to 255 chars (also tag value max length).
            # Final chars should be unique so we cut this short
            'LC_name': as_group.launch_config_name[-155:],
            'demand_expiration': False,
        }
        create_tag(as_group, 'ssr_config', config_dict)
    except Exception as e:
        handle_exception(e)
        sys.exit(1)
Esempio n. 13
0
def init_az_status_tag(as_group):
    try:
        potential_zones = get_potential_azs(as_group)
        ec2_conn = boto.ec2.connect_to_region(as_group.connection.region.name)
        all_zones = ec2_conn.get_all_zones()
        zone_dict = {}
        for zone in all_zones:
            if zone.name in potential_zones:
                zone_dict[zone.name[-1]] = {"use": True, "health": [0, 0, 0]}
            else:
                zone_dict[zone.name[-1]] = {"use": False, "health": [0, 0, 0]}
        return create_tag(as_group, "AZ_status", zone_dict)
    except Exception as e:
        handle_exception(e)
        sys.exit(1)
Esempio n. 14
0
def init_ssr_config_tag(as_group, min_healthy_azs):
    try:
        config_dict = {
            'enabled': True,
            'original_bid': get_bid(as_group),
            'min_AZs': min_healthy_azs,
            # LC name size can be up to 255 chars (also tag value max length).
            # Final chars should be unique so we cut this short
            'LC_name': as_group.launch_config_name[-155:],
            'demand_expiration': False,
        }
        create_tag(as_group, 'ssr_config', config_dict)
    except Exception as e:
        handle_exception(e)
        sys.exit(1)
def modify_as_group_azs(as_group, healthy_zones, dry_run):
    try:
        as_group = reload_as_group(as_group)
        as_group.availability_zones = healthy_zones
        print_verbose(
            os.path.basename(__file__), 'info', "Updating with AZs %s" % healthy_zones)
        if not dry_run:
            as_group.update()

    except BotoServerError as e:
        if e.error_code == 'Throttling':
            print_verbose(
                os.path.basename(__file__), 'info', 'Pausing for aws throttling...')
            time.sleep(1)
        modify_as_group_azs(as_group, healthy_zones, dry_run)
    except Exception as e:
        handle_exception(e)
        sys.exit(1)
Esempio n. 16
0
def modify_as_group_azs(as_group, healthy_zones, dry_run):
    try:
        as_group = reload_as_group(as_group)
        as_group.availability_zones = healthy_zones
        print_verbose(os.path.basename(__file__), 'info',
                      "Updating with AZs %s" % healthy_zones)
        if not dry_run:
            as_group.update()

    except BotoServerError as e:
        if e.error_code == 'Throttling':
            print_verbose(os.path.basename(__file__), 'info',
                          'Pausing for aws throttling...')
            time.sleep(1)
        modify_as_group_azs(as_group, healthy_zones, dry_run)
    except Exception as e:
        handle_exception(e)
        sys.exit(1)
Esempio n. 17
0
def main(args):
    (verbose, dry_run) = dry_run_necessaries(args.dry_run, args.verbose)
    for region in [
            r.name for r in boto.ec2.regions()
            if r.name not in args.excluded_regions
    ]:
        try:
            print_verbose(os.path.basename(__file__), 'info',
                          'Starting pass on %s' % region)
            as_conn = boto.ec2.autoscale.connect_to_region(region)
            as_groups = get_ssr_groups(as_conn)
            health_tags = []
            for as_group in as_groups:
                bid = get_bid(as_group)
                current_prices = get_current_spot_prices(as_group)
                health_dict = {}
                if current_prices:
                    print_verbose(os.path.basename(__file__), 'info',
                                  "Updating health for %s" % as_group.name)
                    for price in current_prices:
                        # * 1.1: #NOTE: potential feature to require a price buffer here?
                        if price.price > bid:
                            health_dict[price.availability_zone[-1]] = 1
                        else:
                            health_dict[price.availability_zone[-1]] = 0
                    health_tags.append(
                        update_az_health_list_tag(as_group, health_dict))
            if health_tags and not dry_run:
                update_tags(as_conn, health_tags)
                print_verbose(os.path.basename(__file__), 'info',
                              "All tags updated!")

            print_verbose(os.path.basename(__file__), 'info',
                          'Done with pass on %s' % region)

        except EC2ResponseError as e:
            handle_exception(e)

        except Exception as e:
            handle_exception(e)
            return 1

    print_verbose(os.path.basename(__file__), 'info', "All regions complete")
def maximize_elb_azs(elb_conn, as_group, dry_run):
    try:
        this_file = os.path.basename(__file__)
        for elb_name in as_group.load_balancers:
            elb = elb_conn.get_all_load_balancers(elb_name)[0]
            if not sorted(elb.availability_zones) == sorted(get_usable_zones(as_group)):
                print_verbose(os.path.basename(
                    __file__), 'info', "AZs for ELB don't include all potential AZs. Removing unusable zones and adding the rest now.")
                if not dry_run:
                    if len(list(set(elb.availability_zones) - set(get_usable_zones(as_group)))) > 0 or len(list(set(get_usable_zones(as_group)) - set(elb.availability_zones))) > 0:
                        in_lb_but_not_asg = list(
                            set(elb.availability_zones) - set(get_usable_zones(as_group)))
                        in_asg_but_not_lb = list(
                            set(get_usable_zones(as_group)) - set(elb.availability_zones))
                        if len(in_asg_but_not_lb) > 0 or len(in_lb_but_not_asg) > 0:
                            try:
                                if len(list(set(elb.availability_zones) - set(get_usable_zones(as_group)))) > 0:
                                    elb.disable_zones(
                                        list(set(elb.availability_zones) - set(get_usable_zones(as_group))))
                                elb.enable_zones(get_usable_zones(as_group))
                            except Exception as e:
                                if e.error_code == 'ValidationError' and 'is constrained and cannot be used together with' in e.message:
                                    print_verbose(
                                        this_file, 'info', 'Conflict found between two AZs. Removing one of them from use.')
                                    pattern = re.compile(
                                        r'\b\w{2}-\w{4,}-\d\w and \w{2}-\w{4,}-\d\w\b')
                                    match = pattern.search(e.message)
                                    if match:
                                        bad_az = match.group().split()[0].split(
                                            '-')[-1][1]
                                        print_verbose(
                                            this_file, 'info', 'Removing %s from potential AZs as it confilcts with another AZ.' % bad_az)
                                        # smarter here would be to figure out
                                        # which AZ is a better choice
                                        new_tag = mark_asg_az_disabled(
                                            as_group, bad_az)
                                        update_tags(
                                            as_group.connection, [new_tag])

    except Exception as e:
        handle_exception(e)
        sys.exit(1)
def main(args):
    (verbose, dry_run) = dry_run_necessaries(args.dry_run, args.verbose)
    for region in [r.name for r in boto.ec2.regions() if r.name not in args.excluded_regions]:
        try:
            print_verbose(
                os.path.basename(__file__), 'info', 'Starting pass on %s' % region)
            as_conn = boto.ec2.autoscale.connect_to_region(region)
            as_groups = get_ssr_groups(as_conn)
            health_tags = []
            for as_group in as_groups:
                bid = get_bid(as_group)
                current_prices = get_current_spot_prices(as_group)
                health_dict = {}
                if current_prices:
                    print_verbose(
                        os.path.basename(__file__), 'info', "Updating health for %s" % as_group.name)
                    for price in current_prices:
                        # * 1.1: #NOTE: potential feature to require a price buffer here?
                        if price.price > bid:
                            health_dict[price.availability_zone[-1]] = 1
                        else:
                            health_dict[price.availability_zone[-1]] = 0
                    health_tags.append(
                        update_az_health_list_tag(as_group, health_dict))
            if health_tags and not dry_run:
                update_tags(as_conn, health_tags)
                print_verbose(
                    os.path.basename(__file__), 'info', "All tags updated!")

            print_verbose(
                os.path.basename(__file__), 'info', 'Done with pass on %s' % region)

        except EC2ResponseError as e:
            handle_exception(e)

        except Exception as e:
            handle_exception(e)
            return 1

    print_verbose(os.path.basename(__file__), 'info', "All regions complete")
def find_best_bid_price(as_group):
    try:
        prices = get_current_spot_prices(as_group)
        print_verbose(os.path.basename(__file__), 'info', prices)
        if len(prices) != len(get_usable_zones(as_group)):
            raise Exception("Different number of AZs found than expected. Prices = %s\nAZs = %s" % (
                str(prices), str(get_usable_zones(as_group))))
        best_bid = sorted(prices, key=lambda price: price.price)[
            int(get_min_azs(as_group)) - 1].price
        print_verbose(
            os.path.basename(__file__), 'info', 'best_bid=', best_bid)
        max_bid = get_max_bid(as_group)
        print_verbose(os.path.basename(__file__), 'info', 'max_bid=', max_bid)
        if get_rounded_price(best_bid) >= get_rounded_price(max_bid) or \
                get_rounded_price(get_bid(as_group)) >= get_rounded_price(get_ondemand_price(get_launch_config(as_group))):
            # since ondemand instances are faster to spin up and more
            # available, if demand and max_bid are equal, ondemand should win
            # out.
            return False
        else:
            return get_rounded_price(best_bid)
    except Exception as e:
        handle_exception(e)
        sys.exit(1)
Esempio n. 21
0
def main(args):
    global verbose
    global dry_run
    (verbose, dry_run) = dry_run_necessaries(args.dry_run, args.verbose)
    for region in [
            r.name for r in boto.ec2.regions()
            if r.name not in args.excluded_regions
    ]:
        try:
            print_verbose(os.path.basename(__file__), 'info',
                          'Starting pass on %s' % region)
            ec2_conn = boto.ec2.connect_to_region(region)
            as_conn = boto.ec2.autoscale.connect_to_region(region)

            all_groups = as_conn.get_all_groups()
            spot_lcs = [
                e for e in as_conn.get_all_launch_configurations()
                if e.spot_price
            ]
            # these need to be pulled from the same all_groups list or
            # duplicate objects will be seen as distinct.
            spot_lc_groups = [
                g for g in all_groups
                if g.launch_config_name in [s.name for s in spot_lcs]
            ]
            previously_ssr_managed_groups = [
                g for g in all_groups if get_tag_dict_value(g, 'ssr_config')
                and get_tag_dict_value(g, 'ssr_config')['enabled'] is True
            ]

            all_groups = list(
                set(spot_lc_groups + previously_ssr_managed_groups))
            for as_group in all_groups:
                print_verbose(os.path.basename(__file__), 'info',
                              "Evaluating %s" % as_group.name)

                # this latter condition can happen when tag value (a dict)
                # can't be interpreted by ast.literal_eval()
                if args.reset_tags or not [
                        t for t in as_group.tags if t.key == 'ssr_config'
                ] or not get_tag_dict_value(as_group, 'ssr_config'):
                    print_verbose(
                        os.path.basename(__file__), 'info',
                        'Tags not found or reset tags option flagged. Adding all tags anew now.'
                    )
                    init_ssr_config_tag(as_group, args.min_healthy_AZs)
                    init_az_status_tag(as_group)

                elif [
                        t for t in as_group.tags
                        if t.key == 'ssr_config' and not get_tag_dict_value(
                            as_group, 'ssr_config')['enabled']
                ]:
                    print_verbose(os.path.basename(__file__), 'info',
                                  'ssr_config DISABLED. Doing nothing.')

                elif [
                        t for t in as_group.tags
                        if t.key == 'ssr_config' and get_tag_dict_value(
                            as_group, 'ssr_config')['enabled']
                ]:
                    print_verbose(
                        os.path.basename(__file__), 'info',
                        'ssr management enabled. Verifying all config values in place.'
                    )
                    config_keys = [
                        'enabled', 'original_bid', 'LC_name', 'min_AZs',
                        'demand_expiration'
                    ]

                    if not verify_tag_dict_keys(as_group, 'ssr_config', config_keys) or \
                            not get_tag_dict_value(as_group, 'ssr_config')['LC_name'] == as_group.launch_config_name[-155:]:
                        # this would indicate a change to the LC outside of ssr
                        # scope. In that case, we need to disable ssr via tag
                        # deletion.
                        if not get_launch_config(as_group).spot_price:
                            del_ssr_tags(as_group)
                            continue
                        else:
                            init_ssr_config_tag(as_group, args.min_healthy_AZs)

                    zones = [z.name[-1] for z in ec2_conn.get_all_zones()]
                    if not verify_tag_dict_keys(as_group, 'AZ_status', zones):
                        init_az_status_tag(as_group)

                else:
                    raise Exception(
                        "ssr_enabled tag found for %s but isn't a valid value."
                        % (as_group.name, ))

            print_verbose(os.path.basename(__file__), 'info',
                          'Done with pass on %s' % region)

        except EC2ResponseError as e:
            handle_exception(e)

        except BotoServerError as e:
            handle_exception(e)

        except Exception as e:
            handle_exception(e)
            return 1

    print_verbose(os.path.basename(__file__), 'info', "All regions complete")
def main(args):
    global verbose
    global dry_run
    (verbose, dry_run) = dry_run_necessaries(args.dry_run, args.verbose)
    for region in [r.name for r in boto.ec2.regions() if r.name not in args.excluded_regions]:
        try:
            print_verbose(
                os.path.basename(__file__), 'info', 'Starting pass on %s' % region)
            as_conn = boto.ec2.autoscale.connect_to_region(region)
            as_groups = get_ssr_groups(as_conn)
            elb_conn = boto.ec2.elb.connect_to_region(as_conn.region.name)
            minutes_multiplier = 60
            for as_group in as_groups:
                as_group = reload_as_group(as_group)
                print_verbose(
                    os.path.basename(__file__), 'info', "Checking %s" % as_group.name)
                if as_group.load_balancers:
                    maximize_elb_azs(elb_conn, as_group, dry_run)
                demand_expiration = get_tag_dict_value(
                    as_group, 'ssr_config')['demand_expiration']
                healthy_zones = get_healthy_zones(
                    as_group, args.min_health_threshold)
                if demand_expiration is not False:
                    if demand_expiration < int(time.time()):
                        if len(healthy_zones) >= get_min_azs(as_group):
                            print_verbose(os.path.basename(
                                __file__), 'info', 'Woot! We can move back to spots at original bid price.')
                            modify_as_group_azs(
                                as_group, healthy_zones, dry_run)
                            modify_price(
                                as_group, get_tag_dict_value(as_group, 'ssr_config')['original_bid'], dry_run)
                            set_tag_dict_value(
                                as_group, 'ssr_config', 'demand_expiration', False)
                            # kill all demand instances that were created
                            ec2_conn = boto.ec2.connect_to_region(
                                as_group.connection.region.name)
                            all_ec2_instances = ec2_conn.get_all_instances()
                            print_verbose(os.path.basename(
                                __file__), 'info', "Looking at %s instances for potential termination" % str(len(as_group.instances)))
                            for instance in as_group.instances:
                                if not [i for i in all_ec2_instances if
                                        i.instances[0].id == instance.instance_id][0].instances[0].spot_instance_request_id and \
                                        not dry_run:
                                    terminate_instance(instance)
                        else:
                            print_verbose(os.path.basename(
                                __file__), 'info', 'Extending the life of demand instances as we cant fulfill with spots still')
                            set_tag_dict_value(as_group, 'ssr_config', 'demand_expiration', int(
                                time.time()) + (args.demand_expiration * minutes_multiplier))

                elif sorted(as_group.availability_zones) != sorted(healthy_zones):
                    as_group = reload_as_group(as_group)
                    print_verbose(
                        os.path.basename(__file__), 'info', "Healthy zones and zones in use dont match")
                    if len(healthy_zones) >= get_min_azs(as_group):
                        print_verbose(
                            os.path.basename(__file__), 'info', 'Modifying zones accordingly.')
                        modify_as_group_azs(as_group, healthy_zones, dry_run)

                    else:
                        print_verbose(os.path.basename(
                            __file__), 'info', "Bid will need to be modified as we can't meet AZ minimum of %s" % str(get_min_azs(as_group)))
                        best_bid = find_best_bid_price(as_group)
                        print_verbose(os.path.basename(
                            __file__), 'info', "Best possible bid given AZ minimum is %s" % str(best_bid))
                        if best_bid:
                            modify_price(as_group, best_bid, dry_run)
                        else:
                            print_verbose(
                                os.path.basename(__file__), 'info', "Moving to ondemand.")
                            modify_price(
                                as_group, None, dry_run, minutes_multiplier, args.demand_expiration)
                            set_tag_dict_value(as_group, 'ssr_config', 'demand_expiration', int(
                                time.time()) + (args.demand_expiration * minutes_multiplier))
                            modify_as_group_azs(
                                as_group, get_usable_zones(as_group), dry_run)
                else:
                    print_verbose(
                        os.path.basename(__file__), 'info', 'No further actions to take on this ASG.')
            print_verbose(
                os.path.basename(__file__), 'info', 'Done with pass on %s' % region)

        except EC2ResponseError as e:
            handle_exception(e)

        except Exception as e:
            handle_exception(e)
            return 1

    print_verbose(os.path.basename(__file__), 'info', "All regions complete")
def main(args):
    (verbose, dry_run) = dry_run_necessaries(args.dry_run, args.verbose)
    this_file = os.path.basename(__file__)
    for region in [r.name for r in boto.ec2.regions() if r.name not in args.excluded_regions]:
        try:
            ec2_conn = boto.ec2.connect_to_region(region)
            as_conn = boto.ec2.autoscale.connect_to_region(region)
            as_groups = get_all_as_groups(as_conn)
            all_spot_lcs = get_spot_lcs(as_conn)
            pending_requests = []
            bad_statuses = json.loads('''{"status-code": [
                    "capacity-not-available",
                    "capacity-oversubscribed",
                    "price-too-low",
                    "not-scheduled-yet",
                    "launch-group-constraint",
                    "az-group-constraint",
                    "placement-group-constraint",
                    "constraint-not-fulfillable"
                    ]}''')
            pending_requests.append(
                ec2_conn.get_all_spot_instance_requests(filters=bad_statuses))
            oldest_time = datetime.utcnow() - timedelta(minutes=args.minutes)
            # flattening the list of lists here
            pending_requests = [
                item for sublist in pending_requests for item in sublist]
            health_tags = []
            for request in pending_requests:
                if any('ElasticMapReduce' in sec_group.name for sec_group in request.launch_specification.groups):
                    print_verbose(
                        this_file, 'info', "This request belongs to the ElasticMapReduce group and will not be SSR managed.")
                    continue
                if oldest_time > datetime.strptime(request.create_time, "%Y-%m-%dT%H:%M:%S.000Z"):
                    print_verbose(
                        this_file, 'info', "Bad request found. Identifying LC and associated ASGs to tag AZ health.")
                    launch_configs = [lc for lc in all_spot_lcs if
                                      request.price == lc.spot_price and
                                      request.launch_specification.instance_type == lc.instance_type and
                                      request.launch_specification.instance_profile['name'] == lc.instance_profile_name and
                                      request.launch_specification.image_id == lc.image_id]  # This could be made hella specific if we want to go that route
                    if len(launch_configs) != 1:
                        raise Exception(
                            "Only one launch config should be found. You may need to run remove_old_launch_configs.py to clear this: %s" % launch_configs)
                    else:
                        launch_config = launch_configs[0]
                    offending_as_groups = [
                        g for g in as_groups if g.launch_config_name == launch_config.name]
                    bad_az = request.launch_group.split(
                        request.region.name)[1][0]
                    health_dict = {bad_az: 1}
                    for as_group in offending_as_groups:
                        print_verbose(
                            this_file, 'info', "The following AZ will be tagged as an offender: %s." % str(as_group))
                        health_tags.append(
                            update_az_health_list_tag(as_group, health_dict))
                    print_verbose(
                        this_file, 'info', "Killing spot request %s." % str(request.id))
                    if not args.dry_run:
                        request.cancel()
                        update_tags(as_conn, health_tags)
                    else:
                        print_verbose(this_file, 'info', "PSYCH! Dry run.")
                else:
                    print_verbose(this_file, 'info', "Request %s not older than %s minutes. Continuing..." % (
                        request.id, str(args.minutes)))
            print_verbose(
                this_file, 'info', "Region %s pass complete." % region)

        except EC2ResponseError as e:
            handle_exception(e)

        except Exception as e:
            handle_exception(e)
            sys.exit(1)

    print_verbose(this_file, 'info', "All regions complete")
Esempio n. 24
0
def main(args):
    (verbose, dry_run) = dry_run_necessaries(args.dry_run, args.verbose)
    this_file = os.path.basename(__file__)
    for region in [
            r.name for r in boto.ec2.regions()
            if r.name not in args.excluded_regions
    ]:
        try:
            ec2_conn = boto.ec2.connect_to_region(region)
            as_conn = boto.ec2.autoscale.connect_to_region(region)
            as_groups = get_all_as_groups(as_conn)
            all_spot_lcs = get_spot_lcs(as_conn)
            pending_requests = []
            bad_statuses = json.loads('''{"status-code": [
                    "capacity-not-available",
                    "capacity-oversubscribed",
                    "price-too-low",
                    "not-scheduled-yet",
                    "launch-group-constraint",
                    "az-group-constraint",
                    "placement-group-constraint",
                    "constraint-not-fulfillable"
                    ]}''')
            pending_requests.append(
                ec2_conn.get_all_spot_instance_requests(filters=bad_statuses))
            oldest_time = datetime.utcnow() - timedelta(minutes=args.minutes)
            # flattening the list of lists here
            pending_requests = [
                item for sublist in pending_requests for item in sublist
            ]
            health_tags = []
            for request in pending_requests:
                if any('ElasticMapReduce' in sec_group.name
                       for sec_group in request.launch_specification.groups):
                    print_verbose(
                        this_file, 'info',
                        "This request belongs to the ElasticMapReduce group and will not be SSR managed."
                    )
                    continue
                if oldest_time > datetime.strptime(request.create_time,
                                                   "%Y-%m-%dT%H:%M:%S.000Z"):
                    print_verbose(
                        this_file, 'info',
                        "Bad request found. Identifying LC and associated ASGs to tag AZ health."
                    )
                    launch_configs = [
                        lc for lc in all_spot_lcs
                        if request.price == lc.spot_price
                        and request.launch_specification.instance_type ==
                        lc.instance_type and request.launch_specification.
                        instance_profile['name'] == lc.instance_profile_name
                        and request.launch_specification.image_id ==
                        lc.image_id
                    ]  # This could be made hella specific if we want to go that route
                    if len(launch_configs) != 1:
                        raise Exception(
                            "Only one launch config should be found. You may need to run remove_old_launch_configs.py to clear this: %s"
                            % launch_configs)
                    else:
                        launch_config = launch_configs[0]
                    offending_as_groups = [
                        g for g in as_groups
                        if g.launch_config_name == launch_config.name
                    ]
                    bad_az = request.launch_group.split(
                        request.region.name)[1][0]
                    health_dict = {bad_az: 1}
                    for as_group in offending_as_groups:
                        print_verbose(
                            this_file, 'info',
                            "The following AZ will be tagged as an offender: %s."
                            % str(as_group))
                        health_tags.append(
                            update_az_health_list_tag(as_group, health_dict))
                    print_verbose(this_file, 'info',
                                  "Killing spot request %s." % str(request.id))
                    if not args.dry_run:
                        request.cancel()
                        update_tags(as_conn, health_tags)
                    else:
                        print_verbose(this_file, 'info', "PSYCH! Dry run.")
                else:
                    print_verbose(
                        this_file, 'info',
                        "Request %s not older than %s minutes. Continuing..." %
                        (request.id, str(args.minutes)))
            print_verbose(this_file, 'info',
                          "Region %s pass complete." % region)

        except EC2ResponseError as e:
            handle_exception(e)

        except Exception as e:
            handle_exception(e)
            sys.exit(1)

    print_verbose(this_file, 'info', "All regions complete")
Esempio n. 25
0
def modify_price(as_group,
                 new_bid,
                 dry_run,
                 minutes_multiplier=None,
                 demand_expiration=None):
    try:
        as_group = reload_as_group(as_group)
        as_conn = boto.ec2.autoscale.connect_to_region(
            as_group.connection.region.name)
        old_launch_config = get_launch_config(as_group)
        new_launch_config_name = old_launch_config.name[:-13] + 'ssr' + ''.join(
            random.choice(string.ascii_uppercase + string.digits)
            for _ in range(10))

        launch_config = LaunchConfiguration(
            image_id=old_launch_config.image_id,
            key_name=old_launch_config.key_name,
            security_groups=old_launch_config.security_groups,
            user_data=old_launch_config.user_data,
            instance_type=old_launch_config.instance_type,
            kernel_id=old_launch_config.kernel_id,
            ramdisk_id=old_launch_config.ramdisk_id,
            block_device_mappings=old_launch_config.block_device_mappings,
            instance_monitoring=old_launch_config.instance_monitoring.enabled,
            instance_profile_name=old_launch_config.instance_profile_name,
            ebs_optimized=old_launch_config.ebs_optimized,
            associate_public_ip_address=old_launch_config.
            associate_public_ip_address,
            volume_type=old_launch_config.volume_type,
            delete_on_termination=old_launch_config.delete_on_termination,
            iops=old_launch_config.iops,
            use_block_device_types=old_launch_config.use_block_device_types,
            spot_price=new_bid,  # new values
            name=new_launch_config_name,
        )

        as_conn.create_launch_configuration(launch_config)
        print_verbose(
            os.path.basename(__file__), 'info',
            "Created LC %s with price %s." % (launch_config.name, new_bid))
        as_groups = [
            a for a in as_group.connection.get_all_groups()
            if old_launch_config.name == a.launch_config_name
        ]
        for as_group in as_groups:
            as_group.launch_config_name = launch_config.name
            if not dry_run:
                print_verbose(os.path.basename(__file__), 'info',
                              "Applying new LC to ASG %s" % as_group.name)
                as_group.update()
                set_tag_dict_value(as_group, 'ssr_config', 'LC_name',
                                   launch_config.name[-155:])
                if not new_bid:
                    set_tag_dict_value(
                        as_group, 'ssr_config', 'demand_expiration',
                        int(time.time()) +
                        (demand_expiration * minutes_multiplier))
                    modify_as_group_azs(as_group, get_usable_zones(as_group),
                                        dry_run)

        print_verbose(
            os.path.basename(__file__), 'info',
            "Autoscaling group launch configuration update complete.")
        print_verbose(os.path.basename(__file__), 'info',
                      "Deleting old launch_config: %s" % old_launch_config)
        old_launch_config.delete()  # XXX is this actually working?

    except Exception as e:
        handle_exception(e)
        sys.exit(1)
Esempio n. 26
0
def main(args):
    global verbose
    global dry_run
    (verbose, dry_run) = dry_run_necessaries(args.dry_run, args.verbose)
    for region in [
            r.name for r in boto.ec2.regions()
            if r.name not in args.excluded_regions
    ]:
        try:
            print_verbose(os.path.basename(__file__), 'info',
                          'Starting pass on %s' % region)
            as_conn = boto.ec2.autoscale.connect_to_region(region)
            as_groups = get_ssr_groups(as_conn)
            elb_conn = boto.ec2.elb.connect_to_region(as_conn.region.name)
            minutes_multiplier = 60
            for as_group in as_groups:
                as_group = reload_as_group(as_group)
                print_verbose(os.path.basename(__file__), 'info',
                              "Checking %s" % as_group.name)
                if as_group.load_balancers:
                    maximize_elb_azs(elb_conn, as_group, dry_run)
                demand_expiration = get_tag_dict_value(
                    as_group, 'ssr_config')['demand_expiration']
                healthy_zones = get_healthy_zones(as_group,
                                                  args.min_health_threshold)
                if demand_expiration is not False:
                    if demand_expiration < int(time.time()):
                        if len(healthy_zones) >= get_min_azs(as_group):
                            print_verbose(
                                os.path.basename(__file__), 'info',
                                'Woot! We can move back to spots at original bid price.'
                            )
                            modify_as_group_azs(as_group, healthy_zones,
                                                dry_run)
                            modify_price(
                                as_group,
                                get_tag_dict_value(
                                    as_group, 'ssr_config')['original_bid'],
                                dry_run)
                            set_tag_dict_value(as_group, 'ssr_config',
                                               'demand_expiration', False)
                            # kill all demand instances that were created
                            ec2_conn = boto.ec2.connect_to_region(
                                as_group.connection.region.name)
                            all_ec2_instances = ec2_conn.get_all_instances()
                            print_verbose(
                                os.path.basename(__file__), 'info',
                                "Looking at %s instances for potential termination"
                                % str(len(as_group.instances)))
                            for instance in as_group.instances:
                                if not [i for i in all_ec2_instances if
                                        i.instances[0].id == instance.instance_id][0].instances[0].spot_instance_request_id and \
                                        not dry_run:
                                    terminate_instance(instance)
                        else:
                            print_verbose(
                                os.path.basename(__file__), 'info',
                                'Extending the life of demand instances as we cant fulfill with spots still'
                            )
                            set_tag_dict_value(
                                as_group, 'ssr_config', 'demand_expiration',
                                int(time.time()) +
                                (args.demand_expiration * minutes_multiplier))

                elif sorted(
                        as_group.availability_zones) != sorted(healthy_zones):
                    as_group = reload_as_group(as_group)
                    print_verbose(os.path.basename(__file__), 'info',
                                  "Healthy zones and zones in use dont match")
                    if len(healthy_zones) >= get_min_azs(as_group):
                        print_verbose(os.path.basename(__file__), 'info',
                                      'Modifying zones accordingly.')
                        modify_as_group_azs(as_group, healthy_zones, dry_run)

                    else:
                        print_verbose(
                            os.path.basename(__file__), 'info',
                            "Bid will need to be modified as we can't meet AZ minimum of %s"
                            % str(get_min_azs(as_group)))
                        best_bid = find_best_bid_price(as_group)
                        print_verbose(
                            os.path.basename(__file__), 'info',
                            "Best possible bid given AZ minimum is %s" %
                            str(best_bid))
                        if best_bid:
                            modify_price(as_group, best_bid, dry_run)
                        else:
                            print_verbose(os.path.basename(__file__), 'info',
                                          "Moving to ondemand.")
                            modify_price(as_group, None, dry_run,
                                         minutes_multiplier,
                                         args.demand_expiration)
                            set_tag_dict_value(
                                as_group, 'ssr_config', 'demand_expiration',
                                int(time.time()) +
                                (args.demand_expiration * minutes_multiplier))
                            modify_as_group_azs(as_group,
                                                get_usable_zones(as_group),
                                                dry_run)
                else:
                    print_verbose(os.path.basename(__file__), 'info',
                                  'No further actions to take on this ASG.')
            print_verbose(os.path.basename(__file__), 'info',
                          'Done with pass on %s' % region)

        except EC2ResponseError as e:
            handle_exception(e)

        except Exception as e:
            handle_exception(e)
            return 1

    print_verbose(os.path.basename(__file__), 'info', "All regions complete")
Esempio n. 27
0
def maximize_elb_azs(elb_conn, as_group, dry_run):
    try:
        this_file = os.path.basename(__file__)
        for elb_name in as_group.load_balancers:
            elb = elb_conn.get_all_load_balancers(elb_name)[0]
            if not sorted(elb.availability_zones) == sorted(
                    get_usable_zones(as_group)):
                print_verbose(
                    os.path.basename(__file__), 'info',
                    "AZs for ELB don't include all potential AZs. Removing unusable zones and adding the rest now."
                )
                if not dry_run:
                    if len(
                            list(
                                set(elb.availability_zones) -
                                set(get_usable_zones(as_group)))) > 0 or len(
                                    list(
                                        set(get_usable_zones(as_group)) -
                                        set(elb.availability_zones))) > 0:
                        in_lb_but_not_asg = list(
                            set(elb.availability_zones) -
                            set(get_usable_zones(as_group)))
                        in_asg_but_not_lb = list(
                            set(get_usable_zones(as_group)) -
                            set(elb.availability_zones))
                        if len(in_asg_but_not_lb) > 0 or len(
                                in_lb_but_not_asg) > 0:
                            try:
                                if len(
                                        list(
                                            set(elb.availability_zones) -
                                            set(get_usable_zones(as_group)))
                                ) > 0:
                                    elb.disable_zones(
                                        list(
                                            set(elb.availability_zones) -
                                            set(get_usable_zones(as_group))))
                                elb.enable_zones(get_usable_zones(as_group))
                            except Exception as e:
                                if e.error_code == 'ValidationError' and 'is constrained and cannot be used together with' in e.message:
                                    print_verbose(
                                        this_file, 'info',
                                        'Conflict found between two AZs. Removing one of them from use.'
                                    )
                                    pattern = re.compile(
                                        r'\b\w{2}-\w{4,}-\d\w and \w{2}-\w{4,}-\d\w\b'
                                    )
                                    match = pattern.search(e.message)
                                    if match:
                                        bad_az = match.group().split(
                                        )[0].split('-')[-1][1]
                                        print_verbose(
                                            this_file, 'info',
                                            'Removing %s from potential AZs as it confilcts with another AZ.'
                                            % bad_az)
                                        # smarter here would be to figure out
                                        # which AZ is a better choice
                                        new_tag = mark_asg_az_disabled(
                                            as_group, bad_az)
                                        update_tags(as_group.connection,
                                                    [new_tag])

    except Exception as e:
        handle_exception(e)
        sys.exit(1)
Esempio n. 28
0
def main(args):
    global verbose
    global dry_run
    (verbose, dry_run) = dry_run_necessaries(args.dry_run, args.verbose)
    for region in [r.name for r in boto.ec2.regions() if r.name not in args.excluded_regions]:
        try:
            print_verbose(
                os.path.basename(__file__), 'info', 'Starting pass on %s' % region)
            ec2_conn = boto.ec2.connect_to_region(region)
            as_conn = boto.ec2.autoscale.connect_to_region(region)

            all_groups = as_conn.get_all_groups()
            spot_lcs = [
                e for e in as_conn.get_all_launch_configurations() if e.spot_price]
            # these need to be pulled from the same all_groups list or
            # duplicate objects will be seen as distinct.
            spot_lc_groups = [
                g for g in all_groups if g.launch_config_name in [s.name for s in spot_lcs]]
            previously_ssr_managed_groups = [g for g in all_groups if get_tag_dict_value(
                g, 'ssr_config') and get_tag_dict_value(g, 'ssr_config')['enabled'] is True]

            all_groups = list(
                set(spot_lc_groups + previously_ssr_managed_groups))
            for as_group in all_groups:
                print_verbose(
                    os.path.basename(__file__), 'info', "Evaluating %s" % as_group.name)

                # this latter condition can happen when tag value (a dict)
                # can't be interpreted by ast.literal_eval()
                if args.reset_tags or not [t for t in as_group.tags if t.key == 'ssr_config'] or not get_tag_dict_value(as_group, 'ssr_config'):
                    print_verbose(os.path.basename(
                        __file__), 'info', 'Tags not found or reset tags option flagged. Adding all tags anew now.')
                    init_ssr_config_tag(as_group, args.min_healthy_AZs)
                    init_az_status_tag(as_group)

                elif [t for t in as_group.tags if t.key == 'ssr_config' and not get_tag_dict_value(as_group, 'ssr_config')['enabled']]:
                    print_verbose(
                        os.path.basename(__file__), 'info', 'ssr_config DISABLED. Doing nothing.')

                elif [t for t in as_group.tags if t.key == 'ssr_config' and get_tag_dict_value(as_group, 'ssr_config')['enabled']]:
                    print_verbose(os.path.basename(
                        __file__), 'info', 'ssr management enabled. Verifying all config values in place.')
                    config_keys = [
                        'enabled', 'original_bid', 'LC_name', 'min_AZs', 'demand_expiration']

                    if not verify_tag_dict_keys(as_group, 'ssr_config', config_keys) or \
                            not get_tag_dict_value(as_group, 'ssr_config')['LC_name'] == as_group.launch_config_name[-155:]:
                        # this would indicate a change to the LC outside of ssr
                        # scope. In that case, we need to disable ssr via tag
                        # deletion.
                        if not get_launch_config(as_group).spot_price:
                            del_ssr_tags(as_group)
                            continue
                        else:
                            init_ssr_config_tag(as_group, args.min_healthy_AZs)

                    zones = [z.name[-1] for z in ec2_conn.get_all_zones()]
                    if not verify_tag_dict_keys(as_group, 'AZ_status', zones):
                        init_az_status_tag(as_group)

                else:
                    raise Exception(
                        "ssr_enabled tag found for %s but isn't a valid value." % (as_group.name,))

            print_verbose(
                os.path.basename(__file__), 'info', 'Done with pass on %s' % region)

        except EC2ResponseError as e:
            handle_exception(e)

        except BotoServerError as e:
            handle_exception(e)

        except Exception as e:
            handle_exception(e)
            return 1

    print_verbose(os.path.basename(__file__), 'info', "All regions complete")