Beispiel #1
0
def cancel_spot_requests(ec2client, spot_requests, num_to_cancel):
    try:
        response = ec2client.cancel_spot_instance_requests(
            SpotInstanceRequestIds=spot_requests[-num_to_cancel:])
    except Exception as e:
        print_verbose(e)
        basics.handle_error('Termination of spot requests failed')
Beispiel #2
0
def get_params():
    try:
        opts, args = getopt.getopt(sys.argv[1:], "vc:",
                                   ["verbose", "help", "config="])
    except getopt.GetoptError as e:
        basics.handle_error(e)

    return opts, args
Beispiel #3
0
def fetch_and_parse_json(request):
    try:
        print_verbose('Fetching %s' % request)
        json_data = urllib2.urlopen(request).read()
        parsed_data = json.loads(json_data)
    except Exception as e:
        print_verbose(e)
        basics.handle_error('Failed JSON fetch and parse.')

    return parsed_data
Beispiel #4
0
def request_spot_instances(ec2client, num_to_boot, instance_type, max_bid):
    launch_config = import_launch_config()

    try:
        response = ec2client.request_spot_instances(
            SpotPrice=str(max_bid),
            InstanceCount=num_to_boot,
            LaunchSpecification=launch_config)
    except Exception as e:
        print_verbose(e)
        basics.handle_error('Requesting spot instances failed')
Beispiel #5
0
def fetch_current_mesos_master(mesos_zkurl):
    print_verbose('Resolving ZooKeeper url for the working Mesos Master')

    #return '52.17.132.212:5050'

    try:
        mesos_master = basics.run_command('mesos-resolve %s' % mesos_zkurl)
    except Exception as e:
        print_verbose(e)
        basics.handle_error(
            'Could not resolve the ZooKeeper url for the leading master node.')

    return mesos_master
Beispiel #6
0
def import_launch_config():
    config_path = 'launch_config.yml'
    if not basics.check_file_exists(config_path):
        print_verbose('Attempted to find config file: %s' % config_path)
        basics.handle_error('No configuration file found')

    try:
        with open(config_path, 'r') as configfile:
            config = yaml.load(configfile)
    except StandardError as e:
        print_verbose(e)
        basics.handle_error(
            'Error when attempting to read the configuration file')

    return config
Beispiel #7
0
def purge_old_spot_requests(ec2client, cur_spot_requests, timeout, max_bid):
    now_time = datetime.datetime.utcnow()
    now_time = now_time.replace(tzinfo=dateutil.tz.tzutc())

    for request in cur_spot_requests:
        if request[u'State'] == 'open':
            #print now_time - request[u'CreateTime']
            if (int(now_time.strftime('%s')) -
                    int(request[u'CreateTime'].strftime('%s')) > timeout
                    and not request[u'SpotPrice'] == max_bid):
                try:
                    response = ec2client.cancel_spot_instance_requests(
                        SpotInstanceRequestIds=[
                            request[u'SpotInstanceRequestId']
                        ])
                except Exception as e:
                    print_verbose(e)
                    basics.handle_error(
                        'Some error ocurred. Could not cancel old spot instance request.'
                    )
Beispiel #8
0
def terminate_spot_instances(ec2client, spot_instances, raw_spot_info,
                             num_to_terminate, partial_hour_limit):
    try:
        list_with_timestamp = dict()
        now_time = datetime.datetime.utcnow()
        now_time = now_time.replace(tzinfo=dateutil.tz.tzutc())
        terminated = 0

        for instance in spot_instances:
            list_with_timestamp[
                instance.instance_id] = instance.launch_time.strftime('%s')

        for instance in sorted(list_with_timestamp,
                               key=list_with_timestamp.get):
            if terminated == num_to_terminate:
                break

            instance_lifetime_delta = int(now_time.strftime('%s')) - int(
                list_with_timestamp[instance])

            # Calculate minutes in a partial hour used
            if instance_lifetime_delta < partial_hour_limit:
                part_seconds = instance_lifetime_delta
            else:
                part_seconds = instance_lifetime_delta - (
                    floor(instance_lifetime_delta / 3600) * partial_hour_limit)

            if part_seconds > partial_hour_limit:
                print_verbose('Terminating %s...' % instance)
                response = ec2client.terminate_instances(
                    InstanceIds=[instance])
            else:
                print_verbose(
                    '%s has not reached the set partial hour limit. %.0f minutes has passed.'
                    % (instance, part_seconds / 60))

            terminated += 1

    except Exception as e:
        print_verbose(e)
        basics.handle_error('Termination of spot requests failed')
Beispiel #9
0
def main():
    # Get paramaters and process them
    opts, args = get_params()
    set_options(opts)

    # Import the configuration and set some session settings
    config = import_config()

    try:
        session = boto3.session.Session(
            aws_access_key_id=config['aws_access_key_id'],
            aws_secret_access_key=config['aws_secret_access_key'],
            region_name=config['default_region'])
    except Exception as e:
        print_verbose(e)
        basics.handle_error(
            'Could not establish a session towards AWS API, check config')

    # Start EC2 and resource and client session
    try:
        ec2resource = session.resource('ec2')
        ec2client = session.client('ec2')
    except Exception as e:
        print_verbose(e)
        basics.handle_error('Could not establish a session towards EC2.')

    # Main execution
    while True:
        start_time = time.time()
        print ''

        #################################
        ### Collect hybrid cloud metrics
        #################################

        ## Fetch current Mesos master
        mesos_master = fetch_current_mesos_master(config['mesos_zkurl'])

        ## Create a Marathon url
        marathon_url = '%s:%i' % (mesos_master[:-5], config['marathon_port'])
        print_verbose('   Current Mesos master %s' % mesos_master[:-5])

        ## Collect Mesos metrics
        mesos_data = fetch_and_parse_json('http://%s/metrics/snapshot' %
                                          mesos_master)

        ## Collect the usage values of the resources
        resources_in_use = {
            'cpus': float(mesos_data[u'master/cpus_used']),
            'mem': float(mesos_data[u'master/mem_used']),
            'disk': float(mesos_data[u'master/disk_used'])
        }
        current_percent_in_use = {
            'cpus': float(mesos_data[u'master/cpus_percent']),
            'mem': float(mesos_data[u'master/mem_percent']),
            'disk': float(mesos_data[u'master/disk_percent'])
        }

        ### Collect EC2 metrics
        cur_slaves, cur_slaves_raw = get_current_spot_slaves(ec2resource)
        cur_spot_requests = get_current_spot_requests(ec2client, 'all')
        cur_open_spot_requests = get_current_spot_requests(
            ec2client, [u'open'])
        num_active_pending_slaves = len(cur_slaves) + len(
            cur_open_spot_requests)

        #################################
        ### Calculate the bidding price
        #################################

        # Fetch the current price
        bid = fetch_current_price(ec2client, config['availability_zone'],
                                  config['instance_type'],
                                  config['maximum_bid_limit'])

        ########################################################
        ### Make a descision of whether or not to cloud burst
        ########################################################
        slaves_to_adjust = get_scaling_decision(resources_in_use,
                                                current_percent_in_use,
                                                len(cur_slaves),
                                                len(cur_open_spot_requests),
                                                config)

        desired_slaves = len(cur_open_spot_requests) + len(
            cur_slaves) + slaves_to_adjust

        print_verbose('   |----------------------------')
        print_verbose('   | Number of:         | Count ')
        print_verbose('   |----------------------------')
        print_verbose('   | Desired instances  |   %i  ' % desired_slaves)
        print_verbose('   | Pending requests   |   %i  ' %
                      len(cur_open_spot_requests))
        print_verbose('   | Active instances   |   %i  ' % len(cur_slaves))
        print_verbose('   |----------------------------')

        ############################
        ### Execute the descision
        ############################

        # Remove spot requests that exceeded the timeout and that does bid at max limit
        purge_old_spot_requests(ec2client, cur_spot_requests,
                                config['spot_request_timeout'],
                                config['maximum_bid_limit'])

        ## The number of pending and active instances are ok
        if desired_slaves == num_active_pending_slaves:
            print_verbose('The number of pending and active slaves are ok')
            try:
                sleep(start_time, config['execution_interval'])
            except KeyError as e:
                basics.handle_error('%s has not been set in the config.' % e)

            continue

        ## Request new spot instances
        if desired_slaves > num_active_pending_slaves:
            print_verbose(
                'Not enough pending or active slave nodes. Requesting new ones'
            )
            request_spot_instances(ec2client,
                                   desired_slaves - num_active_pending_slaves,
                                   config['instance_typed'], bid)

        ## Terminate excessive pending spot requests
        if (num_active_pending_slaves > desired_slaves
                and not len(cur_open_spot_requests) == 0):
            excessive_slaves = num_active_pending_slaves - desired_slaves

            if excessive_slaves < len(cur_open_spot_requests):
                print_verbose(
                    'Excessive spot requests. Attempting to cancel %i' %
                    excessive_slaves)
                cancel_spot_requests(ec2client, cur_open_spot_requests,
                                     excessive_slaves)
                num_active_pending_slaves = num_active_pending_slaves - excessive_slaves
            else:
                print_verbose(
                    'Excessive spot requests. Attempting to cancel %i' %
                    len(cur_open_spot_requests))
                cancel_spot_requests(ec2client, cur_open_spot_requests,
                                     len(cur_open_spot_requests))
                num_active_pending_slaves = num_active_pending_slaves - len(
                    cur_open_spot_requests)

        ## Terminate excessive spot instances
        if (num_active_pending_slaves > desired_slaves):
            excessive_slaves = num_active_pending_slaves - desired_slaves
            print_verbose(
                'Excessive spot instances. Attempting to terminate %i' %
                excessive_slaves)

            terminate_spot_instances(ec2client, cur_slaves, cur_slaves_raw,
                                     excessive_slaves,
                                     config['partial_hour_limit'])

        ### Sleep and repeat
        try:
            sleep(start_time, config['execution_interval'])
        except KeyError as e:
            basics.handle_error('%s has not been set in the config.' % e)