def cancel_spot_requests(ec2client, spot_requests, num_to_cancel): try: response = ec2client.cancel_spot_instance_requests( SpotInstanceRequestIds=spot_requests[-num_to_cancel:]) except Exception as e: print_verbose(e) basics.handle_error('Termination of spot requests failed')
def get_params(): try: opts, args = getopt.getopt(sys.argv[1:], "vc:", ["verbose", "help", "config="]) except getopt.GetoptError as e: basics.handle_error(e) return opts, args
def fetch_and_parse_json(request): try: print_verbose('Fetching %s' % request) json_data = urllib2.urlopen(request).read() parsed_data = json.loads(json_data) except Exception as e: print_verbose(e) basics.handle_error('Failed JSON fetch and parse.') return parsed_data
def request_spot_instances(ec2client, num_to_boot, instance_type, max_bid): launch_config = import_launch_config() try: response = ec2client.request_spot_instances( SpotPrice=str(max_bid), InstanceCount=num_to_boot, LaunchSpecification=launch_config) except Exception as e: print_verbose(e) basics.handle_error('Requesting spot instances failed')
def fetch_current_mesos_master(mesos_zkurl): print_verbose('Resolving ZooKeeper url for the working Mesos Master') #return '52.17.132.212:5050' try: mesos_master = basics.run_command('mesos-resolve %s' % mesos_zkurl) except Exception as e: print_verbose(e) basics.handle_error( 'Could not resolve the ZooKeeper url for the leading master node.') return mesos_master
def import_launch_config(): config_path = 'launch_config.yml' if not basics.check_file_exists(config_path): print_verbose('Attempted to find config file: %s' % config_path) basics.handle_error('No configuration file found') try: with open(config_path, 'r') as configfile: config = yaml.load(configfile) except StandardError as e: print_verbose(e) basics.handle_error( 'Error when attempting to read the configuration file') return config
def purge_old_spot_requests(ec2client, cur_spot_requests, timeout, max_bid): now_time = datetime.datetime.utcnow() now_time = now_time.replace(tzinfo=dateutil.tz.tzutc()) for request in cur_spot_requests: if request[u'State'] == 'open': #print now_time - request[u'CreateTime'] if (int(now_time.strftime('%s')) - int(request[u'CreateTime'].strftime('%s')) > timeout and not request[u'SpotPrice'] == max_bid): try: response = ec2client.cancel_spot_instance_requests( SpotInstanceRequestIds=[ request[u'SpotInstanceRequestId'] ]) except Exception as e: print_verbose(e) basics.handle_error( 'Some error ocurred. Could not cancel old spot instance request.' )
def terminate_spot_instances(ec2client, spot_instances, raw_spot_info, num_to_terminate, partial_hour_limit): try: list_with_timestamp = dict() now_time = datetime.datetime.utcnow() now_time = now_time.replace(tzinfo=dateutil.tz.tzutc()) terminated = 0 for instance in spot_instances: list_with_timestamp[ instance.instance_id] = instance.launch_time.strftime('%s') for instance in sorted(list_with_timestamp, key=list_with_timestamp.get): if terminated == num_to_terminate: break instance_lifetime_delta = int(now_time.strftime('%s')) - int( list_with_timestamp[instance]) # Calculate minutes in a partial hour used if instance_lifetime_delta < partial_hour_limit: part_seconds = instance_lifetime_delta else: part_seconds = instance_lifetime_delta - ( floor(instance_lifetime_delta / 3600) * partial_hour_limit) if part_seconds > partial_hour_limit: print_verbose('Terminating %s...' % instance) response = ec2client.terminate_instances( InstanceIds=[instance]) else: print_verbose( '%s has not reached the set partial hour limit. %.0f minutes has passed.' % (instance, part_seconds / 60)) terminated += 1 except Exception as e: print_verbose(e) basics.handle_error('Termination of spot requests failed')
def main(): # Get paramaters and process them opts, args = get_params() set_options(opts) # Import the configuration and set some session settings config = import_config() try: session = boto3.session.Session( aws_access_key_id=config['aws_access_key_id'], aws_secret_access_key=config['aws_secret_access_key'], region_name=config['default_region']) except Exception as e: print_verbose(e) basics.handle_error( 'Could not establish a session towards AWS API, check config') # Start EC2 and resource and client session try: ec2resource = session.resource('ec2') ec2client = session.client('ec2') except Exception as e: print_verbose(e) basics.handle_error('Could not establish a session towards EC2.') # Main execution while True: start_time = time.time() print '' ################################# ### Collect hybrid cloud metrics ################################# ## Fetch current Mesos master mesos_master = fetch_current_mesos_master(config['mesos_zkurl']) ## Create a Marathon url marathon_url = '%s:%i' % (mesos_master[:-5], config['marathon_port']) print_verbose(' Current Mesos master %s' % mesos_master[:-5]) ## Collect Mesos metrics mesos_data = fetch_and_parse_json('http://%s/metrics/snapshot' % mesos_master) ## Collect the usage values of the resources resources_in_use = { 'cpus': float(mesos_data[u'master/cpus_used']), 'mem': float(mesos_data[u'master/mem_used']), 'disk': float(mesos_data[u'master/disk_used']) } current_percent_in_use = { 'cpus': float(mesos_data[u'master/cpus_percent']), 'mem': float(mesos_data[u'master/mem_percent']), 'disk': float(mesos_data[u'master/disk_percent']) } ### Collect EC2 metrics cur_slaves, cur_slaves_raw = get_current_spot_slaves(ec2resource) cur_spot_requests = get_current_spot_requests(ec2client, 'all') cur_open_spot_requests = get_current_spot_requests( ec2client, [u'open']) num_active_pending_slaves = len(cur_slaves) + len( cur_open_spot_requests) ################################# ### Calculate the bidding price ################################# # Fetch the current price bid = fetch_current_price(ec2client, config['availability_zone'], config['instance_type'], config['maximum_bid_limit']) ######################################################## ### Make a descision of whether or not to cloud burst ######################################################## slaves_to_adjust = get_scaling_decision(resources_in_use, current_percent_in_use, len(cur_slaves), len(cur_open_spot_requests), config) desired_slaves = len(cur_open_spot_requests) + len( cur_slaves) + slaves_to_adjust print_verbose(' |----------------------------') print_verbose(' | Number of: | Count ') print_verbose(' |----------------------------') print_verbose(' | Desired instances | %i ' % desired_slaves) print_verbose(' | Pending requests | %i ' % len(cur_open_spot_requests)) print_verbose(' | Active instances | %i ' % len(cur_slaves)) print_verbose(' |----------------------------') ############################ ### Execute the descision ############################ # Remove spot requests that exceeded the timeout and that does bid at max limit purge_old_spot_requests(ec2client, cur_spot_requests, config['spot_request_timeout'], config['maximum_bid_limit']) ## The number of pending and active instances are ok if desired_slaves == num_active_pending_slaves: print_verbose('The number of pending and active slaves are ok') try: sleep(start_time, config['execution_interval']) except KeyError as e: basics.handle_error('%s has not been set in the config.' % e) continue ## Request new spot instances if desired_slaves > num_active_pending_slaves: print_verbose( 'Not enough pending or active slave nodes. Requesting new ones' ) request_spot_instances(ec2client, desired_slaves - num_active_pending_slaves, config['instance_typed'], bid) ## Terminate excessive pending spot requests if (num_active_pending_slaves > desired_slaves and not len(cur_open_spot_requests) == 0): excessive_slaves = num_active_pending_slaves - desired_slaves if excessive_slaves < len(cur_open_spot_requests): print_verbose( 'Excessive spot requests. Attempting to cancel %i' % excessive_slaves) cancel_spot_requests(ec2client, cur_open_spot_requests, excessive_slaves) num_active_pending_slaves = num_active_pending_slaves - excessive_slaves else: print_verbose( 'Excessive spot requests. Attempting to cancel %i' % len(cur_open_spot_requests)) cancel_spot_requests(ec2client, cur_open_spot_requests, len(cur_open_spot_requests)) num_active_pending_slaves = num_active_pending_slaves - len( cur_open_spot_requests) ## Terminate excessive spot instances if (num_active_pending_slaves > desired_slaves): excessive_slaves = num_active_pending_slaves - desired_slaves print_verbose( 'Excessive spot instances. Attempting to terminate %i' % excessive_slaves) terminate_spot_instances(ec2client, cur_slaves, cur_slaves_raw, excessive_slaves, config['partial_hour_limit']) ### Sleep and repeat try: sleep(start_time, config['execution_interval']) except KeyError as e: basics.handle_error('%s has not been set in the config.' % e)