def __is_queue_head_running(self, params): """ Private method that is used for checking whether the queue head is running. Queue head has a different configuration of machine type and should be used for celery configuration. Args params A dictionary of parameters Return A boolean value of wether the queue head is running or not """ logging.info("Trying to check if queue head is running...") try: all_vms = self.agent.describe_instances(params, prefix=params["key_prefix"]) if all_vms == None: logging.info("No vms were found!") return False queue_head_tag = AgentConfig.get_queue_head_key_tag(agent_type=self.agent_type) key_prefix = AgentConfig.get_agent_key_prefix( agent_type=self.agent_type, key_prefix=params.get("key_prefix", "") ) # Just need one running vm with the QUEUEHEAD_KEY_TAG in the name of the keypair for vm in all_vms: if vm != None and vm["state"] == "running": if vm["key_name"].endswith(queue_head_tag) and vm["key_name"].startswith(key_prefix): logging.info("Found queue head: {0}".format(vm)) return True return False except Exception as e: logging.error("Error in testing whether queue_head is running! {0}".format(e)) return False
def __launch_ec2_queue_head(self, parameters): logging.info('About to start an EC2 queue head...') parameters["queue_head"] = True requested_key_name = parameters["keyname"] # get the largest instance_type and let it to be queue head head_node = parameters['head_node'] parameters["instance_type"] = head_node["instance_type"] parameters["num_vms"] = 1 parameters["shutdown"] = "terminate" num_vms = 1 # Tag queue head key so that it can be differentiated if necessary parameters["keyname"] = AgentConfig.get_queue_head_keyname( agent_type=self.agent_type, keyname=requested_key_name) logging.info('New queue head keyname: {0}'.format( parameters["keyname"])) self.agent.configure_instance_security(parameters) try: self.agent.prepare_instances(parameters) except Exception as e: raise Exception('Errors in running instances in agent: ' + str(e)) del parameters['head_node'] return num_vms
def deregister_instances(self, parameters, terminate=True): """ Stop one of more EC2 instances using. The input instance IDs are fetched from the 'instance_ids' parameters in the input map. (Also see documentation for the BaseAgent class) Args: parameters A dictionary of parameters terminate A Boolean flag for terminating instances """ key_prefix = AgentConfig.get_agent_key_prefix(agent_type=self.AGENT_NAME, key_prefix=parameters.get('key_prefix', '')) conn = self.open_connection(parameters) instance_ids = [] reservations = conn.get_all_instances() instances = [i for r in reservations for i in r.instances] for i in instances: if i.key_name is not None and i.key_name.startswith(key_prefix): instance_ids.append(i.id) terminated_instances = conn.terminate_instances(instance_ids) for instance in terminated_instances: logging.info('Instance {0} was terminated'.format(instance.id))
def __launch_ec2_queue_head(self, parameters): logging.info("About to start an EC2 queue head...") parameters["queue_head"] = True requested_key_name = parameters["keyname"] # get the largest instance_type and let it to be queue head head_node = parameters["head_node"] parameters["instance_type"] = head_node["instance_type"] parameters["num_vms"] = 1 parameters["shutdown"] = "terminate" num_vms = 1 # Tag queue head key so that it can be differentiated if necessary parameters["keyname"] = AgentConfig.get_queue_head_keyname( agent_type=self.agent_type, keyname=requested_key_name ) logging.info("New queue head keyname: {0}".format(parameters["keyname"])) self.agent.configure_instance_security(parameters) try: self.agent.prepare_instances(parameters) except Exception as e: raise Exception("Errors in running instances in agent: " + str(e)) del parameters["head_node"] return num_vms
def __is_queue_head_running(self, params): ''' Private method that is used for checking whether the queue head is running. Queue head has a different configuration of machine type and should be used for celery configuration. Args params A dictionary of parameters Return A boolean value of wether the queue head is running or not ''' logging.info('Trying to check if queue head is running...') try: all_vms = self.agent.describe_instances( params, prefix=params['key_prefix']) if all_vms == None: logging.info('No vms were found!') return False queue_head_tag = AgentConfig.get_queue_head_key_tag( agent_type=self.agent_type) key_prefix = AgentConfig.get_agent_key_prefix( agent_type=self.agent_type, key_prefix=params.get('key_prefix', '')) # Just need one running vm with the QUEUEHEAD_KEY_TAG in the name of the keypair for vm in all_vms: if vm != None and vm['state'] == 'running': if vm['key_name'].endswith(queue_head_tag) and vm[ 'key_name'].startswith(key_prefix): logging.info('Found queue head: {0}'.format(vm)) return True return False except Exception as e: logging.error( 'Error in testing whether queue_head is running! {0}'.format( e)) return False
def stop_ec2_vms(self, params, blocking=False): ''' This method would terminate all the EC2 instances associated with the account that have a keyname prefixed with stochss (all instances created by the backend service) params must contain credentials key/value ''' key_prefix = AgentConfig.get_agent_key_prefix(agent_type=AgentTypes.EC2, key_prefix=params.get('key_prefix', '')) try: logging.debug("Stopping compute nodes with key_prefix: {0}".format(key_prefix)) i = InfrastructureManager(blocking=blocking) res = i.deregister_instances(parameters=params, terminate=True) ret = True except Exception, e: logging.error("Terminate machine failed with error : %s", str(e)) ret = False
def __prepare_queue_head(self, parameters): logging.debug( "__prepare_queue_head(): parameters = {0}".format(parameters)) num_vms = 0 if not self.__is_queue_head_running(parameters): logging.info( "Queue head is not running, so create a new queue head...") if 'head_node' not in parameters: logging.error("Head node is needed to run StochSS!") return None, None num_vms = self.__launch_ec2_queue_head(parameters) else: logging.info("Found queue head running...") # Queue head is already running, downgrading to normal worker if "queue_head" in parameters and parameters["queue_head"] == True: parameters["keyname"] = parameters["keyname"].replace( AgentConfig.get_queue_head_key_tag( agent_type=self.agent_type), '') logging.info( 'After downgrading from queue head to normal worker: keyname = {0}' .format(parameters["keyname"])) parameters["queue_head"] = False self.agent.configure_instance_security(parameters) # set shutdown behavior to "terminate" parameters["shutdown"] = "terminate" for vm in parameters["vms"]: parameters["instance_type"] = vm["instance_type"] parameters["num_vms"] = vm["num_vms"] num_vms += vm["num_vms"] try: self.agent.prepare_instances(parameters) except Exception as e: raise Exception( 'Errors in running instances in agent: {0}'.format( str(e))) return num_vms, parameters
def __prepare_queue_head(self, parameters): logging.debug("__prepare_queue_head(): parameters = {0}".format(parameters)) num_vms = 0 if not self.__is_queue_head_running(parameters): logging.info("Queue head is not running, so create a new queue head...") if "head_node" not in parameters: logging.error("Head node is needed to run StochSS!") return None, None num_vms = self.__launch_ec2_queue_head(parameters) else: logging.info("Found queue head running...") # Queue head is already running, downgrading to normal worker if "queue_head" in parameters and parameters["queue_head"] == True: parameters["keyname"] = parameters["keyname"].replace( AgentConfig.get_queue_head_key_tag(agent_type=self.agent_type), "" ) logging.info( "After downgrading from queue head to normal worker: keyname = {0}".format(parameters["keyname"]) ) parameters["queue_head"] = False self.agent.configure_instance_security(parameters) # set shutdown behavior to "terminate" parameters["shutdown"] = "terminate" for vm in parameters["vms"]: parameters["instance_type"] = vm["instance_type"] parameters["num_vms"] = vm["num_vms"] num_vms += vm["num_vms"] try: self.agent.prepare_instances(parameters) except Exception as e: raise Exception("Errors in running instances in agent: {0}".format(str(e))) return num_vms, parameters
def start_ec2_vms(self, params, blocking=False): ''' This method instantiates EC2 vm instances ''' logging.debug("start_ec2_vms : inside method with params : \n%s", pprint.pformat(params)) try: # make sure that any keynames we use are prefixed with stochss so that #we can do a terminate all based on keyname prefix key_prefix = AgentConfig.get_agent_key_prefix(agent_type=AgentTypes.EC2, key_prefix=params.get('key_prefix', '')) key_name = params["keyname"] if not key_name.startswith(key_prefix): params['keyname'] = key_prefix + key_name # NOTE: We are forcing blocking mode within the InfrastructureManager class # for the launching of VMs because of how GAE joins on all threads before # returning a response from a request. i = InfrastructureManager(blocking=blocking) res = {} # 1. change the status of 'failed' in the previous launch in db to 'terminated' # NOTE: We need to make sure that the RabbitMQ server is running if any compute # nodes are running as we are using the AMQP broker option for Celery. ins_ids = VMStateModel.terminate_not_active(params) # 2. get user_id, infra, ec2 credentials user_id = self.__get_required_parameter(parameter_key='user_id', params=params) infrastructure = self.__get_required_parameter(parameter_key='infrastructure', params=params) reservation_id = self.__get_required_parameter(parameter_key='reservation_id', params=params) logging.debug('ec2: reservation_id = {0}'.format(reservation_id)) if 'credentials' in params: if 'EC2_ACCESS_KEY' in params['credentials'] and 'EC2_SECRET_KEY' in params['credentials']: ec2_access_key = params['credentials']['EC2_ACCESS_KEY'] ec2_secret_key = params['credentials']['EC2_SECRET_KEY'] else: raise Exception('VMStateModel ERROR: Cannot get access key or secret.') else: raise Exception('VMStateModel ERROR: No credentials are provided.') if ec2_access_key is None or ec2_secret_key is None: raise Exception('VMStateModel ERROR: ec2 credentials are not valid.') # 3. create exact number of entities in db for this launch, and set the status to 'creating' num_vms = 0 if 'vms' in params: for vm in params['vms']: logging.debug('vm: {0}, num: {1}'.format(vm['instance_type'], vm['num_vms'])) num_vms += vm['num_vms'] if 'head_node' in params: num_vms += 1 logging.debug('num = {0}'.format(num_vms)) ids = self.__create_vm_state_model_entries(ec2_access_key=ec2_access_key, ec2_secret_key=ec2_secret_key, infrastructure=infrastructure, num_vms=num_vms, user_id=user_id, reservation_id=reservation_id) # 4. Prepare Instances params[VMStateModel.IDS] = ids res = i.prepare_instances(params) # 5, check and create stochss table exists if it does not exist self.__create_dynamodb_stochss_table(ec2_access_key=ec2_access_key, ec2_secret_key=ec2_secret_key) logging.debug("start_ec2_vms : exiting method with result : %s", str(res)) return True, None except Exception as e: logging.exception("start_ec2_vms : exiting method with error : {0}".format(str(e))) return False, 'Errors occur in starting machines:' + str(e)