def prepare_machines(self): logging.info( "prepare_machines: inside method with machine_info : \n%s", pprint.pformat(self.machines)) queue_head = self.__get_queue_head_machine_info() # push queue head to be the first node to be prepared self.machines.remove(queue_head) self.machines.insert(0, queue_head) logging.info("queue head = \n{0}".format(pprint.pformat(queue_head))) try: logging.info("Preparing environment on remote machines...") for machine in self.machines: logging.info( "For machine {ip}".format(ip=machine['public_ip'])) success = self.__copy_prepare_script_to_vm(machine) if success != 0: raise Exception( "Remote copy command failed on {ip}!".format( ip=machine['public_ip'])) success = self.__run_prepare_script_on_vm(machine) if success != 0: raise Exception("Remote command failed on {ip}!".format( ip=machine['public_ip'])) helper.update_celery_config_with_queue_head_ip( queue_head_ip=queue_head["public_ip"], agent_type=self.AGENT_TYPE) logging.info( "Updated celery config with queue head ip: {0}".format( queue_head["public_ip"])) self.__configure_celery(queue_head) return True except Exception, e: traceback.print_exc() logging.error( "prepare_machines : exiting method with error : {0}".format( str(e))) return False
def prepare_machines(self): logging.info("prepare_machines: inside method with machine_info : \n%s", pprint.pformat(self.machines)) queue_head = self.__get_queue_head_machine_info() # push queue head to be the first node to be prepared self.machines.remove(queue_head) self.machines.insert(0, queue_head) logging.info("queue head = \n{0}".format(pprint.pformat(queue_head))) try: logging.info("Preparing environment on remote machines...") for machine in self.machines: logging.info("For machine {ip}".format(ip=machine['public_ip'])) success = self.__copy_prepare_script_to_vm(machine) if success != 0: raise Exception("Remote copy command failed on {ip}!".format(ip=machine['public_ip'])) success = self.__run_prepare_script_on_vm(machine) if success != 0: raise Exception("Remote command failed on {ip}!".format(ip=machine['public_ip'])) helper.update_celery_config_with_queue_head_ip(queue_head_ip=queue_head["public_ip"], agent_type=self.AGENT_TYPE) logging.info("Updated celery config with queue head ip: {0}".format(queue_head["public_ip"])) self.__configure_celery(queue_head) return True except Exception, e: traceback.print_exc() logging.error("prepare_machines : exiting method with error : {0}".format(str(e))) return False
def prepare_vms(self, parameters): """ Public method for preparing a set of VMs Args: parameters A dictionary of parameters """ logging.debug("prepare_vms(): nparameters = {0}".format(parameters)) if not parameters["vms"] and 'head_node' not in parameters: logging.error( "No vms are waiting to be prepared or head_node is not specified!" ) return try: # ################################################## # step 1: run instance based on queue head or not # # ################################################## num_vms, parameters = self.__prepare_queue_head(parameters) if num_vms == None and parameters == None: return # ######################################################################## # step 2: poll the status of instances, if not running, terminate them # ######################################################################### public_ips, private_ips, instance_ids = self.__poll_instances_status( num_vms, parameters) if public_ips == None: if not self.__is_queue_head_running(parameters): # if last time of spawning queue head failed, spawn another queue head again self.prepare_vms(parameters) else: return ############################################################ # step 3: set alarm for the nodes, if it is NOT queue head # ############################################################ # logging.info('Set shutdown alarm') # # try: # if "queue_head" not in parameters or parameters["queue_head"] == False: # for ins_id in instance_ids: # agent.make_sleepy(parameters, ins_id) # else: # agent.make_sleepy(parameters, instance_ids[0], '7200') # # except: # raise Exception('Errors in set alarm for instances.') ######################################################## # step 4: verify whether nodes are connectable via ssh # ######################################################## connected_public_ips, connected_instance_ids = self.__verify_ec2_instances_via_ssh( parameters=parameters, public_ips=public_ips, instance_ids=instance_ids) if len(connected_public_ips) == 0: if not self.__is_queue_head_running(parameters): # if last time of spawning queue head failed, spawn another queue head again self.prepare_vms(parameters) else: return ######################################### # step 5: configure celery on each node # ######################################### if "queue_head" in parameters and parameters["queue_head"] == True: queue_head_ip = connected_public_ips[0] logging.info('queue_head_ip: {0}'.format(queue_head_ip)) # celery configuration needs to be updated with the queue head ip helper.update_celery_config_with_queue_head_ip( queue_head_ip=queue_head_ip, agent_type=self.agent_type) # copy celery configure to nodes. self.__configure_celery(parameters, connected_public_ips, connected_instance_ids) ##################################################################### # step 6: if current node is queue head, may need to spawn the rest # ##################################################################### if "queue_head" in parameters and parameters["queue_head"] == True: self.prepare_vms(parameters) else: # else all vms requested are finished spawning. Done! return except Exception as e: logging.exception(e)
def prepare_vms(self, parameters): logging.debug('prepare_vms(): parameters={0}'.format(parameters)) queue_head_machine = parameters[self.PARAM_FLEX_QUEUE_HEAD] user_data = self.__get_user_data(parameters['user_id']) if self.PARAM_FLEX_CLOUD_MACHINE_INFO not in parameters \ or parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] == None \ or parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] == []: logging.error('Error: No {0} param!'.format( self.PARAM_FLEX_CLOUD_MACHINE_INFO)) # Report Error user_data.flex_cloud_status = False user_data.flex_cloud_info_msg = 'Invalid Parameters' user_data.put() return flex_cloud_machine_info = parameters[ self.PARAM_FLEX_CLOUD_MACHINE_INFO] # Set the user message to "configuring..." user_data.flex_cloud_status = True user_data.flex_cloud_info_msg = 'Flex Cloud configured. Waiting for workers to become available...' user_data.put() # Initialize the VMstateModel db all_accessible = True for machine in flex_cloud_machine_info: if self.agent.check_network_ports(machine['ip'], [22, 443]): state = VMStateModel.STATE_ACCESSIBLE else: state = VMStateModel.STATE_INACCESSIBLE all_accessible = False vm_state = VMStateModel(state=state, infra=self.agent_type, ins_type=FlexConfig.INSTANCE_TYPE, pri_ip=machine['ip'], pub_ip=machine['ip'], username=machine['username'], keyfile=machine['keyfile'], ins_id=self.agent.get_flex_instance_id( machine['ip']), user_id=parameters['user_id'], res_id=self.reservation_id) vm_state.put() if not all_accessible: # Report Failure user_data.flex_cloud_status = False user_data.flex_cloud_info_msg = 'Error: not all workers are accessible' user_data.put() return if queue_head_machine == None or not helper.wait_for_ssh_connection( queue_head_machine['keyfile'], queue_head_machine['ip'], username=queue_head_machine['username']): logging.error( 'Found no viable ssh-able/running queue head machine!') # Report Failure user_data.flex_cloud_status = False user_data.flex_cloud_info_msg = 'Error: Can not connect {0} (queue head) via SSH'.format( queue_head_machine['ip']) user_data.put() return if not self.__prepare_queue_head(queue_head_machine, parameters): logging.error('Error: could not prepare queue head!') # Report Failure user_data.flex_cloud_status = False user_data.flex_cloud_info_msg = 'Error preparing the queue head' user_data.put() return flex_cloud_workers = [] for machine in parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO]: if machine[self.PARAM_IS_QUEUE_HEAD] != True: if helper.wait_for_ssh_connection( machine['keyfile'], machine['ip'], username=machine['username']): flex_cloud_workers.append(machine) else: # Report Failure user_data.flex_cloud_status = False user_data.flex_cloud_info_msg = 'Error: Can not connect to {0} via SSH'.format( machine['ip']) user_data.put() return if len(flex_cloud_workers) > 0: logging.debug( 'Preparing workers: {0}'.format(flex_cloud_workers)) params = { 'infrastructure': AgentTypes.FLEX, self.PARAM_FLEX_CLOUD_MACHINE_INFO: flex_cloud_workers, 'credentials': parameters['credentials'], 'user_id': parameters['user_id'], self.PARAM_FLEX_QUEUE_HEAD: parameters[self.PARAM_FLEX_QUEUE_HEAD], 'reservation_id': parameters['reservation_id'] } self.agent.prepare_instances(params) helper.update_celery_config_with_queue_head_ip( queue_head_ip=queue_head_machine['ip'], agent_type=self.agent_type) self.__configure_celery(params=parameters) # Report Success logging.debug('Flex Cloud Deployed') user_data.flex_cloud_status = True user_data.flex_cloud_info_msg = 'Flex Cloud Deployed' user_data.put() # Force the update of the instance status VMStateModel.synchronize(agent=self.agent, parameters=parameters) return
def prepare_vms(self, parameters): """ Public method for preparing a set of VMs Args: parameters A dictionary of parameters """ logging.debug("prepare_vms(): nparameters = {0}".format(parameters)) if not parameters["vms"] and "head_node" not in parameters: logging.error("No vms are waiting to be prepared or head_node is not specified!") return try: # ################################################## # step 1: run instance based on queue head or not # # ################################################## num_vms, parameters = self.__prepare_queue_head(parameters) if num_vms == None and parameters == None: return # ######################################################################## # step 2: poll the status of instances, if not running, terminate them # ######################################################################### public_ips, private_ips, instance_ids = self.__poll_instances_status(num_vms, parameters) if public_ips == None: if not self.__is_queue_head_running(parameters): # if last time of spawning queue head failed, spawn another queue head again self.prepare_vms(parameters) else: return ############################################################ # step 3: set alarm for the nodes, if it is NOT queue head # ############################################################ # logging.info('Set shutdown alarm') # # try: # if "queue_head" not in parameters or parameters["queue_head"] == False: # for ins_id in instance_ids: # agent.make_sleepy(parameters, ins_id) # else: # agent.make_sleepy(parameters, instance_ids[0], '7200') # # except: # raise Exception('Errors in set alarm for instances.') ######################################################## # step 4: verify whether nodes are connectable via ssh # ######################################################## connected_public_ips, connected_instance_ids = self.__verify_ec2_instances_via_ssh( parameters=parameters, public_ips=public_ips, instance_ids=instance_ids ) if len(connected_public_ips) == 0: if not self.__is_queue_head_running(parameters): # if last time of spawning queue head failed, spawn another queue head again self.prepare_vms(parameters) else: return ######################################### # step 5: configure celery on each node # ######################################### if "queue_head" in parameters and parameters["queue_head"] == True: queue_head_ip = connected_public_ips[0] logging.info("queue_head_ip: {0}".format(queue_head_ip)) # celery configuration needs to be updated with the queue head ip helper.update_celery_config_with_queue_head_ip(queue_head_ip=queue_head_ip, agent_type=self.agent_type) # copy celery configure to nodes. self.__configure_celery(parameters, connected_public_ips, connected_instance_ids) ##################################################################### # step 6: if current node is queue head, may need to spawn the rest # ##################################################################### if "queue_head" in parameters and parameters["queue_head"] == True: self.prepare_vms(parameters) else: # else all vms requested are finished spawning. Done! return except Exception as e: logging.exception(e)
def prepare_vms(self, parameters): logging.debug("prepare_vms(): parameters={0}".format(parameters)) queue_head_machine = parameters[self.PARAM_FLEX_QUEUE_HEAD] user_data = self.__get_user_data(parameters["user_id"]) if ( self.PARAM_FLEX_CLOUD_MACHINE_INFO not in parameters or parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] == None or parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] == [] ): logging.error("Error: No {0} param!".format(self.PARAM_FLEX_CLOUD_MACHINE_INFO)) # Report Error user_data.flex_cloud_status = False user_data.flex_cloud_info_msg = "Invalid Parameters" user_data.put() return flex_cloud_machine_info = parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] # Set the user message to "configuring..." user_data.flex_cloud_status = True user_data.flex_cloud_info_msg = "Flex Cloud configured. Waiting for workers to become available..." user_data.put() # Initialize the VMstateModel db all_accessible = True for machine in flex_cloud_machine_info: if self.agent.check_network_ports(machine["ip"], [22, 443]): state = VMStateModel.STATE_ACCESSIBLE else: state = VMStateModel.STATE_INACCESSIBLE all_accessible = False vm_state = VMStateModel( state=state, infra=self.agent_type, ins_type=FlexConfig.INSTANCE_TYPE, pri_ip=machine["ip"], pub_ip=machine["ip"], username=machine["username"], keyfile=machine["keyfile"], ins_id=self.agent.get_flex_instance_id(machine["ip"]), user_id=parameters["user_id"], res_id=self.reservation_id, ) vm_state.put() if not all_accessible: # Report Failure user_data.flex_cloud_status = False user_data.flex_cloud_info_msg = "Error: not all workers are accessible" user_data.put() return if queue_head_machine == None or not helper.wait_for_ssh_connection( queue_head_machine["keyfile"], queue_head_machine["ip"], username=queue_head_machine["username"] ): logging.error("Found no viable ssh-able/running queue head machine!") # Report Failure user_data.flex_cloud_status = False user_data.flex_cloud_info_msg = "Error: Can not connect {0} (queue head) via SSH".format( queue_head_machine["ip"] ) user_data.put() return if not self.__prepare_queue_head(queue_head_machine, parameters): logging.error("Error: could not prepare queue head!") # Report Failure user_data.flex_cloud_status = False user_data.flex_cloud_info_msg = "Error preparing the queue head" user_data.put() return flex_cloud_workers = [] for machine in parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO]: if machine[self.PARAM_IS_QUEUE_HEAD] != True: if helper.wait_for_ssh_connection(machine["keyfile"], machine["ip"], username=machine["username"]): flex_cloud_workers.append(machine) else: # Report Failure user_data.flex_cloud_status = False user_data.flex_cloud_info_msg = "Error: Can not connect to {0} via SSH".format(machine["ip"]) user_data.put() return if len(flex_cloud_workers) > 0: logging.debug("Preparing workers: {0}".format(flex_cloud_workers)) params = { "infrastructure": AgentTypes.FLEX, self.PARAM_FLEX_CLOUD_MACHINE_INFO: flex_cloud_workers, "credentials": parameters["credentials"], "user_id": parameters["user_id"], self.PARAM_FLEX_QUEUE_HEAD: parameters[self.PARAM_FLEX_QUEUE_HEAD], "reservation_id": parameters["reservation_id"], } self.agent.prepare_instances(params) helper.update_celery_config_with_queue_head_ip( queue_head_ip=queue_head_machine["ip"], agent_type=self.agent_type ) self.__configure_celery(params=parameters) # Report Success logging.debug("Flex Cloud Deployed") user_data.flex_cloud_status = True user_data.flex_cloud_info_msg = "Flex Cloud Deployed" user_data.put() # Force the update of the instance status VMStateModel.synchronize(agent=self.agent, parameters=parameters) return