def __deregister_flex_vm(self, ip, username, keyfile, parameters, queue_head_ip, force=False): try: if self.check_network_ports(ip, [22]): deregister_command = self.get_remote_command_string( ip=ip, username=username, keyfile=keyfile, command= "sudo ~/stochss/release-tools/flex-cloud/deregister_flex_vm.sh" ) logging.debug( 'deregister_command =\n{}'.format(deregister_command)) os.system(deregister_command) else: logging.debug( 'Flex VM is not accessible via SSH, can not execute deregister command' ) except Exception as e: logging.exception('Failed to deregister Flex VM: {0}'.format(e)) # logging.error(sys.exc_info()) finally: VMStateModel.set_state( params=parameters, ins_ids=[self.get_flex_instance_id(public_ip=ip)], state=VMStateModel.STATE_TERMINATED, description='VM Deregistered.')
def _run(self): logging.debug( 'SynchronizeDB._run() thread_id={0} agent_type={1} parameters={2}'. format(self.thread_id, self.agent_type, self.parameters)) self.is_start = False VMStateModel.synchronize(agent=self.agent, parameters=self.parameters) if self.update_vm_state_db(): self._start()
def _run(self): logging.debug( "SynchronizeDB._run() thread_id={0} agent_type={1} parameters={2}".format( self.thread_id, self.agent_type, self.parameters ) ) self.is_start = False VMStateModel.synchronize(agent=self.agent, parameters=self.parameters) if self.update_vm_state_db(): self._start()
def synchronize_db(self, params, force=False): logging.debug('synchronize_db(force={0}) param={1}'.format( force, params)) last_time = None set_gap_large = False try: e = db.GqlQuery("SELECT * FROM VMStateSyn").get() if e: last_time = e.last_syn else: last_time = datetime.datetime.now() - datetime.timedelta(1) except Exception as e: logging.error( 'Error: have errors in opening db_syn file. {0}'.format(e)) return if last_time is None: raise Exception( 'Error: cannot read last synchronization information of db!') else: now = datetime.datetime.now() delta = now - last_time gap = delta.total_seconds() logging.info('Time now: {0}'.format(now)) logging.info('Time last synchronization: {0}'.format(last_time)) logging.info('Time in between: {0}'.format(gap)) infrastructure = params[self.PARAM_INFRASTRUCTURE] agent = self.agent_factory.create_agent(infrastructure) if force: VMStateModel.synchronize(agent=agent, parameters=params) if gap < backend_handler.SynchronizeDB.PAUSE + 1: logging.info('Less than {0} seconds to synchronize db.'.format( backend_handler.SynchronizeDB.PAUSE)) return logging.info('Start synchronize db every {0} seconds.'.format( backend_handler.SynchronizeDB.PAUSE)) from_fields = { 'op': 'start_db_syn', 'agent': pickle.dumps(agent), 'parameters': pickle.dumps(params), } logging.info('\n\nAdding db syn task for agent = {}'.format( agent.AGENT_NAME)) taskqueue.add(url=InfrastructureManager.BACKEND_QUEUE_URL, params=from_fields, method='GET')
def __verify_ec2_instances_via_ssh(self, instance_ids, parameters, public_ips): keyfile = os.path.join(os.path.dirname(__file__), '..', '{0}.key'.format(parameters['keyname'])) logging.info('keyfile = {0}'.format(keyfile)) if not os.path.exists(keyfile): raise Exception("ssh keyfile file not found: {0}".format(keyfile)) connected_public_ips = [] connected_instance_ids = [] for (pub_ip, ins_id) in zip(public_ips, instance_ids): logging.info('connecting to {0}...'.format(pub_ip)) success = helper.wait_for_ssh_connection(key_file=keyfile, ip=pub_ip) if success == True: logging.info('{0} is successfully added'.format(pub_ip)) connected_public_ips.append(pub_ip) connected_instance_ids.append(ins_id) # if there are some vms not able to be connected via ssh, # just shut them down explicitly if len(public_ips) != len(connected_public_ips): logging.info( 'Time out on ssh to {0} instances. They will be terminated.'. format(len(public_ips) - len(connected_public_ips))) try: terminate_ins_ids = [] for ins_id in instance_ids: if ins_id not in connected_instance_ids: terminate_ins_ids.append(ins_id) self.agent.deregister_some_instances(parameters, terminate_ins_ids) # update db with failed vms VMStateModel.set_state(parameters, terminate_ins_ids, VMStateModel.STATE_FAILED, VMStateModel.DESCRI_TIMEOUT_ON_SSH) except: raise Exception( "Errors in terminating instances that cannot be connected via ssh." ) public_ips = None instance_ids = None return connected_public_ips, connected_instance_ids
def synchronize_db(self, params, force=False): logging.debug('synchronize_db(force={0}) param={1}'.format(force, params)) last_time = None set_gap_large = False try: e = db.GqlQuery("SELECT * FROM VMStateSyn").get() if e: last_time = e.last_syn else: last_time = datetime.datetime.now() - datetime.timedelta(1) except Exception as e: logging.error('Error: have errors in opening db_syn file. {0}'.format(e)) return if last_time is None: raise Exception('Error: cannot read last synchronization information of db!') else: now = datetime.datetime.now() delta = now - last_time gap = delta.total_seconds() logging.info('Time now: {0}'.format(now)) logging.info('Time last synchronization: {0}'.format(last_time)) logging.info('Time in between: {0}'.format(gap)) infrastructure = params[self.PARAM_INFRASTRUCTURE] agent = self.agent_factory.create_agent(infrastructure) if force: VMStateModel.synchronize(agent = agent, parameters = params) if gap < backend_handler.SynchronizeDB.PAUSE + 1: logging.info('Less than {0} seconds to synchronize db.'.format(backend_handler.SynchronizeDB.PAUSE)) return logging.info('Start synchronize db every {0} seconds.'.format(backend_handler.SynchronizeDB.PAUSE)) from_fields = { 'op': 'start_db_syn', 'agent': pickle.dumps(agent), 'parameters': pickle.dumps(params), } logging.info('\n\nAdding db syn task for agent = {}'.format(agent.AGENT_NAME)) taskqueue.add(url=InfrastructureManager.BACKEND_QUEUE_URL, params=from_fields, method='GET')
def deregister_flex_cloud(self, user_id): logging.debug('deregister_flex_cloud') service = backendservices( self.user_data) #infrastructure=AgentTypes.FLEX) credentials = self.user_data.getCredentials() params = { 'infrastructure': AgentTypes.FLEX, 'flex_cloud_machine_info': self.user_data.get_flex_cloud_machine_info(), 'flex_queue_head': self.user_data.get_flex_queue_head_machine(), 'key_prefix': '', # no prefix 'keyname': '', 'email': [user_id], 'credentials': credentials, 'user_id': user_id, 'reservation_id': self.user_data.reservation_id } self.user_data.flex_cloud_status = True self.user_data.flex_cloud_info_msg = 'Stopping Flex Cloud' self.user_data.put() result = service.deregister_flex_cloud(parameters=params, blocking=True) if result == True: logging.debug('deregister_flex_cloud succeeded!') self.user_data.valid_flex_cloud_info = False self.user_data.is_flex_cloud_info_set = False self.user_data.reservation_id = None self.user_data.flex_db_password = None self.user_data.flex_cloud_status = True self.user_data.flex_cloud_info_msg = 'Flex Cloud Stopped' self.user_data.put() else: logging.error('deregister_flex_cloud failed!') self.user_data.flex_cloud_status = True self.user_data.flex_cloud_info_msg = 'Error when deregistering Flex Cloud' self.user_data.put() logging.debug("Cleaning up old flex-cloud entries in the DB") VMStateModel.cleanup_flex_old_flex_entries(user_id) self.redirect('/flexCloudCredentials')
def __verify_ec2_instances_via_ssh(self, instance_ids, parameters, public_ips): keyfile = os.path.join(os.path.dirname(__file__), "..", "{0}.key".format(parameters["keyname"])) logging.info("keyfile = {0}".format(keyfile)) if not os.path.exists(keyfile): raise Exception("ssh keyfile file not found: {0}".format(keyfile)) connected_public_ips = [] connected_instance_ids = [] for (pub_ip, ins_id) in zip(public_ips, instance_ids): logging.info("connecting to {0}...".format(pub_ip)) success = helper.wait_for_ssh_connection(key_file=keyfile, ip=pub_ip) if success == True: logging.info("{0} is successfully added".format(pub_ip)) connected_public_ips.append(pub_ip) connected_instance_ids.append(ins_id) # if there are some vms not able to be connected via ssh, # just shut them down explicitly if len(public_ips) != len(connected_public_ips): logging.info( "Time out on ssh to {0} instances. They will be terminated.".format( len(public_ips) - len(connected_public_ips) ) ) try: terminate_ins_ids = [] for ins_id in instance_ids: if ins_id not in connected_instance_ids: terminate_ins_ids.append(ins_id) self.agent.deregister_some_instances(parameters, terminate_ins_ids) # update db with failed vms VMStateModel.set_state( parameters, terminate_ins_ids, VMStateModel.STATE_FAILED, VMStateModel.DESCRI_TIMEOUT_ON_SSH ) except: raise Exception("Errors in terminating instances that cannot be connected via ssh.") public_ips = None instance_ids = None return connected_public_ips, connected_instance_ids
def __configure_celery(self, params): ''' Private method used for uploading the current celery configuration to each instance that is running and ssh connectable. Args parameters A dictionary of parameters ''' # Update celery config file...it should have the correct IP # of the Queue head node, which should already be running. # Pass it line by line so theres no weird formatting errors from # trying to echo a multi-line file directly on the command line logging.debug('__configure_celery() params={0}'.format(params)) flex_cloud_machine_info = params[self.PARAM_FLEX_CLOUD_MACHINE_INFO] instance_types = [] for machine in flex_cloud_machine_info: vm = VMStateModel.get_by_ip( machine['ip'], reservation_id=params['reservation_id']) commands = [] my_ins_type = 'Unknown' commands.append('source ~/.bashrc') if vm is None: logging.error('VMStateModel.get_by_ip({0}) in None'.format( machine['ip'])) continue else: my_ins_type = vm.ins_type commands.append('export INSTANCE_TYPE={0}'.format(vm.ins_type)) if vm.ins_type not in instance_types: instance_types.append(vm.ins_type) ip = machine['ip'] keyfile = machine['keyfile'] username = machine['username'] success = helper.start_celery_on_vm(instance_type=my_ins_type, ip=ip, key_file=keyfile, username=username, agent_type=self.agent_type, worker_name=ip.replace( '.', '_'), prepend_commands=commands) if success == 0: # update db with successful running vms logging.info("celery started on host ip: {0}".format(ip)) else: raise Exception("Fail to start celery on {0}".format(ip)) # get all intstance types and configure the celeryconfig.py locally logging.info('For local celery setup, instance_types = {0}'.format( instance_types)) helper.config_celery_queues(agent_type=self.agent_type, instance_types=instance_types)
def __deregister_flex_vm(self, ip, username, keyfile, parameters, queue_head_ip, force=False): try: if self.check_network_ports(ip, [22]): deregister_command = self.get_remote_command_string(ip=ip, username=username, keyfile=keyfile, command="sudo ~/stochss/release-tools/flex-cloud/deregister_flex_vm.sh") logging.debug('deregister_command =\n{}'.format(deregister_command)) os.system(deregister_command) else: logging.debug('Flex VM is not accessible via SSH, can not execute deregister command') except Exception as e: logging.exception('Failed to deregister Flex VM: {0}'.format(e)) # logging.error(sys.exc_info()) finally: VMStateModel.set_state(params=parameters, ins_ids=[self.get_flex_instance_id(public_ip=ip)], state=VMStateModel.STATE_TERMINATED, description='VM Deregistered.')
def update_flex_cloud_machine_info_from_db(self, service): logging.debug('update_flex_cloud_machine_info_from_db') if self.is_flex_cloud_info_set: flex_cloud_machine_info = self.get_flex_cloud_machine_info() if flex_cloud_machine_info is None or len(flex_cloud_machine_info) == 0: return all_vms = self.__get_all_vms(AgentTypes.FLEX, service) #logging.debug('flex: all_vms =\n{0}'.format(pprint.pformat(all_vms))) logging.debug('flex: all_vms =\n{0}'.format(all_vms)) all_vms_map = {vm['pub_ip']: vm for vm in all_vms} #logging.debug('flex: all_vms_map =\n{0}'.format(pprint.pformat(all_vms_map))) logging.debug('flex: all_vms_map =\n{0}'.format(all_vms_map)) for machine in flex_cloud_machine_info: vms = VMStateModel.get_by_ip(machine['ip'], reservation_id=self.reservation_id) if vms is None: logging.debug('machine={0} vms=NONE'.format(machine)) else: logging.debug('machine={0} vms={1} {2}'.format(machine, vms.pub_ip, vms.state)) if vms and vms.res_id == self.reservation_id: machine['state'] = vms.state machine['description'] = vms.description else: if vms: logging.error('From VMStateModel, reservation_id = {0} != user_data.reservation_id'.format( vms.res_id )) machine['state'] = VMStateModel.STATE_UNKNOWN machine['description'] = VMStateModel.STATE_UNKNOWN for machine in flex_cloud_machine_info: machine['key_file_id'] = int(machine['key_file_id']) logging.debug('After updating from VMStateModel, flex_cloud_machine_info =\n{0}'.format( pprint.pformat(flex_cloud_machine_info))) # Update Flex Cloud Status valid_flex_cloud_info = False for machine in flex_cloud_machine_info: if machine['queue_head'] and machine['state'] == VMStateModel.STATE_RUNNING: valid_flex_cloud_info = True self.valid_flex_cloud_info = valid_flex_cloud_info self.set_flex_cloud_machine_info(flex_cloud_machine_info) self.put() logging.debug('valid_flex_cloud_info = {0}'.format(self.valid_flex_cloud_info)) else: # for clearing out db syn requests all_vms = self.__get_all_vms(AgentTypes.FLEX, service) logging.debug('flex: all_vms =\n{0}'.format(pprint.pformat(all_vms)))
def __create_vm_state_model_entries(self, infrastructure, num_vms, ec2_secret_key, ec2_access_key, user_id, reservation_id): logging.debug('__create_vm_state_model_entries') logging.debug('num_vms = {0} user_id = {1} reservation_id = {2}'.format(num_vms, user_id, reservation_id)) ids = [] for _ in xrange(num_vms): vm_state = VMStateModel(state=VMStateModel.STATE_CREATING, infra=infrastructure, ec2_access_key=ec2_access_key, ec2_secret_key=ec2_secret_key, user_id=user_id, res_id=reservation_id) vm_state.put() ids.append(vm_state.key().id()) logging.debug('__create_vm_state_model_entries: ids = {0}'.format(ids)) return ids
def deregister_flex_cloud(self, user_id): logging.debug('deregister_flex_cloud') service = backendservices(self.user_data) #infrastructure=AgentTypes.FLEX) credentials = self.user_data.getCredentials() params = { 'infrastructure': AgentTypes.FLEX, 'flex_cloud_machine_info': self.user_data.get_flex_cloud_machine_info(), 'flex_queue_head': self.user_data.get_flex_queue_head_machine(), 'key_prefix': '', # no prefix 'keyname': '', 'email': [user_id], 'credentials': credentials, 'user_id': user_id, 'reservation_id': self.user_data.reservation_id } self.user_data.flex_cloud_status = True self.user_data.flex_cloud_info_msg = 'Stopping Flex Cloud' self.user_data.put() result = service.deregister_flex_cloud(parameters=params, blocking=True) if result == True: logging.debug('deregister_flex_cloud succeeded!') self.user_data.valid_flex_cloud_info = False self.user_data.is_flex_cloud_info_set = False self.user_data.reservation_id = None self.user_data.flex_db_password = None self.user_data.flex_cloud_status = True self.user_data.flex_cloud_info_msg = 'Flex Cloud Stopped' self.user_data.put() else: logging.error('deregister_flex_cloud failed!') self.user_data.flex_cloud_status = True self.user_data.flex_cloud_info_msg = 'Error when deregistering Flex Cloud' self.user_data.put() logging.debug("Cleaning up old flex-cloud entries in the DB") VMStateModel.cleanup_flex_old_flex_entries(user_id) self.redirect('/flexCloudCredentials')
def __configure_celery(self, params): """ Private method used for uploading the current celery configuration to each instance that is running and ssh connectable. Args parameters A dictionary of parameters """ # Update celery config file...it should have the correct IP # of the Queue head node, which should already be running. # Pass it line by line so theres no weird formatting errors from # trying to echo a multi-line file directly on the command line logging.debug("__configure_celery() params={0}".format(params)) flex_cloud_machine_info = params[self.PARAM_FLEX_CLOUD_MACHINE_INFO] instance_types = [] for machine in flex_cloud_machine_info: vm = VMStateModel.get_by_ip(machine["ip"], reservation_id=params["reservation_id"]) commands = [] my_ins_type = "Unknown" commands.append("source ~/.bashrc") if vm is None: logging.error("VMStateModel.get_by_ip({0}) in None".format(machine["ip"])) continue else: my_ins_type = vm.ins_type commands.append("export INSTANCE_TYPE={0}".format(vm.ins_type)) if vm.ins_type not in instance_types: instance_types.append(vm.ins_type) ip = machine["ip"] keyfile = machine["keyfile"] username = machine["username"] success = helper.start_celery_on_vm( instance_type=my_ins_type, ip=ip, key_file=keyfile, username=username, agent_type=self.agent_type, worker_name=ip.replace(".", "_"), prepend_commands=commands, ) if success == 0: # update db with successful running vms logging.info("celery started on host ip: {0}".format(ip)) else: raise Exception("Fail to start celery on {0}".format(ip)) # get all intstance types and configure the celeryconfig.py locally logging.info("For local celery setup, instance_types = {0}".format(instance_types)) helper.config_celery_queues(agent_type=self.agent_type, instance_types=instance_types)
def describe_machines_from_db(self, infrastructure, force=False): parameters = { "infrastructure": infrastructure, "credentials": self.get_credentials(), "key_prefix": self.user_data.user_id, "user_id": self.user_data.user_id, } if infrastructure == AgentTypes.FLEX: parameters['flex_cloud_machine_info'] = self.user_data.get_flex_cloud_machine_info() parameters['reservation_id'] = self.user_data.reservation_id i = InfrastructureManager() i.synchronize_db(parameters, force=force) all_vms = VMStateModel.get_all(parameters) return all_vms
def __configure_celery(self, params, public_ips, instance_ids): """ Private method used for uploading the current celery configuration to each instance that is running and ssh connectable. Args parameters A dictionary of parameters public_ips A list of public ips that are going to be configed instance_ids A list of instance_ids that are used for terminating instances and update database if fail on configuration by some reason """ # Update celery config file...it should have the correct IP # of the Queue head node, which should already be running. # Pass it line by line so theres no weird formatting errors from # trying to echo a multi-line file directly on the command line key_file = os.path.join(os.path.dirname(__file__), "..", "{0}.key".format(params["keyname"])) logging.debug("key_file = {0}".format(key_file)) if not os.path.exists(key_file): raise Exception("ssh key_file file not found: {0}".format(key_file)) credentials = params["credentials"] commands = [] commands.append("source /home/ubuntu/.bashrc") commands.append("export AWS_ACCESS_KEY_ID={0}".format(str(credentials["EC2_ACCESS_KEY"]))) commands.append("export AWS_SECRET_ACCESS_KEY={0}".format(str(credentials["EC2_SECRET_KEY"]))) for ip, ins_id in zip(public_ips, instance_ids): # helper.wait_for_ssh_connection(key_file, ip) ins_type = VMStateModel.get_instance_type(params, ins_id) commands.append("export INSTANCE_TYPE={0}".format(ins_type)) success = helper.start_celery_on_vm( instance_type=ins_type, ip=ip, key_file=key_file, agent_type=self.agent_type, worker_name=ip.replace(".", "_"), prepend_commands=commands, ) if success == 0: # update db with successful running vms logging.info("celery started! ") logging.info("host ip: {0}".format(ip)) VMStateModel.set_state(params, [ins_id], VMStateModel.STATE_RUNNING, VMStateModel.DESCRI_SUCCESS) else: self.agent.deregister_some_instances(params, [ins_id]) VMStateModel.set_state( params, [ins_id], VMStateModel.STATE_FAILED, VMStateModel.DESCRI_FAIL_TO_COFIGURE_CELERY ) raise Exception("Failure to start celery on {0}".format(ip)) # get all intstance types and configure the celeryconfig.py locally instance_types = VMStateModel.get_running_instance_types(params) helper.config_celery_queues(agent_type=self.agent_type, instance_types=instance_types)
def start_ec2_vms(self, params, blocking=False): ''' This method instantiates EC2 vm instances ''' logging.debug("start_ec2_vms : inside method with params : \n%s", pprint.pformat(params)) try: # make sure that any keynames we use are prefixed with stochss so that #we can do a terminate all based on keyname prefix key_prefix = AgentConfig.get_agent_key_prefix(agent_type=AgentTypes.EC2, key_prefix=params.get('key_prefix', '')) key_name = params["keyname"] if not key_name.startswith(key_prefix): params['keyname'] = key_prefix + key_name # NOTE: We are forcing blocking mode within the InfrastructureManager class # for the launching of VMs because of how GAE joins on all threads before # returning a response from a request. i = InfrastructureManager(blocking=blocking) res = {} # 1. change the status of 'failed' in the previous launch in db to 'terminated' # NOTE: We need to make sure that the RabbitMQ server is running if any compute # nodes are running as we are using the AMQP broker option for Celery. ins_ids = VMStateModel.terminate_not_active(params) # 2. get user_id, infra, ec2 credentials user_id = self.__get_required_parameter(parameter_key='user_id', params=params) infrastructure = self.__get_required_parameter(parameter_key='infrastructure', params=params) reservation_id = self.__get_required_parameter(parameter_key='reservation_id', params=params) logging.debug('ec2: reservation_id = {0}'.format(reservation_id)) if 'credentials' in params: if 'EC2_ACCESS_KEY' in params['credentials'] and 'EC2_SECRET_KEY' in params['credentials']: ec2_access_key = params['credentials']['EC2_ACCESS_KEY'] ec2_secret_key = params['credentials']['EC2_SECRET_KEY'] else: raise Exception('VMStateModel ERROR: Cannot get access key or secret.') else: raise Exception('VMStateModel ERROR: No credentials are provided.') if ec2_access_key is None or ec2_secret_key is None: raise Exception('VMStateModel ERROR: ec2 credentials are not valid.') # 3. create exact number of entities in db for this launch, and set the status to 'creating' num_vms = 0 if 'vms' in params: for vm in params['vms']: logging.debug('vm: {0}, num: {1}'.format(vm['instance_type'], vm['num_vms'])) num_vms += vm['num_vms'] if 'head_node' in params: num_vms += 1 logging.debug('num = {0}'.format(num_vms)) ids = self.__create_vm_state_model_entries(ec2_access_key=ec2_access_key, ec2_secret_key=ec2_secret_key, infrastructure=infrastructure, num_vms=num_vms, user_id=user_id, reservation_id=reservation_id) # 4. Prepare Instances params[VMStateModel.IDS] = ids res = i.prepare_instances(params) # 5, check and create stochss table exists if it does not exist self.__create_dynamodb_stochss_table(ec2_access_key=ec2_access_key, ec2_secret_key=ec2_secret_key) logging.debug("start_ec2_vms : exiting method with result : %s", str(res)) return True, None except Exception as e: logging.exception("start_ec2_vms : exiting method with error : {0}".format(str(e))) return False, 'Errors occur in starting machines:' + str(e)
''' key_prefix = AgentConfig.get_agent_key_prefix(agent_type=AgentTypes.EC2, key_prefix=params.get('key_prefix', '')) try: logging.debug("Stopping compute nodes with key_prefix: {0}".format(key_prefix)) i = InfrastructureManager(blocking=blocking) res = i.deregister_instances(parameters=params, terminate=True) ret = True except Exception, e: logging.error("Terminate machine failed with error : %s", str(e)) ret = False finally: # update db VMStateModel.terminate_all(params) return ret # def describeMachines(self, params): # ''' # This method gets the status of all the instances # ''' # # add calls to the infrastructure manager for getting details of machines # logging.debug("describeMachines() params =\n%s", pprint.pformat(params)) # # key_prefix = AgentConfig.get_agent_key_prefix(agent_type=self.infrastructure, # key_prefix=params.get('key_prefix', '')) # logging.debug('key_prefix = {0}'.format(key_prefix)) # # params["key_prefix"] = key_prefix
def prepare_instances(self, parameters, count=None, security_configured=True): """ prepares the specified number of Flex instances using the parameters provided. This method is blocking in that it waits until the requested VMs are properly booted up. However if the requested VMs cannot be procured within 1800 seconds, this method will treat it as an error and return. (Also see documentation for the BaseAgent class) Args: parameters A dictionary of parameters. This must contain 'keyname', 'group', 'image_id' and 'instance_type' parameters. security_configured Uses this boolean value as an heuristic to detect brand new AppScale deployments. Returns: A tuple of the form (instances, public_ips, private_ips) """ logging.debug( 'flex_agent.prepare_instances() parameters={0}'.format(parameters)) try: flex_cloud_machine_info = parameters[ self.PARAM_FLEX_CLOUD_MACHINE_INFO] logging.debug('flex_cloud_machine_info =\n{}'.format( pprint.pformat(flex_cloud_machine_info))) queue_head = parameters[self.PARAM_FLEX_QUEUE_HEAD] logging.debug('queue_head = {}'.format(queue_head)) queue_head_keyfile = queue_head['keyfile'] remote_queue_head_keyfile = os.path.join( FlexConfig.QUEUE_HEAD_KEY_DIR, os.path.basename(queue_head_keyfile)) for machine in flex_cloud_machine_info: ip = machine['ip'] keyfile = machine['keyfile'] os.chmod(keyfile, int('600', 8)) username = machine['username'] is_queue_head = machine[self.PARAM_QUEUE_HEAD] id = self.get_flex_instance_id(public_ip=ip) if not os.path.exists(keyfile): logging.error( 'Keyfile: {0} does not exist!'.format(keyfile)) VMStateModel.set_state( params=parameters, ins_ids=[id], state=VMStateModel.STATE_FAILED, description=VMStateModel.DESCRI_INVALID_KEYFILE) continue logging.debug("[{0}] [{1}] [{2}] [is_queue_head:{3}]".format( ip, keyfile, username, is_queue_head)) scp_command = \ 'scp -o \'UserKnownHostsFile=/dev/null\' -o \'StrictHostKeyChecking no\' -i {keyfile} {source} {target}'.format( keyfile=keyfile, source=queue_head_keyfile, target="{username}@{ip}:{remote_queue_head_keyfile}".format( username=username, ip=ip, remote_queue_head_keyfile=remote_queue_head_keyfile ) ) logging.debug( 'scp command for queue head keyfile =\n{}'.format( scp_command)) res = os.system(scp_command) if res != 0: logging.error( 'scp for queue head keyfile failed!'.format(keyfile)) VMStateModel.set_state( params=parameters, ins_ids=[id], state=VMStateModel.STATE_FAILED, description=VMStateModel.DESCRI_FAIL_TO_PREPARE) continue script_lines = [] script_lines.append("#!/bin/bash") script_lines.append( "echo export STOCHKIT_HOME={0} >> ~/.bashrc".format( "~/stochss/StochKit/")) script_lines.append( "echo export STOCHKIT_ODE={0} >> ~/.bashrc".format( "~/stochss/ode/")) script_lines.append( "echo export R_LIBS={0} >> ~/.bashrc".format( "~/stochss/stochoptim/library")) script_lines.append( "echo export C_FORCE_ROOT=1 >> ~/.bashrc".format( "~/stochss/stochoptim/library")) script_lines.append( "chmod 600 {remote_queue_head_keyfile}".format( remote_queue_head_keyfile=remote_queue_head_keyfile)) if is_queue_head: logging.debug( 'Adding extra commands for configuring queue head...') script_lines.append( "sudo rabbitmqctl add_user stochss ucsb") script_lines.append( 'sudo rabbitmqctl set_permissions -p / stochss ".*" ".*" ".*"' ) reset_mysql_script = '~/stochss/release-tools/flex-cloud/reset_mysql_pwd.sh' script_lines.append( "sudo {reset_mysql_script} root {flex_db_password}". format(reset_mysql_script=reset_mysql_script, flex_db_password=parameters[ self.PARAM_FLEX_DB_PASSWORD])) bash_script = '\n'.join(script_lines) logging.debug( "\n\n\nbash_script =\n{0}\n\n\n".format(bash_script)) bash_script_filename = os.path.join(AgentConfig.TMP_DIRNAME, 'stochss_init.sh') with open(bash_script_filename, 'w') as bash_script_file: bash_script_file.write(bash_script) scp_command = 'scp -o \'UserKnownHostsFile=/dev/null\' -o \'StrictHostKeyChecking no\' -i {keyfile} {source} {target}'.format( keyfile=keyfile, source=bash_script_filename, target="{username}@{ip}:~/stochss_init.sh".format( username=username, ip=ip)) logging.debug('scp command =\n{}'.format(scp_command)) res = os.system(scp_command) os.remove(bash_script_filename) if res != 0: logging.error('scp failed!'.format(keyfile)) VMStateModel.set_state( params=parameters, ins_ids=[id], state=VMStateModel.STATE_FAILED, description=VMStateModel.DESCRI_FAIL_TO_PREPARE) continue commands = ['chmod +x ~/stochss_init.sh', '~/stochss_init.sh'] command = ';'.join(commands) remote_command_string = self.get_remote_command_string( ip=ip, username=username, keyfile=keyfile, command=command) logging.debug('remote_command_string =\n{}'.format( remote_command_string)) res = os.system(remote_command_string) if res != 0: logging.error('remote command failed!'.format(keyfile)) VMStateModel.set_state( params=parameters, ins_ids=[id], state=VMStateModel.STATE_FAILED, description=VMStateModel.DESCRI_FAIL_TO_PREPARE) continue except Exception as e: logging.exception(e) raise
def prepare_instances(self, parameters, count=None, security_configured=True): """ prepares the specified number of Flex instances using the parameters provided. This method is blocking in that it waits until the requested VMs are properly booted up. However if the requested VMs cannot be procured within 1800 seconds, this method will treat it as an error and return. (Also see documentation for the BaseAgent class) Args: parameters A dictionary of parameters. This must contain 'keyname', 'group', 'image_id' and 'instance_type' parameters. security_configured Uses this boolean value as an heuristic to detect brand new AppScale deployments. Returns: A tuple of the form (instances, public_ips, private_ips) """ logging.debug('flex_agent.prepare_instances() parameters={0}'.format(parameters)) try: flex_cloud_machine_info = parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] logging.debug('flex_cloud_machine_info =\n{}'.format(pprint.pformat(flex_cloud_machine_info))) queue_head = parameters[self.PARAM_FLEX_QUEUE_HEAD] logging.debug('queue_head = {}'.format(queue_head)) queue_head_keyfile = queue_head['keyfile'] remote_queue_head_keyfile = os.path.join(FlexConfig.QUEUE_HEAD_KEY_DIR, os.path.basename(queue_head_keyfile)) for machine in flex_cloud_machine_info: ip = machine['ip'] keyfile = machine['keyfile'] os.chmod(keyfile, int('600', 8)) username = machine['username'] is_queue_head = machine[self.PARAM_QUEUE_HEAD] id = self.get_flex_instance_id(public_ip=ip) if not os.path.exists(keyfile): logging.error('Keyfile: {0} does not exist!'.format(keyfile)) VMStateModel.set_state(params=parameters, ins_ids=[id], state=VMStateModel.STATE_FAILED, description=VMStateModel.DESCRI_INVALID_KEYFILE) continue logging.debug("[{0}] [{1}] [{2}] [is_queue_head:{3}]".format(ip, keyfile, username, is_queue_head)) scp_command = \ 'scp -o \'UserKnownHostsFile=/dev/null\' -o \'StrictHostKeyChecking no\' -i {keyfile} {source} {target}'.format( keyfile=keyfile, source=queue_head_keyfile, target="{username}@{ip}:{remote_queue_head_keyfile}".format( username=username, ip=ip, remote_queue_head_keyfile=remote_queue_head_keyfile ) ) logging.debug('scp command for queue head keyfile =\n{}'.format(scp_command)) res = os.system(scp_command) if res != 0: logging.error('scp for queue head keyfile failed!'.format(keyfile)) VMStateModel.set_state(params=parameters, ins_ids=[id], state=VMStateModel.STATE_FAILED, description=VMStateModel.DESCRI_FAIL_TO_PREPARE) continue script_lines = [] script_lines.append("#!/bin/bash") script_lines.append("echo export STOCHKIT_HOME={0} >> ~/.bashrc".format("~/stochss/StochKit/")) script_lines.append("echo export STOCHKIT_ODE={0} >> ~/.bashrc".format("~/stochss/ode/")) script_lines.append("echo export R_LIBS={0} >> ~/.bashrc".format("~/stochss/stochoptim/library")) script_lines.append("echo export C_FORCE_ROOT=1 >> ~/.bashrc".format("~/stochss/stochoptim/library")) script_lines.append("chmod 600 {remote_queue_head_keyfile}".format( remote_queue_head_keyfile=remote_queue_head_keyfile)) if is_queue_head: logging.debug('Adding extra commands for configuring queue head...') script_lines.append("sudo rabbitmqctl add_user stochss ucsb") script_lines.append('sudo rabbitmqctl set_permissions -p / stochss ".*" ".*" ".*"') reset_mysql_script = '~/stochss/release-tools/flex-cloud/reset_mysql_pwd.sh' script_lines.append("sudo {reset_mysql_script} root {flex_db_password}".format( reset_mysql_script=reset_mysql_script, flex_db_password=parameters[self.PARAM_FLEX_DB_PASSWORD])) bash_script = '\n'.join(script_lines) logging.debug("\n\n\nbash_script =\n{0}\n\n\n".format(bash_script)) bash_script_filename = os.path.join(AgentConfig.TMP_DIRNAME, 'stochss_init.sh') with open(bash_script_filename, 'w') as bash_script_file: bash_script_file.write(bash_script) scp_command = 'scp -o \'UserKnownHostsFile=/dev/null\' -o \'StrictHostKeyChecking no\' -i {keyfile} {source} {target}'.format( keyfile=keyfile, source=bash_script_filename, target="{username}@{ip}:~/stochss_init.sh".format(username=username, ip=ip)) logging.debug('scp command =\n{}'.format(scp_command)) res = os.system(scp_command) os.remove(bash_script_filename) if res != 0: logging.error('scp failed!'.format(keyfile)) VMStateModel.set_state(params=parameters, ins_ids=[id], state=VMStateModel.STATE_FAILED, description=VMStateModel.DESCRI_FAIL_TO_PREPARE) continue commands = ['chmod +x ~/stochss_init.sh', '~/stochss_init.sh'] command = ';'.join(commands) remote_command_string = self.get_remote_command_string(ip=ip, username=username, keyfile=keyfile, command=command) logging.debug('remote_command_string =\n{}'.format(remote_command_string)) res = os.system(remote_command_string) if res != 0: logging.error('remote command failed!'.format(keyfile)) VMStateModel.set_state(params=parameters, ins_ids=[id], state=VMStateModel.STATE_FAILED, description=VMStateModel.DESCRI_FAIL_TO_PREPARE) continue except Exception as e: logging.exception(e) raise
def prepare_vms(self, parameters): logging.debug('prepare_vms(): parameters={0}'.format(parameters)) queue_head_machine = parameters[self.PARAM_FLEX_QUEUE_HEAD] user_data = self.__get_user_data(parameters['user_id']) if self.PARAM_FLEX_CLOUD_MACHINE_INFO not in parameters \ or parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] == None \ or parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] == []: logging.error('Error: No {0} param!'.format( self.PARAM_FLEX_CLOUD_MACHINE_INFO)) # Report Error user_data.flex_cloud_status = False user_data.flex_cloud_info_msg = 'Invalid Parameters' user_data.put() return flex_cloud_machine_info = parameters[ self.PARAM_FLEX_CLOUD_MACHINE_INFO] # Set the user message to "configuring..." user_data.flex_cloud_status = True user_data.flex_cloud_info_msg = 'Flex Cloud configured. Waiting for workers to become available...' user_data.put() # Initialize the VMstateModel db all_accessible = True for machine in flex_cloud_machine_info: if self.agent.check_network_ports(machine['ip'], [22, 443]): state = VMStateModel.STATE_ACCESSIBLE else: state = VMStateModel.STATE_INACCESSIBLE all_accessible = False vm_state = VMStateModel(state=state, infra=self.agent_type, ins_type=FlexConfig.INSTANCE_TYPE, pri_ip=machine['ip'], pub_ip=machine['ip'], username=machine['username'], keyfile=machine['keyfile'], ins_id=self.agent.get_flex_instance_id( machine['ip']), user_id=parameters['user_id'], res_id=self.reservation_id) vm_state.put() if not all_accessible: # Report Failure user_data.flex_cloud_status = False user_data.flex_cloud_info_msg = 'Error: not all workers are accessible' user_data.put() return if queue_head_machine == None or not helper.wait_for_ssh_connection( queue_head_machine['keyfile'], queue_head_machine['ip'], username=queue_head_machine['username']): logging.error( 'Found no viable ssh-able/running queue head machine!') # Report Failure user_data.flex_cloud_status = False user_data.flex_cloud_info_msg = 'Error: Can not connect {0} (queue head) via SSH'.format( queue_head_machine['ip']) user_data.put() return if not self.__prepare_queue_head(queue_head_machine, parameters): logging.error('Error: could not prepare queue head!') # Report Failure user_data.flex_cloud_status = False user_data.flex_cloud_info_msg = 'Error preparing the queue head' user_data.put() return flex_cloud_workers = [] for machine in parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO]: if machine[self.PARAM_IS_QUEUE_HEAD] != True: if helper.wait_for_ssh_connection( machine['keyfile'], machine['ip'], username=machine['username']): flex_cloud_workers.append(machine) else: # Report Failure user_data.flex_cloud_status = False user_data.flex_cloud_info_msg = 'Error: Can not connect to {0} via SSH'.format( machine['ip']) user_data.put() return if len(flex_cloud_workers) > 0: logging.debug( 'Preparing workers: {0}'.format(flex_cloud_workers)) params = { 'infrastructure': AgentTypes.FLEX, self.PARAM_FLEX_CLOUD_MACHINE_INFO: flex_cloud_workers, 'credentials': parameters['credentials'], 'user_id': parameters['user_id'], self.PARAM_FLEX_QUEUE_HEAD: parameters[self.PARAM_FLEX_QUEUE_HEAD], 'reservation_id': parameters['reservation_id'] } self.agent.prepare_instances(params) helper.update_celery_config_with_queue_head_ip( queue_head_ip=queue_head_machine['ip'], agent_type=self.agent_type) self.__configure_celery(params=parameters) # Report Success logging.debug('Flex Cloud Deployed') user_data.flex_cloud_status = True user_data.flex_cloud_info_msg = 'Flex Cloud Deployed' user_data.put() # Force the update of the instance status VMStateModel.synchronize(agent=self.agent, parameters=parameters) return
def __poll_instances_status(self, num_vms, parameters): """ Private method that working on polling the state of instances that have already spawned every some time and checking the ssh connectability if they are running. Args num_vms Number of virtual machines that are needed to be polling parameters A dictionary of parameters Return A turple of (public ips, private ips, instance ids). Each of the three is a list """ logging.info("Start polling task for infrastructure = {0}".format(parameters["infrastructure"])) ins_ids = self.agent.describe_instances_launched(parameters) logging.info("ins_ids = {0}".format(ins_ids)) # update db with new instance ids and 'pending' VMStateModel.update_ins_ids( parameters, ins_ids, self.reservation_id, from_state=VMStateModel.STATE_CREATING, to_state=VMStateModel.STATE_PENDING, ) public_ips = None private_ips = None instance_ids = None keyfiles = None for x in xrange(EC2BackendWorker.POLL_COUNT): # get the ips and ids of this keyname public_ips, private_ips, instance_ids, instance_types, keyfiles = self.agent.describe_instances_running( parameters ) logging.info("public_ips = {0}".format(public_ips)) logging.debug("private_ips = {0}".format(private_ips)) logging.info("instance_ids = {0}".format(instance_ids)) logging.info("instance_types = {0}".format(instance_types)) logging.info("keyfiles = {0}".format(keyfiles)) # if we get the requested number of vms (the requested number will be 1 if this is queue head), # update reservation information and send a message to the backend server if num_vms == len(public_ips): # update db with new public ips and private ips VMStateModel.update_ips(parameters, instance_ids, public_ips, private_ips, instance_types, keyfiles) break else: if x < EC2BackendWorker.POLL_COUNT - 1: time.sleep(EC2BackendWorker.POLL_WAIT_TIME) logging.info("Polling task: sleep 5 seconds...") else: VMStateModel.update_ips(parameters, instance_ids, public_ips, private_ips, instance_types, keyfiles) logging.info("Polling timeout. About to terminate some instances:") terminate_ins_ids = [] for ins_id in ins_ids: if ins_id not in instance_ids: logging.info("instance {0} to be terminated".format(ins_id)) terminate_ins_ids.append(ins_id) # terminate timeout instances self.agent.deregister_some_instances(parameters, terminate_ins_ids) # update db with failure information VMStateModel.set_state( parameters, terminate_ins_ids, VMStateModel.STATE_FAILED, VMStateModel.DESCRI_FAIL_TO_RUN ) return public_ips, private_ips, instance_ids
def __configure_celery(self, params, public_ips, instance_ids): ''' Private method used for uploading the current celery configuration to each instance that is running and ssh connectable. Args parameters A dictionary of parameters public_ips A list of public ips that are going to be configed instance_ids A list of instance_ids that are used for terminating instances and update database if fail on configuration by some reason ''' # Update celery config file...it should have the correct IP # of the Queue head node, which should already be running. # Pass it line by line so theres no weird formatting errors from # trying to echo a multi-line file directly on the command line key_file = os.path.join(os.path.dirname(__file__), '..', '{0}.key'.format(params['keyname'])) logging.debug("key_file = {0}".format(key_file)) if not os.path.exists(key_file): raise Exception( "ssh key_file file not found: {0}".format(key_file)) credentials = params['credentials'] commands = [] commands.append('source /home/ubuntu/.bashrc') commands.append('export AWS_ACCESS_KEY_ID={0}'.format( str(credentials['EC2_ACCESS_KEY']))) commands.append('export AWS_SECRET_ACCESS_KEY={0}'.format( str(credentials['EC2_SECRET_KEY']))) for ip, ins_id in zip(public_ips, instance_ids): # helper.wait_for_ssh_connection(key_file, ip) ins_type = VMStateModel.get_instance_type(params, ins_id) commands.append('export INSTANCE_TYPE={0}'.format(ins_type)) success = helper.start_celery_on_vm(instance_type=ins_type, ip=ip, key_file=key_file, agent_type=self.agent_type, worker_name=ip.replace( '.', '_'), prepend_commands=commands) if success == 0: # update db with successful running vms logging.info("celery started! ") logging.info("host ip: {0}".format(ip)) VMStateModel.set_state(params, [ins_id], VMStateModel.STATE_RUNNING, VMStateModel.DESCRI_SUCCESS) else: self.agent.deregister_some_instances(params, [ins_id]) VMStateModel.set_state( params, [ins_id], VMStateModel.STATE_FAILED, VMStateModel.DESCRI_FAIL_TO_COFIGURE_CELERY) raise Exception("Failure to start celery on {0}".format(ip)) # get all intstance types and configure the celeryconfig.py locally instance_types = VMStateModel.get_running_instance_types(params) helper.config_celery_queues(agent_type=self.agent_type, instance_types=instance_types)
def prepare_vms(self, parameters): logging.debug("prepare_vms(): parameters={0}".format(parameters)) queue_head_machine = parameters[self.PARAM_FLEX_QUEUE_HEAD] user_data = self.__get_user_data(parameters["user_id"]) if ( self.PARAM_FLEX_CLOUD_MACHINE_INFO not in parameters or parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] == None or parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] == [] ): logging.error("Error: No {0} param!".format(self.PARAM_FLEX_CLOUD_MACHINE_INFO)) # Report Error user_data.flex_cloud_status = False user_data.flex_cloud_info_msg = "Invalid Parameters" user_data.put() return flex_cloud_machine_info = parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] # Set the user message to "configuring..." user_data.flex_cloud_status = True user_data.flex_cloud_info_msg = "Flex Cloud configured. Waiting for workers to become available..." user_data.put() # Initialize the VMstateModel db all_accessible = True for machine in flex_cloud_machine_info: if self.agent.check_network_ports(machine["ip"], [22, 443]): state = VMStateModel.STATE_ACCESSIBLE else: state = VMStateModel.STATE_INACCESSIBLE all_accessible = False vm_state = VMStateModel( state=state, infra=self.agent_type, ins_type=FlexConfig.INSTANCE_TYPE, pri_ip=machine["ip"], pub_ip=machine["ip"], username=machine["username"], keyfile=machine["keyfile"], ins_id=self.agent.get_flex_instance_id(machine["ip"]), user_id=parameters["user_id"], res_id=self.reservation_id, ) vm_state.put() if not all_accessible: # Report Failure user_data.flex_cloud_status = False user_data.flex_cloud_info_msg = "Error: not all workers are accessible" user_data.put() return if queue_head_machine == None or not helper.wait_for_ssh_connection( queue_head_machine["keyfile"], queue_head_machine["ip"], username=queue_head_machine["username"] ): logging.error("Found no viable ssh-able/running queue head machine!") # Report Failure user_data.flex_cloud_status = False user_data.flex_cloud_info_msg = "Error: Can not connect {0} (queue head) via SSH".format( queue_head_machine["ip"] ) user_data.put() return if not self.__prepare_queue_head(queue_head_machine, parameters): logging.error("Error: could not prepare queue head!") # Report Failure user_data.flex_cloud_status = False user_data.flex_cloud_info_msg = "Error preparing the queue head" user_data.put() return flex_cloud_workers = [] for machine in parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO]: if machine[self.PARAM_IS_QUEUE_HEAD] != True: if helper.wait_for_ssh_connection(machine["keyfile"], machine["ip"], username=machine["username"]): flex_cloud_workers.append(machine) else: # Report Failure user_data.flex_cloud_status = False user_data.flex_cloud_info_msg = "Error: Can not connect to {0} via SSH".format(machine["ip"]) user_data.put() return if len(flex_cloud_workers) > 0: logging.debug("Preparing workers: {0}".format(flex_cloud_workers)) params = { "infrastructure": AgentTypes.FLEX, self.PARAM_FLEX_CLOUD_MACHINE_INFO: flex_cloud_workers, "credentials": parameters["credentials"], "user_id": parameters["user_id"], self.PARAM_FLEX_QUEUE_HEAD: parameters[self.PARAM_FLEX_QUEUE_HEAD], "reservation_id": parameters["reservation_id"], } self.agent.prepare_instances(params) helper.update_celery_config_with_queue_head_ip( queue_head_ip=queue_head_machine["ip"], agent_type=self.agent_type ) self.__configure_celery(params=parameters) # Report Success logging.debug("Flex Cloud Deployed") user_data.flex_cloud_status = True user_data.flex_cloud_info_msg = "Flex Cloud Deployed" user_data.put() # Force the update of the instance status VMStateModel.synchronize(agent=self.agent, parameters=parameters) return
def __poll_instances_status(self, num_vms, parameters): ''' Private method that working on polling the state of instances that have already spawned every some time and checking the ssh connectability if they are running. Args num_vms Number of virtual machines that are needed to be polling parameters A dictionary of parameters Return A turple of (public ips, private ips, instance ids). Each of the three is a list ''' logging.info('Start polling task for infrastructure = {0}'.format( parameters['infrastructure'])) ins_ids = self.agent.describe_instances_launched(parameters) logging.info("ins_ids = {0}".format(ins_ids)) # update db with new instance ids and 'pending' VMStateModel.update_ins_ids(parameters, ins_ids, self.reservation_id, from_state=VMStateModel.STATE_CREATING, to_state=VMStateModel.STATE_PENDING) public_ips = None private_ips = None instance_ids = None keyfiles = None for x in xrange(EC2BackendWorker.POLL_COUNT): # get the ips and ids of this keyname public_ips, private_ips, instance_ids, instance_types, keyfiles = self.agent.describe_instances_running( parameters) logging.info("public_ips = {0}".format(public_ips)) logging.debug("private_ips = {0}".format(private_ips)) logging.info("instance_ids = {0}".format(instance_ids)) logging.info("instance_types = {0}".format(instance_types)) logging.info("keyfiles = {0}".format(keyfiles)) # if we get the requested number of vms (the requested number will be 1 if this is queue head), # update reservation information and send a message to the backend server if num_vms == len(public_ips): # update db with new public ips and private ips VMStateModel.update_ips(parameters, instance_ids, public_ips, private_ips, instance_types, keyfiles) break else: if x < EC2BackendWorker.POLL_COUNT - 1: time.sleep(EC2BackendWorker.POLL_WAIT_TIME) logging.info('Polling task: sleep 5 seconds...') else: VMStateModel.update_ips(parameters, instance_ids, public_ips, private_ips, instance_types, keyfiles) logging.info( 'Polling timeout. About to terminate some instances:') terminate_ins_ids = [] for ins_id in ins_ids: if ins_id not in instance_ids: logging.info( 'instance {0} to be terminated'.format(ins_id)) terminate_ins_ids.append(ins_id) # terminate timeout instances self.agent.deregister_some_instances( parameters, terminate_ins_ids) # update db with failure information VMStateModel.set_state(parameters, terminate_ins_ids, VMStateModel.STATE_FAILED, VMStateModel.DESCRI_FAIL_TO_RUN) return public_ips, private_ips, instance_ids