def on_get_quote_response(self, agent, url, response): if agent is None: raise Exception("agent deleted while being processed") if response.error: # this is a connection error, retry get quote if isinstance(response.error, IOError) or (isinstance(response.error, tornado.web.HTTPError) and response.error.code == 599): self.process_agent(agent, cloud_verifier_common.CloudAgent_Operational_State.GET_QUOTE_RETRY) else: #catastrophic error, do not continue error = "Unexpected Get Quote response error for cloud agent " + agent['agent_id'] + ", Error: " + str(response.error) logger.critical(error) self.process_agent(agent, cloud_verifier_common.CloudAgent_Operational_State.FAILED) else: try: json_response = json.loads(response.body) # validate the cloud agent response if cloud_verifier_common.process_quote_response(agent, json_response['results']): if agent['provide_V']: self.process_agent(agent, cloud_verifier_common.CloudAgent_Operational_State.PROVIDE_V) else: self.process_agent(agent, cloud_verifier_common.CloudAgent_Operational_State.GET_QUOTE) else: self.process_agent(agent, cloud_verifier_common.CloudAgent_Operational_State.INVALID_QUOTE) cloud_verifier_common.notifyError(agent) # if self.get_q_log_file_base_name is not None and writeTime: # self.get_q_log_file.write("%s\n" % t.secs) # self.get_q_log_file.flush() except Exception as e: logger.exception(e)
def process_agent(self, agent, new_operational_state): try: main_agent_operational_state = agent['operational_state'] stored_agent = self.db.get_agent(agent['agent_id']) # if the user did terminated this agent if stored_agent[ 'operational_state'] == cloud_verifier_common.CloudAgent_Operational_State.TERMINATED: logger.warning("agent %s terminated by user." % agent['agent_id']) if agent['pending_event'] is not None: tornado.ioloop.IOLoop.current().remove_timeout( agent['pending_event']) self.db.remove_agent(agent['agent_id']) return # if the user tells us to stop polling because the tenant quote check failed if stored_agent[ 'operational_state'] == cloud_verifier_common.CloudAgent_Operational_State.TENANT_FAILED: logger.warning( "agent %s has failed tenant quote. stopping polling" % agent['agent_id']) if agent['pending_event'] is not None: tornado.ioloop.IOLoop.current().remove_timeout( agent['pending_event']) return # If failed during processing, log regardless and drop it on the floor # The administration application (tenant) can GET the status and act accordingly (delete/retry/etc). if new_operational_state == cloud_verifier_common.CloudAgent_Operational_State.FAILED or \ new_operational_state == cloud_verifier_common.CloudAgent_Operational_State.INVALID_QUOTE: agent['operational_state'] = new_operational_state if agent['pending_event'] is not None: tornado.ioloop.IOLoop.current().remove_timeout( agent['pending_event']) self.db.overwrite_agent(agent['agent_id'], agent) logger.warning("agent %s failed, stopping polling" % agent['agent_id']) return # propagate all state self.db.overwrite_agent(agent['agent_id'], agent) # if new, get a quote if main_agent_operational_state == cloud_verifier_common.CloudAgent_Operational_State.START and \ new_operational_state == cloud_verifier_common.CloudAgent_Operational_State.GET_QUOTE: agent['num_retries'] = 0 self.invoke_get_quote(agent, True) return if main_agent_operational_state == cloud_verifier_common.CloudAgent_Operational_State.GET_QUOTE and \ (new_operational_state == cloud_verifier_common.CloudAgent_Operational_State.PROVIDE_V): agent['num_retries'] = 0 self.invoke_provide_v(agent) return if (main_agent_operational_state == cloud_verifier_common.CloudAgent_Operational_State.PROVIDE_V or main_agent_operational_state == cloud_verifier_common.CloudAgent_Operational_State.GET_QUOTE) and \ new_operational_state == cloud_verifier_common.CloudAgent_Operational_State.GET_QUOTE: agent['num_retries'] = 0 interval = config.getfloat('cloud_verifier', 'quote_interval') if interval == 0: self.invoke_get_quote(agent, False) else: #logger.debug("Setting up callback to check again in %f seconds"%interval) # set up a call back to check again cb = functools.partial(self.invoke_get_quote, agent, False) pending = tornado.ioloop.IOLoop.current().call_later( interval, cb) agent['pending_event'] = pending return maxr = config.getint('cloud_verifier', 'max_retries') retry = config.getfloat('cloud_verifier', 'retry_interval') if main_agent_operational_state == cloud_verifier_common.CloudAgent_Operational_State.GET_QUOTE and \ new_operational_state == cloud_verifier_common.CloudAgent_Operational_State.GET_QUOTE_RETRY: if agent['num_retries'] >= maxr: logger.warning( "agent %s was not reachable for quote in %d tries, setting state to FAILED" % (agent['agent_id'], maxr)) if agent[ 'first_verified']: # only notify on previously good agents cloud_verifier_common.notifyError(agent, 'comm_error') else: logger.debug( "Communication error for new agent. no notification will be sent" ) self.process_agent( agent, cloud_verifier_common. CloudAgent_Operational_State.FAILED) else: cb = functools.partial(self.invoke_get_quote, agent, True) agent['num_retries'] += 1 logger.info( "connection to %s refused after %d/%d tries, trying again in %f seconds" % (agent['ip'], agent['num_retries'], maxr, retry)) tornado.ioloop.IOLoop.current().call_later(retry, cb) return if main_agent_operational_state == cloud_verifier_common.CloudAgent_Operational_State.PROVIDE_V and \ new_operational_state == cloud_verifier_common.CloudAgent_Operational_State.PROVIDE_V_RETRY: if agent['num_retries'] >= maxr: logger.warning( "agent %s was not reachable to provide v in %d tries, setting state to FAILED" % (agent['agent_id'], maxr)) cloud_verifier_common.notifyError(agent, 'comm_error') self.process_agent( agent, cloud_verifier_common. CloudAgent_Operational_State.FAILED) else: cb = functools.partial(self.invoke_provide_v, agent) agent['num_retries'] += 1 logger.info( "connection to %s refused after %d/%d tries, trying again in %f seconds" % (agent['ip'], agent['num_retries'], maxr, retry)) tornado.ioloop.IOLoop.current().call_later(retry, cb) return raise Exception("nothing should ever fall out of this!") except Exception as e: logger.error("Polling thread error: %s" % e) logger.exception(e)