Ejemplo n.º 1
0
    def on_get_quote_response(self, agent, url, response):
        if agent is None:
            raise Exception("agent deleted while being processed")
        if response.error:
            # this is a connection error, retry get quote
            if isinstance(response.error, IOError) or (isinstance(response.error, tornado.web.HTTPError) and response.error.code == 599):
                self.process_agent(agent, cloud_verifier_common.CloudAgent_Operational_State.GET_QUOTE_RETRY)
            else:
                #catastrophic error, do not continue
                error = "Unexpected Get Quote response error for cloud agent " + agent['agent_id']  + ", Error: " + str(response.error)
                logger.critical(error)
                self.process_agent(agent, cloud_verifier_common.CloudAgent_Operational_State.FAILED)
        else:
            try:
                json_response = json.loads(response.body)

                # validate the cloud agent response
                if cloud_verifier_common.process_quote_response(agent, json_response['results']):
                    if agent['provide_V']:
                        self.process_agent(agent, cloud_verifier_common.CloudAgent_Operational_State.PROVIDE_V)
                    else:
                        self.process_agent(agent, cloud_verifier_common.CloudAgent_Operational_State.GET_QUOTE)
                else:
                    self.process_agent(agent, cloud_verifier_common.CloudAgent_Operational_State.INVALID_QUOTE)
                    cloud_verifier_common.notifyError(agent)

#                 if self.get_q_log_file_base_name is not None and writeTime:
#                     self.get_q_log_file.write("%s\n" % t.secs)
#                     self.get_q_log_file.flush()

            except Exception as e:
                logger.exception(e)
    def process_agent(self, agent, new_operational_state):
        try:
            main_agent_operational_state = agent['operational_state']
            stored_agent = self.db.get_agent(agent['agent_id'])

            # if the user did terminated this agent
            if stored_agent[
                    'operational_state'] == cloud_verifier_common.CloudAgent_Operational_State.TERMINATED:
                logger.warning("agent %s terminated by user." %
                               agent['agent_id'])
                if agent['pending_event'] is not None:
                    tornado.ioloop.IOLoop.current().remove_timeout(
                        agent['pending_event'])
                self.db.remove_agent(agent['agent_id'])
                return

            # if the user tells us to stop polling because the tenant quote check failed
            if stored_agent[
                    'operational_state'] == cloud_verifier_common.CloudAgent_Operational_State.TENANT_FAILED:
                logger.warning(
                    "agent %s has failed tenant quote.  stopping polling" %
                    agent['agent_id'])
                if agent['pending_event'] is not None:
                    tornado.ioloop.IOLoop.current().remove_timeout(
                        agent['pending_event'])
                return

            # If failed during processing, log regardless and drop it on the floor
            # The administration application (tenant) can GET the status and act accordingly (delete/retry/etc).
            if new_operational_state == cloud_verifier_common.CloudAgent_Operational_State.FAILED or \
                new_operational_state == cloud_verifier_common.CloudAgent_Operational_State.INVALID_QUOTE:
                agent['operational_state'] = new_operational_state
                if agent['pending_event'] is not None:
                    tornado.ioloop.IOLoop.current().remove_timeout(
                        agent['pending_event'])
                self.db.overwrite_agent(agent['agent_id'], agent)
                logger.warning("agent %s failed, stopping polling" %
                               agent['agent_id'])
                return

            # propagate all state
            self.db.overwrite_agent(agent['agent_id'], agent)

            # if new, get a quote
            if main_agent_operational_state == cloud_verifier_common.CloudAgent_Operational_State.START and \
                new_operational_state == cloud_verifier_common.CloudAgent_Operational_State.GET_QUOTE:
                agent['num_retries'] = 0
                self.invoke_get_quote(agent, True)
                return

            if main_agent_operational_state == cloud_verifier_common.CloudAgent_Operational_State.GET_QUOTE and \
                (new_operational_state == cloud_verifier_common.CloudAgent_Operational_State.PROVIDE_V):
                agent['num_retries'] = 0
                self.invoke_provide_v(agent)
                return

            if (main_agent_operational_state == cloud_verifier_common.CloudAgent_Operational_State.PROVIDE_V or
               main_agent_operational_state == cloud_verifier_common.CloudAgent_Operational_State.GET_QUOTE) and \
                new_operational_state == cloud_verifier_common.CloudAgent_Operational_State.GET_QUOTE:
                agent['num_retries'] = 0
                interval = config.getfloat('cloud_verifier', 'quote_interval')

                if interval == 0:
                    self.invoke_get_quote(agent, False)
                else:
                    #logger.debug("Setting up callback to check again in %f seconds"%interval)
                    # set up a call back to check again
                    cb = functools.partial(self.invoke_get_quote, agent, False)
                    pending = tornado.ioloop.IOLoop.current().call_later(
                        interval, cb)
                    agent['pending_event'] = pending
                return

            maxr = config.getint('cloud_verifier', 'max_retries')
            retry = config.getfloat('cloud_verifier', 'retry_interval')
            if main_agent_operational_state == cloud_verifier_common.CloudAgent_Operational_State.GET_QUOTE and \
                new_operational_state == cloud_verifier_common.CloudAgent_Operational_State.GET_QUOTE_RETRY:
                if agent['num_retries'] >= maxr:
                    logger.warning(
                        "agent %s was not reachable for quote in %d tries, setting state to FAILED"
                        % (agent['agent_id'], maxr))
                    if agent[
                            'first_verified']:  # only notify on previously good agents
                        cloud_verifier_common.notifyError(agent, 'comm_error')
                    else:
                        logger.debug(
                            "Communication error for new agent.  no notification will be sent"
                        )
                    self.process_agent(
                        agent, cloud_verifier_common.
                        CloudAgent_Operational_State.FAILED)
                else:
                    cb = functools.partial(self.invoke_get_quote, agent, True)
                    agent['num_retries'] += 1
                    logger.info(
                        "connection to %s refused after %d/%d tries, trying again in %f seconds"
                        % (agent['ip'], agent['num_retries'], maxr, retry))
                    tornado.ioloop.IOLoop.current().call_later(retry, cb)
                return

            if main_agent_operational_state == cloud_verifier_common.CloudAgent_Operational_State.PROVIDE_V and \
                new_operational_state == cloud_verifier_common.CloudAgent_Operational_State.PROVIDE_V_RETRY:
                if agent['num_retries'] >= maxr:
                    logger.warning(
                        "agent %s was not reachable to provide v in %d tries, setting state to FAILED"
                        % (agent['agent_id'], maxr))
                    cloud_verifier_common.notifyError(agent, 'comm_error')
                    self.process_agent(
                        agent, cloud_verifier_common.
                        CloudAgent_Operational_State.FAILED)
                else:
                    cb = functools.partial(self.invoke_provide_v, agent)
                    agent['num_retries'] += 1
                    logger.info(
                        "connection to %s refused after %d/%d tries, trying again in %f seconds"
                        % (agent['ip'], agent['num_retries'], maxr, retry))
                    tornado.ioloop.IOLoop.current().call_later(retry, cb)
                return
            raise Exception("nothing should ever fall out of this!")

        except Exception as e:
            logger.error("Polling thread error: %s" % e)
            logger.exception(e)