Example #1
0
class Broker(object):
    """Class that route jobs to agents.

    Options:

    - **frontend**: the ZMQ socket to receive jobs.
    - **backend**: the ZMQ socket to communicate with agents.
    - **heartbeat**: the ZMQ socket to receive heartbeat requests.
    - **register** : the ZMQ socket to register agents.
    - **receiver**: the ZMQ socket that receives data from agents.
    - **publisher**: the ZMQ socket to publish agents data
    """
    def __init__(self, frontend=DEFAULT_FRONTEND, backend=DEFAULT_BACKEND,
                 heartbeat=None, register=DEFAULT_REG,
                 io_threads=DEFAULT_IOTHREADS,
                 agent_timeout=DEFAULT_TIMEOUT_MOVF,
                 receiver=DEFAULT_BROKER_RECEIVER, publisher=DEFAULT_PUBLISHER,
                 db='python', dboptions=None):
        # before doing anything, we verify if a broker is already up and
        # running
        logger.debug('Verifying if there is a running broker')
        pid = verify_broker(frontend)
        if pid is not None:    # oops. can't do this !
            logger.debug('Ooops, we have a running broker on that socket')
            raise DuplicateBrokerError(pid)

        self.endpoints = {'frontend': frontend,
                          'backend': backend,
                          'register': register,
                          'receiver': receiver,
                          'publisher': publisher}

        if heartbeat is not None:
            self.endpoints['heartbeat'] = heartbeat

        logger.debug('Initializing the broker.')

        for endpoint in self.endpoints.values():
            if endpoint.startswith('ipc'):
                register_ipc_file(endpoint)

        self.context = zmq.Context(io_threads=io_threads)

        # setting up the sockets
        self._frontend = self.context.socket(zmq.ROUTER)
        self._frontend.identity = 'broker-' + frontend
        self._frontend.bind(frontend)
        self._backend = self.context.socket(zmq.ROUTER)
        self._backend.bind(backend)
        self._registration = self.context.socket(zmq.PULL)
        self._registration.bind(register)
        self._receiver = self.context.socket(zmq.PULL)
        self._receiver.bind(receiver)
        self._publisher = self.context.socket(zmq.PUB)
        self._publisher.bind(publisher)

        # setting up the streams
        self.loop = ioloop.IOLoop()
        self._frontstream = zmqstream.ZMQStream(self._frontend, self.loop)
        self._frontstream.on_recv(self._handle_recv_front)
        self._backstream = zmqstream.ZMQStream(self._backend, self.loop)
        self._backstream.on_recv(self._handle_recv_back)
        self._regstream = zmqstream.ZMQStream(self._registration, self.loop)
        self._regstream.on_recv(self._handle_reg)
        self._rcvstream = zmqstream.ZMQStream(self._receiver, self.loop)
        self._rcvstream.on_recv(self._handle_recv)

        # heartbeat
        if heartbeat is not None:
            self.pong = Heartbeat(heartbeat, io_loop=self.loop,
                                  ctx=self.context,
                                  onregister=self._deregister)
        else:
            self.pong = None

        # status
        self.started = False
        self.poll_timeout = None

        # controller
        self.ctrl = BrokerController(self, self.loop, db=db,
                                     dboptions=dboptions,
                                     agent_timeout=agent_timeout)

    def _handle_recv(self, msg):
        # publishing all the data received from agents
        self._publisher.send(msg[0])

        # saving the data locally
        data = json.loads(msg[0])
        agent_id = str(data.get('agent_id'))
        self.ctrl.save_data(agent_id, data)

    def _deregister(self):
        logger.debug('Unregistering all agents')
        self.ctrl.unregister_agents()

    def _handle_reg(self, msg):
        if msg[0] == 'REGISTER':
            self.ctrl.register_agent(msg[1])
        elif msg[0] == 'UNREGISTER':
            self.ctrl.unregister_agent(msg[1])

    def _send_json(self, target, data):
        try:
            self._frontstream.send_multipart(target + [json.dumps(data)])
        except ValueError:
            logger.error('Could not dump %s' % str(data))
            raise

    def _handle_recv_front(self, msg, tentative=0):
        # front => back
        # if the last part of the message is 'PING', we just PONG back
        # this is used as a health check
        data = json.loads(msg[2])
        target = msg[:-1]

        cmd = data['command']
        if cmd == 'PING':
            res = {'result': {'pid': os.getpid(),
                              'endpoints': self.endpoints,
                              'agents': self.ctrl.agents}}
            self._send_json(target, res)
            return
        elif cmd == 'LISTRUNS':
            logger.debug('Asked for LISTRUNS')
            res = {'result': self.ctrl.list_runs()}
            logger.debug('Got %s' % str(res))
            self._send_json(target, res)
            return
        elif cmd == 'STOPRUN':
            run_id = data['run_id']
            stopped_agents = self.ctrl.stop_run(run_id, msg)

            # we give back the list of agents we stopped
            res = {'result': stopped_agents}
            self._send_json(target, res)
            return
        elif cmd == 'GET_DATA':
            # we send back the data we have in the db
            # XXX stream ?
            db_data = self.ctrl.get_data(data['run_id'],
                                         data_type=data.get('data_type'),
                                         groupby=data.get('groupby', False))
            self._send_json(target, {'result': db_data})
            return
        elif cmd == 'GET_COUNTS':
            counts = self.ctrl.get_counts(data['run_id'])
            self._send_json(target, {'result': counts})
            return
        elif cmd == 'GET_METADATA':
            metadata = self.ctrl.get_metadata(data['run_id'])
            self._send_json(target, {'result': metadata})
            return

        # other commands below this point are for agents
        if tentative == 3:
            logger.debug('No agents')
            self._send_json(target, {'error': 'No agent'})
            return

        # the msg tells us which agent to work with
        data = json.loads(msg[2])   # XXX we need to unserialize here

        # broker protocol
        cmd = data['command']

        if cmd == 'LIST':
            # we return a list of agent ids and their status
            self._send_json(target, {'result': self.ctrl.agents})
            return
        elif cmd == 'RUN':
            # create a unique id for this run
            run_id = str(uuid4())

            # get some agents
            try:
                agents = self.ctrl.reserve_agents(data['agents'], run_id)
            except NotEnoughWorkersError:
                self._send_json(target, {'error': 'Not enough agents'})
                return

            # send to every agent with the run_id and the receiver endpoint
            data['run_id'] = run_id
            data['args']['zmq_receiver'] = self.endpoints['receiver']

            msg[2] = json.dumps(data)

            # notice when the test was started
            data['args']['started'] = time.time()

            # save the tests metadata in the db
            self.ctrl.save_metadata(run_id, data['args'])
            self.ctrl.flush_db()

            for agent_id in agents:
                self.ctrl.send_to_agent(agent_id, msg)

            # tell the client which agents where selected.
            res = {'result': {'agents': agents, 'run_id': run_id}}
            self._send_json(target, res)
            return

        if 'agent_id' not in data:
            raise NotImplementedError('DEAD CODE?')
        else:
            agent_id = str(data['agent_id'])
            self.ctrl.send_to_agent(agent_id, msg)

    def _handle_recv_back(self, msg):
        # back => front
        #logger.debug('front <- back [%s]' % msg[0])
        # let's remove the agent id and track the time it took
        agent_id = msg[0]
        msg = msg[1:]

        # grabbing the data to update the agents statuses if needed
        data = json.loads(msg[-1])
        if 'error' in data:
            result = data['error']
            logger.error(result.get('exception'))
        else:
            result = data['result']

        if result.get('command') == '_STATUS':
            statuses = result['status'].values()
            run_id = self.ctrl.update_status(agent_id, statuses)
            if run_id is not None:
                # if the tests are finished, publish this on the pubsub.
                self._publisher.send(json.dumps({'data_type': 'run-finished',
                                                 'run_id': run_id}))

            return

        # other things are pass-through
        try:
            self._frontstream.send_multipart(msg)
        except Exception, e:
            logger.error('Could not send to front')
            logger.error(msg)
            # we don't want to die on error. we just log it
            exc_type, exc_value, exc_traceback = sys.exc_info()
            exc = traceback.format_tb(exc_traceback)
            exc.insert(0, str(e))
            logger.error('\n'.join(exc))