Example #1
0
 def setUp(self):
     self.dbdir = tempfile.mkdtemp()
     loop = ioloop.IOLoop()
     self.broker = FakeBroker()
     dboptions = {'directory': self.dbdir}
     self.ctrl = BrokerController(self.broker, loop, dboptions=dboptions)
     self.old_exists = psutil.pid_exists
     psutil.pid_exists = lambda pid: True
Example #2
0
    def __init__(
        self,
        frontend=DEFAULT_FRONTEND,
        backend=DEFAULT_BACKEND,
        heartbeat=None,
        register=DEFAULT_REG,
        io_threads=DEFAULT_IOTHREADS,
        agent_timeout=DEFAULT_AGENT_TIMEOUT,
        receiver=DEFAULT_BROKER_RECEIVER,
        publisher=DEFAULT_PUBLISHER,
        db="python",
        dboptions=None,
        web_root=None,
    ):
        # before doing anything, we verify if a broker is already up and
        # running
        logger.debug("Verifying if there is a running broker")
        pid = verify_broker(frontend)
        if pid is not None:  # oops. can't do this !
            logger.debug("Ooops, we have a running broker on that socket")
            raise DuplicateBrokerError(pid)

        self.endpoints = {
            "frontend": frontend,
            "backend": backend,
            "register": register,
            "receiver": receiver,
            "publisher": publisher,
        }

        if heartbeat is not None:
            self.endpoints["heartbeat"] = heartbeat

        logger.debug("Initializing the broker.")

        for endpoint in self.endpoints.values():
            if endpoint.startswith("ipc"):
                register_ipc_file(endpoint)

        self.context = zmq.Context(io_threads=io_threads)

        # setting up the sockets
        self._frontend = self.context.socket(zmq.ROUTER)
        self._frontend.identity = "broker-" + frontend
        self._frontend.bind(frontend)
        self._backend = self.context.socket(zmq.ROUTER)
        self.pid = str(os.getpid())
        self._backend.identity = self.pid
        self._backend.bind(backend)
        self._registration = self.context.socket(zmq.PULL)
        self._registration.bind(register)
        self._receiver = self.context.socket(zmq.PULL)
        self._receiver.bind(receiver)
        self._publisher = self.context.socket(zmq.PUB)
        self._publisher.bind(publisher)

        # setting up the streams
        self.loop = ioloop.IOLoop()
        self._frontstream = zmqstream.ZMQStream(self._frontend, self.loop)
        self._frontstream.on_recv(self._handle_recv_front)
        self._backstream = zmqstream.ZMQStream(self._backend, self.loop)
        self._backstream.on_recv(self._handle_recv_back)
        self._regstream = zmqstream.ZMQStream(self._registration, self.loop)
        self._regstream.on_recv(self._handle_reg)
        self._rcvstream = zmqstream.ZMQStream(self._receiver, self.loop)
        self._rcvstream.on_recv(self._handle_recv)

        # heartbeat
        if heartbeat is not None:
            self.pong = Heartbeat(heartbeat, io_loop=self.loop, ctx=self.context, onregister=self._deregister)
        else:
            self.pong = None

        # status
        self.started = False
        self.poll_timeout = None

        # controller
        self.ctrl = BrokerController(self, self.loop, db=db, dboptions=dboptions, agent_timeout=agent_timeout)

        self.web_root = web_root
Example #3
0
class Broker(object):
    """Class that route jobs to agents.

    Options:

    - **frontend**: the ZMQ socket to receive jobs.
    - **backend**: the ZMQ socket to communicate with agents.
    - **heartbeat**: the ZMQ socket to receive heartbeat requests.
    - **register** : the ZMQ socket to register agents.
    - **receiver**: the ZMQ socket that receives data from agents.
    - **publisher**: the ZMQ socket to publish agents data
    """

    def __init__(
        self,
        frontend=DEFAULT_FRONTEND,
        backend=DEFAULT_BACKEND,
        heartbeat=None,
        register=DEFAULT_REG,
        io_threads=DEFAULT_IOTHREADS,
        agent_timeout=DEFAULT_AGENT_TIMEOUT,
        receiver=DEFAULT_BROKER_RECEIVER,
        publisher=DEFAULT_PUBLISHER,
        db="python",
        dboptions=None,
        web_root=None,
    ):
        # before doing anything, we verify if a broker is already up and
        # running
        logger.debug("Verifying if there is a running broker")
        pid = verify_broker(frontend)
        if pid is not None:  # oops. can't do this !
            logger.debug("Ooops, we have a running broker on that socket")
            raise DuplicateBrokerError(pid)

        self.endpoints = {
            "frontend": frontend,
            "backend": backend,
            "register": register,
            "receiver": receiver,
            "publisher": publisher,
        }

        if heartbeat is not None:
            self.endpoints["heartbeat"] = heartbeat

        logger.debug("Initializing the broker.")

        for endpoint in self.endpoints.values():
            if endpoint.startswith("ipc"):
                register_ipc_file(endpoint)

        self.context = zmq.Context(io_threads=io_threads)

        # setting up the sockets
        self._frontend = self.context.socket(zmq.ROUTER)
        self._frontend.identity = "broker-" + frontend
        self._frontend.bind(frontend)
        self._backend = self.context.socket(zmq.ROUTER)
        self.pid = str(os.getpid())
        self._backend.identity = self.pid
        self._backend.bind(backend)
        self._registration = self.context.socket(zmq.PULL)
        self._registration.bind(register)
        self._receiver = self.context.socket(zmq.PULL)
        self._receiver.bind(receiver)
        self._publisher = self.context.socket(zmq.PUB)
        self._publisher.bind(publisher)

        # setting up the streams
        self.loop = ioloop.IOLoop()
        self._frontstream = zmqstream.ZMQStream(self._frontend, self.loop)
        self._frontstream.on_recv(self._handle_recv_front)
        self._backstream = zmqstream.ZMQStream(self._backend, self.loop)
        self._backstream.on_recv(self._handle_recv_back)
        self._regstream = zmqstream.ZMQStream(self._registration, self.loop)
        self._regstream.on_recv(self._handle_reg)
        self._rcvstream = zmqstream.ZMQStream(self._receiver, self.loop)
        self._rcvstream.on_recv(self._handle_recv)

        # heartbeat
        if heartbeat is not None:
            self.pong = Heartbeat(heartbeat, io_loop=self.loop, ctx=self.context, onregister=self._deregister)
        else:
            self.pong = None

        # status
        self.started = False
        self.poll_timeout = None

        # controller
        self.ctrl = BrokerController(self, self.loop, db=db, dboptions=dboptions, agent_timeout=agent_timeout)

        self.web_root = web_root

    def _handle_recv(self, msg):
        # publishing all the data received from agents
        self._publisher.send(msg[0])

        data = json.loads(msg[0])
        agent_id = str(data.get("agent_id"))
        hostname = data.get("hostname", "?")

        # telling the controller that the agent is alive
        self.ctrl.register_agent({"pid": agent_id, "hostname": hostname})

        # saving the data locally
        self.ctrl.save_data(agent_id, data)

    def _deregister(self):
        self.ctrl.unregister_agents("asked by the heartbeat.")

    def _handle_reg(self, msg):
        if msg[0] == "REGISTER":
            self.ctrl.register_agent(json.loads(msg[1]))
        elif msg[0] == "UNREGISTER":
            self.ctrl.unregister_agent(msg[1], "asked via UNREGISTER")

    def send_json(self, target, data):
        assert isinstance(target, basestring), target
        msg = [target, "", json.dumps(data)]
        try:
            self._frontstream.send_multipart(msg)
        except ValueError:
            logger.error("Could not dump %s" % str(data))
            raise

    def _handle_recv_front(self, msg, tentative=0):
        """front => back

        All commands starting with CTRL_ are sent to the controller.
        """
        target = msg[0]

        try:
            data = json.loads(msg[-1])
        except ValueError:
            exc = "Invalid JSON received."
            logger.exception(exc)
            self.send_json(target, {"error": exc})
            return

        cmd = data["command"]

        # a command handled by the controller
        if cmd.startswith("CTRL_"):
            cmd = cmd[len("CTRL_") :]
            logger.debug("calling %s" % cmd)
            try:
                res = self.ctrl.run_command(cmd, msg, data)
            except Exception, e:
                logger.debug("Failed")
                exc_type, exc_value, exc_traceback = sys.exc_info()
                exc = traceback.format_tb(exc_traceback)
                exc.insert(0, str(e))
                self.send_json(target, {"error": exc})
            else:
                # sending back a synchronous result if needed.
                if res is not None:
                    logger.debug("sync success %s" % str(res))
                    self.send_json(target, res)
                else:
                    logger.debug("async success")

        # misc commands
        elif cmd == "PING":
            res = {"result": {"pid": os.getpid(), "endpoints": self.endpoints, "agents": self.ctrl.agents}}
            self.send_json(target, res)
Example #4
0
class Broker(object):
    """Class that route jobs to agents.

    Options:

    - **frontend**: the ZMQ socket to receive jobs.
    - **backend**: the ZMQ socket to communicate with agents.
    - **heartbeat**: the ZMQ socket to receive heartbeat requests.
    - **register** : the ZMQ socket to register agents.
    - **receiver**: the ZMQ socket that receives data from agents.
    - **publisher**: the ZMQ socket to publish agents data
    """
    def __init__(self, frontend=DEFAULT_FRONTEND, backend=DEFAULT_BACKEND,
                 heartbeat=None, register=DEFAULT_REG,
                 io_threads=DEFAULT_IOTHREADS,
                 agent_timeout=DEFAULT_TIMEOUT_MOVF,
                 receiver=DEFAULT_BROKER_RECEIVER, publisher=DEFAULT_PUBLISHER,
                 db='python', dboptions=None):
        # before doing anything, we verify if a broker is already up and
        # running
        logger.debug('Verifying if there is a running broker')
        pid = verify_broker(frontend)
        if pid is not None:    # oops. can't do this !
            logger.debug('Ooops, we have a running broker on that socket')
            raise DuplicateBrokerError(pid)

        self.endpoints = {'frontend': frontend,
                          'backend': backend,
                          'register': register,
                          'receiver': receiver,
                          'publisher': publisher}

        if heartbeat is not None:
            self.endpoints['heartbeat'] = heartbeat

        logger.debug('Initializing the broker.')

        for endpoint in self.endpoints.values():
            if endpoint.startswith('ipc'):
                register_ipc_file(endpoint)

        self.context = zmq.Context(io_threads=io_threads)

        # setting up the sockets
        self._frontend = self.context.socket(zmq.ROUTER)
        self._frontend.identity = 'broker-' + frontend
        self._frontend.bind(frontend)
        self._backend = self.context.socket(zmq.ROUTER)
        self._backend.bind(backend)
        self._registration = self.context.socket(zmq.PULL)
        self._registration.bind(register)
        self._receiver = self.context.socket(zmq.PULL)
        self._receiver.bind(receiver)
        self._publisher = self.context.socket(zmq.PUB)
        self._publisher.bind(publisher)

        # setting up the streams
        self.loop = ioloop.IOLoop()
        self._frontstream = zmqstream.ZMQStream(self._frontend, self.loop)
        self._frontstream.on_recv(self._handle_recv_front)
        self._backstream = zmqstream.ZMQStream(self._backend, self.loop)
        self._backstream.on_recv(self._handle_recv_back)
        self._regstream = zmqstream.ZMQStream(self._registration, self.loop)
        self._regstream.on_recv(self._handle_reg)
        self._rcvstream = zmqstream.ZMQStream(self._receiver, self.loop)
        self._rcvstream.on_recv(self._handle_recv)

        # heartbeat
        if heartbeat is not None:
            self.pong = Heartbeat(heartbeat, io_loop=self.loop,
                                  ctx=self.context,
                                  onregister=self._deregister)
        else:
            self.pong = None

        # status
        self.started = False
        self.poll_timeout = None

        # controller
        self.ctrl = BrokerController(self, self.loop, db=db,
                                     dboptions=dboptions,
                                     agent_timeout=agent_timeout)

    def _handle_recv(self, msg):
        # publishing all the data received from agents
        self._publisher.send(msg[0])

        # saving the data locally
        data = json.loads(msg[0])
        agent_id = str(data.get('agent_id'))
        self.ctrl.save_data(agent_id, data)

    def _deregister(self):
        self.ctrl.unregister_agents()

    def _handle_reg(self, msg):
        if msg[0] == 'REGISTER':
            self.ctrl.register_agent(msg[1])
        elif msg[0] == 'UNREGISTER':
            self.ctrl.unregister_agent(msg[1])

    def send_json(self, target, data):
        try:
            self._frontstream.send_multipart(target + [json.dumps(data)])
        except ValueError:
            logger.error('Could not dump %s' % str(data))
            raise

    def _handle_recv_front(self, msg, tentative=0):
        """front => back

        All commands starting with CTRL_ are sent to the controller.
        """
        target = msg[:-1]

        try:
            data = json.loads(msg[2])
        except ValueError:
            exc = 'Invalid JSON received.'
            logger.exception(exc)
            self.send_json(target, {'error': exc})
            return

        cmd = data['command']

        # a command handled by the controller
        if cmd.startswith('CTRL_'):
            cmd = cmd[len('CTRL_'):]
            logger.debug('calling %s' % cmd)
            try:
                res = self.ctrl.run_command(cmd, msg, data)
            except Exception, e:
                logger.debug('Failed')
                exc_type, exc_value, exc_traceback = sys.exc_info()
                exc = traceback.format_tb(exc_traceback)
                exc.insert(0, str(e))
                self.send_json(target, {'error': exc})
            else:
                # sending back a synchronous result if needed.
                if res is not None:
                    logger.debug('sync success %s' % str(res))
                    self.send_json(target, res)
                else:
                    logger.debug('async success')

        # misc commands
        elif cmd == 'PING':
            res = {'result': {'pid': os.getpid(),
                              'endpoints': self.endpoints,
                              'agents': self.ctrl.agents}}
            self.send_json(target, res)
Example #5
0
class TestBrokerController(unittest2.TestCase):

    def setUp(self):
        self.dbdir = tempfile.mkdtemp()
        loop = ioloop.IOLoop()
        self.broker = FakeBroker()
        dboptions = {'directory': self.dbdir}
        self.ctrl = BrokerController(self.broker, loop, dboptions=dboptions)
        self.old_exists = psutil.pid_exists
        psutil.pid_exists = lambda pid: True

    def tearDown(self):
        psutil.pid_exists = self.old_exists
        Stream.msgs[:] = []
        shutil.rmtree(self.dbdir)

    def test_registration(self):
        self.ctrl.register_agent('1')
        self.assertTrue('1' in self.ctrl.agents)

        # make the agent busy before we unregister it
        self.ctrl.send_to_agent('1', ['something'])
        self.ctrl.reserve_agents(1, 'run')

        self.ctrl.unregister_agent('1')
        self.assertFalse('1' in self.ctrl.agents)

    def test_reserve_agents(self):
        self.ctrl.register_agent('1')
        self.ctrl.register_agent('2')

        self.assertRaises(NotEnoughWorkersError, self.ctrl.reserve_agents,
                          10, 'run')

        agents = self.ctrl.reserve_agents(2, 'run')
        agents.sort()
        self.assertEqual(agents, ['1', '2'])

    def test_run_and_stop(self):
        self.ctrl.register_agent('1')
        self.ctrl.register_agent('2')
        self.ctrl.register_agent('3')

        self.ctrl.reserve_agents(1, 'run')
        self.ctrl.reserve_agents(2, 'run2')

        runs = self.ctrl.list_runs(None, None).keys()
        runs.sort()
        self.assertEqual(['run', 'run2'], runs)
        self.ctrl.stop_run(['somemsg'], {'run_id': 'run'})

        # make sure the STOP cmd made it through
        msgs = [msg for msg in Stream.msgs if '_STATUS' not in msg[-1]]
        self.assertEqual(msgs[0][-1], '{"command":"STOP"}')
        self.assertEqual(len(msgs), 1)

    def test_db_access(self):
        self.ctrl.register_agent('1')
        self.ctrl.reserve_agents(1, 'run')

        # metadata
        data = {'some': 'data'}
        self.ctrl.save_metadata('run', data)
        self.assertEqual(self.ctrl.get_metadata(None, {'run_id': 'run'}),
                         data)

        # save data by agent
        self.ctrl.save_data('1', data)
        self.ctrl.flush_db()

        # we get extra run_id key, set for us
        self.assertEqual(data['run_id'], 'run')

        back = self.ctrl.get_data(None, {'run_id': 'run'})
        self.assertTrue(back[0]['some'], 'data')

        back2 = self.ctrl.get_data(None, {'run_id': 'run'})
        self.assertEqual(back, back2)

    def test_compute_observers(self):
        obs = ['irc', 'loads.observers.irc']
        observers = _compute_observers(obs)
        self.assertEqual(len(observers), 2)
        self.assertRaises(ImportError, _compute_observers, ['blah'])

    def test_run(self):
        msg = ['somedata', '', 'target']
        data = {'agents': 1, 'args': {}}

        # not enough agents
        self.ctrl.run(msg, data)
        res = self.broker.msgs.values()[0]
        self.assertEqual(res, [{'error': 'Not enough agents'}])

        # one agent, we're good
        self.ctrl._agents.append('agent1')
        self.ctrl.run(msg, data)
        runs = self.broker.msgs.values()[0][-1]
        self.assertEqual(runs['result']['agents'], ['agent1'])

    def test_run_command(self):
        msg = ['somedata', '', 'target']
        data = {'agents': 1, 'args': {}, 'agent_id': '1'}
        self.ctrl.run_command('RUN', msg, data)
        self.ctrl.run_command('AGENT_STATUS', msg, data)
        runs = self.broker.msgs.values()[0][-1]
        self.assertEqual(runs['result']['agents'], ['agent1'])

        msg = {"command": "STATUS", "args": {}, "agents": 1, "agent_id": "1"}
        msg = msg.items()
        msg.sort()

        self.assertTrue(len(self.broker._backstream.msgs), 1)
        self.assertTrue(len(self.broker._backstream.msgs[0]), 1)
        got = self.broker._backstream.msgs[0][3]
        got = json.loads(got)
        got = got.items()
        got.sort()
        self.assertEqual(msg, got)

    def test_clean(self):
        self.ctrl.agent_timeout = 0.1
        self.ctrl._associate('run', ['1', '2'])
        self.ctrl.clean()
        self.assertTrue('1' in self.ctrl._agent_times)
        self.assertTrue('2' in self.ctrl._agent_times)

        time.sleep(.2)
        self.ctrl.clean()

        self.assertEqual(self.ctrl._agent_times, {})
        self.ctrl.test_ended('run')
Example #6
0
class TestBrokerController(unittest2.TestCase):

    def setUp(self):
        self.dbdir = tempfile.mkdtemp()
        loop = ioloop.IOLoop()
        broker = FakeBroker()
        dboptions = {'directory': self.dbdir}
        self.ctrl = BrokerController(broker, loop, dboptions=dboptions)
        self.old_exists = psutil.pid_exists
        psutil.pid_exists = lambda pid: True

    def tearDown(self):
        psutil.pid_exists = self.old_exists
        Stream.msgs[:] = []
        shutil.rmtree(self.dbdir)

    def test_registration(self):
        self.ctrl.register_agent('1')
        self.assertTrue('1' in self.ctrl.agents)

        # make the agent busy before we unregister it
        self.ctrl.send_to_agent('1', ['something'])
        self.ctrl.reserve_agents(1, 'run')

        self.ctrl.unregister_agent('1')
        self.assertFalse('1' in self.ctrl.agents)

    def test_reserve_agents(self):
        self.ctrl.register_agent('1')
        self.ctrl.register_agent('2')

        self.assertRaises(NotEnoughWorkersError, self.ctrl.reserve_agents,
                          10, 'run')

        agents = self.ctrl.reserve_agents(2, 'run')
        agents.sort()
        self.assertEqual(agents, ['1', '2'])

    def test_run_and_stop(self):
        self.ctrl.register_agent('1')
        self.ctrl.register_agent('2')
        self.ctrl.register_agent('3')

        self.ctrl.reserve_agents(1, 'run')
        self.ctrl.reserve_agents(2, 'run2')

        runs = self.ctrl.list_runs(None, None).keys()
        runs.sort()
        self.assertEqual(['run', 'run2'], runs)
        self.ctrl.stop_run(['somemsg'], {'run_id': 'run'})

        # make sure the STOP cmd made it through
        msgs = [msg for msg in Stream.msgs if '_STATUS' not in msg[-1]]
        self.assertEqual(msgs[0][-1], '{"command": "STOP"}')
        self.assertEqual(len(msgs), 1)

    def test_db_access(self):
        self.ctrl.register_agent('1')
        self.ctrl.reserve_agents(1, 'run')

        # metadata
        data = {'some': 'data'}
        self.ctrl.save_metadata('run', data)
        self.assertEqual(self.ctrl.get_metadata(None, {'run_id': 'run'}),
                         data)

        # save data by agent
        self.ctrl.save_data('1', data)
        self.ctrl.flush_db()

        # we get extra run_id key, set for us
        self.assertEqual(data['run_id'], 'run')

        back = self.ctrl.get_data(None, {'run_id': 'run'})
        self.assertTrue(back[0]['some'], 'data')

        back2 = self.ctrl.get_data(None, {'run_id': 'run'})
        self.assertEqual(back, back2)
Example #7
0
class Broker(object):
    """Class that route jobs to agents.

    Options:

    - **frontend**: the ZMQ socket to receive jobs.
    - **backend**: the ZMQ socket to communicate with agents.
    - **heartbeat**: the ZMQ socket to receive heartbeat requests.
    - **register** : the ZMQ socket to register agents.
    - **receiver**: the ZMQ socket that receives data from agents.
    - **publisher**: the ZMQ socket to publish agents data
    """
    def __init__(self, frontend=DEFAULT_FRONTEND, backend=DEFAULT_BACKEND,
                 heartbeat=None, register=DEFAULT_REG,
                 io_threads=DEFAULT_IOTHREADS,
                 agent_timeout=DEFAULT_TIMEOUT_MOVF,
                 receiver=DEFAULT_BROKER_RECEIVER, publisher=DEFAULT_PUBLISHER,
                 db='python', dboptions=None):
        # before doing anything, we verify if a broker is already up and
        # running
        logger.debug('Verifying if there is a running broker')
        pid = verify_broker(frontend)
        if pid is not None:    # oops. can't do this !
            logger.debug('Ooops, we have a running broker on that socket')
            raise DuplicateBrokerError(pid)

        self.endpoints = {'frontend': frontend,
                          'backend': backend,
                          'register': register,
                          'receiver': receiver,
                          'publisher': publisher}

        if heartbeat is not None:
            self.endpoints['heartbeat'] = heartbeat

        logger.debug('Initializing the broker.')

        for endpoint in self.endpoints.values():
            if endpoint.startswith('ipc'):
                register_ipc_file(endpoint)

        self.context = zmq.Context(io_threads=io_threads)

        # setting up the sockets
        self._frontend = self.context.socket(zmq.ROUTER)
        self._frontend.identity = 'broker-' + frontend
        self._frontend.bind(frontend)
        self._backend = self.context.socket(zmq.ROUTER)
        self._backend.bind(backend)
        self._registration = self.context.socket(zmq.PULL)
        self._registration.bind(register)
        self._receiver = self.context.socket(zmq.PULL)
        self._receiver.bind(receiver)
        self._publisher = self.context.socket(zmq.PUB)
        self._publisher.bind(publisher)

        # setting up the streams
        self.loop = ioloop.IOLoop()
        self._frontstream = zmqstream.ZMQStream(self._frontend, self.loop)
        self._frontstream.on_recv(self._handle_recv_front)
        self._backstream = zmqstream.ZMQStream(self._backend, self.loop)
        self._backstream.on_recv(self._handle_recv_back)
        self._regstream = zmqstream.ZMQStream(self._registration, self.loop)
        self._regstream.on_recv(self._handle_reg)
        self._rcvstream = zmqstream.ZMQStream(self._receiver, self.loop)
        self._rcvstream.on_recv(self._handle_recv)

        # heartbeat
        if heartbeat is not None:
            self.pong = Heartbeat(heartbeat, io_loop=self.loop,
                                  ctx=self.context,
                                  onregister=self._deregister)
        else:
            self.pong = None

        # status
        self.started = False
        self.poll_timeout = None

        # controller
        self.ctrl = BrokerController(self, self.loop, db=db,
                                     dboptions=dboptions,
                                     agent_timeout=agent_timeout)

    def _handle_recv(self, msg):
        # publishing all the data received from agents
        self._publisher.send(msg[0])

        # saving the data locally
        data = json.loads(msg[0])
        agent_id = str(data.get('agent_id'))
        self.ctrl.save_data(agent_id, data)

    def _deregister(self):
        logger.debug('Unregistering all agents')
        self.ctrl.unregister_agents()

    def _handle_reg(self, msg):
        if msg[0] == 'REGISTER':
            self.ctrl.register_agent(msg[1])
        elif msg[0] == 'UNREGISTER':
            self.ctrl.unregister_agent(msg[1])

    def _send_json(self, target, data):
        try:
            self._frontstream.send_multipart(target + [json.dumps(data)])
        except ValueError:
            logger.error('Could not dump %s' % str(data))
            raise

    def _handle_recv_front(self, msg, tentative=0):
        # front => back
        # if the last part of the message is 'PING', we just PONG back
        # this is used as a health check
        data = json.loads(msg[2])
        target = msg[:-1]

        cmd = data['command']
        if cmd == 'PING':
            res = {'result': {'pid': os.getpid(),
                              'endpoints': self.endpoints,
                              'agents': self.ctrl.agents}}
            self._send_json(target, res)
            return
        elif cmd == 'LISTRUNS':
            logger.debug('Asked for LISTRUNS')
            res = {'result': self.ctrl.list_runs()}
            logger.debug('Got %s' % str(res))
            self._send_json(target, res)
            return
        elif cmd == 'STOPRUN':
            run_id = data['run_id']
            stopped_agents = self.ctrl.stop_run(run_id, msg)

            # we give back the list of agents we stopped
            res = {'result': stopped_agents}
            self._send_json(target, res)
            return
        elif cmd == 'GET_DATA':
            # we send back the data we have in the db
            # XXX stream ?
            db_data = self.ctrl.get_data(data['run_id'],
                                         data_type=data.get('data_type'),
                                         groupby=data.get('groupby', False))
            self._send_json(target, {'result': db_data})
            return
        elif cmd == 'GET_COUNTS':
            counts = self.ctrl.get_counts(data['run_id'])
            self._send_json(target, {'result': counts})
            return
        elif cmd == 'GET_METADATA':
            metadata = self.ctrl.get_metadata(data['run_id'])
            self._send_json(target, {'result': metadata})
            return

        # other commands below this point are for agents
        if tentative == 3:
            logger.debug('No agents')
            self._send_json(target, {'error': 'No agent'})
            return

        # the msg tells us which agent to work with
        data = json.loads(msg[2])   # XXX we need to unserialize here

        # broker protocol
        cmd = data['command']

        if cmd == 'LIST':
            # we return a list of agent ids and their status
            self._send_json(target, {'result': self.ctrl.agents})
            return
        elif cmd == 'RUN':
            # create a unique id for this run
            run_id = str(uuid4())

            # get some agents
            try:
                agents = self.ctrl.reserve_agents(data['agents'], run_id)
            except NotEnoughWorkersError:
                self._send_json(target, {'error': 'Not enough agents'})
                return

            # send to every agent with the run_id and the receiver endpoint
            data['run_id'] = run_id
            data['args']['zmq_receiver'] = self.endpoints['receiver']

            msg[2] = json.dumps(data)

            # notice when the test was started
            data['args']['started'] = time.time()

            # save the tests metadata in the db
            self.ctrl.save_metadata(run_id, data['args'])
            self.ctrl.flush_db()

            for agent_id in agents:
                self.ctrl.send_to_agent(agent_id, msg)

            # tell the client which agents where selected.
            res = {'result': {'agents': agents, 'run_id': run_id}}
            self._send_json(target, res)
            return

        if 'agent_id' not in data:
            raise NotImplementedError('DEAD CODE?')
        else:
            agent_id = str(data['agent_id'])
            self.ctrl.send_to_agent(agent_id, msg)

    def _handle_recv_back(self, msg):
        # back => front
        #logger.debug('front <- back [%s]' % msg[0])
        # let's remove the agent id and track the time it took
        agent_id = msg[0]
        msg = msg[1:]

        # grabbing the data to update the agents statuses if needed
        data = json.loads(msg[-1])
        if 'error' in data:
            result = data['error']
            logger.error(result.get('exception'))
        else:
            result = data['result']

        if result.get('command') == '_STATUS':
            statuses = result['status'].values()
            run_id = self.ctrl.update_status(agent_id, statuses)
            if run_id is not None:
                # if the tests are finished, publish this on the pubsub.
                self._publisher.send(json.dumps({'data_type': 'run-finished',
                                                 'run_id': run_id}))

            return

        # other things are pass-through
        try:
            self._frontstream.send_multipart(msg)
        except Exception, e:
            logger.error('Could not send to front')
            logger.error(msg)
            # we don't want to die on error. we just log it
            exc_type, exc_value, exc_traceback = sys.exc_info()
            exc = traceback.format_tb(exc_traceback)
            exc.insert(0, str(e))
            logger.error('\n'.join(exc))