Beispiel #1
0
    def test_that_mgmt_messages_work(self):
        topic = "me"
        identity = "myself"
        data = "and i"

        msg = MgmtMessage([topic, identity, data])

        self.assertEqual(topic, msg.topic)
        self.assertEqual(identity, msg.identity)
        self.assertEqual(data, msg.data)
        self.assertEqual([topic, identity, data], msg.serialize())
        self.assertEqual(msg, MgmtMessage(msg.serialize()))
Beispiel #2
0
    def test_that_mgmt_messages_work(self):
        topic = "me"
        identity = "myself"
        data = "and i"

        msg = MgmtMessage([topic, identity, data])

        self.assertEqual(topic, msg.topic)
        self.assertEqual(identity, msg.identity)
        self.assertEqual(data, msg.data)
        self.assertEqual([topic, identity, data], msg.serialize())
        self.assertEqual(msg, MgmtMessage(msg.serialize()))
Beispiel #3
0
 def assert_expected_result_and_stop(raw_msg):
     msg2 = DataMessage(raw_msg)
     self.assertEqual(CURI_OPTIONAL_TRUE,
                      msg2.curi.optional_vars[CURI_EXTRACTION_FINISHED])
     death = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER,
                         data=ZMQ_SPYDER_MGMT_WORKER_QUIT)
     self._mgmt_sockets['master_pub'].send_multipart(death.serialize())
 def assert_expected_result_and_stop(raw_msg):
     msg = DataMessage(raw_msg)
     self.assertEqual(304, msg.curi.status_code)
     self.assertEqual("", msg.curi.content_body)
     death = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER,
             data=ZMQ_SPYDER_MGMT_WORKER_QUIT)
     self._mgmt_sockets['master_pub'].send_multipart(death.serialize())
 def assert_expected_result_and_stop(raw_msg):
     msg2 = DataMessage(raw_msg)
     self.assertEqual(CURI_OPTIONAL_TRUE,
             msg2.curi.optional_vars[CURI_EXTRACTION_FINISHED])
     death = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER,
             data=ZMQ_SPYDER_MGMT_WORKER_QUIT)
     self._mgmt_sockets['master_pub'].send_multipart(death.serialize())
 def assert_expected_result_and_stop(raw_msg):
     msg = DataMessage(raw_msg)
     self.assertEqual(304, msg.curi.status_code)
     self.assertEqual("", msg.curi.content_body)
     death = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER,
                         data=ZMQ_SPYDER_MGMT_WORKER_QUIT)
     self._mgmt_sockets['master_pub'].send_multipart(death.serialize())
Beispiel #7
0
 def handle_shutdown_signal(_sig, _frame):
     """
     Called from the os when a shutdown signal is fired.
     """
     msg = MgmtMessage(data=ZMQ_SPYDER_MGMT_WORKER_QUIT)
     quit_worker(msg.serialize())
     # zmq 2.1 stops blocking calls, restart the ioloop
     io_loop.start()
 def assert_expected_result_and_stop(raw_msg):
     msg = DataMessage(raw_msg)
     robots = open(os.path.join(os.path.dirname(__file__),
                 "static/robots.txt")).read()
     self.assertEqual(robots, msg.curi.content_body)
     death = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER,
             data=ZMQ_SPYDER_MGMT_WORKER_QUIT)
     self._mgmt_sockets['master_pub'].send_multipart(death.serialize())
Beispiel #9
0
 def publish(self, topic=None, identity=None, data=None):
     """
     Publish a message to the intended audience.
     """
     assert topic is not None
     assert data is not None
     msg = MgmtMessage(topic=topic, identity=identity, data=data)
     self._out_stream.send_multipart(msg.serialize())
Beispiel #10
0
 def assert_expected_result_and_stop(raw_msg):
     msg = DataMessage(raw_msg)
     robots = open(
         os.path.join(os.path.dirname(__file__),
                      "static/robots.txt")).read()
     self.assertEqual(robots, msg.curi.content_body)
     death = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER,
                         data=ZMQ_SPYDER_MGMT_WORKER_QUIT)
     self._mgmt_sockets['master_pub'].send_multipart(death.serialize())
Beispiel #11
0
 def quit_worker(raw_msg):
     """
     When the worker should quit, stop the io_loop after 2 seconds.
     """
     msg = MgmtMessage(raw_msg)
     if ZMQ_SPYDER_MGMT_WORKER_QUIT == msg.data:
         logger.info("process::We have been asked to shutdown, do so")
         DelayedCallback(io_loop.stop, 2000, io_loop).start()
         ack = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER, identity=identity,
                 data=ZMQ_SPYDER_MGMT_WORKER_QUIT_ACK)
         mgmt._out_stream.send_multipart(ack.serialize())
    def test_that_creating_mgmt_works(self):

        ctx = zmq.Context()
        io_loop = IOLoop.instance()

        def stop_looping(_msg):
            io_loop.stop()

        settings = Settings()
        settings.ZEROMQ_MASTER_PUSH = 'inproc://spyder-zmq-master-push'
        settings.ZEROMQ_WORKER_PROC_FETCHER_PULL = \
            settings.ZEROMQ_MASTER_PUSH
        settings.ZEROMQ_MASTER_SUB = 'inproc://spyder-zmq-master-sub'
        settings.ZEROMQ_WORKER_PROC_EXTRACTOR_PUB = \
            settings.ZEROMQ_MASTER_SUB

        settings.ZEROMQ_MGMT_MASTER = 'inproc://spyder-zmq-mgmt-master'
        settings.ZEROMQ_MGMT_WORKER = 'inproc://spyder-zmq-mgmt-worker'

        pubsocket = ctx.socket(zmq.PUB)
        pubsocket.bind(settings.ZEROMQ_MGMT_MASTER)
        pub_stream = ZMQStream(pubsocket, io_loop)

        subsocket = ctx.socket(zmq.SUB)
        subsocket.setsockopt(zmq.SUBSCRIBE, "")
        subsocket.bind(settings.ZEROMQ_MGMT_WORKER)
        sub_stream = ZMQStream(subsocket, io_loop)

        mgmt = workerprocess.create_worker_management(settings, ctx, io_loop)
        mgmt.add_callback(ZMQ_SPYDER_MGMT_WORKER, stop_looping)
        mgmt.start()

        def assert_quit_message(msg):
            self.assertEqual(ZMQ_SPYDER_MGMT_WORKER_QUIT_ACK, msg.data)

        sub_stream.on_recv(assert_quit_message)

        death = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER,
                            data=ZMQ_SPYDER_MGMT_WORKER_QUIT)
        pub_stream.send_multipart(death.serialize())

        io_loop.start()

        mgmt._out_stream.close()
        mgmt._in_stream.close()
        mgmt._publisher.close()
        mgmt._subscriber.close()
        pub_stream.close()
        pubsocket.close()
        sub_stream.close()
        subsocket.close()
        ctx.term()
    def test_that_creating_mgmt_works(self):

        ctx = zmq.Context()
        io_loop = IOLoop.instance()

        def stop_looping(_msg):
            io_loop.stop()

        settings = Settings()
        settings.ZEROMQ_MASTER_PUSH = 'inproc://spyder-zmq-master-push'
        settings.ZEROMQ_WORKER_PROC_FETCHER_PULL = \
            settings.ZEROMQ_MASTER_PUSH
        settings.ZEROMQ_MASTER_SUB = 'inproc://spyder-zmq-master-sub'
        settings.ZEROMQ_WORKER_PROC_EXTRACTOR_PUB = \
            settings.ZEROMQ_MASTER_SUB

        settings.ZEROMQ_MGMT_MASTER = 'inproc://spyder-zmq-mgmt-master'
        settings.ZEROMQ_MGMT_WORKER = 'inproc://spyder-zmq-mgmt-worker'

        pubsocket = ctx.socket(zmq.PUB)
        pubsocket.bind(settings.ZEROMQ_MGMT_MASTER)
        pub_stream = ZMQStream(pubsocket, io_loop)

        subsocket = ctx.socket(zmq.SUB)
        subsocket.setsockopt(zmq.SUBSCRIBE, "")
        subsocket.bind(settings.ZEROMQ_MGMT_WORKER)
        sub_stream = ZMQStream(subsocket, io_loop)

        mgmt = workerprocess.create_worker_management(settings, ctx, io_loop)
        mgmt.add_callback(ZMQ_SPYDER_MGMT_WORKER, stop_looping)
        mgmt.start()

        def assert_quit_message(msg):
            self.assertEqual(ZMQ_SPYDER_MGMT_WORKER_QUIT_ACK, msg.data)

        sub_stream.on_recv(assert_quit_message)

        death = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER,
                data=ZMQ_SPYDER_MGMT_WORKER_QUIT)
        pub_stream.send_multipart(death.serialize())

        io_loop.start()

        mgmt._out_stream.close()
        mgmt._in_stream.close()
        mgmt._publisher.close()
        mgmt._subscriber.close()
        pub_stream.close()
        pubsocket.close()
        sub_stream.close()
        subsocket.close()
        ctx.term()
Beispiel #14
0
    def test_simple_mgmt_session(self):
        
        mgmt = ZmqMgmt(self._worker_sub, self._worker_pub, io_loop=self._io_loop)
        mgmt.start()

        self.assertRaises(ValueError, mgmt.add_callback, "test", "test")

        mgmt.add_callback(self._topic, self.call_me)
        mgmt.add_callback(ZMQ_SPYDER_MGMT_WORKER, self.on_end)

        test_msg = MgmtMessage(topic=self._topic, data='test'.encode())
        self._master_pub.send_multipart(test_msg.serialize())

        def assert_correct_mgmt_answer(raw_msg):
            msg = MgmtMessage(raw_msg)
            self.assertEqual(ZMQ_SPYDER_MGMT_WORKER_QUIT_ACK, msg.data)
            mgmt.remove_callback(self._topic, self.call_me)
            mgmt.remove_callback(ZMQ_SPYDER_MGMT_WORKER, self.on_end)
            self.assertEqual({}, mgmt._callbacks)

        self._master_sub.on_recv(assert_correct_mgmt_answer)

        self._io_loop.start()
Beispiel #15
0
 def echo_processing(self, crawl_uri):
     death = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER,
             data=ZMQ_SPYDER_MGMT_WORKER_QUIT)
     self._mgmt_sockets['master_pub'].send_multipart(death.serialize())
     return crawl_uri
Beispiel #16
0
 def echo_processing(self, data_message, out_socket):
     msg = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER,
                       data=ZMQ_SPYDER_MGMT_WORKER_QUIT)
     self._mgmt_sockets['master_pub'].send_multipart(msg.serialize())
     out_socket.send_multipart(data_message.serialize())
Beispiel #17
0
 def echo_processing(self, crawl_uri):
     death = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER,
                         data=ZMQ_SPYDER_MGMT_WORKER_QUIT)
     self._mgmt_sockets['master_pub'].send_multipart(death.serialize())
     return crawl_uri
Beispiel #18
0
 def echo_processing(self, data_message, out_socket):
     msg = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER, data=ZMQ_SPYDER_MGMT_WORKER_QUIT)
     self._mgmt_sockets["master_pub"].send_multipart(msg.serialize())
     out_socket.send_multipart(data_message.serialize())
Beispiel #19
0
 def call_me(self, msg):
     self.assertEqual(self._topic, msg.topic)
     self.assertEqual('test'.encode(), msg.data)
     death = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER,
             data=ZMQ_SPYDER_MGMT_WORKER_QUIT)
     self._master_pub.send_multipart(death.serialize())
Beispiel #20
0
def main(settings):
    """
    The :meth:`main` method for worker processes.

    Here we will:

     - create a :class:`ZmqMgmt` instance

     - create a :class:`Fetcher` instance

     - initialize and instantiate the extractor chain

    The `settings` have to be loaded already.
    """
    # create my own identity
    identity = "worker:%s:%s" % (socket.gethostname(), os.getpid())

    ctx = zmq.Context()
    io_loop = IOLoop.instance()

    # initialize the logging subsystem
    log_pub = ctx.socket(zmq.PUB)
    log_pub.connect(settings.ZEROMQ_LOGGING)
    zmq_logging_handler = PUBHandler(log_pub)
    zmq_logging_handler.root_topic = "spyder.worker"
    logger = logging.getLogger()
    logger.addHandler(zmq_logging_handler)
    logger.setLevel(settings.LOG_LEVEL_WORKER)

    logger.info("process::Starting up another worker")

    mgmt = create_worker_management(settings, ctx, io_loop)

    logger.debug("process::Initializing fetcher, extractor and scoper")

    fetcher = create_worker_fetcher(settings, mgmt, ctx, zmq_logging_handler,
        io_loop)
    fetcher.start()
    extractor = create_worker_extractor(settings, mgmt, ctx,
        zmq_logging_handler, io_loop)
    extractor.start()

    def quit_worker(raw_msg):
        """
        When the worker should quit, stop the io_loop after 2 seconds.
        """
        msg = MgmtMessage(raw_msg)
        if ZMQ_SPYDER_MGMT_WORKER_QUIT == msg.data:
            logger.info("process::We have been asked to shutdown, do so")
            DelayedCallback(io_loop.stop, 2000, io_loop).start()
            ack = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER, identity=identity,
                    data=ZMQ_SPYDER_MGMT_WORKER_QUIT_ACK)
            mgmt._out_stream.send_multipart(ack.serialize())

    mgmt.add_callback(ZMQ_SPYDER_MGMT_WORKER, quit_worker)
    mgmt.start()

    # notify the master that we are online
    msg = MgmtMessage(topic=ZMQ_SPYDER_MGMT_WORKER, identity=identity,
            data=ZMQ_SPYDER_MGMT_WORKER_AVAIL)
    mgmt._out_stream.send_multipart(msg.serialize())

    def handle_shutdown_signal(_sig, _frame):
        """
        Called from the os when a shutdown signal is fired.
        """
        msg = MgmtMessage(data=ZMQ_SPYDER_MGMT_WORKER_QUIT)
        quit_worker(msg.serialize())
        # zmq 2.1 stops blocking calls, restart the ioloop
        io_loop.start()

    # handle kill signals
    signal.signal(signal.SIGINT, handle_shutdown_signal)
    signal.signal(signal.SIGTERM, handle_shutdown_signal)

    logger.info("process::waiting for action")
    # this will block until the worker quits
    try:
        io_loop.start()
    except ZMQError:
        logger.debug("Caught a ZMQError. Hopefully during shutdown")
        logger.debug(traceback.format_exc())

    for mod in [fetcher, extractor, mgmt]:
        mod.close()

    logger.info("process::Houston: Worker down")
    ctx.term()