Esempio n. 1
0
 def on_result(self, message):
     self.debug("got message: %s" % message)
     try:
         name = message.get('mode', HANDLER_MODE_DEFAULT)
         handler = load_handler(name, self.ctx, None)
         self.debug("Spider loaded handler: %s" % handler)
         handler.newtask(message)
         del handler
     except Exception as e:
         self.error(e)
Esempio n. 2
0
 def __init__(self, context, task, no_sync=False):
     logger = logging.getLogger('handler')
     log_level = logging.WARN
     if context.obj.get('debug', False):
         log_level = logging.DEBUG
     super(Loader, self).__init__(logger, log_level)
     mode = task.get('mode', HANDLER_MODE_DEFAULT)
     self.params = {"context": context, "task": task, "no_sync": no_sync}
     try:
         self.handler = load_handler(mode, **self.params)
     except ImportError:
         raise CDSpiderHandlerError("handler not exists")
Esempio n. 3
0
    def schedule(self, message = None):
        self.info("%s route starting..." % self.__class__.__name__)
        def handler_schedule(handler, mode, frequency):
            self.info("%s loaded handler: %s by %s" % (self.__class__.__name__, handler, frequency))
            save = {}
            while True:
                has_item = False
                for item in handler.route(mode, frequency, save):
                    self.info("%s got route parameter: %s", self.__class__.__name__, json.dumps(item))
                    if item:
                        has_item = True
                        message = {
                            "frequency": frequency,
                            "mode": mode,
                            **item
                        }
                        self.debug("%s route message: %s" % (self.__class__.__name__, str(message)))
                        if not self.testing_mode:
                            self.outqueue.put_nowait(message)
                if not has_item:
                    break
                time.sleep(0.1)
        threads = []
        frequencymap = self.frequency if self.frequency else self.ctx.obj.get('app_config', {}).get('frequencymap', {}).keys()

        if self.mode:
            for key in self.mode:
                handler = load_handler(key, context=self.ctx, task=None)
                for frequency in frequencymap:
                    threads.append(run_in_thread(handler_schedule, handler, key, frequency))
        else:
            def execut(ext, data):
                for frequency in data["frequencymap"]:
                    threads.append(run_in_thread(handler_schedule, ext.obj, ext.name, frequency))

            call_extension("handler", execut, {"ctx": self.ctx, "frequencymap": frequencymap}, context=self.ctx, task=None)

        for each in threads:
            if not each.is_alive():
                continue
            if hasattr(each, 'terminate'):
                each.terminate()
            each.join()

        self.info("%s route end, %s threads was run" % (self.__class__.__name__, len(threads)))
Esempio n. 4
0
 def schedule(self, message):
     self.debug("%s schedule got message: %s" % (self.__class__.__name__, str(message)))
     if 'mode' not in message or not message['mode']:
         raise CDSpiderError("%s handler mode is missing" % self.__class__.__name__)
     self.info("%s schedule starting..." % self.__class__.__name__)
     handler_mode = message['mode']
     handler = load_handler(handler_mode, context=self.ctx, task=None)
     self.info("%s loaded handler: %s" % (self.__class__.__name__, handler))
     save = {"now": int(time.time())}
     while True:
         has_item = False
         for item in handler.schedule(message, save):
             if item:
                 item['mode'] = handler_mode
                 self.debug("%s schedule task: %s" % (self.__class__.__name__, str(item)))
                 if not self.testing_mode:
                     self.send_task(item)
                 has_item = True
         if not has_item or 'count' in message:
             break
         time.sleep(0.1)
     del handler
     self.info("%s schedule end" % self.__class__.__name__)
Esempio n. 5
0
 def status(self, message):
     name = message.get('mode', HANDLER_MODE_DEFAULT)
     handler = load_handler(name, self.ctx, None)
     self.info("Spider loaded handler: %s" % handler)
     handler.status(message)
     del handler