class ArachnadoExecutionEngine(ExecutionEngine):
    """
    Extended ExecutionEngine.
    It sends a signal when engine gets scheduled to stop.
    """
    def __init__(self, *args, **kwargs):
        super(ArachnadoExecutionEngine, self).__init__(*args, **kwargs)
        self.send_tick = CallLaterOnce(self._send_tick_signal)

    def close_spider(self, spider, reason='cancelled'):
        if self.slot.closing:
            return self.slot.closing
        self.crawler.crawling = False
        self.signals.send_catch_log(signals.spider_closing)
        return super(ArachnadoExecutionEngine,
                     self).close_spider(spider, reason)

    def pause(self):
        """Pause the execution engine"""
        super(ArachnadoExecutionEngine, self).pause()
        self.signals.send_catch_log(signals.engine_paused)

    def unpause(self):
        """Resume the execution engine"""
        super(ArachnadoExecutionEngine, self).unpause()
        self.signals.send_catch_log(signals.engine_resumed)

    def _next_request(self, spider):
        res = super(ArachnadoExecutionEngine, self)._next_request(spider)
        self.send_tick.schedule(0.1)  # avoid sending the signal too often
        return res

    def _send_tick_signal(self):
        self.signals.send_catch_log_deferred(signals.engine_tick)
Example #2
0
class ArachnadoExecutionEngine(ExecutionEngine):
    """
    Extended ExecutionEngine.
    It sends a signal when engine gets scheduled to stop.
    """

    def __init__(self, *args, **kwargs):
        super(ArachnadoExecutionEngine, self).__init__(*args, **kwargs)
        self.send_tick = CallLaterOnce(self._send_tick_signal)

    def close_spider(self, spider, reason="cancelled"):
        if self.slot.closing:
            return self.slot.closing
        self.crawler.crawling = False
        self.signals.send_catch_log(signals.spider_closing)
        return super(ArachnadoExecutionEngine, self).close_spider(spider, reason)

    def pause(self):
        """Pause the execution engine"""
        super(ArachnadoExecutionEngine, self).pause()
        self.signals.send_catch_log(signals.engine_paused)

    def unpause(self):
        """Resume the execution engine"""
        super(ArachnadoExecutionEngine, self).unpause()
        self.signals.send_catch_log(signals.engine_resumed)

    def _next_request(self, spider):
        res = super(ArachnadoExecutionEngine, self)._next_request(spider)
        self.send_tick.schedule(0.1)  # avoid sending the signal too often
        return res

    def _send_tick_signal(self):
        self.signals.send_catch_log_deferred(signals.engine_tick)
Example #3
0
class MyselfExecutionEngine(ExecutionEngine):
    """扩写执行引擎 任务停止时发送信号"""
    def __init__(self, *args, **kwargs):
        super(MyselfExecutionEngine, self).__init__(*args, **kwargs)
        self.send_tick = CallLaterOnce(self._send_tick_signal)

    # TODO
    def close_spider(self, spider, reason='cancelled'):
        """关闭spider并清除未完成请求"""
        # self.slot使用twisted.reactor调度engine的_next_request方法, 核心循环方法
        if self.slot.closing:
            return self.slot.closing
        self.crawler.crawling = False
        self.signals.send_catch_log(signals.spider_closing)
        return super(MyselfExecutionEngine, self).close_spider(spider, reason)

    def pause(self):
        """暂停执行引擎"""
        super(MyselfExecutionEngine, self).pause()
        self.signals.send_catch_log(signals.engine_paused)

    def unpause(self):
        """继续执行暂停任务"""
        super(MyselfExecutionEngine, self).unpause()
        self.signals.send_catch_log(signals.engine_resumed)

    def _next_request(self, spider):
        """任务调度"""
        res = super(MyselfExecutionEngine, self)._next_request(spider)
        self.send_tick.schedule(0.1)
        return res

    def _send_tick_signal(self):
        """发送信号"""
        self.signals.send_catch_log_deferred(signals.engine_tick)
def createSpiderTask(site_info, settings, CHECK_POINT):
    results = iter(select(settings, SITE_ID=site_info["site_id"]))
    nextcall = CallLaterOnce(eval(site_info["SpiderName"]), site_info, results,
                             CHECK_POINT)
    heartbeat = task.LoopingCall(nextcall.schedule)
    # TODO delay 秒后开始回调
    nextcall.schedule(delay=0.5)
    TaskTimer = 3
    # TODO 每 TaskTimer秒 产生一次任务
    heartbeat.start(TaskTimer)