def process(self, *args, **kwargs): if not args: args = [None] callback = kwargs.pop('callback') nargs = args for item in self.steps: logging.debug("Running %s %r", item.name, args[0]) try: t0 = time.time() yvalue = yield gen.Task(item.process, *nargs, **kwargs) dt = time.time() - t0 systemMetrics.add('pipeline:%s:time' % item.name, dt) systemMetrics.incr('pipeline:%s:calls' % item.name) except Exception as e: logging.error("Pipeline Exception", exc_info=e) action, res = yvalue if action == Step.STOP: logging.debug("Processing stoped by %s" % item.__class__.__name__) break nargs = [res] if callback: callback(res)
def loop(self): if self._stopping: return if not self.queue or not self.app.user_settings.crawler_running: self.ioloop.add_timeout(timedelta(seconds=1), self.loop) return task = None try: task, complete_cb = yield gen.Task(self.queue.pop) except Exception as e: pass if not task: self.ioloop.add_timeout( timedelta(seconds=self.user_settings.crawl_delay), self.loop) return logging.debug("Staring task url=%s" % task.url) self.running_fetchers += 1 yield gen.Task(self.pipeline.process, task) complete_cb(True, task) self.total_fetch_count += 1 self.running_fetchers -= 1 if task.response: systemMetrics.add('response:%s' % task.url_host, task.response.request_time) models.LogEvent("Crawled %d %s" % (task.response.code, task.url)).save() else: models.LogEvent("NOT Crawled %s" % (task.url)).save() logging.debug("Finished task url=%s" % task.url) self.ioloop.add_callback(self.loop)