Esempio n. 1
0
 def execute(cls, stage, state, data, next_allowed_exec_time=None):
     """Execute the operation, rate limiting allowing."""
     try:
         context = Context.from_state(state, stage)
         now = datetime.utcnow()
         if next_allowed_exec_time and now < next_allowed_exec_time:
             # task not allowed to run yet; put it back in the queue
             Queue.queue(stage, state, data, delay=next_allowed_exec_time)
         elif context.crawler.disabled:
             pass
         elif context.stage.rate_limit:
             try:
                 with rate_limiter(context):
                     context.execute(data)
             except RateLimitException:
                 delay = max(1, 1.0 / context.stage.rate_limit)
                 delay = random.randint(1, int(delay))
                 context.log.info("Rate limit exceeded, delaying %d sec.",
                                  delay)
                 Queue.queue(stage, state, data, delay=delay)
         else:
             context.execute(data)
     except Exception:
         log.exception("Task failed to execute:")
     finally:
         # Decrease the pending task count after excuting a task.
         Queue.decr_pending(context.crawler)
         # If we don't have anymore tasks to execute, time to clean up.
         if not context.crawler.is_running:
             context.crawler.aggregate(context)
Esempio n. 2
0
    def execute(cls, stage, state, data, next_allowed_exec_time=None):
        """Execute the operation, rate limiting allowing."""
        now = datetime.utcnow()
        if next_allowed_exec_time and now < next_allowed_exec_time:
            # task not allowed to run yet; put it back in the queue
            Queue.queue(stage, state, data, delay=next_allowed_exec_time)
            return

        context = Context.from_state(state, stage)
        if context.crawler.disabled:
            return

        if context.stage.rate_limit:
            try:
                with rate_limiter(context):
                    context.execute(data)
                    return
            except RateLimitException:
                delay = max(1, 1.0 / context.stage.rate_limit)
                delay = random.randint(1, int(delay))
                context.log.info("Rate limit exceeded, delaying %d sec.",
                                 delay)
                Queue.queue(stage, state, data, delay=delay)

        context.execute(data)
Esempio n. 3
0
 def handle(self, task):
     apply_task_context(task)
     data = task.payload
     stage = CrawlerStage.detach_namespace(task.stage.stage)
     state = task.context
     context = Context.from_state(state, stage)
     context.execute(data)
Esempio n. 4
0
 def after_task(self, task):
     if task.job.is_done():
         stage = CrawlerStage.detach_namespace(task.stage.stage)
         state = task.context
         context = Context.from_state(state, stage)
         context.crawler.aggregate(context)
     self.timeout_expiration_check()
Esempio n. 5
0
 def handle(self, task):
     data = task.payload
     stage = task.stage.stage
     state = task.context
     context = Context.from_state(state, stage)
     if context.crawler.disabled:
         return
     context.execute(data)
Esempio n. 6
0
 def test_dump_load_state(self, context, crawler, stage):
     dump = context.dump_state()
     new_context = Context.from_state(dump, stage.name)
     assert isinstance(new_context, Context)
     assert new_context.run_id == context.run_id
     assert new_context.crawler.name == crawler.name
     assert new_context.stage.name == stage.name
     assert all((k, v) in new_context.state.items()
                for k, v in context.state.items())
Esempio n. 7
0
 def after_task(self, task):
     if task.job.is_done():
         stage = task.stage.stage
         state = task.context
         context = Context.from_state(state, stage)
         context.crawler.aggregate(context)
Esempio n. 8
0
 def handle(self, task):
     data = task.payload
     stage = task.stage.stage
     state = task.context
     context = Context.from_state(state, stage)
     context.execute(data)