def emit(self, rule="pass", stage=None, data={}, delay=None, optional=False): """Invoke the next stage, either based on a handling rule, or by calling the `pass` rule by default.""" if stage is None: stage = self.stage.handlers.get(rule) if optional and stage is None: return if stage is None or stage not in self.crawler.stages: self.log.info("No next stage: %s (%s)" % (stage, rule)) return if settings.DEBUG: # sampling rate is a float between 0.0 to 1.0. If it's 0.2, we # aim to execute only 20% of the crawler's tasks. sampling_rate = self.get("sampling_rate") if sampling_rate and random.random() > float(sampling_rate): self.log.info("Skipping emit due to sampling rate") return if is_sync_mode(): # In sync mode we use a in-memory backend for the task queue. # Make a copy of the data to avoid mutation in that case. data = deepcopy(data) state = self.dump_state() stage = self.crawler.get(stage) delay = delay or self.params.get("delay", 0) or self.crawler.delay self.sleep(delay) Queue.queue(stage, state, data)
def emit(self, rule='pass', stage=None, data={}, delay=None): """Invoke the next stage, either based on a handling rule, or by calling the `pass` rule by default.""" if stage is None: stage = self.stage.handlers.get(rule) if stage is None or stage not in self.crawler.stages: self.log.info("No next stage: %s (%s)" % (stage, rule)) return state = self.dump_state() delay = delay or self.crawler.delay Queue.queue(stage, state, data, delay)
def run(self, incremental=None, run_id=None): """Queue the execution of a particular crawler.""" state = { "crawler": self.name, "run_id": run_id or Job.random_id(), "incremental": settings.INCREMENTAL, "continue_on_error": settings.CONTINUE_ON_ERROR, } if incremental is not None: state["incremental"] = incremental # Cancel previous runs: self.cancel() init_stage = self.get(self.init_stage) Queue.queue(init_stage, state, {})
def run(self, incremental=None, run_id=None): """Queue the execution of a particular crawler.""" state = { 'crawler': self.name, 'run_id': run_id or Job.random_id(), 'incremental': settings.INCREMENTAL } if incremental is not None: state['incremental'] = incremental # Cancel previous runs: self.cancel() # Flush out previous events data but keep the counts: Event.delete_data(self) Queue.queue(self.init_stage, state, {})
def run(self, incremental=None, run_id=None): """Queue the execution of a particular crawler.""" state = { 'crawler': self.name, 'run_id': run_id, 'incremental': settings.INCREMENTAL } if incremental is not None: state['incremental'] = incremental # TaskRunner.execute(stage.name, state, {}) Queue.queue(self.init_stage, state, {}) if not settings.REDIS_HOST: TaskRunner.run()
def run(self, incremental=None, run_id=None): """Queue the execution of a particular crawler.""" state = { "crawler": self.name, "run_id": run_id or Job.random_id(), "incremental": settings.INCREMENTAL, } if incremental is not None: state["incremental"] = incremental # Cancel previous runs: self.cancel() # Flush out previous events data but keep the counts: Event.delete_data(self) init_stage = self.get(self.init_stage) Queue.queue(init_stage, state, {})
def run(self, incremental=None, run_id=None): """Queue the execution of a particular crawler.""" state = { 'crawler': self.name, 'run_id': run_id, 'incremental': settings.INCREMENTAL } if incremental is not None: state['incremental'] = incremental # Cancel previous runs: self.cancel() # Flush out previous events: Event.delete(self) Queue.queue(self.init_stage, state, {}) if not settings.REDIS_HOST: TaskRunner.run()
def emit(self, rule='pass', stage=None, data={}, delay=None, optional=False): """Invoke the next stage, either based on a handling rule, or by calling the `pass` rule by default.""" if stage is None: stage = self.stage.handlers.get(rule) if optional and stage is None: return if stage is None or stage not in self.crawler.stages: self.log.info("No next stage: %s (%s)" % (stage, rule)) return if settings.DEBUG: # sampling rate is a float between 0.0 to 1.0. If it's 0.2, we # aim to execute only 20% of the crawler's tasks. sampling_rate = self.get('sampling_rate') if sampling_rate and random.random() > float(sampling_rate): self.log.info("Skipping emit due to sampling rate") return state = self.dump_state() stage = self.crawler.get(stage) delay = delay or self.params.get('delay', 0) or self.crawler.delay self.sleep(delay) Queue.queue(stage, state, data)