def execute(self, data): """Execute the crawler and create a database record of having done so.""" if Crawl.is_aborted(self.crawler, self.run_id): return try: Crawl.operation_start(self.crawler, self.stage, self.run_id) self.log.info( "[%s->%s(%s)]: %s", self.crawler.name, self.stage.name, self.stage.method_name, self.run_id, ) return self.stage.method(self, data) except QueueTooBigError as qtbe: self.emit_warning(str(qtbe)) except Exception as exc: self.emit_exception(exc) if not self.continue_on_error: raise exc finally: Crawl.operation_end(self.crawler, self.run_id) shutil.rmtree(self.work_path)
def test_operation_reporting(self, crawler, context): stage = list(crawler.stages)[0] Crawl.operation_start(crawler, stage, context.run_id) assert crawler.latest_runid == context.run_id assert len(list(crawler.runs)) == 1 assert crawler.op_count == 1 assert isinstance(crawler.last_run, datetime.datetime) Crawl.operation_end(crawler, context.run_id) crawler.flush() assert crawler.op_count == 0 assert len(list(crawler.runs)) == 0
def execute(self, data): """Execute the crawler and create a database record of having done so.""" if Crawl.is_aborted(self.crawler, self.run_id): return try: Crawl.operation_start(self.crawler, self.stage, self.run_id) self.log.info('[%s->%s(%s)]: %s', self.crawler.name, self.stage.name, self.stage.method_name, self.run_id) return self.stage.method(self, data) except Exception as exc: self.emit_exception(exc) finally: Crawl.operation_end(self.crawler, self.run_id) shutil.rmtree(self.work_path)
def cancel(self): Crawl.abort_all(self) self.queue.cancel()
def flush(self): """Delete all run-time data generated by this crawler.""" self.queue.cancel() Crawl.flush(self) self.flush_tags()
def latest_runid(self): return Crawl.latest_runid(self)
def runs(self): return Crawl.runs(self)
def op_count(self): """Total operations performed for this crawler""" return Crawl.op_count(self)
def last_run(self): return Crawl.last_run(self)
def flush(self): """Delete all run-time data generated by this crawler.""" self.queue.cancel() Event.delete(self) Crawl.flush(self)
def op_count(self): """Total operations performed for this stage""" return Crawl.op_count(self.crawler, self)
def emit_heartbeat(self): Crawl.heartbeat(self.crawler)
def crawler_change_schedule(crawler): crawler = get_crawler(crawler) schedule = request.json.get("schedule", crawler.schedule) Crawl.set_schedule(crawler, schedule) return jsonify(success=True)
def schedule(self): schedule = Crawl.get_schedule(self) or self._schedule return schedule if schedule in self.SCHEDULES else 'disabled'
def cancel(self): Crawl.abort_all(self) Queue.flush(self)
def flush(self): """Delete all run-time data generated by this crawler.""" Queue.flush(self) Tag.delete(self) Event.delete(self) Crawl.flush(self)