Beispiel #1
0
    def execute(self, data):
        """Execute the crawler and create a database record of having done
        so."""
        if Crawl.is_aborted(self.crawler, self.run_id):
            return

        try:
            Crawl.operation_start(self.crawler, self.stage, self.run_id)
            self.log.info(
                "[%s->%s(%s)]: %s",
                self.crawler.name,
                self.stage.name,
                self.stage.method_name,
                self.run_id,
            )
            return self.stage.method(self, data)
        except QueueTooBigError as qtbe:
            self.emit_warning(str(qtbe))
        except Exception as exc:
            self.emit_exception(exc)
            if not self.continue_on_error:
                raise exc
        finally:
            Crawl.operation_end(self.crawler, self.run_id)
            shutil.rmtree(self.work_path)
Beispiel #2
0
 def test_operation_reporting(self, crawler, context):
     stage = list(crawler.stages)[0]
     Crawl.operation_start(crawler, stage, context.run_id)
     assert crawler.latest_runid == context.run_id
     assert len(list(crawler.runs)) == 1
     assert crawler.op_count == 1
     assert isinstance(crawler.last_run, datetime.datetime)
     Crawl.operation_end(crawler, context.run_id)
     crawler.flush()
     assert crawler.op_count == 0
     assert len(list(crawler.runs)) == 0
Beispiel #3
0
    def execute(self, data):
        """Execute the crawler and create a database record of having done
        so."""
        if Crawl.is_aborted(self.crawler, self.run_id):
            return

        try:
            Crawl.operation_start(self.crawler, self.stage, self.run_id)
            self.log.info('[%s->%s(%s)]: %s', self.crawler.name,
                          self.stage.name, self.stage.method_name, self.run_id)
            return self.stage.method(self, data)
        except Exception as exc:
            self.emit_exception(exc)
        finally:
            Crawl.operation_end(self.crawler, self.run_id)
            shutil.rmtree(self.work_path)