예제 #1
0
    def execute(self, data):
        """Execute the crawler and create a database record of having done
        so."""
        if Crawl.is_aborted(self.crawler, self.run_id):
            return

        try:
            Crawl.operation_start(self.crawler, self.stage, self.run_id)
            self.log.info(
                "[%s->%s(%s)]: %s",
                self.crawler.name,
                self.stage.name,
                self.stage.method_name,
                self.run_id,
            )
            return self.stage.method(self, data)
        except QueueTooBigError as qtbe:
            self.emit_warning(str(qtbe))
        except Exception as exc:
            self.emit_exception(exc)
            if not self.continue_on_error:
                raise exc
        finally:
            Crawl.operation_end(self.crawler, self.run_id)
            shutil.rmtree(self.work_path)
예제 #2
0
 def test_operation_reporting(self, crawler, context):
     stage = list(crawler.stages)[0]
     Crawl.operation_start(crawler, stage, context.run_id)
     assert crawler.latest_runid == context.run_id
     assert len(list(crawler.runs)) == 1
     assert crawler.op_count == 1
     assert isinstance(crawler.last_run, datetime.datetime)
     Crawl.operation_end(crawler, context.run_id)
     crawler.flush()
     assert crawler.op_count == 0
     assert len(list(crawler.runs)) == 0
예제 #3
0
    def execute(self, data):
        """Execute the crawler and create a database record of having done
        so."""
        if Crawl.is_aborted(self.crawler, self.run_id):
            return

        try:
            Crawl.operation_start(self.crawler, self.stage, self.run_id)
            self.log.info('[%s->%s(%s)]: %s', self.crawler.name,
                          self.stage.name, self.stage.method_name, self.run_id)
            return self.stage.method(self, data)
        except Exception as exc:
            self.emit_exception(exc)
        finally:
            Crawl.operation_end(self.crawler, self.run_id)
            shutil.rmtree(self.work_path)
예제 #4
0
 def cancel(self):
     Crawl.abort_all(self)
     self.queue.cancel()
예제 #5
0
 def flush(self):
     """Delete all run-time data generated by this crawler."""
     self.queue.cancel()
     Crawl.flush(self)
     self.flush_tags()
예제 #6
0
 def latest_runid(self):
     return Crawl.latest_runid(self)
예제 #7
0
 def runs(self):
     return Crawl.runs(self)
예제 #8
0
 def op_count(self):
     """Total operations performed for this crawler"""
     return Crawl.op_count(self)
예제 #9
0
 def last_run(self):
     return Crawl.last_run(self)
예제 #10
0
 def flush(self):
     """Delete all run-time data generated by this crawler."""
     self.queue.cancel()
     Event.delete(self)
     Crawl.flush(self)
예제 #11
0
 def op_count(self):
     """Total operations performed for this stage"""
     return Crawl.op_count(self.crawler, self)
예제 #12
0
 def emit_heartbeat(self):
     Crawl.heartbeat(self.crawler)
예제 #13
0
def crawler_change_schedule(crawler):
    crawler = get_crawler(crawler)
    schedule = request.json.get("schedule", crawler.schedule)
    Crawl.set_schedule(crawler, schedule)
    return jsonify(success=True)
예제 #14
0
 def schedule(self):
     schedule = Crawl.get_schedule(self) or self._schedule
     return schedule if schedule in self.SCHEDULES else 'disabled'
예제 #15
0
 def cancel(self):
     Crawl.abort_all(self)
     Queue.flush(self)
예제 #16
0
 def flush(self):
     """Delete all run-time data generated by this crawler."""
     Queue.flush(self)
     Tag.delete(self)
     Event.delete(self)
     Crawl.flush(self)