Example #1
0
def scraper_worker_handler(event, context):
    console = Console(file=sys.stdout, record=True)
    run_log = settings.RUN_LOGGER(start=datetime.datetime.utcnow())

    message = json.loads(event["Records"][0]["body"])

    council = message["council"]
    command_name = message["scraper_type"]
    console.log(f"Fetching Scraper for: {council}")
    scraper_cls = load_scraper(council, command_name)
    if not scraper_cls:
        return
    console.log(f"Begin attempting to scrape: {council}")
    options = {"council": council, "verbose": True, "aws_lambda": True}
    scraper = scraper_cls(options, console)
    try:
        if not scraper.disabled:
            scraper.run(run_log)
        else:
            console.log(f"Scraper for {council} is disabled")
    except Exception as e:
        scraper.console.log(e)
        run_log.error = traceback.format_exc()
        # This probably means aws_tidy_up hasn't been called.
        # Let's do that ourselves then
        scraper.aws_tidy_up(run_log)

    console.log(f"Finished running scraper for: {council}")
Example #2
0
    def handle(self, options):
        self.options = options
        for council in self.councils_to_run():
            self.options["council"] = council
            scraper_cls = load_scraper(council, self.command_name)
            with scraper_cls((self.options)) as scraper:
                should_run = True
                if scraper.disabled:
                    should_run = False

                if should_run and options["refresh"]:
                    if scraper.run_since():
                        should_run = False

                if should_run and options["tags"]:
                    required_tags = set(options["tags"].split(","))
                    scraper_tags = set(scraper.get_tags)
                    if not required_tags.issubset(scraper_tags):
                        should_run = False

                if should_run:
                    if options.get("verbose"):
                        print(council)

                    self._run_single(scraper)
Example #3
0
 def disabled(self):
     disabled_councils = []
     for council in self.all_councils:
         scraper = load_scraper(council.council_id, self.command_name)
         if scraper and scraper.disabled:
             council_info = {
                 "code": council.council_id,
                 "name": council.metadata["official_name"],
             }
             disabled_councils.append(council_info)
     return sorted(disabled_councils, key=lambda d: d["code"])
Example #4
0
    def missing(self):
        missing_councils = []
        for council in self.all_councils:

            # non-current councils are never classes as missing
            if not council.current:
                continue

            scraper = load_scraper(council.council_id, self.command_name)
            if not scraper:
                council_info = {
                    "code": council.council_id,
                    "name": council.metadata["official_name"],
                }
                missing_councils.append(council_info)
        return sorted(missing_councils, key=lambda d: d["code"])
Example #5
0
 def run_council(self, council):
     self.options["council"] = council
     self.options["council_info"] = load_council_info(council)
     scraper_cls = load_scraper(council, self.command_name)
     if not scraper_cls:
         return
     with scraper_cls(self.options, self.console) as scraper:
         should_run = True
         if scraper.disabled:
             should_run = False
         if should_run and self.options["refresh"]:
             if scraper.run_since():
                 should_run = False
         if should_run and self.options["tags"]:
             required_tags = set(self.options["tags"].split(","))
             scraper_tags = set(scraper.get_tags)
             if not required_tags.issubset(scraper_tags):
                 should_run = False
         if should_run:
             self._run_single(scraper)