Beispiel #1
0
    def get_missing_dates_response(self, url):
        """Check for missing dates and raise an exception if the response is not 200."""
        logger.debug("Fetching...")
        r = get(url, timeout=self.config.get("timeout", 10))
        r.raise_for_status()

        return r.json()
Beispiel #2
0
    def _get_feed(self, url):
        """Get the feed and raise an exception if the response is not 200."""
        logger.debug("Fetching...")
        r = self.session.get(url, timeout=self.config.get("timeout", 10))
        r.raise_for_status()

        return r.text
Beispiel #3
0
    def _get_start_url(self, start_date):
        logger.debug(f"Start date: {start_date}")
        logger.debug(self.start_date)

        print(
            self.url_template.format(start_date=start_date,
                                     coverage=self.coverage))
        return self.url_template.format(start_date=start_date,
                                        coverage=self.coverage)
Beispiel #4
0
 def _convert(self, entry):
     """Convert an OpenSearch entry into an item that can be saved in the db."""
     logger.debug(f"Found {self._get_identifier(entry)}")
     logger.debug(f"Found {self._get_ingestiondate(entry)}")
     return Item(
         harvester=self.name,
         source=self.source,
         identifier=self._get_identifier(entry),
         source_date=self._get_ingestiondate(entry),
         content=entry.encode(),
     )
Beispiel #5
0
 def _convert(self, entry):
     """Convert an OpenSearch entry into an item that can be saved in the db."""
     logger.debug(f"Found {self.coverage}_{entry}")
     logger.debug(f"Found {entry}")
     identifier = f"{self.coverage}_{entry}"
     return Item(
         harvester=self.name,
         source=self.source,
         identifier=identifier,
         source_date=entry,
         content=None,
     )
Beispiel #6
0
    def _harvest(self, url):
        logger.debug(self.start_date)
        logger.debug(f"fetching with {url}")
        missing_dates_response = self.get_missing_dates_response(url)
        missing_dates_set = self.make_missing_dates_set(missing_dates_response)
        dates_in_month = self.get_dates_in_month()
        entries = self.get_dates_with_products(missing_dates_set,
                                               dates_in_month)
        self.start_date = self.get_new_start_date(dates_in_month)
        url = self.make_next_url()

        return (entries, url)
Beispiel #7
0
    def _get_start_url(self, start_date):
        logger.debug(f"Start date: {start_date}")

        return self.url_template.format(start_date=start_date, end_date=self.end_date)