Example #1
0
def __download_climate_observations_data(remote_file: str) -> bytes:

    try:
        zip_file = download_file_from_dwd(remote_file)
    except InvalidURL as e:
        raise InvalidURL(
            f"Error: the station data {remote_file} could not be reached."
        ) from e
    except Exception:
        raise FailedDownload(f"Download failed for {remote_file}")

    try:
        zip_file_opened = ZipFile(zip_file)

        # Files of archive
        archive_files = zip_file_opened.namelist()

        for file in archive_files:
            # If found file load file in bytes, close zipfile and return bytes
            if file.startswith(PRODUCT_FILE_IDENTIFIER):
                file_in_bytes = zip_file_opened.open(file).read()

                zip_file_opened.close()

                return file_in_bytes

        # If whatsoever no file was found and returned already throw exception
        raise ProductFileNotFound(
            f"The archive of {remote_file} does not hold a 'produkt' file.")

    except BadZipFile as e:
        raise BadZipFile(
            f"The archive of {remote_file} seems to be corrupted.") from e
Example #2
0
    def _download_stations() -> Tuple[BytesIO, int]:
        """
        Download station list from ECCC FTP server.

        :return: CSV payload, source identifier
        """

        gdrive_url = "https://drive.google.com/uc?id=1HDRnj41YBWpMioLPwAFiLlK4SK8NV72C"
        http_url = (
            "https://github.com/earthobservations/testdata/raw/main/ftp.tor.ec.gc.ca/Pub/"
            "Get_More_Data_Plus_de_donnees/Station%20Inventory%20EN.csv.gz"
        )

        payload = None
        source = None
        try:
            payload = download_file(gdrive_url, CacheExpiry.METAINDEX)
            source = 0
        except Exception:
            log.exception(f"Unable to access Google drive server at {gdrive_url}")

            # Fall back to different source.
            try:
                response = download_file(http_url, CacheExpiry.METAINDEX)
                with gzip.open(response, mode="rb") as f:
                    payload = BytesIO(f.read())
                source = 1
            except Exception:
                log.exception(f"Unable to access HTTP server at {http_url}")

        if payload is None:
            raise FailedDownload("Unable to acquire ECCC stations_result list")

        return payload, source
Example #3
0
def _extract_radolan_data(
    date_time: datetime, archive_in_bytes: BytesIO
) -> RadarResult:
    """
    Function used to extract RADOLAN_CDC file for the requested datetime
    from the downloaded archive.

    Args:
        date_time: requested datetime of RADOLAN
        archive_in_bytes: downloaded archive of RADOLAN file

    Returns:
        the datetime formatted as string and the RADOLAN file for the datetime
    """
    # Need string of datetime to check if one of the files in the archive contains
    # the requested datetime
    date_time_string = date_time.strftime(DatetimeFormat.ymdhm.value)

    # First try to unpack archive from archive (case for historical data)
    try:
        # Have to seek(0) as the archive might be reused
        archive_in_bytes.seek(0)

        with gzip.GzipFile(fileobj=archive_in_bytes, mode="rb") as gz_file:
            file_in_archive = BytesIO(gz_file.read())

            with tarfile.open(fileobj=file_in_archive) as tar_file:
                for file in tar_file.getmembers():
                    if date_time_string in file.name:
                        return RadarResult(
                            data=BytesIO(tar_file.extractfile(file).read()),
                            timestamp=date_time,
                            filename=file.name,
                        )

                raise FileNotFoundError(
                    f"RADOLAN file for {date_time_string} not found."
                )  # pragma: no cover

    except EOFError as ex:
        raise FailedDownload(
            f"RADOLAN file for {date_time_string} is invalid: {ex}"
        )  # pragma: no cover

    # Otherwise if there's an error the data is from recent time period and only has to
    # be unpacked once
    except tarfile.ReadError:
        # Seek again for reused purpose
        archive_in_bytes.seek(0)

        with gzip.GzipFile(fileobj=archive_in_bytes, mode="rb") as gz_file:
            return RadarResult(
                data=BytesIO(gz_file.read()), timestamp=date_time, filename=gz_file.name
            )
Example #4
0
def _download_climate_observations_data_parallel(
        remote_file: Union[str, Path]) -> BytesIO:
    """
    This function downloads the station data for which the link is
    provided by the 'select_dwd' function. It checks the shortened filepath (just
    the zipfile) for its parameters, creates the full filepath and downloads the
    file(s) according to the set up folder.

    Args:
        remote_file: contains path to file that should be downloaded
            and the path to the folder to store the files

    Returns:
        stores data on local file system

    """
    try:
        zip_file = download_file_from_dwd(remote_file,
                                          DWDCDCBase.CLIMATE_OBSERVATIONS)
    except InvalidURL as e:
        raise InvalidURL(
            f"Error: the station data {remote_file} couldn't be reached."
        ) from e
    except Exception:
        raise FailedDownload(f"Download failed for {remote_file}")

    try:
        zip_file_opened = ZipFile(zip_file)

        # Files of archive
        archive_files = zip_file_opened.namelist()

        for file in archive_files:
            # If found file load file in bytes, close zipfile and return bytes
            if file.startswith(PRODUCT_FILE_IDENTIFIER):
                file_in_bytes = BytesIO(zip_file_opened.open(file).read())

                zip_file_opened.close()

                return file_in_bytes

        # If whatsoever no file was found and returned already throw exception
        raise ProductFileNotFound(
            f"The archive of {remote_file} does not hold a 'produkt' file.")

    except BadZipFile as e:
        raise BadZipFile(
            f"The archive of {remote_file} seems to be corrupted.") from e
Example #5
0
def __download_climate_observations_data(remote_file: str) -> bytes:

    try:
        file = download_file(remote_file, ttl=CacheExpiry.FIVE_MINUTES)
    except InvalidURL as e:
        raise InvalidURL(f"Error: the station data {remote_file} could not be reached.") from e
    except Exception:
        raise FailedDownload(f"Download failed for {remote_file}")

    try:
        zfs = ZipFileSystem(file)
    except BadZipFile as e:
        raise BadZipFile(f"The archive of {remote_file} seems to be corrupted.") from e

    product_file = zfs.glob("produkt*")

    if len(product_file) != 1:
        raise ProductFileNotFound(f"The archive of {remote_file} does not hold a 'produkt' file.")

    return zfs.open(product_file[0]).read()
Example #6
0
    def _download_stations() -> bytes:
        """
        Download station list from ECCC FTP server.

        :return: CSV payload
        """

        ftp_url = (
            "ftp://*****:*****@ftp.tor.ec.gc.ca"
            "/Pub/Get_More_Data_Plus_de_donnees/Station Inventory EN.csv"
        )

        http_url = (
            "https://raw.githubusercontent.com/earthobservations/testdata"
            "/main/ftp.tor.ec.gc.ca/Pub/Get_More_Data_Plus_de_donnees"
            "/Station%20Inventory%20EN.csv.gz"
        )

        payload = None

        # Try original source.
        session = FTPSession()
        try:
            response = session.retr(ftp_url)
            payload = response.content
        except Exception:
            log.exception(f"Unable to access FTP server at {ftp_url}")

            # Fall back to different source.
            try:
                response = requests.get(http_url)
                response.raise_for_status()
                with gzip.open(BytesIO(response.content), mode="rb") as f:
                    payload = f.read()
            except Exception:
                log.exception(f"Unable to access HTTP server at {http_url}")

        if payload is None:
            raise FailedDownload("Unable to acquire ECCC stations list")

        return payload