Beispiel #1
0
def _download_dwd_data(download_specification: Tuple[Union[str, Path],
                                                     Union[str, Path]]):
    """
    This function downloads the stationdata for which the link is
    provided by the 'select_dwd' function. It checks the shortened filepath (just
    the zipfile) for its parameters, creates the full filepath and downloads the
    file(s) according to the set up folder.

    Args:
        download_specification: contains path to file that should be downloaded
            and the path to the folder to store the files

    Returns:
        stores data on local file system

    """
    remote_file, folder = download_specification

    create_folder(subfolder=SUB_FOLDER_STATIONDATA, folder=folder)

    file_server = create_remote_file_name(remote_file)
    file_local = create_local_file_name(remote_file, folder)

    try:
        # Open connection with ftp server
        with FTP(DWD_SERVER) as ftp:
            ftp.login()
            ftp_file_download(ftp, Path(file_server), Path(file_local))

    except Exception:
        # In the end raise an error naming the files that couldn't be loaded.
        raise NameError(f"The file\n {file_local} \n couldn't be downloaded!")
Beispiel #2
0
def create_metaindex(parameter: Parameter, time_resolution: TimeResolution,
                     period_type: PeriodType) -> pd.DataFrame:
    """ The function is used to create a simple metadata DataFrame parsed from the text files that are located in each
    data section of the station data directory of the weather service.

    Args:
        parameter: observation measure
        time_resolution: frequency/granularity of measurement interval
        period_type: recent or historical files
    Return:
        DataFrame with parsed columns of the corresponding text file. Columns are translated into English and data is
        not yet complete as file existence is not checked.

    """
    server_path = PurePosixPath(DWD_PATH, time_resolution.value,
                                parameter.value, period_type.value)

    try:
        with FTP(DWD_SERVER) as ftp:
            ftp.login()
            files_server = ftp.list_files(remote_path=str(server_path),
                                          also_subfolders=False)

    except ftplib.all_errors as e:
        raise ftplib.all_errors(
            "Error: couldn't retrieve filelist from server.\n"
            f"{str(e)}")

    metafile_server = [
        file for file in files_server
        if find_all_matchstrings_in_string(file.lower(), METADATA_MATCHSTRINGS)
    ].pop(0)

    metafile_server = create_remote_file_name(metafile_server.lstrip(DWD_PATH))

    try:
        with urllib.request.urlopen(metafile_server) as request:
            file = BytesIO(request.read())

    except urllib.error.URLError as e:
        raise urllib.error.URLError("Error: reading metadata file failed.\n"
                                    f"{str(e)}")

    metaindex = pd.read_fwf(filepath_or_buffer=file,
                            colspecs=METADATA_FIXED_COLUMN_WIDTH,
                            skiprows=[1],
                            dtype=str,
                            encoding="ISO-8859-1")

    # Fix column names, as header is not aligned to fixed column widths
    metaindex.columns = "".join([
        column for column in metaindex.columns
        if "unnamed" not in column.lower()
    ]).split(" ")

    metaindex = metaindex.rename(columns=str.upper).rename(
        columns=GERMAN_TO_ENGLISH_COLUMNS_MAPPING)

    return metaindex.astype(METADATA_DTYPE_MAPPING)
Beispiel #3
0
def metaindex_for_1minute_data(
        parameter: Parameter, time_resolution: TimeResolution) -> pd.DataFrame:
    """
    A helping function to create a raw index of metadata for stations of the set of
    parameters as given. This raw metadata is then used by other functions. This
    second/alternative function must be used for high resolution data, where the
    metadata is not available as file but instead saved in external files per each
    station.
    - especially for precipitation/1_minute/historical!

    """

    assert time_resolution == TimeResolution.MINUTE_1, \
        "Wrong TimeResolution, only 1 minute is valid "

    metadata_path = PurePosixPath(DWD_PATH, time_resolution.value,
                                  parameter.value, FTP_METADATA_NAME)

    with FTP(DWD_SERVER) as ftp:
        ftp.login()

        metadata_filepaths = ftp.list_files(remote_path=str(metadata_path),
                                            also_subfolders=False)

    metadata_filepaths = [
        create_remote_file_name(file.lstrip(DWD_PATH))
        for file in metadata_filepaths
    ]

    statids = [
        re.findall(STATID_REGEX, file).pop(0) for file in metadata_filepaths
    ]

    metaindex_df = pd.DataFrame(None, columns=METADATA_COLUMNS)

    metadata_files = Pool().map(download_metadata_file_for_1minute_data,
                                metadata_filepaths)

    metadata_dfs = Pool().map(combine_geo_and_par_file_to_metadata_df,
                              zip(metadata_files, statids))

    metaindex_df = metaindex_df.append(other=metadata_dfs, ignore_index=True)

    metaindex_df = metaindex_df.astype(METADATA_DTYPE_MAPPING)

    return metaindex_df.sort_values(
        DWDColumns.STATION_ID.value).reset_index(drop=True)
Beispiel #4
0
def _download_dwd_data(remote_file: Union[str, Path]) -> BytesIO:
    """
    This function downloads the stationdata for which the link is
    provided by the 'select_dwd' function. It checks the shortened filepath (just
    the zipfile) for its parameters, creates the full filepath and downloads the
    file(s) according to the set up folder.

    Args:
        remote_file: contains path to file that should be downloaded
            and the path to the folder to store the files

    Returns:
        stores data on local file system

    """
    file_server = create_remote_file_name(remote_file)

    try:
        with urllib.request.urlopen(file_server) as url_request:
            zip_file = BytesIO(url_request.read())
    except urllib.error.URLError as e:
        raise e(f"Error: the stationdata {file_server} couldn't be reached.")
    except:
        raise FailedDownload(f"Download failed for {file_server}")

    try:
        with zipfile.ZipFile(zip_file) as zip_file_opened:
            produkt_file = [
                file_in_zip for file_in_zip in zip_file_opened.namelist()
                if find_all_matchstrings_in_string(file_in_zip,
                                                   STATIONDATA_MATCHSTRINGS)
            ].pop(0)
            file = BytesIO(zip_file_opened.open(produkt_file).read())
    except zipfile.BadZipFile as e:
        raise zipfile.BadZipFile(
            f"The zipfile seems to be corrupted.\n {str(e)}")

    return file