Beispiel #1
0
def create_metaindex(parameter: Parameter, time_resolution: TimeResolution,
                     period_type: PeriodType) -> pd.DataFrame:
    """ The function is used to create a simple metadata DataFrame parsed from the text files that are located in each
    data section of the station data directory of the weather service.

    Args:
        parameter: observation measure
        time_resolution: frequency/granularity of measurement interval
        period_type: recent or historical files
    Return:
        DataFrame with parsed columns of the corresponding text file. Columns are translated into English and data is
        not yet complete as file existence is not checked.

    """
    server_path = PurePosixPath(DWD_PATH, time_resolution.value,
                                parameter.value, period_type.value)

    try:
        with FTP(DWD_SERVER) as ftp:
            ftp.login()
            files_server = ftp.list_files(remote_path=str(server_path),
                                          also_subfolders=False)

    except ftplib.all_errors as e:
        raise ftplib.all_errors(
            "Error: couldn't retrieve filelist from server.\n"
            f"{str(e)}")

    metafile_server = [
        file for file in files_server
        if find_all_matchstrings_in_string(file.lower(), METADATA_MATCHSTRINGS)
    ].pop(0)

    metafile_server = create_remote_file_name(metafile_server.lstrip(DWD_PATH))

    try:
        with urllib.request.urlopen(metafile_server) as request:
            file = BytesIO(request.read())

    except urllib.error.URLError as e:
        raise urllib.error.URLError("Error: reading metadata file failed.\n"
                                    f"{str(e)}")

    metaindex = pd.read_fwf(filepath_or_buffer=file,
                            colspecs=METADATA_FIXED_COLUMN_WIDTH,
                            skiprows=[1],
                            dtype=str,
                            encoding="ISO-8859-1")

    # Fix column names, as header is not aligned to fixed column widths
    metaindex.columns = "".join([
        column for column in metaindex.columns
        if "unnamed" not in column.lower()
    ]).split(" ")

    metaindex = metaindex.rename(columns=str.upper).rename(
        columns=GERMAN_TO_ENGLISH_COLUMNS_MAPPING)

    return metaindex.astype(METADATA_DTYPE_MAPPING)
Beispiel #2
0
def connect(host: str) -> Any:
    try:
        with FTP_TLS(host) as ftp:
            ftp.login()
            ftp.dir()
            resp = ftp.quit()
    except ftplib.all_errors() as err:
        print(err)
    return resp
Beispiel #3
0
def connect():
    try:
        ftp = ftplib.FTP('ftp1.at.proftpd.org')
        ftp.login()
        print(ftp.getwelcome())
        print(f"Current Directory", ftp.pwd())
        print(f"All Files in the Directory-\n{ftp.dir()}")
        print('Valid commands are cd/get/ls/exit - ex: get readme.txt')
        ftp_command(ftp)
    except ftplib.all_errors() as err:
        print(f"The Error is - {err}")
Beispiel #4
0
 def ftp_open(self, req):
     host = req.get_host()
     if not host:
         raise IOError('ftp error', 'no host given')
     # XXX handle custom username & password
     try:
         host = socket.gethostbyname(host)
     except socket.error(msg):
         raise URLError(msg)
     host, port = splitport(host)
     if port is None:
         port = ftplib.FTP_PORT
     path, attrs = splitattr(req.get_selector())
     path = unquote(path)
     dirs = path.split('/')
     dirs, file = dirs[:-1], dirs[-1]
     if dirs and not dirs[0]:
         dirs = dirs[1:]
     user = passwd = ''  # XXX
     try:
         fw = self.connect_ftp(user, passwd, host, port, dirs)
         type = file and 'I' or 'D'
         for attr in attrs:
             attr, value = splitattr(attr)
             if attr.lower() == 'type' and \
                value in ('a', 'A', 'i', 'I', 'd', 'D'):
                 type = value.upper()
         fp, retrlen = fw.retrfile(file, type)
         headers = ""
         mtype = mimetypes.guess_type(req.get_full_url())[0]
         if mtype:
             headers += "Content-Type: %s\n" % mtype
         if retrlen is not None and retrlen >= 0:
             headers += "Content-Length: %d\n" % retrlen
         sf = StringIO(headers)
         headers = mimetools.Message(sf)
         return addinfourl(fp, headers, req.get_full_url())
     except ftplib.all_errors(msg):
         raise IOError(('ftp error', msg), sys.exc_info()[2])
Beispiel #5
0
 def ftp_open(self, req):
     host = req.get_host()
     if not host:
         raise IOError('ftp error', 'no host given')
     # XXX handle custom username & password
     try:
         host = socket.gethostbyname(host)
     except socket.error(msg):
         raise URLError(msg)
     host, port = splitport(host)
     if port is None:
         port = ftplib.FTP_PORT
     path, attrs = splitattr(req.get_selector())
     path = unquote(path)
     dirs = path.split('/')
     dirs, file = dirs[:-1], dirs[-1]
     if dirs and not dirs[0]:
         dirs = dirs[1:]
     user = passwd = '' # XXX
     try:
         fw = self.connect_ftp(user, passwd, host, port, dirs)
         type = file and 'I' or 'D'
         for attr in attrs:
             attr, value = splitattr(attr)
             if attr.lower() == 'type' and \
                value in ('a', 'A', 'i', 'I', 'd', 'D'):
                 type = value.upper()
         fp, retrlen = fw.retrfile(file, type)
         headers = ""
         mtype = mimetypes.guess_type(req.get_full_url())[0]
         if mtype:
             headers += "Content-Type: %s\n" % mtype
         if retrlen is not None and retrlen >= 0:
             headers += "Content-Length: %d\n" % retrlen
         sf = StringIO(headers)
         headers = mimetools.Message(sf)
         return addinfourl(fp, headers, req.get_full_url())
     except ftplib.all_errors(msg):
         raise IOError(('ftp error', msg), sys.exc_info()[2])
Beispiel #6
0
def create_fileindex(parameter: Parameter,
                     time_resolution: TimeResolution,
                     period_type: PeriodType,
                     folder: str = DWD_FOLDER_MAIN) -> None:
    """
        A function to receive current files on server as list excluding description
        files and only containing those files that have measuring data.

    """
    # Check for folder and create if necessary
    create_folder(subfolder=DWD_FOLDER_METADATA, folder=folder)

    filelist_local_path = Path(
        folder, DWD_FOLDER_METADATA, f"{FILELIST_NAME}_{parameter.value}_"
        f"{time_resolution.value}_"
        f"{period_type.value}{DATA_FORMAT}")

    server_path = PurePosixPath(DWD_PATH, time_resolution.value,
                                parameter.value, period_type.value)

    try:
        with FTP(DWD_SERVER) as ftp:
            ftp.login()
            files_server = ftp.list_files(remote_path=str(server_path),
                                          also_subfolders=True)

    except ftplib.all_errors as e:
        raise ftplib.all_errors(
            "Error: creating a filelist currently not possible.\n"
            f"{str(e)}")

    files_server = pd.DataFrame(files_server,
                                columns=[DWDColumns.FILENAME.value],
                                dtype='str')

    files_server.loc[:, DWDColumns.FILENAME.
                     value] = files_server.loc[:, DWDColumns.FILENAME.
                                               value].apply(
                                                   lambda filename: filename.
                                                   lstrip(DWD_PATH + '/'))

    files_server = files_server[files_server.FILENAME.str.contains(
        ARCHIVE_FORMAT)]

    files_server.loc[:, DWDColumns.FILEID.value] = files_server.index

    file_names = files_server.iloc[:, 0].str.split("/").apply(
        lambda string: string[-1])

    files_server.loc[:, DWDColumns.STATION_ID.value] = file_names.apply(
        lambda x: re.findall(STATID_REGEX, x).pop(0))

    files_server = files_server.iloc[:, [1, 2, 0]]

    files_server.iloc[:, 1] = files_server.iloc[:, 1].astype(int)

    files_server = files_server.sort_values(by=[DWDColumns.STATION_ID.value])

    remove_old_file(file_type=FILELIST_NAME,
                    parameter=parameter,
                    time_resolution=time_resolution,
                    period_type=period_type,
                    file_postfix=DATA_FORMAT,
                    folder=folder,
                    subfolder=DWD_FOLDER_METADATA)

    files_server.to_csv(path_or_buf=filelist_local_path,
                        header=True,
                        index=False)