def create_metaindex(parameter: Parameter, time_resolution: TimeResolution, period_type: PeriodType) -> pd.DataFrame: """ The function is used to create a simple metadata DataFrame parsed from the text files that are located in each data section of the station data directory of the weather service. Args: parameter: observation measure time_resolution: frequency/granularity of measurement interval period_type: recent or historical files Return: DataFrame with parsed columns of the corresponding text file. Columns are translated into English and data is not yet complete as file existence is not checked. """ server_path = PurePosixPath(DWD_PATH, time_resolution.value, parameter.value, period_type.value) try: with FTP(DWD_SERVER) as ftp: ftp.login() files_server = ftp.list_files(remote_path=str(server_path), also_subfolders=False) except ftplib.all_errors as e: raise ftplib.all_errors( "Error: couldn't retrieve filelist from server.\n" f"{str(e)}") metafile_server = [ file for file in files_server if find_all_matchstrings_in_string(file.lower(), METADATA_MATCHSTRINGS) ].pop(0) metafile_server = create_remote_file_name(metafile_server.lstrip(DWD_PATH)) try: with urllib.request.urlopen(metafile_server) as request: file = BytesIO(request.read()) except urllib.error.URLError as e: raise urllib.error.URLError("Error: reading metadata file failed.\n" f"{str(e)}") metaindex = pd.read_fwf(filepath_or_buffer=file, colspecs=METADATA_FIXED_COLUMN_WIDTH, skiprows=[1], dtype=str, encoding="ISO-8859-1") # Fix column names, as header is not aligned to fixed column widths metaindex.columns = "".join([ column for column in metaindex.columns if "unnamed" not in column.lower() ]).split(" ") metaindex = metaindex.rename(columns=str.upper).rename( columns=GERMAN_TO_ENGLISH_COLUMNS_MAPPING) return metaindex.astype(METADATA_DTYPE_MAPPING)
def connect(host: str) -> Any: try: with FTP_TLS(host) as ftp: ftp.login() ftp.dir() resp = ftp.quit() except ftplib.all_errors() as err: print(err) return resp
def connect(): try: ftp = ftplib.FTP('ftp1.at.proftpd.org') ftp.login() print(ftp.getwelcome()) print(f"Current Directory", ftp.pwd()) print(f"All Files in the Directory-\n{ftp.dir()}") print('Valid commands are cd/get/ls/exit - ex: get readme.txt') ftp_command(ftp) except ftplib.all_errors() as err: print(f"The Error is - {err}")
def ftp_open(self, req): host = req.get_host() if not host: raise IOError('ftp error', 'no host given') # XXX handle custom username & password try: host = socket.gethostbyname(host) except socket.error(msg): raise URLError(msg) host, port = splitport(host) if port is None: port = ftplib.FTP_PORT path, attrs = splitattr(req.get_selector()) path = unquote(path) dirs = path.split('/') dirs, file = dirs[:-1], dirs[-1] if dirs and not dirs[0]: dirs = dirs[1:] user = passwd = '' # XXX try: fw = self.connect_ftp(user, passwd, host, port, dirs) type = file and 'I' or 'D' for attr in attrs: attr, value = splitattr(attr) if attr.lower() == 'type' and \ value in ('a', 'A', 'i', 'I', 'd', 'D'): type = value.upper() fp, retrlen = fw.retrfile(file, type) headers = "" mtype = mimetypes.guess_type(req.get_full_url())[0] if mtype: headers += "Content-Type: %s\n" % mtype if retrlen is not None and retrlen >= 0: headers += "Content-Length: %d\n" % retrlen sf = StringIO(headers) headers = mimetools.Message(sf) return addinfourl(fp, headers, req.get_full_url()) except ftplib.all_errors(msg): raise IOError(('ftp error', msg), sys.exc_info()[2])
def create_fileindex(parameter: Parameter, time_resolution: TimeResolution, period_type: PeriodType, folder: str = DWD_FOLDER_MAIN) -> None: """ A function to receive current files on server as list excluding description files and only containing those files that have measuring data. """ # Check for folder and create if necessary create_folder(subfolder=DWD_FOLDER_METADATA, folder=folder) filelist_local_path = Path( folder, DWD_FOLDER_METADATA, f"{FILELIST_NAME}_{parameter.value}_" f"{time_resolution.value}_" f"{period_type.value}{DATA_FORMAT}") server_path = PurePosixPath(DWD_PATH, time_resolution.value, parameter.value, period_type.value) try: with FTP(DWD_SERVER) as ftp: ftp.login() files_server = ftp.list_files(remote_path=str(server_path), also_subfolders=True) except ftplib.all_errors as e: raise ftplib.all_errors( "Error: creating a filelist currently not possible.\n" f"{str(e)}") files_server = pd.DataFrame(files_server, columns=[DWDColumns.FILENAME.value], dtype='str') files_server.loc[:, DWDColumns.FILENAME. value] = files_server.loc[:, DWDColumns.FILENAME. value].apply( lambda filename: filename. lstrip(DWD_PATH + '/')) files_server = files_server[files_server.FILENAME.str.contains( ARCHIVE_FORMAT)] files_server.loc[:, DWDColumns.FILEID.value] = files_server.index file_names = files_server.iloc[:, 0].str.split("/").apply( lambda string: string[-1]) files_server.loc[:, DWDColumns.STATION_ID.value] = file_names.apply( lambda x: re.findall(STATID_REGEX, x).pop(0)) files_server = files_server.iloc[:, [1, 2, 0]] files_server.iloc[:, 1] = files_server.iloc[:, 1].astype(int) files_server = files_server.sort_values(by=[DWDColumns.STATION_ID.value]) remove_old_file(file_type=FILELIST_NAME, parameter=parameter, time_resolution=time_resolution, period_type=period_type, file_postfix=DATA_FORMAT, folder=folder, subfolder=DWD_FOLDER_METADATA) files_server.to_csv(path_or_buf=filelist_local_path, header=True, index=False)