def export_df (list_df, list_prop): #list_df = [['name', df], ..., ], list_prop = [processed, raw] data_dir = file_fct.get_parent_dir(2, 'data') list_final_df = [] for df_prop, a_prop in zip(list_df, list_prop): df_name, df = df_prop source_df = read_db_list (a_prop) file_path = file_fct.creation_folder(f'{data_dir}/{a_prop}', [f'{source_df.loc[df_name, "sub_dir"]}']) export_path = f'{file_path[0]}/{source_df.loc[df_name, "file_name"]}' export_format = source_df.loc[df_name, "file_name"].split('.')[-1] if source_df.loc[df_name, 'type'] == 'Pandas': if export_format == 'csv': df.to_csv(export_path, index=True) elif export_format == 'json': df.to_json(export_path, orient = "table", indent=4) else: print(f"File {source_df.loc[df_name, 'file_name']} couldn't be saved, please change extension") elif source_df.loc[df_name, 'type'] == 'GeoPandas': if export_format == 'shp': df.to_file(export_path, index=True) elif export_format == 'geojson': df.to_file(export_path, orient = "table", indent=4, driver="GeoJSON") elif export_format == 'feather': df.to_feather(export_path) else: print(f"File {source_df.loc[df_name, 'file_name']} couldn't be saved, please change extension") return list_final_df
def import_df (list_df, list_prop): data_dir = file_fct.get_parent_dir(2, 'data') list_final_df = [] for df_name, a_prop in zip(list_df, list_prop): source_df = read_db_list (a_prop) import_path = os.path.normcase(f'{data_dir}/{a_prop}/{source_df.loc[df_name, "sub_dir"]}/{source_df.loc[df_name, "file_name"]}') export_format = source_df.loc[df_name, "file_name"].split('.')[-1] if source_df.loc[df_name, 'type'] == 'Pandas': if export_format == 'csv': importing_df = pandas.read_csv(import_path, sep=source_df.loc[df_name, 'sep'], encoding=source_df.loc[df_name, 'encoding']) elif export_format == 'json': importing_df = pandas.read_json(import_path, orient = "table") elif source_df.loc[df_name, 'type'] == 'GeoPandas': if export_format == 'csv' or export_format == 'shp': importing_df = gpd.read_file(import_path) elif export_format == 'json' or export_format == 'geojson': importing_df = gpd.read_json(import_path, orient = "table") elif export_format == 'feather': importing_df = gpd.read_feather(import_path) list_final_df.append(importing_df) #print(df_name) #print(importing_df) return list_final_df
def main(update_limit): data_dir = file_fct.get_parent_dir(2, 'data') # .../COVID19/data db_list = df_fct.read_db_list('raw') last_update = last_update_db(data_dir, db_list) import_static(data_dir, db_list) import_daily(data_dir, db_list, last_update, update_limit)
def read_db_list (type): data_dir = file_fct.get_parent_dir(2, 'data') if type =='raw': db_list_path = os.path.normcase(f'{data_dir}/list_raw_data.json') elif type == 'processed': db_list_path = os.path.normcase(f'{data_dir}/list_processed_data.json') db_list = pandas.read_json(db_list_path, orient = "table") return db_list
def read_db_files (): data_dir = file_fct.get_parent_dir(2, 'data') db_file = pandas.read_json(f'{data_dir}/list_files.json', orient = "table") list_dir, list_files = file_fct.list_dir_files(f'{data_dir}') if 'list_files_date.json' in list_files: db_file_date = pandas.read_json(f'{data_dir}/list_files_date.json', orient = "table") else: db_file_date = pandas.DataFrame(index=db_file.index, columns=['date']) db_file_date.loc[:,'date'] = pandas.to_datetime(db_file_date.loc[:,'date']) return db_file, db_file_date
def import_daily(data_dir, db_list, last_update_db, limit): raw_data_dir = os.path.normcase(f'{data_dir}/raw') df_daily = db_list[db_list.loc[:, 'update'] == True] for a_df_name in df_daily.index: if a_df_name not in last_update_db.index: print( f"Creating and downloading {df_daily.loc[a_df_name, 'file_name']}...", end='') df = import_and_save(a_df_name, raw_data_dir, df_daily) delta_spaces = " " * (len( f"Creating and downloading {df_daily.loc[a_df_name, 'file_name']}..." ) - len(f"\r{df_daily.loc[a_df_name, 'file_name']} was downloaded") ) print( f"\r{df_daily.loc[a_df_name, 'file_name']} was downloaded {delta_spaces}" ) last_update = get_dates(df, a_df_name, db_list) last_update_db.loc[a_df_name, 'date'] = last_update elif last_update_db.loc[a_df_name, 'delta_day'] > limit: print(f"Downloading {df_daily.loc[a_df_name, 'file_name']}...", end='') df = import_and_save(a_df_name, raw_data_dir, df_daily) delta_spaces = " " * ( len(f"Downloading {df_daily.loc[a_df_name, 'file_name']}...") - len(f"\r{df_daily.loc[a_df_name, 'file_name']} was downloaded") ) print( f"\r{df_daily.loc[a_df_name, 'file_name']} was downloaded {delta_spaces}" ) last_update = get_dates(df, a_df_name, db_list) last_update_db.loc[a_df_name, 'date'] = last_update data_dir = file_fct.get_parent_dir(2, 'data') last_update_db['delta_day'] = last_update_db.apply( lambda x: (pandas.to_datetime('today') - x["date"]).days, axis=1) print(last_update_db) last_update_db.loc[:, 'date'] = last_update_db.apply( lambda x: x["date"].strftime("%Y-%m-%d"), axis=1) last_update_db.to_json(f'{data_dir}/last_update.json', orient="table", indent=4) print('\n\n')
def path_to_file(self): self.return_path() root = os.path.normcase(file_fct.get_parent_dir( 2, 'src/data_transfer')) #print(root) if self.type_transfer == 'daily': file_path = os.path.normcase(f'{root}/export_file.txt') export_file = open(file_path, 'w') elif self.type_transfer == 'article': today = date.today().strftime("%Y-%m-%d") file_path = os.path.normcase(f'{root}/export_file_{today}.txt') export_file = open(file_path, 'w') for a_name in self.list_names: if a_name[-1] != '/': export_file.write(a_name + '\n') export_file.close()
def save_db_files (db_file, db_file_date): data_dir = file_fct.get_parent_dir(2, 'data') db_file.to_json(f'{data_dir}/list_files.json', orient = "table", indent=4) db_file_date.to_json(f'{data_dir}/list_files_date.json', orient = "table", indent=4)