def find_match_files(): matches_data_files = {} matches_storage_path = ds.output_data + "individual_keyword_matches/" dataset_paths = helpers.path_fetcher(matches_storage_path) for path in dataset_paths: files = helpers.path_fetcher(matches_storage_path + path) matches_data_files[path] = [] for file in files: f_path = matches_storage_path + path + "/" + file matches_data_files[path].append(f_path) return matches_data_files
def date_selection(): output_files = [] path = ds.output_data + "merged_dataset_extraction/" files = helpers.path_fetcher(path) for file in files: df = helpers.load_dataset(path + file) df_2013 = df[df.year == 2013] df_2013_8 = df_2013[df.month == 8] df_2013_9 = df_2013[df.month == 9] df_2013_10 = df_2013[df.month == 10] df_2013_11 = df_2013[df.month == 11] df_2013_12 = df_2013[df.month == 12] df = df[(df.year == 2014) | (df.year == 2015) | (df.year == 2016) | (df.year == 2017) | (df.year == 2018)] df = pd.concat( [df_2013_8, df_2013_9, df_2013_10, df_2013_11, df_2013_12, df]) storage_path = ds.output_data + "time_filtered_dataset_extraction/" helpers.path_checker(storage_path) helpers.dataframe_to_csv(df, storage_path + file) output_files.append(storage_path + file) return output_files
def get_results_filenames (version_path): files = helpers.path_fetcher(version_path) return files
def get_metric_storage_filenames (): files = helpers.path_fetcher(ds.metric_storage_location) return files