def find_match_files():
    matches_data_files = {}
    matches_storage_path = ds.output_data + "individual_keyword_matches/"
    dataset_paths = helpers.path_fetcher(matches_storage_path)
    for path in dataset_paths:
        files = helpers.path_fetcher(matches_storage_path + path)
        matches_data_files[path] = []
        for file in files:
            f_path = matches_storage_path + path + "/" + file
            matches_data_files[path].append(f_path)
    return matches_data_files
Exemple #2
0
def date_selection():
    output_files = []
    path = ds.output_data + "merged_dataset_extraction/"
    files = helpers.path_fetcher(path)
    for file in files:
        df = helpers.load_dataset(path + file)
        df_2013 = df[df.year == 2013]
        df_2013_8 = df_2013[df.month == 8]
        df_2013_9 = df_2013[df.month == 9]
        df_2013_10 = df_2013[df.month == 10]
        df_2013_11 = df_2013[df.month == 11]
        df_2013_12 = df_2013[df.month == 12]
        df = df[(df.year == 2014) | (df.year == 2015) | (df.year == 2016) |
                (df.year == 2017) | (df.year == 2018)]
        df = pd.concat(
            [df_2013_8, df_2013_9, df_2013_10, df_2013_11, df_2013_12, df])
        storage_path = ds.output_data + "time_filtered_dataset_extraction/"
        helpers.path_checker(storage_path)
        helpers.dataframe_to_csv(df, storage_path + file)
        output_files.append(storage_path + file)
    return output_files
def get_results_filenames (version_path):
    files = helpers.path_fetcher(version_path)
    return files
Exemple #4
0
def get_metric_storage_filenames ():
    files = helpers.path_fetcher(ds.metric_storage_location)
    return files