Exemple #1
0
def transformed_to_all_stacked():
    filepaths = get_filepaths(TRANSFORMED_DIR, ext='csv')
    dfs = read_csvs_to_dfs(filepaths)
    dfs = [pd.concat(dfs, axis=1, sort=True)]
    filepaths = [ALL_STACKED_DIR / 'full.csv']
    n_written = write_dfs_to_filepaths(dfs, filepaths)
    print(n_written)
Exemple #2
0
def apply_ts_features():
    filepaths = get_filepaths(STACKED_DIR, ext='csv')
    dfs = read_csvs_to_dfs(filepaths)
    featured_dfs = apply_features(dfs)
    featured_filepaths = make_equiv_image_dest_fps(STACKED_DIR, FEATURED_DIR,
                                                   filepaths)
    n_saved = write_dfs_to_filepaths(featured_dfs, featured_filepaths)
    print(n_saved)
Exemple #3
0
def clean_football_data_co_uk():
    filepaths = get_filepaths(SCOPED_DIR / 'football-data-co-uk', ext='csv')
    dfs = read_csvs_to_dfs(filepaths)
    clean_dfs = clean_up_dfs(dfs)
    cleaned_fps = make_filepaths_from_dfs(CLEANED_DIR,
                                          clean_dfs,
                                          data_origin='football-data-co-uk')
    n_saved = write_dfs_to_filepaths(clean_dfs, cleaned_fps)
    print(n_saved)
def clean_indatabet_com():
    filepaths = get_filepaths(SCOPED_DIR / 'indatabet-com', ext='csv')
    dfs = read_csvs_to_dfs(filepaths)
    clean_dfs = clean_up_dfs(dfs)
    cleaned_fps = make_filepaths_from_dfs(CLEANED_DIR,
                                          clean_dfs,
                                          data_origin='indatabet-com')
    n_saved = write_dfs_to_filepaths(clean_dfs, cleaned_fps)
    print(n_saved)
def run_transform_ts_to_supervised():
    filepaths = get_filepaths(FEATURED_DIR, ext='csv')
    dfs = read_csvs_to_dfs(filepaths)
    features = None
    df_tss = []
    for df in dfs:
        # Need to group by season here before applying transform
        # Then need to restack after transformation applied
        season_dfs = []
        for ind, season_df in df.groupby(by='season'):
            season_df_t = transform_ts_to_supervised(season_df, features)
            season_dfs.append(season_df_t)
        league_df = pd.concat(season_dfs, sort=True, axis=0)
        league_df = clean_up_league_df(league_df)
        df_tss.append(league_df)
    transformed_filepaths = make_equiv_image_dest_fps(FEATURED_DIR,
                                                      TRANSFORMED_DIR,
                                                      filepaths)
    n_saved = write_dfs_to_filepaths(df_tss, transformed_filepaths)
    print(n_saved)