def transformed_to_all_stacked(): filepaths = get_filepaths(TRANSFORMED_DIR, ext='csv') dfs = read_csvs_to_dfs(filepaths) dfs = [pd.concat(dfs, axis=1, sort=True)] filepaths = [ALL_STACKED_DIR / 'full.csv'] n_written = write_dfs_to_filepaths(dfs, filepaths) print(n_written)
def apply_ts_features(): filepaths = get_filepaths(STACKED_DIR, ext='csv') dfs = read_csvs_to_dfs(filepaths) featured_dfs = apply_features(dfs) featured_filepaths = make_equiv_image_dest_fps(STACKED_DIR, FEATURED_DIR, filepaths) n_saved = write_dfs_to_filepaths(featured_dfs, featured_filepaths) print(n_saved)
def clean_football_data_co_uk(): filepaths = get_filepaths(SCOPED_DIR / 'football-data-co-uk', ext='csv') dfs = read_csvs_to_dfs(filepaths) clean_dfs = clean_up_dfs(dfs) cleaned_fps = make_filepaths_from_dfs(CLEANED_DIR, clean_dfs, data_origin='football-data-co-uk') n_saved = write_dfs_to_filepaths(clean_dfs, cleaned_fps) print(n_saved)
def clean_indatabet_com(): filepaths = get_filepaths(SCOPED_DIR / 'indatabet-com', ext='csv') dfs = read_csvs_to_dfs(filepaths) clean_dfs = clean_up_dfs(dfs) cleaned_fps = make_filepaths_from_dfs(CLEANED_DIR, clean_dfs, data_origin='indatabet-com') n_saved = write_dfs_to_filepaths(clean_dfs, cleaned_fps) print(n_saved)
def run_transform_ts_to_supervised(): filepaths = get_filepaths(FEATURED_DIR, ext='csv') dfs = read_csvs_to_dfs(filepaths) features = None df_tss = [] for df in dfs: # Need to group by season here before applying transform # Then need to restack after transformation applied season_dfs = [] for ind, season_df in df.groupby(by='season'): season_df_t = transform_ts_to_supervised(season_df, features) season_dfs.append(season_df_t) league_df = pd.concat(season_dfs, sort=True, axis=0) league_df = clean_up_league_df(league_df) df_tss.append(league_df) transformed_filepaths = make_equiv_image_dest_fps(FEATURED_DIR, TRANSFORMED_DIR, filepaths) n_saved = write_dfs_to_filepaths(df_tss, transformed_filepaths) print(n_saved)