def stackseasons(): filepaths = get_filepaths(JOINED_DIR) actual_scope_dict = get_actual_scope_dict(filepaths) n_written = 0 stacked_leagues = [] stacked_filepaths = [] for nation, league in zip(actual_scope_dict['nations'], actual_scope_dict['leagues']): league_seasons = [ fp for fp in filepaths if nation in str(fp) and league in str(fp) ] league_seasons_dfs = read_csvs_to_dfs(league_seasons) league_season_df = pd.concat(league_seasons_dfs, axis=0, sort=True) league_season_df.sort_values(by=['season', 'date'], inplace=True) league_season_df.reset_index(drop=True, inplace=True) stacked_leagues.append(league_season_df) seasons = get_df_seasons(league_season_df) fn = seasons[0] + '__' + seasons[-1] + '.csv' save_path = STACKED_DIR / nation / league / fn stacked_filepaths.append(save_path) n_written = write_dfs_to_filepaths(stacked_leagues, stacked_filepaths) print(n_written)
def scope_football_data_co_uk(): scope_dict = load_pickle(SCOPE_DICT_DIR) fdcuk_fps = make_fdcuk_filepaths(RAW_DIR, scope_dict) all_season_dfs = load_fdcuk_filepaths_as_dfs(fdcuk_fps) scoped_fps = make_filepaths_from_dfs(SCOPED_DIR, all_season_dfs, data_origin='football-data-co-uk') n_saved = write_dfs_to_filepaths(all_season_dfs, scoped_fps) print(n_saved)
def standardize_all_dfs(source_dir): filepaths = get_filepaths(CLEANED_DIR / source_dir, ext='csv') dfs = read_csvs_to_dfs(filepaths) stdzed_dfs = standardize_dfs(dfs, REF_DIR) # Not all standardized dictionaries available yet stdzed_dfs = [df for df in stdzed_dfs if len(df) > 0] stdzed_dfs_fps = make_filepaths_from_dfs(STDZED_DIR, stdzed_dfs, source_dir) n_saved = write_dfs_to_filepaths(stdzed_dfs, stdzed_dfs_fps) print(n_saved)
def scope_indatabet_com(): scope_dict = load_pickle(SCOPE_DICT_DIR) df_raw = pandas_read_xlsb_file(INDATABET_FILEPATH) df_clean = clean_monolithic_df(df_raw) df_prepped = prep_monolithic_for_scoping(df_clean) scoped_dfs = scope_monolithic_df(df_prepped, scope_dict) scoped_fps = make_filepaths_from_dfs(SCOPED_DIR, scoped_dfs, data_origin='indatabet-com') n_saved = write_dfs_to_filepaths(scoped_dfs, scoped_fps) print(n_saved)
def join_within_league_seasons(sources): left_df_fps = get_filepaths(STDZED_DIR / sources[0], ext='csv') right_df_fps = get_filepaths(STDZED_DIR / sources[1], ext='csv') left_df_fps, right_df_fps = get_matching_filepaths(left_df_fps, right_df_fps, sources[0], sources[1]) left_dfs = read_csvs_to_dfs(left_df_fps) right_dfs = read_csvs_to_dfs(right_df_fps) merged_dfs = do_merge(left_dfs, right_dfs) merged_fps = make_filepaths_from_dfs(JOINED_DIR, merged_dfs, '') n_saved = write_dfs_to_filepaths(merged_dfs, merged_fps) print(n_saved)
def join_images_within_seasons(): image_types = ['heatmap', 'shotmap'] for image_type in image_types: dfs = insert_rel_path_into_dfs(image_type) top_dir = 'whoscored-com-' + image_type + 's' existing_image_fps = get_filepaths(STDZED_DIR / top_dir, ext='png') new_image_fps = make_merged_image_filepaths(existing_image_fps, top_dir, image_type) n_copied = copy_files(existing_image_fps, new_image_fps) print(n_copied) updated_dfs = update_for_image_file_exists(dfs, image_type) save_fps = make_filepaths_from_dfs(JOINED_DIR, updated_dfs, '') n_written = write_dfs_to_filepaths(updated_dfs, save_fps) print(n_written)