def join_within_league_seasons(sources): left_df_fps = get_filepaths(STDZED_DIR / sources[0], ext='csv') right_df_fps = get_filepaths(STDZED_DIR / sources[1], ext='csv') left_df_fps, right_df_fps = get_matching_filepaths(left_df_fps, right_df_fps, sources[0], sources[1]) left_dfs = read_csvs_to_dfs(left_df_fps) right_dfs = read_csvs_to_dfs(right_df_fps) merged_dfs = do_merge(left_dfs, right_dfs) merged_fps = make_filepaths_from_dfs(JOINED_DIR, merged_dfs, '') n_saved = write_dfs_to_filepaths(merged_dfs, merged_fps) print(n_saved)
def stackseasons(): filepaths = get_filepaths(JOINED_DIR) actual_scope_dict = get_actual_scope_dict(filepaths) n_written = 0 stacked_leagues = [] stacked_filepaths = [] for nation, league in zip(actual_scope_dict['nations'], actual_scope_dict['leagues']): league_seasons = [ fp for fp in filepaths if nation in str(fp) and league in str(fp) ] league_seasons_dfs = read_csvs_to_dfs(league_seasons) league_season_df = pd.concat(league_seasons_dfs, axis=0, sort=True) league_season_df.sort_values(by=['season', 'date'], inplace=True) league_season_df.reset_index(drop=True, inplace=True) stacked_leagues.append(league_season_df) seasons = get_df_seasons(league_season_df) fn = seasons[0] + '__' + seasons[-1] + '.csv' save_path = STACKED_DIR / nation / league / fn stacked_filepaths.append(save_path) n_written = write_dfs_to_filepaths(stacked_leagues, stacked_filepaths) print(n_written)
def standardize_all_dfs(source_dir): filepaths = get_filepaths(CLEANED_DIR / source_dir, ext='csv') dfs = read_csvs_to_dfs(filepaths) stdzed_dfs = standardize_dfs(dfs, REF_DIR) # Not all standardized dictionaries available yet stdzed_dfs = [df for df in stdzed_dfs if len(df) > 0] stdzed_dfs_fps = make_filepaths_from_dfs(STDZED_DIR, stdzed_dfs, source_dir) n_saved = write_dfs_to_filepaths(stdzed_dfs, stdzed_dfs_fps) print(n_saved)
def clean_whoscored_shotmap_images(): image_dir = 'whoscored-com-shotmaps' filepaths = get_filepaths(SCOPED_DIR / image_dir, ext='png') # clean_images = clean_up_images(images) # cleaned_filepaths = make_filepaths_from_images(CLEANED_DIR, # clean_images, # data_origin=image_dir) # n_written = write_images_to_filepaths(clean_images, clean_filepaths) dest_fps = make_equiv_image_dest_fps(SCOPED_DIR, CLEANED_DIR, filepaths) n_copied = copy_files(filepaths, dest_fps) print(n_copied)
def join_images_within_seasons(): image_types = ['heatmap', 'shotmap'] for image_type in image_types: dfs = insert_rel_path_into_dfs(image_type) top_dir = 'whoscored-com-' + image_type + 's' existing_image_fps = get_filepaths(STDZED_DIR / top_dir, ext='png') new_image_fps = make_merged_image_filepaths(existing_image_fps, top_dir, image_type) n_copied = copy_files(existing_image_fps, new_image_fps) print(n_copied) updated_dfs = update_for_image_file_exists(dfs, image_type) save_fps = make_filepaths_from_dfs(JOINED_DIR, updated_dfs, '') n_written = write_dfs_to_filepaths(updated_dfs, save_fps) print(n_written)
def standardize_image_team_names(source_dir): cleaned_src_fps = get_filepaths(CLEANED_DIR / source_dir, ext='png') stdzed_fns = standardize_filenames(cleaned_src_fps, REF_DIR) stdzed_dest_fps = standardize_filepaths(cleaned_src_fps, stdzed_fns, STDZED_DIR, source_dir) # Because not all the standardized dictionaries are available # We only want to copy images that have actually been standardized cleaned_src_fps = [ src_fp for src_fp, dest_fp in zip(cleaned_src_fps, stdzed_dest_fps) if dest_fp is not False ] stdzed_dest_fps = [ dest_fp for src_fp, dest_fp in zip(cleaned_src_fps, stdzed_dest_fps) if dest_fp is not False ] n_copied = copy_files(cleaned_src_fps, stdzed_dest_fps) print(n_copied)
def insert_rel_path_into_dfs(image_type): filepaths = get_filepaths(JOINED_DIR) df_origs = read_csvs_to_dfs(filepaths) dfs = insert_filepaths(df_origs, image_type) return dfs