Ejemplo n.º 1
0
def stackseasons():
    filepaths = get_filepaths(JOINED_DIR)
    actual_scope_dict = get_actual_scope_dict(filepaths)

    n_written = 0
    stacked_leagues = []
    stacked_filepaths = []
    for nation, league in zip(actual_scope_dict['nations'],
                              actual_scope_dict['leagues']):
        league_seasons = [
            fp for fp in filepaths if nation in str(fp) and league in str(fp)
        ]
        league_seasons_dfs = read_csvs_to_dfs(league_seasons)
        league_season_df = pd.concat(league_seasons_dfs, axis=0, sort=True)
        league_season_df.sort_values(by=['season', 'date'], inplace=True)
        league_season_df.reset_index(drop=True, inplace=True)
        stacked_leagues.append(league_season_df)

        seasons = get_df_seasons(league_season_df)
        fn = seasons[0] + '__' + seasons[-1] + '.csv'
        save_path = STACKED_DIR / nation / league / fn
        stacked_filepaths.append(save_path)

    n_written = write_dfs_to_filepaths(stacked_leagues, stacked_filepaths)
    print(n_written)
Ejemplo n.º 2
0
def scope_football_data_co_uk():
    scope_dict = load_pickle(SCOPE_DICT_DIR)
    fdcuk_fps = make_fdcuk_filepaths(RAW_DIR, scope_dict)
    all_season_dfs = load_fdcuk_filepaths_as_dfs(fdcuk_fps)

    scoped_fps = make_filepaths_from_dfs(SCOPED_DIR,
                                         all_season_dfs,
                                         data_origin='football-data-co-uk')
    n_saved = write_dfs_to_filepaths(all_season_dfs, scoped_fps)
    print(n_saved)
Ejemplo n.º 3
0
def standardize_all_dfs(source_dir):
    filepaths = get_filepaths(CLEANED_DIR / source_dir, ext='csv')
    dfs = read_csvs_to_dfs(filepaths)
    stdzed_dfs = standardize_dfs(dfs, REF_DIR)
    # Not all standardized dictionaries available yet
    stdzed_dfs = [df for df in stdzed_dfs if len(df) > 0]
    stdzed_dfs_fps = make_filepaths_from_dfs(STDZED_DIR, stdzed_dfs,
                                             source_dir)
    n_saved = write_dfs_to_filepaths(stdzed_dfs, stdzed_dfs_fps)
    print(n_saved)
Ejemplo n.º 4
0
def scope_indatabet_com():
    scope_dict = load_pickle(SCOPE_DICT_DIR)
    df_raw = pandas_read_xlsb_file(INDATABET_FILEPATH)
    df_clean = clean_monolithic_df(df_raw)
    df_prepped = prep_monolithic_for_scoping(df_clean)
    scoped_dfs = scope_monolithic_df(df_prepped, scope_dict)
    scoped_fps = make_filepaths_from_dfs(SCOPED_DIR,
                                         scoped_dfs,
                                         data_origin='indatabet-com')
    n_saved = write_dfs_to_filepaths(scoped_dfs, scoped_fps)
    print(n_saved)
Ejemplo n.º 5
0
def join_within_league_seasons(sources):
    left_df_fps = get_filepaths(STDZED_DIR / sources[0], ext='csv')
    right_df_fps = get_filepaths(STDZED_DIR / sources[1], ext='csv')
    left_df_fps, right_df_fps = get_matching_filepaths(left_df_fps,
                                                       right_df_fps,
                                                       sources[0], sources[1])
    left_dfs = read_csvs_to_dfs(left_df_fps)
    right_dfs = read_csvs_to_dfs(right_df_fps)
    merged_dfs = do_merge(left_dfs, right_dfs)

    merged_fps = make_filepaths_from_dfs(JOINED_DIR, merged_dfs, '')
    n_saved = write_dfs_to_filepaths(merged_dfs, merged_fps)
    print(n_saved)
def join_images_within_seasons():
    image_types = ['heatmap', 'shotmap']
    for image_type in image_types:
        dfs = insert_rel_path_into_dfs(image_type)
        top_dir = 'whoscored-com-' + image_type + 's'
        existing_image_fps = get_filepaths(STDZED_DIR / top_dir, ext='png')
        new_image_fps = make_merged_image_filepaths(existing_image_fps,
                                                    top_dir, image_type)
        n_copied = copy_files(existing_image_fps, new_image_fps)
        print(n_copied)
        updated_dfs = update_for_image_file_exists(dfs, image_type)
        save_fps = make_filepaths_from_dfs(JOINED_DIR, updated_dfs, '')
        n_written = write_dfs_to_filepaths(updated_dfs, save_fps)
        print(n_written)