コード例 #1
0
def make():
    fr = IntermediateFilePersistence('transformed/2019GeneratedMatchups.csv')

    print("Generating test matchups...")
    df_test = make_dataset.load_test_data()

    df_a = df_test
    df_b = df_test.drop('Season', axis=1)

    df = df_a.assign(key=1).merge(df_b.assign(key=1), on='key').drop('key', 1)
    df = df[df['TeamID_x'] != df['TeamID_y']]

    df['GameID'] = feature_utils.create_game_key(df['Season'], df['TeamID_x'],
                                                 df['TeamID_y'])
    df['TeamA_ID'] = feature_utils.create_key_from_season_team(
        df['Season'], df['TeamID_x'])
    df['TeamB_ID'] = feature_utils.create_key_from_season_team(
        df['Season'], df['TeamID_y'])

    df.drop(labels=['TeamID_x', 'TeamID_y'], inplace=True, axis=1)

    print("Writing test values to disk...")

    fr.write_to_csv(df)

    return df
コード例 #2
0
def load_seed_data():
    df = pd.read_csv(tourney_seeds_csv)
    df['TeamSeasonId'] = feature_utils.create_key_from_season_team(
        df['Season'], df['TeamID'])
    df = df.drop('TeamID', axis=1)
    fr = IntermediateFilePersistence('transformed/SeedData.csv')
    fr.write_to_csv(df)
    return df
コード例 #3
0
def load_advanced_team_data(start=START_YEAR, end=END_YEAR):
    bball_ref_dir = base_dir + 'external/bball_reference/advanced/'
    df_regular_season_aggregated_advanced = pd.DataFrame()
    for year in range(start, end + 1):
        advanced_csv = bball_ref_dir + str(year) + 'SchoolAdvanced.csv'
        df_advanced = pd.read_csv(advanced_csv, header=1)
        df_advanced['Year'] = year
        df_regular_season_aggregated_advanced = df_regular_season_aggregated_advanced.append(
            df_advanced)
    df = helper.parse_advanced(df_regular_season_aggregated_advanced)
    df = df.dropna(0)
    fr = IntermediateFilePersistence('transformed/SeasonRatings.csv')
    fr.write_to_csv(df)
    return df
コード例 #4
0
def load_season_team_data(start=START_YEAR, end=END_YEAR):
    bball_ref_dir = base_dir + '/external/bball_reference/'
    df_regular_season_aggregated = pd.DataFrame()
    for year in range(start, end + 1):
        regular_season_csv = bball_ref_dir + str(year) + '_season.csv'
        df_regular_season = pd.read_csv(regular_season_csv, header=1)
        df_regular_season['Year'] = year
        df_regular_season_aggregated = df_regular_season_aggregated.append(
            df_regular_season)

    df = helper.parse_single_season_team_data(df_regular_season_aggregated)
    fr = IntermediateFilePersistence('transformed/SeasonRawStats.csv')
    fr.write_to_csv(df)
    return df_regular_season_aggregated
コード例 #5
0
def persist(df):
    fr = IntermediateFilePersistence('NormalizedFeatureData.csv')
    fr.write_to_csv(df)
コード例 #6
0
def persist(df):
    fr = IntermediateFilePersistence('CanonicalFeatureData.csv')
    fr.write_to_csv(df)
コード例 #7
0
def persist(df):
    fp = IntermediateFilePersistence('TeamData.csv')
    fp.write_to_csv(df)