Exemple #1
0
def make_output():
    tourney_seeds = read_csv_as_np_array('../Data/tourney_seeds.csv',
                                         header=False)
    neutralized_season_data = neutralize_season_data(
        '../Data/regular_season_detailed_results.csv',
        '../Data/tourney_detailed_results.csv',
        '../Data/neutralized_season_data.csv'
    )

    tuning_params = dict(std=0.05)
    output = [['id', 'pred']]
    for season in ['2011', '2012', '2013', '2014']:
        tourney_teams = np.sort(tourney_seeds[np.where(tourney_seeds[:, 0] == season)[0]][:, 2])
        while len(tourney_teams) > 1:
            team1 = tourney_teams[0]
            tourney_teams= np.delete(tourney_teams, 0)
            for team2 in tourney_teams:
                prob = calc_new_features(
                    neutralized_season_data,
                    season,
                    team1,
                    team2,
                    tuning_params
                    )[0]
                game_id = [season+'_'+team1+'_'+team2, str(prob)]
                output.append(game_id)
                print game_id

    output_file = open('../Data/out.csv', 'wb')
    csv.writer(output_file).writerows(output)
Exemple #2
0
def make_output_2015():
    tourney_seeds = read_csv_as_np_array('../Data/tourney_seeds_2015.csv',
                                         header=False)
    neutralized_season_data = neutralize_season_data(
        '../Data/regular_season_detailed_results_combined.csv',
        '../Data/tourney_detailed_results.csv',
        '../Data/neutralized_season_data_combined.csv'
    )

    tuning_params = dict(std=0.05)
    output = [['id', 'pred', 'score1', 'score2']]
    for season in ['2015']:
        tourney_teams = np.sort(tourney_seeds[np.where(tourney_seeds[:, 0] == season)[0]][:, 2])
        while len(tourney_teams) > 1:
            team1 = tourney_teams[0]
            tourney_teams= np.delete(tourney_teams, 0)
            for team2 in tourney_teams:
                features = calc_new_features(
                    neutralized_season_data,
                    season,
                    team1,
                    team2,
                    tuning_params
                    )
                prob = features[0]
                score1 = np.round(features[2]).astype(int)
                score2 = np.round(features[3]).astype(int)
                game_id = [season+'_'+team1+'_'+team2, str(prob), str(score1), str(score2)]
                output.append(game_id)
                print game_id

    output_file = open('../Data/out_2015_with_scores.csv', 'wb')
    csv.writer(output_file).writerows(output)
Exemple #3
0
def main_2015_03_12():
    tourney_data = read_csv_as_np_array('../Data/tourney_detailed_results.csv',
                                         header=False)

    neutralized_season_data = neutralize_season_data(
        '../Data/regular_season_detailed_results.csv',
        '../Data/tourney_detailed_results.csv',
        '../Data/neutralized_season_data.csv'
    )

    # test on 2011-2014 tourney results
    test_data_min_index = np.min(np.where(tourney_data[:, 0] == '2014')[0])

    train_data = tourney_data

    for std in [0.05]: #  np.linspace(0.01, 0.1, 10):
        tuning_params = dict(std=std)
        #scramble the results to eliminate fact that team1 is always winner in tourney data
        results = np.random.randint(0, 2, len(train_data))

        features = []
        output = [['id', 'pred']]
        for i in xrange(len(train_data)):

            season = train_data[i, 0]
            if results[i] == 1:
                team1 = train_data[i, 2]
                team2 = train_data[i, 4]
            else:
                team1 = train_data[i, 4]
                team2 = train_data[i, 2]

            features.append(
                calc_new_features(
                    neutralized_season_data,
                    season,
                    team1,
                    team2,
                    tuning_params
                    )
            )

            if int(season) >= 2011:
                if int(team1) < int(team2):
                    game_id = [season+'_'+team1+'_'+team2, str(features[-1][0])]
                else:
                    game_id = [season+'_'+team2+'_'+team1, str(1.0-features[-1][0])]

                output.append(game_id)

        # output_file = open('../Data/out.csv', 'wb')
        # csv.writer(output_file).writerows(output)
        features = np.array(features)

        for season in np.unique(train_data[:, 0]):  # ['2011', '2012', '2013', '2014']:
            idx = np.where(train_data[:, 0] == season)[0]
            log_loss = -np.mean(results[idx]*np.log(features[idx, 0])
                           + (1-results[idx])*np.log(1.0 - features[idx, 0]))

            junk = 0.98*(features[idx, 0]-features[idx, 0].min())/(features[idx, 0].max() - features[idx, 0].min()) + 0.01
            junk_log_loss = -np.mean(results[idx]*np.log(junk)
                           + (1-results[idx])*np.log(1.0 - junk))

            print std, season, log_loss, junk_log_loss