def make_output(): tourney_seeds = read_csv_as_np_array('../Data/tourney_seeds.csv', header=False) neutralized_season_data = neutralize_season_data( '../Data/regular_season_detailed_results.csv', '../Data/tourney_detailed_results.csv', '../Data/neutralized_season_data.csv' ) tuning_params = dict(std=0.05) output = [['id', 'pred']] for season in ['2011', '2012', '2013', '2014']: tourney_teams = np.sort(tourney_seeds[np.where(tourney_seeds[:, 0] == season)[0]][:, 2]) while len(tourney_teams) > 1: team1 = tourney_teams[0] tourney_teams= np.delete(tourney_teams, 0) for team2 in tourney_teams: prob = calc_new_features( neutralized_season_data, season, team1, team2, tuning_params )[0] game_id = [season+'_'+team1+'_'+team2, str(prob)] output.append(game_id) print game_id output_file = open('../Data/out.csv', 'wb') csv.writer(output_file).writerows(output)
def make_output_2015(): tourney_seeds = read_csv_as_np_array('../Data/tourney_seeds_2015.csv', header=False) neutralized_season_data = neutralize_season_data( '../Data/regular_season_detailed_results_combined.csv', '../Data/tourney_detailed_results.csv', '../Data/neutralized_season_data_combined.csv' ) tuning_params = dict(std=0.05) output = [['id', 'pred', 'score1', 'score2']] for season in ['2015']: tourney_teams = np.sort(tourney_seeds[np.where(tourney_seeds[:, 0] == season)[0]][:, 2]) while len(tourney_teams) > 1: team1 = tourney_teams[0] tourney_teams= np.delete(tourney_teams, 0) for team2 in tourney_teams: features = calc_new_features( neutralized_season_data, season, team1, team2, tuning_params ) prob = features[0] score1 = np.round(features[2]).astype(int) score2 = np.round(features[3]).astype(int) game_id = [season+'_'+team1+'_'+team2, str(prob), str(score1), str(score2)] output.append(game_id) print game_id output_file = open('../Data/out_2015_with_scores.csv', 'wb') csv.writer(output_file).writerows(output)
def main_2015_03_12(): tourney_data = read_csv_as_np_array('../Data/tourney_detailed_results.csv', header=False) neutralized_season_data = neutralize_season_data( '../Data/regular_season_detailed_results.csv', '../Data/tourney_detailed_results.csv', '../Data/neutralized_season_data.csv' ) # test on 2011-2014 tourney results test_data_min_index = np.min(np.where(tourney_data[:, 0] == '2014')[0]) train_data = tourney_data for std in [0.05]: # np.linspace(0.01, 0.1, 10): tuning_params = dict(std=std) #scramble the results to eliminate fact that team1 is always winner in tourney data results = np.random.randint(0, 2, len(train_data)) features = [] output = [['id', 'pred']] for i in xrange(len(train_data)): season = train_data[i, 0] if results[i] == 1: team1 = train_data[i, 2] team2 = train_data[i, 4] else: team1 = train_data[i, 4] team2 = train_data[i, 2] features.append( calc_new_features( neutralized_season_data, season, team1, team2, tuning_params ) ) if int(season) >= 2011: if int(team1) < int(team2): game_id = [season+'_'+team1+'_'+team2, str(features[-1][0])] else: game_id = [season+'_'+team2+'_'+team1, str(1.0-features[-1][0])] output.append(game_id) # output_file = open('../Data/out.csv', 'wb') # csv.writer(output_file).writerows(output) features = np.array(features) for season in np.unique(train_data[:, 0]): # ['2011', '2012', '2013', '2014']: idx = np.where(train_data[:, 0] == season)[0] log_loss = -np.mean(results[idx]*np.log(features[idx, 0]) + (1-results[idx])*np.log(1.0 - features[idx, 0])) junk = 0.98*(features[idx, 0]-features[idx, 0].min())/(features[idx, 0].max() - features[idx, 0].min()) + 0.01 junk_log_loss = -np.mean(results[idx]*np.log(junk) + (1-results[idx])*np.log(1.0 - junk)) print std, season, log_loss, junk_log_loss