def sim_scheduled_games(sch_game_dicts, n): """ """ # print sch_game_dicts.keys() # print prediction_dict = {} CandP = CombosAndPermuts() trifs = CandP.get_trifectas() str_trifs = [str(x[0]) + str(x[1]) + str(x[2]) for x in trifs] #[x for x in range(1, self.numPlayers +1)] win = [x[0] for x in trifs] ; place = [x[1] for x in trifs] ; show = [x[2] for x in trifs] wpsTable = pd.DataFrame(data = {'First':win,'Second':place, 'Third':show}, index = str_trifs) #print wpsTable.head() #print wpsTable.columns.values.tolist() for game_name,game_data in sch_game_dicts.items(): pwin_dict = rds.get_pwin_dict() point_percentage_data = None player_list = [] enh_player_list = [] #print game # for el in game: # print el enh_player_list = get_enhanced_player_list(game_data) #print game_name, enh_player_list if enh_player_list != None: #print #print game_name for player_dict in enh_player_list: player_list.append(':'.join(ut.sorted_and_lowered(player_dict['name']))) # point_percentage_data = sc.runGame_get_WPS_frequencies(player_list, n) point_percentage_data = sc.runGame_get_WPS_frequencies(player_list, pwin_dict, n) prediction_dict[game_name] = point_percentage_data pred_dict_df = pd.DataFrame.from_dict(prediction_dict) #print pred_dict_df.head() full_df = pd.merge(wpsTable, pred_dict_df, left_index = True, right_index = True ) #print full_df.head() #print prediction_dict fh.write_dict(fh.prediction_dict_file_path,prediction_dict) for game_name in sch_game_dicts.keys(): try: # if game_name in full_df.colnames(full_df): full_df[game_name] = full_df[game_name]/n except: pass #print full_df.columns.values.tolist() return full_df, prediction_dict
def get_pos_freq_actuals(prediction_dict, res_games_dict): # To create the results vector - copy the prediction vector and set everything to 0 if fh.file_exists(fh.prediction_actual_results_dict_file_path) == False: res_freq_dict = prediction_dict.copy() #print prediction_dict for k,v in res_freq_dict.items(): for k1 in v.keys(): #print k1, v[k1] v[k1] = 0 #print res_freq_dict for game_name, game_data in prediction_dict.items(): #game_player_list = [game_data[x].keys() for x in range(len(game_data))] res_dict = res_games_dict.get(game_name.replace("sch", "res")) #print res_dict if res_dict != None: top_1 = res_dict['WIN'][0] top_2 = res_dict['PLACE'][0] top_3 = res_dict['SHOW'][0] top_123 = str(top_1) + str(top_2) + str(top_3) res_freq_dict[game_name][top_123] += 1 #res_dict[top_123] += 1 #print res_freq_dict fh.write_dict(fh.prediction_actual_results_dict_file_path,res_freq_dict) return res_freq_dict else: pass
def simulate_scheduled_games(sch_game_dicts, ji, n = 12): """ """ prediction_dict = {} # Setr the variable here? #PointSetPlayer.gamma = 0.4 # game_dict = {} #print sch_game_dicts.keys() for game_name,game_data in sch_game_dicts.items(): point_percentage_data = None player_list = [] enh_player_list = [] #print game # for el in game: # print el enh_player_list = get_enhanced_player_list(game_data) #print enh_player_list if enh_player_list != None: #print #print game_name for player_dict in enh_player_list: player_list.append(':'.join(ut.sorted_and_lowered(player_dict['name']))) # for team in player_list: # team_name = ':'.join(ut.sorted_and_lowered(player)) #print player_list # run the game point_percentage_data = sc.runGame_get_WPSL_percentages(player_list, n) # for player,position_totals in sorted(point_percentage_data.items()): # print str(player) + ': ' + str(position_totals) # for p in player_list: # print point_percentage_data[p] # for d in point_percentage_data: # print d prediction_dict[game_name] = point_percentage_data #print point_percentage_data # Find a way to sort this dictionary in starting post order !!!!!! #fh.write_dict(os.path.join(fh.prediction_dict_file_path, str(n)),prediction_dict) #print os.path.join(fh.prediction_dict_file_path, str(j)) ji += 1 # path, filename = os.path.split(fh.prediction_dict_file_path) # filename = os.path.splitext(filename)[0] # newfilename = 'ok_%s_%s.txt' % (filename, str(j)) # newpath = os.path.join(path, newfilename) # fh.write_dict(newpath,prediction_dict) fh.write_dict(fh.prediction_dict_file_path,prediction_dict) return prediction_dict
def get_player_season_records(): """ If the player season record does not exist for the top level data directory it is created, otherwise it is just returned from a file location Returns a dictionary of dictionaries for the player season record in the form: {eggy:richard [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 3]]} called all_player_season_records """ fh.set_up_paths(DATA_DIR) all_player_season_records = {} if fh.file_exists(fh.player_season_records_file_path): all_player_season_records = fh.read_dict(fh.player_season_records_file_path) else: all_player_season_records = create_player_season_records() fh.write_dict(fh.player_season_records_file_path,all_player_season_records) # print fin_list return all_player_season_records
def get_prediction_actual_results_dict(prediction_dict, res_game_dicts): """ This looks the similar to the prediction dict ma-01-01-1996_res_Milford_14 [{'aragues:lasa': [0, 1, 0, 0]}, {'azpiri:zabala': [1, 0, 0, 0]}, {'acin:beitia': [0, 0, 0, 0]}................. for all players in the game]}] """ actual_results_dict = {} # Get corresponding results file for game_name, game_data in prediction_dict.items(): game_player_list = [game_data[x].keys() for x in range(len(game_data))] # game_data res_dict = res_game_dicts.get(game_name.replace("sch", "res")) if res_dict != None: # print # print game_name # print res_dict #print actual_results_dict_data = [] #game_data_dict = {} for p in game_player_list: p_dict = {} #print game_name p = p[0].split(":") #print p, dp.get_player_game_result(res_dict, p) #p_dict[p[0]] = dp.get_player_game_result(res_dict, p) p_dict[':'.join(ut.sorted_and_lowered(p))] = dp.get_player_game_result(res_dict, p) #print p_dict[p[0]] p = ':'.join(ut.sorted_and_lowered(p)) #print p actual_results_dict_data.append(p_dict) actual_results_dict[game_name] = actual_results_dict_data #print game_player_list # for k,v in actual_results_dict.items(): # print # print k,v # print # print # for k, v in prediction_dict.items(): # print # print k,v fh.write_dict(fh.prediction_actual_results_dict_file_path,actual_results_dict) return actual_results_dict
def simulate_scheduled_games_freq(sch_game_dicts, n = 12): """ """ prediction_dict = {} for game_name,game_data in sch_game_dicts.items(): pwin_dict = rds.get_pwin_dict() point_percentage_data = None player_list = [] enh_player_list = [] #print game # for el in game: # print el enh_player_list = get_enhanced_player_list(game_data) #print enh_player_list if enh_player_list != None: #print #print game_name for player_dict in enh_player_list: player_list.append(':'.join(ut.sorted_and_lowered(player_dict['name']))) # point_percentage_data = sc.runGame_get_WPS_frequencies(player_list, n) point_percentage_data = sc.runGame_get_WPS_frequencies(player_list, pwin_dict, n) prediction_dict[game_name] = point_percentage_data #print prediction_dict fh.write_dict(fh.prediction_dict_file_path,prediction_dict) return prediction_dict
def run_correlations(): start_time = time.time() dir_list = [] for i in range(1,15): for j in range(1,5): dir_list.append(('correl_' + str(i) + 'm', 'correl_' + str(j)+ 'w')) #print dir_list dir_list = [('correl_1m', 'correl_1w'),('correl_6m', 'correl_1w'),('correl_14m', 'correl_1w')] dir_list = [('correl_1m', 'correl_1w'),('correl_1m', 'correl_2w')] thresholds = [20,40,60,80,100,120,140,160,180,200,220,240, 260, 280, 300, 350, 400, 500] thresholds = [100, 500] low_threshold = 20 results_dir = {} for dir_pair in dir_list: for threshold in thresholds: rds.DATA_DIR = dir_pair[0] fh.set_up_paths(rds.DATA_DIR) clear_dir(fh.data_summaries_path) rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD = threshold pwin_dict_1 = rds.get_pwin_dict() rds.DATA_DIR = dir_pair[1] fh.set_up_paths(rds.DATA_DIR) clear_dir(fh.data_summaries_path) rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD = low_threshold pwin_dict_2 = rds.get_pwin_dict() pwin_dict_3 = dict((pwin_dict_1[key], pwin_dict_2[key]) for key in pwin_dict_1 if key in pwin_dict_2) #print pwin_dict_3 x = np.array(pwin_dict_3.keys()) y = np.array(pwin_dict_3.values()) # correlation1 = np.corrcoef(x,y) # print correlation1 # print pearson_def(x, y) # print pearsonr(x, y) if len(x) < 5: res = (0, 0) else: res = pearsonr(x, y) num = len(x) results_dir[str(dir_pair[0]) + '_' + dir_pair[1] +'_' + str(threshold)] = [res[0], res[1], num, threshold, dir_pair[0].split('_')[1], dir_pair[1].split('_')[1]] #print [str(dir_pair[0]) + '_' + dir_pair[1] +'_' + str(threshold)], results_dir[str(dir_pair[0]) + '_' + dir_pair[1] +'_' + str(threshold)] #print # for k,v in results_dir.items(): # print k, v df = pd.DataFrame(results_dir) df = df.transpose() #curr_path = os.path.dirname(os.path.realpath(__file__)) curr_path = os.path.split(os.path.dirname(os.path.realpath(__file__)))[0] fh.write_dict(os.path.join(curr_path, 'jaialai','analysis','corr_result.txt'),results_dir) #print df df.columns = ['corr', 'p value', 'number players', 'threshold', 'before', 'after'] df.to_excel(os.path.join(curr_path, 'jaialai','analysis','corr_result.xls')) print "To complete %.0f correlations took %.2f minutes " %(len(results_dir),(time.time() - start_time)/60)
def run_experiment(): """ """ start_time = time.time() replicates = 5 #gamma_range = [0.2,0.4,0.6,0.8,1.0,1.2] gamma_range = [0.6] serve_adv_range = [-0.05] n = 100000 WPS_THRESHOLD = 0.3 TRIFECTA_THRESHOLD = 1 QUINIELA_THRESHOLD = 1 EXACTA_THRESHOLD = 1 rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD = 140 start_time = time.time() #ilf = 0 experiment_dict = {} logging.info('starting at %s', (time.time() - start_time)/60) fh.set_up_paths(DATA_DIR) rds.DATA_DIR = DATA_DIR logging.info('run_data_setup() at %s', (time.time() - start_time)/60) for replicate in range(1,replicates+1): logging.info('Replicate : %s at %s', replicate, (time.time() - start_time)/60) for gamma in gamma_range: logging.info('gamma : %s at %s', gamma, (time.time() - start_time)/60) psp.gamma = gamma for serve_adv in serve_adv_range: if fh.file_exists(fh.prediction_actual_results_dict_file_path): os.remove(fh.prediction_actual_results_dict_file_path) # if fh.file_exists(fh.pwin_dict_file_path): # os.remove(fh.pwin_dict_file_path) logging.info('removing files at %s', (time.time() - start_time)/60) rds.run_data_setup() logging.info('run_data_setup at %s', (time.time() - start_time)/60) logging.info('serve adv : %s at %s', serve_adv, (time.time() - start_time)/60) run_dict = {} psp.doublesServerAdvantage = serve_adv sch_game_dicts = set_up_sch_game_dicts(DATA_DIR) logging.info('set_up_sch_game_dicts complete at %s', (time.time() - start_time)/60) res_game_dicts = set_up_res_games_dicts(DATA_DIR, sch_game_dicts) #sch_game_dicts, res_game_dicts = remove_know_sub_games(sch_game_dicts, res_game_dicts) wps_prob_table, prediction_dict = sim_scheduled_games(sch_game_dicts, n) logging.info('serve adv : sim_scheduled_games complete at %s',(time.time() - start_time)/60) bet_table = BetTable(wps_prob_table) bet_table.WPS_THRESHOLD = WPS_THRESHOLD bet_table.EXACTA_THRESHOLD = EXACTA_THRESHOLD bet_table.QUINIELA_THRESHOLD = QUINIELA_THRESHOLD bet_table.TRIFECTA_THRESHOLD = TRIFECTA_THRESHOLD bet_table.create_bet_list(wps_prob_table) logging.info('serve adv : bet_table.create_bet_list complete at %s',(time.time() - start_time)/60) get_pos_freq_actuals(prediction_dict, res_game_dicts) logging.info('get_pos_freq_actuals complete at %s',(time.time() - start_time)/60) my_stakes, my_gross_return, my_return = bet_table.get_returns(prediction_dict, res_game_dicts) #qlf,ilf = calculate_quadratic_loss_function() #ilf = calculate_ilf(n) run_dict['rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD'] = rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD run_dict['rds.DATA_DIR'] = rds.DATA_DIR run_dict['DATA_DIR'] = DATA_DIR run_dict['n'] = n run_dict['psp.doublesServerAdvantage'] = psp.doublesServerAdvantage run_dict['psp.gamma'] = psp.gamma #run_dict['ilf'] = ilf #run_dict['qlf'] = qlf run_dict['my_stakes'] = my_stakes ; run_dict['my_gross_return'] = my_gross_return run_dict['replicate'] = replicate run_dict['my_return'] = my_return run_dict['WPS_THRESHOLD'] = bet_table.WPS_THRESHOLD run_dict['EXACTA_THRESHOLD'] = bet_table.EXACTA_THRESHOLD run_dict['QUINIELA_THRESHOLD'] = bet_table.QUINIELA_THRESHOLD run_dict['TRIFECTA_THRESHOLD'] = bet_table.TRIFECTA_THRESHOLD experiment_dict[str(psp.gamma) + '_' + str(psp.doublesServerAdvantage)+ '_' + str(replicate)] = run_dict print for k,v in run_dict.items(): print k, v print 'out ', run_dict['my_stakes'] print 'in ' , run_dict['my_gross_return'] print 'NET : ' , run_dict['my_return'] logging.info('instance completed at %s', (time.time() - start_time)/60) fh.write_dict(fh.experiment_results_file_path,experiment_dict) # for k, v in experiment_dict.items(): ## print # print k,v['psp.gamma'],v['psp.doublesServerAdvantage'],v['qlf'] num_trials = len(experiment_dict.keys()) # print print "To complete %.0f replicates of %.0f runs took %.2f minutes at n = %.0f over %.0f games" %(replicates, num_trials,(time.time() - start_time)/60,n, len(prediction_dict))