def set_up_sch_game_dicts(DATA_DIR): """ Returns a dictionary of game dictionaries The dictionary keys look like: 'ma-01-01-1998_sch_Milford_1' The dict of dicts looks like: all_games_dict = {'ma-01-01-1998_sch_Milford_1':{'POS-1': ['Ara'], 'POS-2': ['Douglas'], 'POS-3': ['Liam'], 'POS-4': ['Tino'], 'S/D': 'Doubles', 'POS-6': ['Aitor'], 'POS-7': ['Zarandona'], 'POS-8': ['Eggy'], 'GAME': 1, 'DATE': '01/01/1998', 'POS-5': ['Aja'], 'POS-6-ID': [52], 'POS-SUB': ['Altuna'], 'POINTS': 7, 'DAY': 'Thursday', 'GAME-COUNT': 15, 'POS-4-ID': [17], 'POS-8-ID':[2], 'POS-SUB-ID':[15], 'POS-3-ID': [9], 'POS-7-ID': [38], 'FRONTON': 'Milford', 'POS-5-ID': [13], 'ABSOLUTE-DATE': 35795, 'POS-2-ID': [42], 'POS-1-ID':[4]},'ma-01-01-1998_res_Milford_2':{'POS-1': ['Ara'], 'POS-2': ['Douglas'], 'POS-3': ['Liam'], 'POS-4': ['Tino'], 'S/D': 'Doubles', 'POS-6': ['Aitor'], 'POS-7': ['Zarandona'], 'POS-8': ['Eggy'], 'GAME': 1, 'DATE': '01/01/1998', 'POS-5': ['Aja'], 'POS-6-ID': [52], 'POS-SUB': ['Altuna'], 'POINTS': 7, 'DAY': 'Thursday', 'GAME-COUNT': 15, 'POS-4-ID': [17], 'POS-8-ID':[2], 'POS-SUB-ID':[15], 'POS-3-ID': [9], 'POS-7-ID': [38], 'FRONTON': 'Milford', 'POS-5-ID': [13], 'ABSOLUTE-DATE': 35795, 'POS-2-ID': [42], 'POS-1-ID':[4]}} """ all_games_dict = {} fh.set_up_paths(DATA_DIR) # Get a list of paths to training files scheduled_file_list = fh.get_scheduled_file_paths() #print scheduled_file_list # for each training file path for scheduled_file_path in scheduled_file_list: # #print event_file_path # # open the file and read into a list schedule_list = fh.file_to_list(scheduled_file_path) #print 'schedule_list : ' + str(schedule_list) ## print game_event_string_list = ut.format_file_list_to_game_string(schedule_list) #print game_event_string_list for game_event_string in game_event_string_list: temp_dict_name, temp_dict = dp.convert_str_to_dict(game_event_string, scheduled_file_path) all_games_dict[temp_dict_name] = temp_dict #print all_games_dict #for dict_name in all_games_dict.keys(): #print dict_name ####### REMOVE GAMES WITH KNOWN SUBSTITUTIONS #if get_player_game_result(gamedict_res, player) ############ -------------############# return all_games_dict
def set_up_res_games_dicts(DATA_DIR, sch_games_dicts): """ """ fin_all_results_games_dict = {} all_results_games_dict = {} fh.set_up_paths(DATA_DIR) # Get a list of paths to training files results_file_list = fh.get_results_file_paths() #print scheduled_file_list # for each training file path for results_file_path in results_file_list: # #print event_file_path # # open the file and read into a list results_list = fh.file_to_list(results_file_path) #print 'schedule_list : ' + str(schedule_list) ## print game_event_string_list = ut.format_file_list_to_game_string(results_list) #print game_event_string_list for game_event_string in game_event_string_list: temp_dict_name, temp_dict = dp.convert_str_to_dict(game_event_string, results_file_path) all_results_games_dict[temp_dict_name] = temp_dict #print all_games_dict # for dict_name,v in all_results_games_dict.items(): # print dict_name,v # ENSURE THAT ANY RESULTS FILES DO RELATE TO A SCHEDULE FILE for dict_name, dict_data in sch_games_dicts.items(): test_name = dict_name.replace("sch", "res") if all_results_games_dict.get(test_name) != None: fin_all_results_games_dict[test_name] = all_results_games_dict[test_name] # for dict_name,v in fin_all_results_games_dict.items(): # print dict_name,v return fin_all_results_games_dict
def get_roster_list(): """ Tested Accepts a top level data directory If the roster list does not exist for the top level data directory it is created, otherwise it is just returned from a file location Returns a list of lists with each player, and 2 player team in a list : [['player1'],['player2','player3'], ....] """ fh.set_up_paths(DATA_DIR) roster_list = [] temp_list = [] fin_list = [] if fh.file_exists(fh.roster_list_file): roster_list = fh.file_to_list(fh.roster_list_file) for l in roster_list: #print l temp_list.append(l) for ll in temp_list: # print 'll : '+ str(ll), # print ll.split(':') roster_list.append(ll.split(':')) ret_roster_list = dp.get_names_from_list(roster_list) for name in ret_roster_list: if name not in roster_list: fin_list.append(name) else: fin_list = create_roster_list() fh.write_list_of_lists_file(fin_list, fh.roster_list_file) # print fin_list return fin_list
def run_data_setup(): """ Generates all the dataSummary files based on the training folder content Uses DATA_DIR set a the top of the module """ # Copy the name_excludes and the post_position_points factors from constant_data_summaries fh.set_up_paths(DATA_DIR) get_roster_list() get_player_season_records() get_pwin_dict()
def get_player_season_records(): """ If the player season record does not exist for the top level data directory it is created, otherwise it is just returned from a file location Returns a dictionary of dictionaries for the player season record in the form: {eggy:richard [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 3]]} called all_player_season_records """ fh.set_up_paths(DATA_DIR) all_player_season_records = {} if fh.file_exists(fh.player_season_records_file_path): all_player_season_records = fh.read_dict(fh.player_season_records_file_path) else: all_player_season_records = create_player_season_records() fh.write_dict(fh.player_season_records_file_path,all_player_season_records) # print fin_list return all_player_season_records
def create_pwin_dict(): """ """ # May eliminate this afterwards fh.set_up_paths(DATA_DIR) pwin_dict = {} # p_win = 0 # sum_points_won = 0 # sum_points_played = 0 # post_position_points_factors = [[[4.631,5.911],[3.332,5.274],[2.730,5.263],[1.128,3.899]],[[4.616,5.894],[3.345,5.308],[2.721,5.258],[1.153,3.914]],[[4.623,5.824],[3.400,5.185],[2.908,5.287],[1.190,3.882]],[[4.618,5.681],[3.395,5.065],[2.970,5.111],[1.162,3.638]],[[4.647,5.640],[3.571,5.137],[2.972,4.890],[1.147,3.412]],[[4.542,5.315],[3.484,4.967],[2.992,4.781],[1.068,3.086]],[[4.561,5.267],[3.604,5.003],[3.137,4.848],[0.963,2.973]],[[4.000,4.476],[2.960,4.226],[2.744,4.327],[0.792,2.438]]] post_position_points_factors = fh.get_point_factors() # player_season_record = [[1,1,1,1],[1,1,1,1],[1,1,1,1],[1,1,1,1],[1,1,1,1],[1,1,1,1],[1,1,1,1],[1,1,1,1]] #player_season_record = [[1,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,0],[0,0,0,1]] p_s_r = get_player_season_records() for player, player_season_record in p_s_r.items(): p_win = 0 sum_points_won = 0 sum_points_played = 0 # print player, player_season_record for post_pos, post_pos_results in enumerate(player_season_record): # print post_pos, post_pos_results for WPSL_pos, freq in enumerate(post_pos_results): # print WPSL_pos, freq # print post_pos, post_position_points_factors[post_pos][WPSL_pos][1] sum_points_won += freq * post_position_points_factors[post_pos][WPSL_pos][0] sum_points_played += freq * post_position_points_factors[post_pos][WPSL_pos][1] # print sum_points_won # print sum_points_played if sum_points_played > PWIN_DICT_POINTS_PLAYED_THRESHOLD: p_win = sum_points_won/sum_points_played pwin_dict[player] = p_win # else: # p_win = None # if p_pwin_dict #print pwin_dict return pwin_dict
def run_correlations(): start_time = time.time() dir_list = [] for i in range(1,15): for j in range(1,5): dir_list.append(('correl_' + str(i) + 'm', 'correl_' + str(j)+ 'w')) #print dir_list dir_list = [('correl_1m', 'correl_1w'),('correl_6m', 'correl_1w'),('correl_14m', 'correl_1w')] dir_list = [('correl_1m', 'correl_1w'),('correl_1m', 'correl_2w')] thresholds = [20,40,60,80,100,120,140,160,180,200,220,240, 260, 280, 300, 350, 400, 500] thresholds = [100, 500] low_threshold = 20 results_dir = {} for dir_pair in dir_list: for threshold in thresholds: rds.DATA_DIR = dir_pair[0] fh.set_up_paths(rds.DATA_DIR) clear_dir(fh.data_summaries_path) rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD = threshold pwin_dict_1 = rds.get_pwin_dict() rds.DATA_DIR = dir_pair[1] fh.set_up_paths(rds.DATA_DIR) clear_dir(fh.data_summaries_path) rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD = low_threshold pwin_dict_2 = rds.get_pwin_dict() pwin_dict_3 = dict((pwin_dict_1[key], pwin_dict_2[key]) for key in pwin_dict_1 if key in pwin_dict_2) #print pwin_dict_3 x = np.array(pwin_dict_3.keys()) y = np.array(pwin_dict_3.values()) # correlation1 = np.corrcoef(x,y) # print correlation1 # print pearson_def(x, y) # print pearsonr(x, y) if len(x) < 5: res = (0, 0) else: res = pearsonr(x, y) num = len(x) results_dir[str(dir_pair[0]) + '_' + dir_pair[1] +'_' + str(threshold)] = [res[0], res[1], num, threshold, dir_pair[0].split('_')[1], dir_pair[1].split('_')[1]] #print [str(dir_pair[0]) + '_' + dir_pair[1] +'_' + str(threshold)], results_dir[str(dir_pair[0]) + '_' + dir_pair[1] +'_' + str(threshold)] #print # for k,v in results_dir.items(): # print k, v df = pd.DataFrame(results_dir) df = df.transpose() #curr_path = os.path.dirname(os.path.realpath(__file__)) curr_path = os.path.split(os.path.dirname(os.path.realpath(__file__)))[0] fh.write_dict(os.path.join(curr_path, 'jaialai','analysis','corr_result.txt'),results_dir) #print df df.columns = ['corr', 'p value', 'number players', 'threshold', 'before', 'after'] df.to_excel(os.path.join(curr_path, 'jaialai','analysis','corr_result.xls')) print "To complete %.0f correlations took %.2f minutes " %(len(results_dir),(time.time() - start_time)/60)
def trial_calculate_quadratic_loss_function(): fh.set_up_paths(DATA_DIR) calculate_quadratic_loss_function()
def run_experiment(): """ """ start_time = time.time() replicates = 5 #gamma_range = [0.2,0.4,0.6,0.8,1.0,1.2] gamma_range = [0.6] serve_adv_range = [-0.05] n = 100000 WPS_THRESHOLD = 0.3 TRIFECTA_THRESHOLD = 1 QUINIELA_THRESHOLD = 1 EXACTA_THRESHOLD = 1 rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD = 140 start_time = time.time() #ilf = 0 experiment_dict = {} logging.info('starting at %s', (time.time() - start_time)/60) fh.set_up_paths(DATA_DIR) rds.DATA_DIR = DATA_DIR logging.info('run_data_setup() at %s', (time.time() - start_time)/60) for replicate in range(1,replicates+1): logging.info('Replicate : %s at %s', replicate, (time.time() - start_time)/60) for gamma in gamma_range: logging.info('gamma : %s at %s', gamma, (time.time() - start_time)/60) psp.gamma = gamma for serve_adv in serve_adv_range: if fh.file_exists(fh.prediction_actual_results_dict_file_path): os.remove(fh.prediction_actual_results_dict_file_path) # if fh.file_exists(fh.pwin_dict_file_path): # os.remove(fh.pwin_dict_file_path) logging.info('removing files at %s', (time.time() - start_time)/60) rds.run_data_setup() logging.info('run_data_setup at %s', (time.time() - start_time)/60) logging.info('serve adv : %s at %s', serve_adv, (time.time() - start_time)/60) run_dict = {} psp.doublesServerAdvantage = serve_adv sch_game_dicts = set_up_sch_game_dicts(DATA_DIR) logging.info('set_up_sch_game_dicts complete at %s', (time.time() - start_time)/60) res_game_dicts = set_up_res_games_dicts(DATA_DIR, sch_game_dicts) #sch_game_dicts, res_game_dicts = remove_know_sub_games(sch_game_dicts, res_game_dicts) wps_prob_table, prediction_dict = sim_scheduled_games(sch_game_dicts, n) logging.info('serve adv : sim_scheduled_games complete at %s',(time.time() - start_time)/60) bet_table = BetTable(wps_prob_table) bet_table.WPS_THRESHOLD = WPS_THRESHOLD bet_table.EXACTA_THRESHOLD = EXACTA_THRESHOLD bet_table.QUINIELA_THRESHOLD = QUINIELA_THRESHOLD bet_table.TRIFECTA_THRESHOLD = TRIFECTA_THRESHOLD bet_table.create_bet_list(wps_prob_table) logging.info('serve adv : bet_table.create_bet_list complete at %s',(time.time() - start_time)/60) get_pos_freq_actuals(prediction_dict, res_game_dicts) logging.info('get_pos_freq_actuals complete at %s',(time.time() - start_time)/60) my_stakes, my_gross_return, my_return = bet_table.get_returns(prediction_dict, res_game_dicts) #qlf,ilf = calculate_quadratic_loss_function() #ilf = calculate_ilf(n) run_dict['rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD'] = rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD run_dict['rds.DATA_DIR'] = rds.DATA_DIR run_dict['DATA_DIR'] = DATA_DIR run_dict['n'] = n run_dict['psp.doublesServerAdvantage'] = psp.doublesServerAdvantage run_dict['psp.gamma'] = psp.gamma #run_dict['ilf'] = ilf #run_dict['qlf'] = qlf run_dict['my_stakes'] = my_stakes ; run_dict['my_gross_return'] = my_gross_return run_dict['replicate'] = replicate run_dict['my_return'] = my_return run_dict['WPS_THRESHOLD'] = bet_table.WPS_THRESHOLD run_dict['EXACTA_THRESHOLD'] = bet_table.EXACTA_THRESHOLD run_dict['QUINIELA_THRESHOLD'] = bet_table.QUINIELA_THRESHOLD run_dict['TRIFECTA_THRESHOLD'] = bet_table.TRIFECTA_THRESHOLD experiment_dict[str(psp.gamma) + '_' + str(psp.doublesServerAdvantage)+ '_' + str(replicate)] = run_dict print for k,v in run_dict.items(): print k, v print 'out ', run_dict['my_stakes'] print 'in ' , run_dict['my_gross_return'] print 'NET : ' , run_dict['my_return'] logging.info('instance completed at %s', (time.time() - start_time)/60) fh.write_dict(fh.experiment_results_file_path,experiment_dict) # for k, v in experiment_dict.items(): ## print # print k,v['psp.gamma'],v['psp.doublesServerAdvantage'],v['qlf'] num_trials = len(experiment_dict.keys()) # print print "To complete %.0f replicates of %.0f runs took %.2f minutes at n = %.0f over %.0f games" %(replicates, num_trials,(time.time() - start_time)/60,n, len(prediction_dict))