def sim_scheduled_games(sch_game_dicts, n): """ """ # print sch_game_dicts.keys() # print prediction_dict = {} CandP = CombosAndPermuts() trifs = CandP.get_trifectas() str_trifs = [str(x[0]) + str(x[1]) + str(x[2]) for x in trifs] #[x for x in range(1, self.numPlayers +1)] win = [x[0] for x in trifs] ; place = [x[1] for x in trifs] ; show = [x[2] for x in trifs] wpsTable = pd.DataFrame(data = {'First':win,'Second':place, 'Third':show}, index = str_trifs) #print wpsTable.head() #print wpsTable.columns.values.tolist() for game_name,game_data in sch_game_dicts.items(): pwin_dict = rds.get_pwin_dict() point_percentage_data = None player_list = [] enh_player_list = [] #print game # for el in game: # print el enh_player_list = get_enhanced_player_list(game_data) #print game_name, enh_player_list if enh_player_list != None: #print #print game_name for player_dict in enh_player_list: player_list.append(':'.join(ut.sorted_and_lowered(player_dict['name']))) # point_percentage_data = sc.runGame_get_WPS_frequencies(player_list, n) point_percentage_data = sc.runGame_get_WPS_frequencies(player_list, pwin_dict, n) prediction_dict[game_name] = point_percentage_data pred_dict_df = pd.DataFrame.from_dict(prediction_dict) #print pred_dict_df.head() full_df = pd.merge(wpsTable, pred_dict_df, left_index = True, right_index = True ) #print full_df.head() #print prediction_dict fh.write_dict(fh.prediction_dict_file_path,prediction_dict) for game_name in sch_game_dicts.keys(): try: # if game_name in full_df.colnames(full_df): full_df[game_name] = full_df[game_name]/n except: pass #print full_df.columns.values.tolist() return full_df, prediction_dict
def get_enhanced_player_list(sch_game_dict): """ Accepts a scheduled game dictionary such as: {'ma-01-01-1998_sch_Milford_1':{1: ['Ara', 'Arruti'], 2: ['Douglas', 'Edward'], 3: ['Liam', 'Fitz'], 4: ['Tino', 'Aritz'], 'S/D': 'Doubles', 6: ['Aitor', 'Sergio'], 7: ['Zarandona', 'Wayne'], 8: ['Eggy', 'Richard'], 'GAME': 1, 'DATE': '01/01/1998', 5: ['Aja', 'Alberto'], 'POS-SUB': [52, 11], 6: ['Altuna', 'Guisasola'], 'POINTS': 7, 'DAY': 'Thursday', 'GAME-COUNT': 15, 'POS-4-ID': [36, 17], 'POS-8-ID': [2, 12], 'POS-SUB-ID': [15, 18], 'POS-3-ID': [6, 9], 'POS-7-ID': [38, 20], 'FRONTON': 'Milford', 'POS-5-ID': [34, 13], 'ABSOLUTE-DATE': 35795, 'POS-2-ID': [42, 37], 'POS-1-ID': [4, 28]}} Looks up the probability of winning and returns a player list formatted like: playerList = [{'name':['Ara', 'Arruti'],'pwin': 0.56}, {'name':['Douglas', 'Edward'],'pwin': 0.65}, {'name':['Liam', 'Fitz'],'pwin': 0.63}] where position [0] represents start post 1 through to start post # 8 If any player in the scheduled game does not have a probability of winning associated with him then returns None """ #print sch_game_dict real_player_list = [] player_dict = {} temp_list = [] #print sch_game_dict[sch_game_dict.keys()[0]].get(3) #[sch_game_dict.keys()] # Some games have 6 players # Get the names in post position order # for n in range(1,14): # if sch_game_dict[sch_game_dict.keys()[0]].get(n) != None: # temp_list.append(sch_game_dict[sch_game_dict.keys()[0]].get(n)) for n in range(1,14): #print sch_game_dict.keys() if sch_game_dict.get(n) != None: temp_list.append(sch_game_dict.get(n)) #print temp_list pwin_dict = rds.get_pwin_dict() #print pwin_dict.keys() for pos,player in enumerate(temp_list): player_dict = {} # break out this comparison as a function #print player,':'.join(ut.sorted_and_lowered(player)), if ':'.join(ut.sorted_and_lowered(player)) in pwin_dict.keys(): #print 'in'#,':'.join(ut.sorted_and_lowered(player)) if pwin_dict.get(':'.join(ut.sorted_and_lowered(player))) != None: player_dict['name'] = player ; player_dict['pwin'] = pwin_dict.get(':'.join(ut.sorted_and_lowered(player))) real_player_list.append(player_dict) # If there is no probability of winning for any player # return None else: return None #print real_player_list return real_player_list
def simulate_scheduled_games_freq(sch_game_dicts, n = 12): """ """ prediction_dict = {} for game_name,game_data in sch_game_dicts.items(): pwin_dict = rds.get_pwin_dict() point_percentage_data = None player_list = [] enh_player_list = [] #print game # for el in game: # print el enh_player_list = get_enhanced_player_list(game_data) #print enh_player_list if enh_player_list != None: #print #print game_name for player_dict in enh_player_list: player_list.append(':'.join(ut.sorted_and_lowered(player_dict['name']))) # point_percentage_data = sc.runGame_get_WPS_frequencies(player_list, n) point_percentage_data = sc.runGame_get_WPS_frequencies(player_list, pwin_dict, n) prediction_dict[game_name] = point_percentage_data #print prediction_dict fh.write_dict(fh.prediction_dict_file_path,prediction_dict) return prediction_dict
def run_correlations(): start_time = time.time() dir_list = [] for i in range(1,15): for j in range(1,5): dir_list.append(('correl_' + str(i) + 'm', 'correl_' + str(j)+ 'w')) #print dir_list dir_list = [('correl_1m', 'correl_1w'),('correl_6m', 'correl_1w'),('correl_14m', 'correl_1w')] dir_list = [('correl_1m', 'correl_1w'),('correl_1m', 'correl_2w')] thresholds = [20,40,60,80,100,120,140,160,180,200,220,240, 260, 280, 300, 350, 400, 500] thresholds = [100, 500] low_threshold = 20 results_dir = {} for dir_pair in dir_list: for threshold in thresholds: rds.DATA_DIR = dir_pair[0] fh.set_up_paths(rds.DATA_DIR) clear_dir(fh.data_summaries_path) rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD = threshold pwin_dict_1 = rds.get_pwin_dict() rds.DATA_DIR = dir_pair[1] fh.set_up_paths(rds.DATA_DIR) clear_dir(fh.data_summaries_path) rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD = low_threshold pwin_dict_2 = rds.get_pwin_dict() pwin_dict_3 = dict((pwin_dict_1[key], pwin_dict_2[key]) for key in pwin_dict_1 if key in pwin_dict_2) #print pwin_dict_3 x = np.array(pwin_dict_3.keys()) y = np.array(pwin_dict_3.values()) # correlation1 = np.corrcoef(x,y) # print correlation1 # print pearson_def(x, y) # print pearsonr(x, y) if len(x) < 5: res = (0, 0) else: res = pearsonr(x, y) num = len(x) results_dir[str(dir_pair[0]) + '_' + dir_pair[1] +'_' + str(threshold)] = [res[0], res[1], num, threshold, dir_pair[0].split('_')[1], dir_pair[1].split('_')[1]] #print [str(dir_pair[0]) + '_' + dir_pair[1] +'_' + str(threshold)], results_dir[str(dir_pair[0]) + '_' + dir_pair[1] +'_' + str(threshold)] #print # for k,v in results_dir.items(): # print k, v df = pd.DataFrame(results_dir) df = df.transpose() #curr_path = os.path.dirname(os.path.realpath(__file__)) curr_path = os.path.split(os.path.dirname(os.path.realpath(__file__)))[0] fh.write_dict(os.path.join(curr_path, 'jaialai','analysis','corr_result.txt'),results_dir) #print df df.columns = ['corr', 'p value', 'number players', 'threshold', 'before', 'after'] df.to_excel(os.path.join(curr_path, 'jaialai','analysis','corr_result.xls')) print "To complete %.0f correlations took %.2f minutes " %(len(results_dir),(time.time() - start_time)/60)