def sim_scheduled_games(sch_game_dicts, n):
    """
    
    """
#    print sch_game_dicts.keys()
#    print
    prediction_dict = {}
    CandP = CombosAndPermuts()
    trifs = CandP.get_trifectas()
    str_trifs = [str(x[0]) + str(x[1]) + str(x[2]) for x in trifs]
#[x for x in range(1, self.numPlayers +1)]
    win = [x[0] for x in trifs] ; place = [x[1] for x in trifs] ; show = [x[2] for x in trifs]
    wpsTable = pd.DataFrame(data = {'First':win,'Second':place, 'Third':show}, index = str_trifs)
    #print wpsTable.head()
    #print wpsTable.columns.values.tolist()
   

    for game_name,game_data in sch_game_dicts.items():

        pwin_dict = rds.get_pwin_dict()

        point_percentage_data = None
        player_list = []
        enh_player_list = []
        #print game
#        for el in game:
#            print el
        enh_player_list = get_enhanced_player_list(game_data)

        #print game_name, enh_player_list
        if enh_player_list != None:
            #print
            #print game_name
            for player_dict in enh_player_list:
                
                player_list.append(':'.join(ut.sorted_and_lowered(player_dict['name'])))

#            point_percentage_data = sc.runGame_get_WPS_frequencies(player_list, n)

            point_percentage_data = sc.runGame_get_WPS_frequencies(player_list, pwin_dict, n)
            


            prediction_dict[game_name] = point_percentage_data
    pred_dict_df = pd.DataFrame.from_dict(prediction_dict)
    #print pred_dict_df.head()
    full_df = pd.merge(wpsTable, pred_dict_df, left_index = True, right_index = True )
    #print full_df.head()

    #print prediction_dict
    fh.write_dict(fh.prediction_dict_file_path,prediction_dict)
    for game_name in sch_game_dicts.keys():
        try:
#        if game_name in full_df.colnames(full_df):
            full_df[game_name] = full_df[game_name]/n
        except:
            pass

    #print full_df.columns.values.tolist()   
    return full_df, prediction_dict
def get_enhanced_player_list(sch_game_dict):
    """
    Accepts a scheduled game dictionary such as:
    {'ma-01-01-1998_sch_Milford_1':{1: ['Ara', 'Arruti'], 2: ['Douglas', 'Edward'], 3: ['Liam', 'Fitz'], 4: ['Tino', 'Aritz'], 'S/D': 'Doubles', 6: ['Aitor', 'Sergio'], 7: ['Zarandona', 'Wayne'], 8: ['Eggy', 'Richard'], 'GAME': 1, 'DATE': '01/01/1998', 5: ['Aja', 'Alberto'], 'POS-SUB': [52, 11], 6: ['Altuna', 'Guisasola'], 'POINTS': 7, 'DAY': 'Thursday', 'GAME-COUNT': 15, 'POS-4-ID': [36, 17], 'POS-8-ID': [2, 12], 'POS-SUB-ID': [15, 18], 'POS-3-ID': [6, 9], 'POS-7-ID': [38, 20], 'FRONTON': 'Milford', 'POS-5-ID': [34, 13], 'ABSOLUTE-DATE': 35795, 'POS-2-ID': [42, 37], 'POS-1-ID': [4, 28]}}
    Looks up the probability of winning and returns a player list formatted like:    
    playerList = [{'name':['Ara', 'Arruti'],'pwin': 0.56}, {'name':['Douglas', 'Edward'],'pwin': 0.65}, {'name':['Liam', 'Fitz'],'pwin': 0.63}] where position [0] represents start post 1 through to start post # 8
    If any player in the scheduled game does not have a probability of winning associated with him then returns None 
    """ 
    #print sch_game_dict
    real_player_list = []
    player_dict = {}
    temp_list = []
    #print sch_game_dict[sch_game_dict.keys()[0]].get(3) #[sch_game_dict.keys()]
    # Some games have 6 players
    # Get the names in post position order
#    for n in range(1,14):
#        if sch_game_dict[sch_game_dict.keys()[0]].get(n) != None:
#            temp_list.append(sch_game_dict[sch_game_dict.keys()[0]].get(n))
    for n in range(1,14):
        #print sch_game_dict.keys()
        if sch_game_dict.get(n) != None:
            temp_list.append(sch_game_dict.get(n))
    #print temp_list
    pwin_dict = rds.get_pwin_dict()
    #print pwin_dict.keys()
    for pos,player in enumerate(temp_list):
        player_dict = {}
        # break out this comparison as a function
        #print player,':'.join(ut.sorted_and_lowered(player)),
        if ':'.join(ut.sorted_and_lowered(player)) in pwin_dict.keys():
            #print 'in'#,':'.join(ut.sorted_and_lowered(player))
            if pwin_dict.get(':'.join(ut.sorted_and_lowered(player))) != None:
                player_dict['name'] = player ; player_dict['pwin'] = pwin_dict.get(':'.join(ut.sorted_and_lowered(player)))

                real_player_list.append(player_dict)
        # If there is no probability of winning for any player
        #        return None
        else:
            return None
    #print real_player_list
    return real_player_list
def simulate_scheduled_games_freq(sch_game_dicts, n = 12):
    """
    
    """

    prediction_dict = {}

    for game_name,game_data in sch_game_dicts.items():

        pwin_dict = rds.get_pwin_dict()

        point_percentage_data = None
        player_list = []
        enh_player_list = []
        #print game
#        for el in game:
#            print el
        enh_player_list = get_enhanced_player_list(game_data)

        #print enh_player_list
        if enh_player_list != None:
            #print
            #print game_name
            for player_dict in enh_player_list:
                
                player_list.append(':'.join(ut.sorted_and_lowered(player_dict['name'])))

#            point_percentage_data = sc.runGame_get_WPS_frequencies(player_list, n)

            point_percentage_data = sc.runGame_get_WPS_frequencies(player_list, pwin_dict, n)


            prediction_dict[game_name] = point_percentage_data

    #print prediction_dict
    fh.write_dict(fh.prediction_dict_file_path,prediction_dict)
    return prediction_dict
Example #4
0
def run_correlations():
    start_time = time.time()
    dir_list = []
    for i in range(1,15):
        for j in range(1,5):
            dir_list.append(('correl_' + str(i) + 'm', 'correl_' + str(j)+ 'w'))
    #print dir_list
    dir_list = [('correl_1m', 'correl_1w'),('correl_6m', 'correl_1w'),('correl_14m', 'correl_1w')]
    dir_list = [('correl_1m', 'correl_1w'),('correl_1m', 'correl_2w')]    
    thresholds = [20,40,60,80,100,120,140,160,180,200,220,240, 260, 280, 300, 350, 400, 500]
    thresholds = [100, 500]
    low_threshold = 20
    results_dir = {}


    for dir_pair in dir_list:
        for threshold in thresholds:
            rds.DATA_DIR = dir_pair[0]
            fh.set_up_paths(rds.DATA_DIR)
            clear_dir(fh.data_summaries_path)
            rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD = threshold
            pwin_dict_1 = rds.get_pwin_dict()
            
            rds.DATA_DIR = dir_pair[1]
            fh.set_up_paths(rds.DATA_DIR)    
            clear_dir(fh.data_summaries_path)            
            rds.PWIN_DICT_POINTS_PLAYED_THRESHOLD = low_threshold
            pwin_dict_2 = rds.get_pwin_dict()
            pwin_dict_3 = dict((pwin_dict_1[key], pwin_dict_2[key]) for key in pwin_dict_1 if key in pwin_dict_2)
        
            #print pwin_dict_3
            
            x = np.array(pwin_dict_3.keys())
            y = np.array(pwin_dict_3.values())
#            correlation1 = np.corrcoef(x,y)
#            print correlation1
#            print pearson_def(x, y)
#            print pearsonr(x, y)
            if len(x) < 5:
                res = (0, 0)
            else:
                res = pearsonr(x, y)
            num = len(x)
            results_dir[str(dir_pair[0]) + '_' + dir_pair[1] +'_' + str(threshold)] = [res[0], res[1], num, threshold, dir_pair[0].split('_')[1], dir_pair[1].split('_')[1]]
            #print [str(dir_pair[0]) + '_' + dir_pair[1] +'_' + str(threshold)], results_dir[str(dir_pair[0]) + '_' + dir_pair[1] +'_' + str(threshold)]
            #print
            
#    for k,v in results_dir.items():
#        print k, v
    df = pd.DataFrame(results_dir)
    df = df.transpose()
    #curr_path = os.path.dirname(os.path.realpath(__file__))
    curr_path = os.path.split(os.path.dirname(os.path.realpath(__file__)))[0]
    fh.write_dict(os.path.join(curr_path, 'jaialai','analysis','corr_result.txt'),results_dir)
    #print df
    df.columns = ['corr', 'p value', 'number players', 'threshold', 'before', 'after']
    df.to_excel(os.path.join(curr_path, 'jaialai','analysis','corr_result.xls'))
        


    print "To complete %.0f correlations took %.2f minutes " %(len(results_dir),(time.time() - start_time)/60)