Python Extraction_Database.groupbyの例、pyhorse.Database_Management.Extraction_Database.groupby Pythonの例

コード例 #1

0

ファイルを表示

def CC_REC_DAY_PT3(Dataframe, HNAME_List, Raceday):

    """
    Predicted days until next Top 3
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_REC_DAY_PT3]
    """

    Feature_DF = Dataframe.loc[:,['HNAME','RARID']]

    Extraction_Performance = Extraction_Database("""
                                                 Select HNAME, First_Date, Last_T3_Date, Top_Beyer_Date from (
                                                 Select HNAME, First_Date, Top_Beyer_Date from (
                                                 Select HNAME, min(RADAT) First_Date from Race_PosteriorDb
                                                 where RADAT < {Raceday} and HNAME in {HNAME_List}
                                                 Group by HNAME) FIRST
                                                 LEFT OUTER JOIN
                                                 (Select HNAME HNAME_BEYER, RADAT Top_Beyer_Date, max(BEYER_SPEED) from Race_PosteriorDb
                                                 where RADAT < {Raceday} and HNAME in {HNAME_List}
                                                 Group by HNAME) BEYER
                                                 ON FIRST.HNAME = BEYER.HNAME_BEYER) FIRST_BEYER
                                                 LEFT OUTER JOIN
                                                 (Select HNAME HNAME_W, max(RADAT) Last_T3_Date from Race_PosteriorDb
                                                 where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3
                                                 Group by HNAME) WIN
                                                 ON WIN.HNAME_W = FIRST_BEYER.HNAME
                                                 """.format(Raceday = Raceday, HNAME_List = HNAME_List))

    Extraction_Avg = Extraction_Database("""
                                         Select HNAME, RADAT, RESFP, BEYER_SPEED from Race_PosteriorDb
                                         where RADAT < {Raceday} and HNAME in {HNAME_List}
                                         """.format(Raceday = Raceday, HNAME_List = HNAME_List))
    Avg_Date = []
    for name, group in Extraction_Avg.groupby('HNAME'):
        Avg_T3 = group.loc[group.loc[:, 'RESFP'] <= 3, 'RADAT'].apply(lambda x: pd.to_datetime(x, format = '%Y%m%d')).diff().mean()
        Avg_Beyer = group.nlargest(3,'BEYER_SPEED').loc[:, 'RADAT'].apply(lambda x: pd.to_datetime(x, format = '%Y%m%d')).diff().mean()
        Avg_Date.append([name, Avg_T3, Avg_Beyer])
    Avg_Date = pd.DataFrame(Avg_Date, columns=['HNAME','Avg_T3','Avg_Beyer'])

    Raceday = pd.to_datetime(Raceday, format = '%Y%m%d')
    Feature_DF = Feature_DF.merge(Extraction_Performance, how='left').merge(Avg_Date, how='left')
    Feature_DF.loc[:,['First_Date','Last_T3_Date','Top_Beyer_Date']]=Feature_DF.loc[:,['First_Date','Last_T3_Date','Top_Beyer_Date']].fillna('20120101')
    Feature_DF.loc[:,['Avg_T3','Avg_Beyer']]=Feature_DF.loc[:,['Avg_T3','Avg_Beyer']].fillna(Raceday-pd.to_datetime('20120101',format='%Y%m%d'))

    Feature_DF.loc[:,'T3'] = Raceday - pd.to_datetime(Feature_DF.loc[:, 'Last_T3_Date'], format = '%Y%m%d')
    Feature_DF.loc[:,'T3'] = Feature_DF.loc[:,'T3'] - Feature_DF.loc[:,'Avg_T3'].abs()
    Feature_DF.loc[:,'Beyer'] = Raceday - pd.to_datetime(Feature_DF.loc[:, 'Top_Beyer_Date'], format = '%Y%m%d')
    Feature_DF.loc[:,'Beyer'] = Feature_DF.loc[:,'Beyer'] - Feature_DF.loc[:,'Avg_Beyer'].abs()

    Feature_DF.loc[:,'CC_REC_DAY_PT3'] = Feature_DF.loc[:,'T3'].fillna(Feature_DF.loc[:,'Beyer'])
    Feature_DF.loc[:,'CC_REC_DAY_PT3'] = Feature_DF.loc[:,'CC_REC_DAY_PT3'].apply(lambda x : int(str(x).split('days')[0]))
    Feature_DF = Feature_DF.loc[:,['HNAME','CC_REC_DAY_PT3']]

    return Feature_DF

コード例 #2

0

ファイルを表示

def CC_CLS_CL(Dataframe, HNAME_List, Raceday):

    """
    Horse's Competition Level
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_CLS_CL]
    """

    Feature_DF = Dataframe.loc[:,['HNAME','HJRAT']]

    Horse_Comp = []
    #For each horse, get data for last 5 races
    for Horse in Dataframe['HNAME'].tolist():
        Extraction = Extraction_Database("""
                                         Select HNAME, RARID, HJRAT, RESFP from RaceDb
                                         where RARID in (
                                         Select RARID from RaceDb
                                         where RADAT < {Raceday} and HNAME = {Horse}
                                         ORDER BY RARID DESC
                                         LIMIT 5)
                                         """.format(Raceday = Raceday, Horse = "'" + Horse + "'"))

        for RARID, race in Extraction.groupby('RARID'):
            Horse_Rat = race.loc[race.loc[:,'HNAME']==Horse,'HJRAT'].to_list()[0]
            Horse_FP = race.loc[race.loc[:,'HNAME']==Horse,'RESFP'].to_list()[0]
            Comp_Rat = race.nlargest(3, 'HJRAT').loc[:,'HJRAT'].mean()
            Comp_Level = (Comp_Rat - Horse_Rat) / Horse_FP
            Horse_Comp.append([Horse,Comp_Level])
    Horse_Comp = pd.DataFrame(Horse_Comp, columns=['HNAME', 'Comp_Level'])

    #Recency Weighting
    Comp = []
    for name, group in Horse_Comp.groupby('HNAME'):
        Comp_Figure = group.loc[:,'Comp_Level'].dropna().values
        try :
            model = SimpleExpSmoothing(Comp_Figure)
            model = model.fit()
            Comp.append([name, model.forecast()[0]])
        except :
            Comp.append([name,Comp_Figure[0]])
    Comp = pd.DataFrame(Comp, columns=['HNAME','CC_CLS_CL'])

    Feature_DF = Feature_DF.merge(Comp, how='left')
    Feature_DF.loc[:, 'CC_CLS_CL'].fillna(Feature_DF.loc[:, 'CC_CLS_CL'].min(), inplace = True)
    Feature_DF.loc[:, 'CC_CLS_CL'].fillna(0, inplace=True)
    Feature_DF = Feature_DF.loc[:, ['HNAME', 'CC_CLS_CL']]

    return Feature_DF

コード例 #3

0

ファイルを表示

def OD_PR_FAVB(Dataframe, HNAME_List, Raceday):
    """
    Number of favourites that ran behind the underlying horse in the last 5 races.
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, OD_PR_FAVB]
    """

    Feature_DF = Dataframe.loc[:, ['HNAME', 'RESFO']]

    Fav_Beaten_List = []
    #For each horse, get data for last 5 races
    for Horse in Dataframe['HNAME'].tolist():
        Extraction = Extraction_Database("""
                                         Select HNAME, RARID, RESFO, RESFP from RaceDb
                                         where RARID in (
                                         Select RARID from RaceDb
                                         where RADAT < {Raceday} and HNAME = {Horse}
                                         ORDER BY RARID DESC
                                         LIMIT 5)
                                         """.format(Raceday=Raceday,
                                                    Horse="'" + Horse + "'"))

        Won_Fav_tot = 0
        for RARID, race in Extraction.groupby('RARID'):
            fav_con = race.loc[:, 'RESFO'] == race.loc[:, 'RESFO'].min()
            horse_con = race.loc[:, 'HNAME'] == Horse
            Only_Fav_Horse = race.loc[fav_con | horse_con,
                                      ['HNAME', 'RESFP']].sort_values(
                                          'RESFP').reset_index(drop=True)
            if len(Only_Fav_Horse) != 1:
                Won_Fav = float(not bool(Only_Fav_Horse.loc[
                    Only_Fav_Horse.loc[:, 'HNAME'] == Horse, :].index.values))
            else:
                Won_Fav = 0
            Won_Fav_tot += Won_Fav
        Fav_Beaten_List.append([Horse, Won_Fav_tot])
    Fav_Beaten = pd.DataFrame(Fav_Beaten_List, columns=['HNAME', 'OD_PR_FAVB'])

    Feature_DF = Feature_DF.merge(Fav_Beaten, how='left')
    Feature_DF.loc[:, 'OD_PR_FAVB'].fillna(0, inplace=True)
    Feature_DF = Feature_DF.loc[:, ['HNAME', 'OD_PR_FAVB']]

    return Feature_DF

コード例 #4

0

ファイルを表示

def Weight_Aug_Reg(Raceday):

    Extraction = Extraction_Database("""
                                     Select A.HNAME, A.RARID, BEYER_SPEED, HWEIC from
                                     (Select HNAME, RARID, BEYER_SPEED from Race_PosteriorDb
                                     where RADAT < {Raceday}) A,
                                     (Select HNAME, RARID, HWEIC from RaceDb where RADAT < {Raceday}) B
                                     where A.HNAME = B.HNAME and A.RARID = B.RARID
                                     """.format(Raceday = Raceday))
    Extraction.fillna(0, inplace=True)

    if len(Extraction) == 0:
        return None

    DF = []
    for name, group in Extraction.groupby('HNAME'):
        Speed_Figure = group.loc[:,'BEYER_SPEED'].diff().values
        Weight = group.loc[:,'HWEIC'].diff().values
        One_Horse = pd.DataFrame({'Speed': Speed_Figure, 'Weight':Weight})
        One_Horse.replace([np.inf, -np.inf], np.nan, inplace = True)
        One_Horse.dropna(inplace = True)
        DF.append(One_Horse)
    DF = pd.concat(DF)

    #Slice in increase in weight leading to decrease in Speed Figure
    Increase_Weight = DF.loc[(DF.loc[:,'Speed'] < 0) & (DF.loc[:,'Weight'] > 0), :]

    #Slice in decrease in weight leading to increase in Speed Figure
    Decrease_Weight = DF.loc[(DF.loc[:,'Speed'] > 0) & (DF.loc[:,'Weight'] < 0), :]

    #NO not fit model if there is no races
    if len(Increase_Weight) == 0 or len(Decrease_Weight) == 0:
        return None

    #Model Fitting
    model = LinearRegression()
    model.fit(Increase_Weight.loc[:,'Weight'].values.reshape(-1,1), Increase_Weight.loc[:,'Speed'])
    #Save Model
    with open(Aux_Reg_Path + 'CC_WEI_INC_Model.joblib', 'wb') as location:
        joblib.dump(model, location)

    #Model Fitting
    model = LinearRegression()
    model.fit(Decrease_Weight.loc[:,'Weight'].values.reshape(-1,1), Decrease_Weight.loc[:,'Speed'])
    #Save Model
    with open(Aux_Reg_Path + 'CC_WEI_DEC_Model.joblib', 'wb') as location:
        joblib.dump(model, location)

    return None

コード例 #5

0

ファイルを表示

def OD_PR_BFAV(Dataframe, HNAME_List, Raceday):
    """
    Number of races the underlying horse is a beaten favourite is the last 5 races.
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, OD_PR_BFAV]
    """

    Feature_DF = Dataframe.loc[:, ['HNAME', 'RESFO']]

    Fav_Beaten_List = []
    #For each horse, get data for last 5 races
    for Horse in Dataframe.loc[:, 'HNAME'].tolist():
        Extraction = Extraction_Database("""
                                         Select HNAME, RARID, RESFO, RESFP from RaceDb
                                         where RARID in (
                                         Select RARID from RaceDb
                                         where RADAT < {Raceday} and HNAME = {Horse}
                                         ORDER BY RARID DESC
                                         LIMIT 5)
                                         """.format(Raceday=Raceday,
                                                    Horse="'" + Horse + "'"))

        Lost_Fav_tot = 0
        for RARID, race in Extraction.groupby('RARID'):
            fav_con = race.loc[:, 'RESFO'] == race.loc[:, 'RESFO'].min()
            horse_con = race.loc[:, 'HNAME'] == Horse
            Only_Fav_Horse = race.loc[fav_con & horse_con, ['HNAME', 'RESFP']]
            if len(Only_Fav_Horse) == 1:
                Lost_Fav = float(Only_Fav_Horse.loc[:, 'RESFP'] == 1)
            else:
                Lost_Fav = 0
            Lost_Fav_tot += Lost_Fav
        Fav_Beaten_List.append([Horse, Lost_Fav_tot])
    Fav_Beaten = pd.DataFrame(Fav_Beaten_List, columns=['HNAME', 'OD_PR_BFAV'])

    Feature_DF = Feature_DF.merge(Fav_Beaten, how='left')
    Feature_DF.loc[:, 'OD_PR_BFAV'].fillna(0, inplace=True)
    Feature_DF = Feature_DF.loc[:, ['HNAME', 'OD_PR_BFAV']]

    return Feature_DF

コード例 #6

0

ファイルを表示

def CC_BWEI_DT3(Dataframe, HNAME_List, Raceday):

    """
    Absolute Difference in Bodyweight compared to average Top 3 finish of horse in percentage of Bodyweight
    Abs((Current Bodyweight - Average Top 3 Bodyweight ) / Average Top 3 Bodyweight)
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_BWEI_DT3]
    """

    Feature_DF = Dataframe.loc[:,['HNAME','HBWEI']]

    Extraction_T3 = Extraction_Database("""
                                        Select HNAME, avg(HBWEI) T3_Weight from RaceDb
                                        where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3
                                        Group by HNAME
                                        """.format(Raceday = Raceday, HNAME_List = HNAME_List))

    Bodyweight = Extraction_Database("""
                                     Select HNAME, RARID, HBWEI Best from RaceDb
                                     where HNAME in {HNAME_List} and RADAT < {Raceday}
                                     """.format(HNAME_List=HNAME_List, Raceday=Raceday))

    Speed_Ratings = Extraction_Database("""
                                        Select HNAME, RARID, BEYER_SPEED from Race_PosteriorDb
                                        where HNAME in {HNAME_List} and RADAT < {Raceday}
                                        """.format(HNAME_List=HNAME_List, Raceday=Raceday))

    idx = Speed_Ratings.groupby(['HNAME'])['BEYER_SPEED'].transform(max) == Speed_Ratings['BEYER_SPEED']
    Speed_Ratings_Weight = Speed_Ratings[idx].merge(Bodyweight).loc[:,['HNAME','Best']]
    Speed_Ratings_Weight = Speed_Ratings_Weight.groupby('HNAME').max().reset_index()

    Feature_DF = Feature_DF.merge(Extraction_T3, how='left').merge(Speed_Ratings_Weight, how='left')
    Feature_DF.loc[:,'Filled_Weight'] = Feature_DF.loc[:,'T3_Weight'].fillna(Feature_DF.loc[:,'Best'])

    Feature_DF.loc[:,'CC_BWEI_DT3'] = ((Feature_DF.loc[:,'HBWEI'] - Feature_DF.loc[:,'Filled_Weight']) / Feature_DF.loc[:,'Filled_Weight']).abs()
    Feature_DF.loc[:,'CC_BWEI_DT3'].fillna(Feature_DF.loc[:,'CC_BWEI_DT3'].max(), inplace = True)
    Feature_DF.loc[:,'CC_BWEI_DT3'].fillna(0, inplace = True)
    Feature_DF = Feature_DF.loc[:,['HNAME','CC_BWEI_DT3']]

    return Feature_DF

コード例 #7

0

ファイルを表示

def CC_BWEI_DWIN(Dataframe, HNAME_List, Raceday):

    """
    Bodyweight difference with Winning Performance
    Abs(Current Bodyweight - Average Winning Bodyweight ) / Average Winning Bodyweight)
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_BWEI_DWIN]
    """

    Feature_DF = Dataframe.loc[:,['HNAME','HBWEI']]

    Extraction_Win = Extraction_Database("""
                                        Select HNAME, avg(HBWEI) Win_Weight from RaceDb
                                        where RADAT < {Raceday} and HNAME in {HNAME_List} and RESWL = 1
                                        Group by HNAME
                                        """.format(Raceday = Raceday, HNAME_List = HNAME_List))

    Bodyweight = Extraction_Database("""
                                     Select HNAME, RARID, HBWEI Best from RaceDb
                                     where HNAME in {HNAME_List} and RADAT < {Raceday}
                                     """.format(HNAME_List=HNAME_List, Raceday=Raceday))

    Speed_Ratings = Extraction_Database("""
                                        Select HNAME, RARID, BEYER_SPEED from Race_PosteriorDb
                                        where HNAME in {HNAME_List} and RADAT < {Raceday}
                                        """.format(HNAME_List=HNAME_List, Raceday=Raceday))

    idx = Speed_Ratings.groupby(['HNAME'])['BEYER_SPEED'].transform(max) == Speed_Ratings['BEYER_SPEED']
    Speed_Ratings_Weight = Speed_Ratings[idx].merge(Bodyweight).loc[:,['HNAME','Best']]
    Speed_Ratings_Weight = Speed_Ratings_Weight.groupby('HNAME').max().reset_index()

    Feature_DF = Feature_DF.merge(Extraction_Win, how='left').merge(Speed_Ratings_Weight, how='left')
    Feature_DF.loc[:,'Filled_Weight'] = Feature_DF.loc[:,'Win_Weight'].fillna(Feature_DF.loc[:,'Best'])

    Feature_DF.loc[:,'CC_BWEI_DWIN'] = ((Feature_DF.loc[:,'HBWEI'] - Feature_DF.loc[:,'Filled_Weight']) / Feature_DF.loc[:,'Filled_Weight']).abs()
    Feature_DF.loc[:,'CC_BWEI_DWIN'].fillna(Feature_DF.loc[:,'CC_BWEI_DWIN'].max(), inplace = True)
    Feature_DF.loc[:,'CC_BWEI_DWIN'].fillna(0, inplace = True)
    Feature_DF = Feature_DF.loc[:,['HNAME','CC_BWEI_DWIN']]

    return Feature_DF

コード例 #8

0

ファイルを表示

def CC_WEI_MAX(Dataframe, HNAME_List, Raceday):

    """
    Weight carrying threshold
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_WEI_MAX]
    """

    Feature_DF = Dataframe.loc[:,['HNAME', 'HWEIC']]

    Extraction_T3 = Extraction_Database("""
                                        Select HNAME, Avg(HWEIC) T3_Weight from RaceDb
                                        where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3
                                        Group by HNAME
                                        """.format(Raceday = Raceday, HNAME_List = HNAME_List))
    Weight = Extraction_Database("""
                                 Select HNAME, RARID, HWEIC SP_WEI from RaceDb
                                 where HNAME in {HNAME_List} and RADAT < {Raceday}
                                 """.format(HNAME_List=HNAME_List, Raceday=Raceday))

    Speed_Ratings = Extraction_Database("""
                                        Select Distinct HNAME, RARID, BEYER_SPEED from Race_PosteriorDb
                                        where HNAME in {HNAME_List} and RADAT < {Raceday}
                                        """.format(HNAME_List=HNAME_List, Raceday=Raceday))

    idx = Speed_Ratings.groupby(['HNAME'])['BEYER_SPEED'].transform(max) == Speed_Ratings['BEYER_SPEED']
    Speed_Ratings_Weight = Speed_Ratings[idx].merge(Weight, on = ['HNAME', 'RARID']).loc[:,['HNAME','SP_WEI']]
    Speed_Ratings_Weight = Speed_Ratings_Weight.groupby('HNAME').apply(lambda x : x.sort_values('SP_WEI').max()).reset_index(drop = True)

    if len(Speed_Ratings_Weight) == 0:
        Feature_DF.loc[:,'CC_WEI_MAX'] = 0
        Feature_DF = Feature_DF.loc[:,['HNAME','CC_WEI_MAX']]
        return Feature_DF

    Feature_DF = Feature_DF.merge(Extraction_T3, how='left').merge(Speed_Ratings_Weight, how='left')
    Feature_DF.loc[:,'CC_WEI_MAX'] = Feature_DF.loc[:,'T3_Weight'].fillna(Feature_DF.loc[:,'HWEIC'])
    Feature_DF = Feature_DF.loc[:,['HNAME','CC_WEI_MAX']]

    return Feature_DF

コード例 #9

0

ファイルを表示

def OD_PR_LPAVG(Dataframe, HNAME_List, Raceday):
    """
    Average Log Odds implied Probability
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, OD_PR_LPAVG]
    """

    Feature_DF = Dataframe.loc[:, ['HNAME', 'RARID']]

    Extraction = Extraction_Database("""
                                     Select HNAME, RARID, RESFO from RaceDb
                                     where RADAT < {Raceday} and HNAME in {HNAME_List}
                                     """.format(Raceday=Raceday,
                                                HNAME_List=HNAME_List))

    Odds = []
    for name, group in Extraction.groupby('HNAME'):
        Probi = group.loc[:, 'RESFO'].map(lambda x: np.log(
            (1 - 0.175) / x)).dropna().values
        if len(Probi) > 1:
            model = SimpleExpSmoothing(Probi)
            model = model.fit()
            Odds.append([name, model.forecast()[0]])
        elif len(Probi) == 1:
            Odds.append([name, Probi[0]])
        else:
            Odds.append([name, 0])
    Odds = pd.DataFrame(Odds, columns=['HNAME', 'OD_PR_LPAVG'])

    Feature_DF = Feature_DF.merge(Odds, how='left')
    Feature_DF.loc[:,
                   'OD_PR_LPAVG'].fillna(Feature_DF.loc[:,
                                                        'OD_PR_LPAVG'].min(),
                                         inplace=True)
    Feature_DF.loc[:, 'OD_PR_LPAVG'].fillna(0, inplace=True)
    Feature_DF = Feature_DF.loc[:, ['HNAME', 'OD_PR_LPAVG']]

    return Feature_DF

コード例 #10

0

ファイルを表示

def CC_BWEI_D(Dataframe, HNAME_List, Raceday):

    """
    Change in Bodyweight of Horse
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_BWEI_D]
    """

    Feature_DF = Dataframe.loc[:,['HNAME','RARID']]

    Extraction = Extraction_Database("""
                                     Select HNAME, RARID, HBWEI from RaceDb
                                     where RADAT < {Raceday} and HNAME in {HNAME_List}
                                     """.format(Raceday = Raceday, HNAME_List = HNAME_List))

    HBWEI = []
    for name, group in Extraction.groupby('HNAME'):
        Weight = (group.loc[:,'HBWEI'].diff() / group.loc[:,'HBWEI']).dropna().values
        if len(Weight) >1:
            model = SimpleExpSmoothing(Weight)
            model = model.fit()
            HBWEI.append([name, model.forecast()[0]])
        elif len(Weight) == 1:
            HBWEI.append([name,Weight[0]])
        else :
            HBWEI.append([name,0])
    HBWEI = pd.DataFrame(HBWEI, columns=['HNAME','CC_BWEI_D'])

    Feature_DF = Feature_DF.merge(HBWEI, how='left')
    Feature_DF.loc[:,'CC_BWEI_D'] = Feature_DF.loc[:,'CC_BWEI_D'].abs()
    Feature_DF.loc[:,'CC_BWEI_D'].fillna(Feature_DF.loc[:,'CC_BWEI_D'].max(), inplace = True)
    Feature_DF.loc[:,'CC_BWEI_D'].fillna(0, inplace = True)
    Feature_DF = Feature_DF.loc[:,['HNAME','CC_BWEI_D']]

    return Feature_DF

コード例 #11

0

ファイルを表示

def CC_CLS_D(Dataframe, HNAME_List, Raceday):

    """
    Change in HKJC Rating
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_CLS_D]
    """

    Feature_DF = Dataframe.loc[:,['HNAME','RARID']]

    Extraction = Extraction_Database("""
                                     Select HNAME, RARID, HJRAT from RaceDb
                                     where RADAT < {Raceday} and HNAME in {HNAME_List}
                                     """.format(Raceday = Raceday, HNAME_List = HNAME_List))

    JRat = []
    for name, group in Extraction.groupby('HNAME'):
        Rating = (group.loc[:,'HJRAT'].diff() / group.loc[:,'HJRAT']).dropna().values
        if len(Rating) >1:
            model = SimpleExpSmoothing(Rating)
            model = model.fit()
            JRat.append([name, model.forecast()[0]])
        elif len(Rating) == 1:
            JRat.append([name,Rating[0]])
        else :
            JRat.append([name,0])
    JRat = pd.DataFrame(JRat, columns=['HNAME','CC_CLS_D'])

    Feature_DF = Feature_DF.merge(JRat, how='left')
    Feature_DF.loc[:,'CC_CLS_D'].fillna(Feature_DF.loc[:,'CC_CLS_D'].min(), inplace = True)
    Feature_DF.loc[:,'CC_CLS_D'].fillna(0, inplace = True)
    Feature_DF = Feature_DF.loc[:,['HNAME','CC_CLS_D']]

    return Feature_DF

コード例 #12

0

ファイルを表示

def CC_CLS_CC(Dataframe, HNAME_List, Raceday):

    """
    Horse's Compotitive Class
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_CLS_CC]
    """

    Feature_DF = Dataframe.loc[:,['HNAME','HJRAT']]
    Underlying_Class = Feature_DF.nlargest(3, 'HJRAT').mean().to_list()[0]

    Races = Extraction_Database("""
                                Select HNAME, RARID, HJRAT CC_CLS_CC from RaceDb
                                where RARID in (
                                Select RARID from RaceDb
                                where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3)
                                """.format(Raceday = Raceday, HNAME_List = HNAME_List))
    Races_AvgHJRAT = Races.groupby('RARID')['CC_CLS_CC'].apply(lambda x: x.nlargest(3).mean())
    Races_AvgHJRAT = Races_AvgHJRAT.reset_index()
    Race_IDs = Extraction_Database("""
                                   Select HNAME, RARID from RaceDb
                                   where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3
                                   """.format(Raceday = Raceday, HNAME_List = HNAME_List))

    Merged = Race_IDs.merge(Races_AvgHJRAT)
    Feature_DF = Feature_DF.merge(Merged.groupby('HNAME').mean()['CC_CLS_CC'].reset_index(), how='left')
    Feature_DF.loc[:, 'CC_CLS_CC'].fillna(Underlying_Class, inplace = True)
    Feature_DF.loc[:, 'CC_CLS_CC'] = Underlying_Class - Feature_DF.loc[:, 'CC_CLS_CC']
    Feature_DF.loc[:, 'CC_CLS_CC'].fillna(0, inplace = True)
    Feature_DF = Feature_DF.loc[:, ['HNAME', 'CC_CLS_CC']]

    return Feature_DF

コード例 #13

0

ファイルを表示

def OD_PR_LPW(Dataframe, HNAME_List, Raceday):
    """
    Average Winning Odds
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, OD_PR_LPW]
    """

    Feature_DF = Dataframe.loc[:, ['HNAME', 'RESFO']]

    Extraction_Win = Extraction_Database("""
                                        Select HNAME, avg(RESFO) Win_Odds from RaceDb
                                        where RADAT < {Raceday} and HNAME in {HNAME_List} and RESWL = 1
                                        Group by HNAME
                                        """.format(Raceday=Raceday,
                                                   HNAME_List=HNAME_List))

    Odds = Extraction_Database("""
                               Select HNAME, RARID, RESFO Odds from RaceDb
                               where HNAME in {HNAME_List} and RADAT < {Raceday}
                               """.format(HNAME_List=HNAME_List,
                                          Raceday=Raceday))

    Speed_Ratings = Extraction_Database("""
                                        Select HNAME, RARID, BEYER_SPEED from Race_PosteriorDb
                                        where HNAME in {HNAME_List} and RADAT < {Raceday}
                                        """.format(HNAME_List=HNAME_List,
                                                   Raceday=Raceday))

    idx = Speed_Ratings.groupby([
        'HNAME'
    ])['BEYER_SPEED'].transform(max) == Speed_Ratings['BEYER_SPEED']
    Speed_Ratings_Odds = Speed_Ratings[idx].merge(Odds).loc[:,
                                                            ['HNAME', 'Odds']]
    try:
        #Exception for first season
        Speed_Ratings_Odds = Speed_Ratings_Odds.groupby(
            'HNAME').mean().reset_index()
    except:
        pass
    Feature_DF = Feature_DF.merge(Extraction_Win,
                                  how='left').merge(Speed_Ratings_Odds,
                                                    how='left')
    Feature_DF.loc[:, 'Filled_Odds'] = Feature_DF.loc[:, 'Win_Odds'].fillna(
        Feature_DF.loc[:, 'Odds'])
    Feature_DF.loc[:, 'Filled_Odds'] = Feature_DF.loc[:, 'Filled_Odds'].map(
        lambda x: np.log((1 - 0.175) / x))
    Feature_DF.loc[:, 'RESFO'] = Feature_DF.loc[:, 'RESFO'].map(
        lambda x: np.log((1 - 0.175) / x))

    Feature_DF.loc[:, 'OD_PR_LPW'] = (
        (Feature_DF.loc[:, 'RESFO'] - Feature_DF.loc[:, 'Filled_Odds']) /
        Feature_DF.loc[:, 'Filled_Odds']).abs()
    Feature_DF.loc[:, 'OD_PR_LPW'].fillna(0, inplace=True)
    Feature_DF = Feature_DF.loc[:, ['HNAME', 'OD_PR_LPW']]

    return Feature_DF

コード例 #14

0

ファイルを表示

def CC_REC_NUM_LT3(Dataframe, HNAME_List, Raceday):

    """
    Predicted number of races until next Top 3
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_REC_NUM_LT3]
    """

    Feature_DF = Dataframe.loc[:,['HNAME','RARID']]

    Extraction_Performance = Extraction_Database("""
                                                 Select HNAME, First_RARID, Beyer_RARID, Last_T3_RARID from (
                                                 Select HNAME, First_RARID, Beyer_RARID from (
                                                 Select HNAME, min(RARID) First_RARID from Race_PosteriorDb
                                                 where RADAT < {Raceday} and HNAME in {HNAME_List}
                                                 Group by HNAME) FIRST
                                                 LEFT OUTER JOIN
                                                 (Select HNAME HNAME_BEYER, RARID Beyer_RARID, max(BEYER_SPEED)from Race_PosteriorDb
                                                  where RADAT < {Raceday} and HNAME in {HNAME_List}
                                                  Group by HNAME) BEYER
                                                 ON FIRST.HNAME = BEYER.HNAME_BEYER) FIRST_BEYER
                                                 LEFT OUTER JOIN
                                                 (Select HNAME HNAME_T3, max(RARID) Last_T3_RARID from Race_PosteriorDb
                                                  where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3
                                                  Group by HNAME) T3
                                                 ON T3.HNAME_T3 = FIRST_BEYER.HNAME
                                                 """.format(Raceday = Raceday, HNAME_List = HNAME_List))

    Extraction_Avg = Extraction_Database("""
                                         Select HNAME, RARID, RESFP, BEYER_SPEED from Race_PosteriorDb
                                         where RADAT < {Raceday} and HNAME in {HNAME_List}
                                         """.format(Raceday = Raceday, HNAME_List = HNAME_List))

    Avg_Date = []
    for name, group in Extraction_Avg.groupby('HNAME'):
        name
        group

        group = group.sort_values('RARID').reset_index(drop = True).reset_index()
        Avg_T3 = group.loc[group.loc[:, 'RESFP'] <= 3, 'index'].diff().abs().mean()
        Avg_Beyer = group.nlargest(3,'BEYER_SPEED').loc[:, 'index'].diff().abs().mean()
        Today = group.loc[:,'index'].max()+1
        Last_Beyer = Extraction_Performance.loc[Extraction_Performance.loc[:,'HNAME']==name, 'Beyer_RARID'].values[0]
        Last_Beyer = group.loc[group.loc[:,'RARID'] == Last_Beyer,'index'].values[0]
        Diff_Beyer = Today - Last_Beyer - Avg_Beyer
        try :
            Last_T3 = Extraction_Performance.loc[Extraction_Performance.loc[:,'HNAME']==name, 'Last_T3_RARID'].values[0]
            Last_T3 = group.loc[group.loc[:,'RARID'] == Last_T3,'index'].values[0]
            Diff_T3 = Today - Last_T3 - Avg_T3
        except :
            Diff_T3 = np.NaN
        Avg_Date.append([name, Diff_T3, Diff_Beyer])
    Avg_Date = pd.DataFrame(Avg_Date, columns=['HNAME','Diff_T3','Diff_Beyer'])

    Feature_DF = Feature_DF.merge(Avg_Date, how='left')
    Feature_DF.loc[:,'CC_REC_NUM_LT3'] = Feature_DF.loc[:,'Diff_T3'].fillna(Feature_DF.loc[:,'Diff_Beyer'])
    Feature_DF.loc[:,'CC_REC_NUM_LT3'].fillna(Feature_DF.loc[:,'CC_REC_NUM_LT3'].max(), inplace = True)
    Feature_DF.loc[:,'CC_REC_NUM_LT3'].fillna(0, inplace = True)
    Feature_DF = Feature_DF.loc[:,['HNAME','CC_REC_NUM_LT3']]

    return Feature_DF