Exemple #1
0
def CC_FRB(Dataframe, HNAME_List, Raceday):

    """
    Binary indicator on whether the underlying race is the horse’s first race.
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_FRB]
    """

    Feature_DF = Dataframe.loc[:, ['HNAME','RARID']]

    Extraction = Extraction_Database("""
                                     Select HNAME, count(RARID) CC_FRB from RaceDb
                                     where RADAT < {Raceday} and HNAME in {HNAME_List}
                                     Group by HNAME
                                     """.format(Raceday = Raceday, HNAME_List = HNAME_List))
    Extraction.loc[:,'CC_FRB'] = Extraction.loc[:,'CC_FRB'].apply(lambda x : int(x<1))

    Feature_DF = Feature_DF.merge(Extraction, how='left')
    Feature_DF.loc[:,'CC_FRB'].fillna(1, inplace=True)

    Feature_DF = Feature_DF.loc[:,['HNAME','CC_FRB']]

    return Feature_DF
Exemple #2
0
def CC_REC_DAY_PT3(Dataframe, HNAME_List, Raceday):

    """
    Predicted days until next Top 3
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_REC_DAY_PT3]
    """

    Feature_DF = Dataframe.loc[:,['HNAME','RARID']]

    Extraction_Performance = Extraction_Database("""
                                                 Select HNAME, First_Date, Last_T3_Date, Top_Beyer_Date from (
                                                 Select HNAME, First_Date, Top_Beyer_Date from (
                                                 Select HNAME, min(RADAT) First_Date from Race_PosteriorDb
                                                 where RADAT < {Raceday} and HNAME in {HNAME_List}
                                                 Group by HNAME) FIRST
                                                 LEFT OUTER JOIN
                                                 (Select HNAME HNAME_BEYER, RADAT Top_Beyer_Date, max(BEYER_SPEED) from Race_PosteriorDb
                                                 where RADAT < {Raceday} and HNAME in {HNAME_List}
                                                 Group by HNAME) BEYER
                                                 ON FIRST.HNAME = BEYER.HNAME_BEYER) FIRST_BEYER
                                                 LEFT OUTER JOIN
                                                 (Select HNAME HNAME_W, max(RADAT) Last_T3_Date from Race_PosteriorDb
                                                 where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3
                                                 Group by HNAME) WIN
                                                 ON WIN.HNAME_W = FIRST_BEYER.HNAME
                                                 """.format(Raceday = Raceday, HNAME_List = HNAME_List))

    Extraction_Avg = Extraction_Database("""
                                         Select HNAME, RADAT, RESFP, BEYER_SPEED from Race_PosteriorDb
                                         where RADAT < {Raceday} and HNAME in {HNAME_List}
                                         """.format(Raceday = Raceday, HNAME_List = HNAME_List))
    Avg_Date = []
    for name, group in Extraction_Avg.groupby('HNAME'):
        Avg_T3 = group.loc[group.loc[:, 'RESFP'] <= 3, 'RADAT'].apply(lambda x: pd.to_datetime(x, format = '%Y%m%d')).diff().mean()
        Avg_Beyer = group.nlargest(3,'BEYER_SPEED').loc[:, 'RADAT'].apply(lambda x: pd.to_datetime(x, format = '%Y%m%d')).diff().mean()
        Avg_Date.append([name, Avg_T3, Avg_Beyer])
    Avg_Date = pd.DataFrame(Avg_Date, columns=['HNAME','Avg_T3','Avg_Beyer'])

    Raceday = pd.to_datetime(Raceday, format = '%Y%m%d')
    Feature_DF = Feature_DF.merge(Extraction_Performance, how='left').merge(Avg_Date, how='left')
    Feature_DF.loc[:,['First_Date','Last_T3_Date','Top_Beyer_Date']]=Feature_DF.loc[:,['First_Date','Last_T3_Date','Top_Beyer_Date']].fillna('20120101')
    Feature_DF.loc[:,['Avg_T3','Avg_Beyer']]=Feature_DF.loc[:,['Avg_T3','Avg_Beyer']].fillna(Raceday-pd.to_datetime('20120101',format='%Y%m%d'))

    Feature_DF.loc[:,'T3'] = Raceday - pd.to_datetime(Feature_DF.loc[:, 'Last_T3_Date'], format = '%Y%m%d')
    Feature_DF.loc[:,'T3'] = Feature_DF.loc[:,'T3'] - Feature_DF.loc[:,'Avg_T3'].abs()
    Feature_DF.loc[:,'Beyer'] = Raceday - pd.to_datetime(Feature_DF.loc[:, 'Top_Beyer_Date'], format = '%Y%m%d')
    Feature_DF.loc[:,'Beyer'] = Feature_DF.loc[:,'Beyer'] - Feature_DF.loc[:,'Avg_Beyer'].abs()

    Feature_DF.loc[:,'CC_REC_DAY_PT3'] = Feature_DF.loc[:,'T3'].fillna(Feature_DF.loc[:,'Beyer'])
    Feature_DF.loc[:,'CC_REC_DAY_PT3'] = Feature_DF.loc[:,'CC_REC_DAY_PT3'].apply(lambda x : int(str(x).split('days')[0]))
    Feature_DF = Feature_DF.loc[:,['HNAME','CC_REC_DAY_PT3']]

    return Feature_DF
Exemple #3
0
def CC_WEI_SP(Dataframe, HNAME_List, Raceday):

    """
    Weight Carried’s effect on Speed Figure
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_WEI_SP]
    """

    Feature_DF = Dataframe.loc[:,['HNAME','HWEIC']]

    Extraction_Weight = Extraction_Database("""
                                            Select HNAME, max(RARID), HWEIC Last_Weight from RaceDb
                                            where RADAT < {Raceday} and HNAME in {HNAME_List}
                                            Group by HNAME
                                            """.format(Raceday = Raceday, HNAME_List = HNAME_List))


    Extraction_Speed = Extraction_Database("""
                                           Select HNAME, BEYER_SPEED, max(RARID) from Race_PosteriorDb
                                           where RADAT < {Raceday} and HNAME in {HNAME_List}
                                           Group by HNAME
                                           """.format(Raceday = Raceday, HNAME_List = HNAME_List))

    Feature_DF = Feature_DF.merge(Extraction_Weight, how='left')
    Feature_DF.loc[:,'Weight_Change'] = Feature_DF.loc[:,'HWEIC'] - Feature_DF.loc[:,'Last_Weight']

    try :
        #Load Models
        with open(Aux_Reg_Path + 'CC_WEI_INC_Model.joblib', 'rb') as location:
            Inc_model = joblib.load(location)
        with open(Aux_Reg_Path + 'CC_WEI_DEC_Model.joblib', 'rb') as location:
            Dec_model = joblib.load(location)

        SP_Change = []
        for index, row in Feature_DF.iterrows():
            if row['Weight_Change'] < 0:
                SP_Change.append(Dec_model.predict(np.array([[row.loc['Weight_Change']]])))
            else :
                SP_Change.append(Inc_model.predict(np.array([[row.loc['Weight_Change']]])))
    except :
        pass

    Feature_DF = Feature_DF.merge(Extraction_Speed, how='left')
    try :
        Feature_DF.loc[:,'CC_WEI_SP'] = Feature_DF.loc[:,'BEYER_SPEED'] + Feature_DF.loc[:,'SP_Change']
    except :
        Feature_DF.loc[:,'CC_WEI_SP'] = Feature_DF.loc[:,'BEYER_SPEED']
    Feature_DF.loc[:,'CC_WEI_SP'].fillna(Feature_DF.loc[:,'CC_WEI_SP'].min(), inplace = True)
    Feature_DF.loc[:,'CC_WEI_SP'].fillna(0, inplace = True)
    Feature_DF = Feature_DF.loc[:,['HNAME','CC_WEI_SP']]

    return Feature_DF
Exemple #4
0
def CC_CLS_CL(Dataframe, HNAME_List, Raceday):

    """
    Horse's Competition Level
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_CLS_CL]
    """

    Feature_DF = Dataframe.loc[:,['HNAME','HJRAT']]

    Horse_Comp = []
    #For each horse, get data for last 5 races
    for Horse in Dataframe['HNAME'].tolist():
        Extraction = Extraction_Database("""
                                         Select HNAME, RARID, HJRAT, RESFP from RaceDb
                                         where RARID in (
                                         Select RARID from RaceDb
                                         where RADAT < {Raceday} and HNAME = {Horse}
                                         ORDER BY RARID DESC
                                         LIMIT 5)
                                         """.format(Raceday = Raceday, Horse = "'" + Horse + "'"))

        for RARID, race in Extraction.groupby('RARID'):
            Horse_Rat = race.loc[race.loc[:,'HNAME']==Horse,'HJRAT'].to_list()[0]
            Horse_FP = race.loc[race.loc[:,'HNAME']==Horse,'RESFP'].to_list()[0]
            Comp_Rat = race.nlargest(3, 'HJRAT').loc[:,'HJRAT'].mean()
            Comp_Level = (Comp_Rat - Horse_Rat) / Horse_FP
            Horse_Comp.append([Horse,Comp_Level])
    Horse_Comp = pd.DataFrame(Horse_Comp, columns=['HNAME', 'Comp_Level'])

    #Recency Weighting
    Comp = []
    for name, group in Horse_Comp.groupby('HNAME'):
        Comp_Figure = group.loc[:,'Comp_Level'].dropna().values
        try :
            model = SimpleExpSmoothing(Comp_Figure)
            model = model.fit()
            Comp.append([name, model.forecast()[0]])
        except :
            Comp.append([name,Comp_Figure[0]])
    Comp = pd.DataFrame(Comp, columns=['HNAME','CC_CLS_CL'])

    Feature_DF = Feature_DF.merge(Comp, how='left')
    Feature_DF.loc[:, 'CC_CLS_CL'].fillna(Feature_DF.loc[:, 'CC_CLS_CL'].min(), inplace = True)
    Feature_DF.loc[:, 'CC_CLS_CL'].fillna(0, inplace=True)
    Feature_DF = Feature_DF.loc[:, ['HNAME', 'CC_CLS_CL']]

    return Feature_DF
Exemple #5
0
def OD_PR_FAVB(Dataframe, HNAME_List, Raceday):
    """
    Number of favourites that ran behind the underlying horse in the last 5 races.
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, OD_PR_FAVB]
    """

    Feature_DF = Dataframe.loc[:, ['HNAME', 'RESFO']]

    Fav_Beaten_List = []
    #For each horse, get data for last 5 races
    for Horse in Dataframe['HNAME'].tolist():
        Extraction = Extraction_Database("""
                                         Select HNAME, RARID, RESFO, RESFP from RaceDb
                                         where RARID in (
                                         Select RARID from RaceDb
                                         where RADAT < {Raceday} and HNAME = {Horse}
                                         ORDER BY RARID DESC
                                         LIMIT 5)
                                         """.format(Raceday=Raceday,
                                                    Horse="'" + Horse + "'"))

        Won_Fav_tot = 0
        for RARID, race in Extraction.groupby('RARID'):
            fav_con = race.loc[:, 'RESFO'] == race.loc[:, 'RESFO'].min()
            horse_con = race.loc[:, 'HNAME'] == Horse
            Only_Fav_Horse = race.loc[fav_con | horse_con,
                                      ['HNAME', 'RESFP']].sort_values(
                                          'RESFP').reset_index(drop=True)
            if len(Only_Fav_Horse) != 1:
                Won_Fav = float(not bool(Only_Fav_Horse.loc[
                    Only_Fav_Horse.loc[:, 'HNAME'] == Horse, :].index.values))
            else:
                Won_Fav = 0
            Won_Fav_tot += Won_Fav
        Fav_Beaten_List.append([Horse, Won_Fav_tot])
    Fav_Beaten = pd.DataFrame(Fav_Beaten_List, columns=['HNAME', 'OD_PR_FAVB'])

    Feature_DF = Feature_DF.merge(Fav_Beaten, how='left')
    Feature_DF.loc[:, 'OD_PR_FAVB'].fillna(0, inplace=True)
    Feature_DF = Feature_DF.loc[:, ['HNAME', 'OD_PR_FAVB']]

    return Feature_DF
Exemple #6
0
def Weight_Aug_Reg(Raceday):

    Extraction = Extraction_Database("""
                                     Select A.HNAME, A.RARID, BEYER_SPEED, HWEIC from
                                     (Select HNAME, RARID, BEYER_SPEED from Race_PosteriorDb
                                     where RADAT < {Raceday}) A,
                                     (Select HNAME, RARID, HWEIC from RaceDb where RADAT < {Raceday}) B
                                     where A.HNAME = B.HNAME and A.RARID = B.RARID
                                     """.format(Raceday = Raceday))
    Extraction.fillna(0, inplace=True)

    if len(Extraction) == 0:
        return None

    DF = []
    for name, group in Extraction.groupby('HNAME'):
        Speed_Figure = group.loc[:,'BEYER_SPEED'].diff().values
        Weight = group.loc[:,'HWEIC'].diff().values
        One_Horse = pd.DataFrame({'Speed': Speed_Figure, 'Weight':Weight})
        One_Horse.replace([np.inf, -np.inf], np.nan, inplace = True)
        One_Horse.dropna(inplace = True)
        DF.append(One_Horse)
    DF = pd.concat(DF)

    #Slice in increase in weight leading to decrease in Speed Figure
    Increase_Weight = DF.loc[(DF.loc[:,'Speed'] < 0) & (DF.loc[:,'Weight'] > 0), :]

    #Slice in decrease in weight leading to increase in Speed Figure
    Decrease_Weight = DF.loc[(DF.loc[:,'Speed'] > 0) & (DF.loc[:,'Weight'] < 0), :]

    #NO not fit model if there is no races
    if len(Increase_Weight) == 0 or len(Decrease_Weight) == 0:
        return None

    #Model Fitting
    model = LinearRegression()
    model.fit(Increase_Weight.loc[:,'Weight'].values.reshape(-1,1), Increase_Weight.loc[:,'Speed'])
    #Save Model
    with open(Aux_Reg_Path + 'CC_WEI_INC_Model.joblib', 'wb') as location:
        joblib.dump(model, location)

    #Model Fitting
    model = LinearRegression()
    model.fit(Decrease_Weight.loc[:,'Weight'].values.reshape(-1,1), Decrease_Weight.loc[:,'Speed'])
    #Save Model
    with open(Aux_Reg_Path + 'CC_WEI_DEC_Model.joblib', 'wb') as location:
        joblib.dump(model, location)

    return None
Exemple #7
0
def RS_ELO_JP(Dataframe, HNAME_List, Raceday):

    """
    Jockey’s ELO Score Implied Probability
    Parameter
    ---------
    Matchday : Matchday Dataframe
    Return
    ------
    Dataframe [HNAME, RS_ELO_JP]
    """

    Feature_DF = Dataframe.loc[:,['HNAME', 'JNAME']]
    JNAME_List = '('+str(Dataframe['JNAME'].tolist())[1:-1]+')'

    Extraction = Extraction_Database("""
                                     Select JNAME, JELO RS_ELO_J from RS_JOCKEY_ELO
                                     where JNAME in {JNAME_List}
                                     """.format(JNAME_List = JNAME_List))

    Feature_DF = Feature_DF.merge(Extraction, how='left')
    Feature_DF.loc[:,'RS_ELO_J'].fillna(1500, inplace = True)

    #Implied Probability Function
    Feature_DF = Feature_DF.merge(ELO_Expected(Feature_DF, 'JNAME').loc[:,'Expected_Score'], on = 'JNAME')
    Feature_DF.rename(columns={'Expected_Score':'RS_ELO_JP'},inplace=True)
    Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_JP']]

    return Feature_DF
Exemple #8
0
def CC_REC_NUMM(Dataframe, HNAME_List, Raceday):

    """
    Number of meters ran in last 3 months
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_REC_NUMM]
    """

    Feature_DF = Dataframe.loc[:,['HNAME','RARID']]
    Offset_Raceday = (pd.to_datetime(Raceday) + pd.tseries.offsets.DateOffset(months=-3)).strftime("%Y%m%d")

    Extraction = Extraction_Database("""
                                     Select HNAME, sum(RADIS) CC_REC_NUMM from RaceDb
                                     where RADAT < {Raceday} and RADAT > {Offset_Raceday} and HNAME in {HNAME_List}
                                     Group by HNAME
                                     """.format(Raceday = Raceday, Offset_Raceday = Offset_Raceday, HNAME_List = HNAME_List))

    Feature_DF = Feature_DF.merge(Extraction, how='left')
    Feature_DF = Feature_DF.loc[:,['HNAME','CC_REC_NUMM']].fillna(0)

    return Feature_DF
Exemple #9
0
def RS_ELO_S(Dataframe, HNAME_List, Raceday):

    """
    Stable’s ELO Score
    Parameter
    ---------
    Matchday : Matchday Dataframe
    Return
    ------
    Dataframe [HNAME, RS_ELO_S]
    """

    Feature_DF = Dataframe.loc[:,['HNAME', 'SNAME']]
    SNAME_List = '('+str(Dataframe['SNAME'].tolist())[1:-1]+')'

    Extraction = Extraction_Database("""
                                     Select SNAME, SELO RS_ELO_S from RS_STABLE_ELO
                                     where SNAME in {SNAME_List}
                                     """.format(SNAME_List = SNAME_List))

    Feature_DF = Feature_DF.merge(Extraction, on='SNAME', how='left')
    Feature_DF.loc[:,'RS_ELO_S'].fillna(1500, inplace = True)
    Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_S']]

    return Feature_DF
Exemple #10
0
def RS_ELO_HP_SUR(Dataframe, HNAME_List, Raceday):

    """
    Horse’s ELO Score Implied Probability on Surface
    Parameter
    ---------
    Matchday : Matchday Dataframe
    Return
    ------
    Dataframe [HNAME, RS_ELO_HP_SUR]
    """

    Feature_DF = Dataframe.loc[:,['HNAME', 'RARID']]
    Surface = Dataframe.loc[:,'RATRA'].values[0]
    if Surface == 'T' :
        Target = 'HELO_TURF'
    elif Surface == 'AW':
        Target = 'HELO_AW'

    Extraction = Extraction_Database("""
                                     Select HNAME, {Target} RS_ELO_H_SUR from RS_HORSE_ELO
                                     where HNAME in {HNAME_List}
                                     """.format(HNAME_List = HNAME_List, Target=Target))

    Feature_DF = Feature_DF.merge(Extraction, how='left')
    Feature_DF.loc[:,'RS_ELO_H_SUR'].fillna(1500, inplace = True)

    #Implied Probability Function
    Feature_DF = Feature_DF.merge(ELO_Expected(Feature_DF, 'HNAME').loc[:,'Expected_Score'], on = 'HNAME')
    Feature_DF.rename(columns={'Expected_Score':'RS_ELO_HP_SUR'},inplace=True)
    Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_HP_SUR']]

    return Feature_DF
Exemple #11
0
def RS_ELO_SP(Dataframe, HNAME_List, Raceday):

    """
    Stable’s ELO Score Implied Probability
    Parameter
    ---------
    Matchday : Matchday Dataframe
    Return
    ------
    Dataframe [HNAME, RS_ELO_SP]
    """

    Feature_DF = Dataframe.loc[:,['HNAME', 'SNAME']]
    SNAME_List = '('+str(Dataframe['SNAME'].tolist())[1:-1]+')'

    Extraction = Extraction_Database("""
                                     Select SNAME, SELO RS_ELO_S from RS_STABLE_ELO
                                     where SNAME in {SNAME_List}
                                     """.format(SNAME_List = SNAME_List))

    Feature_DF = Feature_DF.merge(Extraction, on='SNAME', how='left')
    Feature_DF.loc[:,'RS_ELO_S'].fillna(1500, inplace = True)

    #Implied Probability Function
    Feature_DF = Feature_DF.merge(ELO_Expected(Feature_DF, 'SNAME').loc[:,'Expected_Score'].reset_index(), on = 'SNAME')
    Feature_DF.rename(columns={'Expected_Score':'RS_ELO_SP'},inplace=True)
    Feature_DF = Feature_DF.groupby('HNAME').mean().reset_index()
    Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_SP']]

    return Feature_DF
Exemple #12
0
def RS_ELO_HP(Dataframe, HNAME_List, Raceday):

    """
    Horse's ELO Score Implied Probability
    Parameter
    ---------
    Matchday : Matchday Dataframe
    Return
    ------
    Dataframe [HNAME, RS_ELO_HP]
    """

    Feature_DF = Dataframe.loc[:,['HNAME', 'RARID']]

    Extraction = Extraction_Database("""
                                     Select HNAME, HELO RS_ELO_H from RS_HORSE_ELO
                                     where HNAME in {HNAME_List}
                                     """.format(HNAME_List = HNAME_List))

    Feature_DF = Feature_DF.merge(Extraction, how='left')
    Feature_DF.loc[:,'RS_ELO_H'].fillna(1500, inplace = True)

    #Implied Probability Function
    Feature_DF = Feature_DF.merge(ELO_Expected(Feature_DF, 'HNAME').loc[:,'Expected_Score'], on = 'HNAME')
    Feature_DF.rename(columns={'Expected_Score':'RS_ELO_HP'},inplace=True)
    Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_HP']]

    return Feature_DF
Exemple #13
0
def RS_ELO_HP_PFL(Dataframe, HNAME_List, Raceday):

    """
    Horse’s ELO Score Implied Probability on Profile
    Parameter
    ---------
    Matchday : Matchday Dataframe
    Return
    ------
    Dataframe [HNAME, RS_ELO_HP_PFL]
    """

    Feature_DF = Dataframe.loc[:,['HNAME', 'RARID']]
    Target = Dataframe.loc[:,'RALOC'].values[0] + "_" + str(Dataframe.loc[:,'RADIS'].values[0]) + "_" +Dataframe.loc[:,'RATRA'].values[0]

    Extraction = Extraction_Database("""
                                     Select HNAME, {Target} RS_ELO_H_PFL from RS_HORSE_ELO
                                     where HNAME in {HNAME_List}
                                     """.format(HNAME_List = HNAME_List, Target=Target))

    Feature_DF = Feature_DF.merge(Extraction, how='left')
    Feature_DF.loc[:,'RS_ELO_H_PFL'].fillna(1500, inplace = True)

    #Implied Probability Function
    Feature_DF = Feature_DF.merge(ELO_Expected(Feature_DF, 'HNAME').loc[:,'Expected_Score'], on = 'HNAME')
    Feature_DF.rename(columns={'Expected_Score':'RS_ELO_HP_PFL'},inplace=True)
    Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_HP_PFL']]

    return Feature_DF
Exemple #14
0
def RS_ELO_J(Dataframe, HNAME_List, Raceday):

    """
    Jockey’s ELO Score
    Parameter
    ---------
    Matchday : Matchday Dataframe
    Return
    ------
    Dataframe [HNAME, RS_ELO_J]
    """

    Feature_DF = Dataframe.loc[:,['HNAME', 'JNAME']]
    JNAME_List = '('+str(Dataframe['JNAME'].tolist())[1:-1]+')'

    Extraction = Extraction_Database("""
                                     Select JNAME, JELO RS_ELO_J from RS_JOCKEY_ELO
                                     where JNAME in {JNAME_List}
                                     """.format(JNAME_List = JNAME_List))

    Feature_DF = Feature_DF.merge(Extraction, how='left')
    Feature_DF.loc[:,'RS_ELO_J'].fillna(1500, inplace = True)
    Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_J']]

    return Feature_DF
Exemple #15
0
def RS_ELO_H(Dataframe, HNAME_List, Raceday):

    """
    ELO Score of Horse
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, RS_ELO_H]
    """

    Feature_DF = Dataframe.loc[:,['HNAME', 'RARID']]

    Extraction = Extraction_Database("""
                                     Select HNAME, HELO RS_ELO_H from RS_HORSE_ELO
                                     where HNAME in {HNAME_List}
                                     """.format(HNAME_List = HNAME_List))

    Feature_DF = Feature_DF.merge(Extraction, how='left')
    Feature_DF.loc[:,'RS_ELO_H'].fillna(1500, inplace = True)
    Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_H']]

    return Feature_DF
Exemple #16
0
def RS_ELO_H_PFL(Dataframe, HNAME_List, Raceday):

    """
    Horse’s ELO Score on Profile
    Parameter
    ---------
    Matchday : Matchday Dataframe
    Return
    ------
    Dataframe [HNAME, RS_ELO_H_PFL]
    """

    Feature_DF = Dataframe.loc[:,['HNAME', 'RARID']]
    Target = Dataframe.loc[:,'RALOC'].values[0] + "_" + str(Dataframe.loc[:,'RADIS'].values[0]) + "_" +Dataframe.loc[:,'RATRA'].values[0]

    Extraction = Extraction_Database("""
                                     Select HNAME, {Target} RS_ELO_H_PFL from RS_HORSE_ELO
                                     where HNAME in {HNAME_List}
                                     """.format(HNAME_List = HNAME_List, Target=Target))

    Feature_DF = Feature_DF.merge(Extraction, how='left')
    Feature_DF.loc[:,'RS_ELO_H_PFL'].fillna(1500, inplace = True)
    Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_H_PFL']]

    return Feature_DF
Exemple #17
0
def OD_PR_BFAV(Dataframe, HNAME_List, Raceday):
    """
    Number of races the underlying horse is a beaten favourite is the last 5 races.
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, OD_PR_BFAV]
    """

    Feature_DF = Dataframe.loc[:, ['HNAME', 'RESFO']]

    Fav_Beaten_List = []
    #For each horse, get data for last 5 races
    for Horse in Dataframe.loc[:, 'HNAME'].tolist():
        Extraction = Extraction_Database("""
                                         Select HNAME, RARID, RESFO, RESFP from RaceDb
                                         where RARID in (
                                         Select RARID from RaceDb
                                         where RADAT < {Raceday} and HNAME = {Horse}
                                         ORDER BY RARID DESC
                                         LIMIT 5)
                                         """.format(Raceday=Raceday,
                                                    Horse="'" + Horse + "'"))

        Lost_Fav_tot = 0
        for RARID, race in Extraction.groupby('RARID'):
            fav_con = race.loc[:, 'RESFO'] == race.loc[:, 'RESFO'].min()
            horse_con = race.loc[:, 'HNAME'] == Horse
            Only_Fav_Horse = race.loc[fav_con & horse_con, ['HNAME', 'RESFP']]
            if len(Only_Fav_Horse) == 1:
                Lost_Fav = float(Only_Fav_Horse.loc[:, 'RESFP'] == 1)
            else:
                Lost_Fav = 0
            Lost_Fav_tot += Lost_Fav
        Fav_Beaten_List.append([Horse, Lost_Fav_tot])
    Fav_Beaten = pd.DataFrame(Fav_Beaten_List, columns=['HNAME', 'OD_PR_BFAV'])

    Feature_DF = Feature_DF.merge(Fav_Beaten, how='left')
    Feature_DF.loc[:, 'OD_PR_BFAV'].fillna(0, inplace=True)
    Feature_DF = Feature_DF.loc[:, ['HNAME', 'OD_PR_BFAV']]

    return Feature_DF
Exemple #18
0
def CC_BWEI_DT3(Dataframe, HNAME_List, Raceday):

    """
    Absolute Difference in Bodyweight compared to average Top 3 finish of horse in percentage of Bodyweight
    Abs((Current Bodyweight - Average Top 3 Bodyweight ) / Average Top 3 Bodyweight)
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_BWEI_DT3]
    """

    Feature_DF = Dataframe.loc[:,['HNAME','HBWEI']]

    Extraction_T3 = Extraction_Database("""
                                        Select HNAME, avg(HBWEI) T3_Weight from RaceDb
                                        where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3
                                        Group by HNAME
                                        """.format(Raceday = Raceday, HNAME_List = HNAME_List))

    Bodyweight = Extraction_Database("""
                                     Select HNAME, RARID, HBWEI Best from RaceDb
                                     where HNAME in {HNAME_List} and RADAT < {Raceday}
                                     """.format(HNAME_List=HNAME_List, Raceday=Raceday))

    Speed_Ratings = Extraction_Database("""
                                        Select HNAME, RARID, BEYER_SPEED from Race_PosteriorDb
                                        where HNAME in {HNAME_List} and RADAT < {Raceday}
                                        """.format(HNAME_List=HNAME_List, Raceday=Raceday))

    idx = Speed_Ratings.groupby(['HNAME'])['BEYER_SPEED'].transform(max) == Speed_Ratings['BEYER_SPEED']
    Speed_Ratings_Weight = Speed_Ratings[idx].merge(Bodyweight).loc[:,['HNAME','Best']]
    Speed_Ratings_Weight = Speed_Ratings_Weight.groupby('HNAME').max().reset_index()

    Feature_DF = Feature_DF.merge(Extraction_T3, how='left').merge(Speed_Ratings_Weight, how='left')
    Feature_DF.loc[:,'Filled_Weight'] = Feature_DF.loc[:,'T3_Weight'].fillna(Feature_DF.loc[:,'Best'])

    Feature_DF.loc[:,'CC_BWEI_DT3'] = ((Feature_DF.loc[:,'HBWEI'] - Feature_DF.loc[:,'Filled_Weight']) / Feature_DF.loc[:,'Filled_Weight']).abs()
    Feature_DF.loc[:,'CC_BWEI_DT3'].fillna(Feature_DF.loc[:,'CC_BWEI_DT3'].max(), inplace = True)
    Feature_DF.loc[:,'CC_BWEI_DT3'].fillna(0, inplace = True)
    Feature_DF = Feature_DF.loc[:,['HNAME','CC_BWEI_DT3']]

    return Feature_DF
Exemple #19
0
def CC_BWEI_DWIN(Dataframe, HNAME_List, Raceday):

    """
    Bodyweight difference with Winning Performance
    Abs(Current Bodyweight - Average Winning Bodyweight ) / Average Winning Bodyweight)
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_BWEI_DWIN]
    """

    Feature_DF = Dataframe.loc[:,['HNAME','HBWEI']]

    Extraction_Win = Extraction_Database("""
                                        Select HNAME, avg(HBWEI) Win_Weight from RaceDb
                                        where RADAT < {Raceday} and HNAME in {HNAME_List} and RESWL = 1
                                        Group by HNAME
                                        """.format(Raceday = Raceday, HNAME_List = HNAME_List))

    Bodyweight = Extraction_Database("""
                                     Select HNAME, RARID, HBWEI Best from RaceDb
                                     where HNAME in {HNAME_List} and RADAT < {Raceday}
                                     """.format(HNAME_List=HNAME_List, Raceday=Raceday))

    Speed_Ratings = Extraction_Database("""
                                        Select HNAME, RARID, BEYER_SPEED from Race_PosteriorDb
                                        where HNAME in {HNAME_List} and RADAT < {Raceday}
                                        """.format(HNAME_List=HNAME_List, Raceday=Raceday))

    idx = Speed_Ratings.groupby(['HNAME'])['BEYER_SPEED'].transform(max) == Speed_Ratings['BEYER_SPEED']
    Speed_Ratings_Weight = Speed_Ratings[idx].merge(Bodyweight).loc[:,['HNAME','Best']]
    Speed_Ratings_Weight = Speed_Ratings_Weight.groupby('HNAME').max().reset_index()

    Feature_DF = Feature_DF.merge(Extraction_Win, how='left').merge(Speed_Ratings_Weight, how='left')
    Feature_DF.loc[:,'Filled_Weight'] = Feature_DF.loc[:,'Win_Weight'].fillna(Feature_DF.loc[:,'Best'])

    Feature_DF.loc[:,'CC_BWEI_DWIN'] = ((Feature_DF.loc[:,'HBWEI'] - Feature_DF.loc[:,'Filled_Weight']) / Feature_DF.loc[:,'Filled_Weight']).abs()
    Feature_DF.loc[:,'CC_BWEI_DWIN'].fillna(Feature_DF.loc[:,'CC_BWEI_DWIN'].max(), inplace = True)
    Feature_DF.loc[:,'CC_BWEI_DWIN'].fillna(0, inplace = True)
    Feature_DF = Feature_DF.loc[:,['HNAME','CC_BWEI_DWIN']]

    return Feature_DF
Exemple #20
0
def CC_WEI_MAX(Dataframe, HNAME_List, Raceday):

    """
    Weight carrying threshold
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_WEI_MAX]
    """

    Feature_DF = Dataframe.loc[:,['HNAME', 'HWEIC']]

    Extraction_T3 = Extraction_Database("""
                                        Select HNAME, Avg(HWEIC) T3_Weight from RaceDb
                                        where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3
                                        Group by HNAME
                                        """.format(Raceday = Raceday, HNAME_List = HNAME_List))
    Weight = Extraction_Database("""
                                 Select HNAME, RARID, HWEIC SP_WEI from RaceDb
                                 where HNAME in {HNAME_List} and RADAT < {Raceday}
                                 """.format(HNAME_List=HNAME_List, Raceday=Raceday))

    Speed_Ratings = Extraction_Database("""
                                        Select Distinct HNAME, RARID, BEYER_SPEED from Race_PosteriorDb
                                        where HNAME in {HNAME_List} and RADAT < {Raceday}
                                        """.format(HNAME_List=HNAME_List, Raceday=Raceday))

    idx = Speed_Ratings.groupby(['HNAME'])['BEYER_SPEED'].transform(max) == Speed_Ratings['BEYER_SPEED']
    Speed_Ratings_Weight = Speed_Ratings[idx].merge(Weight, on = ['HNAME', 'RARID']).loc[:,['HNAME','SP_WEI']]
    Speed_Ratings_Weight = Speed_Ratings_Weight.groupby('HNAME').apply(lambda x : x.sort_values('SP_WEI').max()).reset_index(drop = True)

    if len(Speed_Ratings_Weight) == 0:
        Feature_DF.loc[:,'CC_WEI_MAX'] = 0
        Feature_DF = Feature_DF.loc[:,['HNAME','CC_WEI_MAX']]
        return Feature_DF

    Feature_DF = Feature_DF.merge(Extraction_T3, how='left').merge(Speed_Ratings_Weight, how='left')
    Feature_DF.loc[:,'CC_WEI_MAX'] = Feature_DF.loc[:,'T3_Weight'].fillna(Feature_DF.loc[:,'HWEIC'])
    Feature_DF = Feature_DF.loc[:,['HNAME','CC_WEI_MAX']]

    return Feature_DF
Exemple #21
0
def OD_PR_LPAVG(Dataframe, HNAME_List, Raceday):
    """
    Average Log Odds implied Probability
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, OD_PR_LPAVG]
    """

    Feature_DF = Dataframe.loc[:, ['HNAME', 'RARID']]

    Extraction = Extraction_Database("""
                                     Select HNAME, RARID, RESFO from RaceDb
                                     where RADAT < {Raceday} and HNAME in {HNAME_List}
                                     """.format(Raceday=Raceday,
                                                HNAME_List=HNAME_List))

    Odds = []
    for name, group in Extraction.groupby('HNAME'):
        Probi = group.loc[:, 'RESFO'].map(lambda x: np.log(
            (1 - 0.175) / x)).dropna().values
        if len(Probi) > 1:
            model = SimpleExpSmoothing(Probi)
            model = model.fit()
            Odds.append([name, model.forecast()[0]])
        elif len(Probi) == 1:
            Odds.append([name, Probi[0]])
        else:
            Odds.append([name, 0])
    Odds = pd.DataFrame(Odds, columns=['HNAME', 'OD_PR_LPAVG'])

    Feature_DF = Feature_DF.merge(Odds, how='left')
    Feature_DF.loc[:,
                   'OD_PR_LPAVG'].fillna(Feature_DF.loc[:,
                                                        'OD_PR_LPAVG'].min(),
                                         inplace=True)
    Feature_DF.loc[:, 'OD_PR_LPAVG'].fillna(0, inplace=True)
    Feature_DF = Feature_DF.loc[:, ['HNAME', 'OD_PR_LPAVG']]

    return Feature_DF
Exemple #22
0
def CC_BWEI_D(Dataframe, HNAME_List, Raceday):

    """
    Change in Bodyweight of Horse
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_BWEI_D]
    """

    Feature_DF = Dataframe.loc[:,['HNAME','RARID']]

    Extraction = Extraction_Database("""
                                     Select HNAME, RARID, HBWEI from RaceDb
                                     where RADAT < {Raceday} and HNAME in {HNAME_List}
                                     """.format(Raceday = Raceday, HNAME_List = HNAME_List))

    HBWEI = []
    for name, group in Extraction.groupby('HNAME'):
        Weight = (group.loc[:,'HBWEI'].diff() / group.loc[:,'HBWEI']).dropna().values
        if len(Weight) >1:
            model = SimpleExpSmoothing(Weight)
            model = model.fit()
            HBWEI.append([name, model.forecast()[0]])
        elif len(Weight) == 1:
            HBWEI.append([name,Weight[0]])
        else :
            HBWEI.append([name,0])
    HBWEI = pd.DataFrame(HBWEI, columns=['HNAME','CC_BWEI_D'])

    Feature_DF = Feature_DF.merge(HBWEI, how='left')
    Feature_DF.loc[:,'CC_BWEI_D'] = Feature_DF.loc[:,'CC_BWEI_D'].abs()
    Feature_DF.loc[:,'CC_BWEI_D'].fillna(Feature_DF.loc[:,'CC_BWEI_D'].max(), inplace = True)
    Feature_DF.loc[:,'CC_BWEI_D'].fillna(0, inplace = True)
    Feature_DF = Feature_DF.loc[:,['HNAME','CC_BWEI_D']]

    return Feature_DF
Exemple #23
0
def CC_CLS_D(Dataframe, HNAME_List, Raceday):

    """
    Change in HKJC Rating
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_CLS_D]
    """

    Feature_DF = Dataframe.loc[:,['HNAME','RARID']]

    Extraction = Extraction_Database("""
                                     Select HNAME, RARID, HJRAT from RaceDb
                                     where RADAT < {Raceday} and HNAME in {HNAME_List}
                                     """.format(Raceday = Raceday, HNAME_List = HNAME_List))

    JRat = []
    for name, group in Extraction.groupby('HNAME'):
        Rating = (group.loc[:,'HJRAT'].diff() / group.loc[:,'HJRAT']).dropna().values
        if len(Rating) >1:
            model = SimpleExpSmoothing(Rating)
            model = model.fit()
            JRat.append([name, model.forecast()[0]])
        elif len(Rating) == 1:
            JRat.append([name,Rating[0]])
        else :
            JRat.append([name,0])
    JRat = pd.DataFrame(JRat, columns=['HNAME','CC_CLS_D'])

    Feature_DF = Feature_DF.merge(JRat, how='left')
    Feature_DF.loc[:,'CC_CLS_D'].fillna(Feature_DF.loc[:,'CC_CLS_D'].min(), inplace = True)
    Feature_DF.loc[:,'CC_CLS_D'].fillna(0, inplace = True)
    Feature_DF = Feature_DF.loc[:,['HNAME','CC_CLS_D']]

    return Feature_DF
Exemple #24
0
def RS_ELO_HP_GO(Dataframe, HNAME_List, Raceday):

    """
    Horse’s ELO Score Implied Probability on Going
    Parameter
    ---------
    Matchday : Matchday Dataframe
    Return
    ------
    Dataframe [HNAME, RS_ELO_HP_GO]
    """

    Feature_DF = Dataframe.loc[:,['HNAME', 'RARID']]
    Going = Dataframe.loc[:,'RAGOG'].values[0].strip()
    Going_Dict = Going_Similarity(Going)
    Surface = Dataframe.loc[:,'RATRA'].values[0]
    if Surface == 'T' :
        Target_Col = 'HELO_GF, HELO_G, HELO_GL, HELO_L, HELO_LS, HELO_S'
    elif Surface == 'AW':
        Target_Col = 'HELO_WF, HELO_MF, HELO_MG, HELO_WS'

    Extraction = Extraction_Database("""
                                     Select HNAME, {Target_Col}
                                     from RS_HORSE_ELO
                                     where HNAME in {HNAME_List}
                                     """.format(HNAME_List = HNAME_List, Target_Col=Target_Col))
    Feature_DF = Feature_DF.merge(Extraction, how='left')

    ELO_GOG = []
    for index, row in Feature_DF.iterrows():
        row
        Horse = row['HNAME']
        try :
            df = row.reset_index().loc[2:,:].dropna()
            df.columns = ['RAGOG', 'ELO']
            df.loc[:,'RAGOG'] = df.loc[:,'RAGOG'].apply(lambda x : str(x[5:]))
            df.replace({'RAGOG': Going_Dict}, inplace = True)
            df.loc[:,'RAGOG'] = np.exp(df.loc[:,'RAGOG']) / np.exp(df.loc[:,'RAGOG']).sum()
            ELO = df.loc[:,'RAGOG'].dot(df.loc[:,'ELO'])
            ELO_GOG.append([Horse, ELO])
        except :
            ELO_GOG.append([Horse, 1500])
    Feature_DF = Dataframe.loc[:,['HNAME', 'RARID']]
    Feature_DF = Feature_DF.merge(pd.DataFrame(ELO_GOG, columns = ['HNAME','RS_ELO_H_GO']), how='left')
    Feature_DF.loc[:,'RS_ELO_H_GO'].fillna(1500, inplace=True)

    #Implied Probability Function
    Feature_DF = Feature_DF.merge(ELO_Expected(Feature_DF, 'HNAME').loc[:,'Expected_Score'], on = 'HNAME')
    Feature_DF.rename(columns={'Expected_Score':'RS_ELO_HP_GO'},inplace=True)
    Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_HP_GO']]

    return Feature_DF
Exemple #25
0
def Feature_Creation(Dataframe):
    """
    Creates Feature from Feature List and Insert into FeatureDb
    Parameter
    ---------
    Dataframe : MatchDay Data Format
    Return
    ------
    Dataframe
    """
    #Start Timer
    start_time = time.time()

    for RADAT, Race_Day in Dataframe.groupby('RADAT'):
        print(RADAT)
        """
        Day by Day
        """
        for RARID, Race in Race_Day.groupby('RARID'):
            """
            Race to Race
            """
            # print(RARID)
            One_Race_Feature(Race)
        """
        Post-Day
        """
        Result_DF = Extraction_Database(
            """ Select * from RaceDb where RADAT = ? """, [RADAT])
        Features_Dataframe = Extraction_Database(
            """ Select * from FeatureDb where RARID BETWEEN ? and ? """,
            [int(str(RADAT) + '00'),
             int(str(RADAT) + '99')])
        Post_Raceday_Update(RADAT, Features_Dataframe, Result_DF)

    print("---- %s Races are Created to FeatureDb in %s seconds ----" \
                 %(Dataframe['RARID'].nunique(), (str(round((time.time() - start_time),4)))))

    return None
Exemple #26
0
def CC_REC_DAYL(Dataframe, HNAME_List, Raceday):

    """
    Number of Days since Last Race
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_REC_DAYL]
    """

    Feature_DF = Dataframe.loc[:,['HNAME','RARID']]

    Extraction = Extraction_Database("""
                                     Select HNAME, max(RADAT) Last_Race from RaceDb
                                     where RADAT < {Raceday} and HNAME in {HNAME_List}
                                     Group by HNAME
                                     """.format(Raceday = Raceday, HNAME_List = HNAME_List))
    #For First Runners
    if len(Extraction) == 0:
        Feature_DF.loc[:,'CC_REC_DAYL'] = 0
        Feature_DF = Feature_DF.loc[:,['HNAME','CC_REC_DAYL']]
        return Feature_DF

    Raceday = pd.to_datetime(Raceday, format = '%Y%m%d')
    Extraction.loc[:,'Day_Last'] = Raceday - pd.to_datetime(Extraction.loc[:, 'Last_Race'], format = '%Y%m%d')

    Feature_DF = Feature_DF.merge(Extraction, how='left')
    Feature_DF.loc[Feature_DF.loc[:,'Day_Last'].notna(),'CC_REC_DAYL'] = Feature_DF.loc[Feature_DF.loc[:,'Day_Last'].notna(),'Day_Last']\
                                                                        .apply(lambda x : int(str(x).split('days')[0]))
    Feature_DF.loc[:,'CC_REC_DAYL'].fillna(Feature_DF.loc[:,'CC_REC_DAYL'].min(), inplace = True)
    Feature_DF.loc[:,'CC_REC_DAYL'].fillna(0, inplace = True)
    Feature_DF = Feature_DF.loc[:,['HNAME','CC_REC_DAYL']]

    return Feature_DF
Exemple #27
0
def MatchDay_Dataset(Race_ID):
    """
    Extracting MatchDay Data from RaceDb
    Parameter
    ---------
    Race_ID : Dataframe of RaceID
    Return
    ------
    Dataframe
    """
    #Start Timer
    start_time = time.time()

    Dataset = pd.DataFrame()
    if len(Race_ID) > 1:
        Race_ID_List = [i for i in Race_ID['RARID'].tolist()]
        Dataset = Extraction_Database("""
                                      Select Distinct RARID, HNAME, HAGEI, HBWEI, HDRAW, HJRAT, HWEIC, JNAME, RESFO, RACLS, RADAT, RARAL,
                                      RADIS, RAGOG, RALOC, RARUN, RATRA, SNAME
                                      from RaceDb where RARID in {RARID}
                                      Order By RARID, HNAME
                                      """.format(
            RARID='(' + str(Race_ID_List)[1:-1] + ')'))
    else:
        Dataset = Extraction_Database(
            """
                                      Select Distinct RARID, HNAME, HAGEI, HBWEI, HDRAW, HJRAT, HWEIC, JNAME, RESFO, RACLS, RADAT, RARAL,
                                      RADIS, RAGOG, RALOC, RARUN, RATRA, SNAME
                                      from RaceDb where RARID = ?
                                      Order By HNAME
                                      """, [int(list(Race_ID.values)[0])])
    #Print Time Taken to Load
    print("---- %s Races are Extracted from RaceDb in %s seconds / %s minutes----" \
    %(len(Race_ID), (str(round((time.time() - start_time),4))),(str(round(((time.time() - start_time))/60,4)))))

    return Dataset
Exemple #28
0
def RS_ELO_HP_DIST(Dataframe, HNAME_List, Raceday):

    """
    Horse’s ELO Score Implied Probability on Distance
    Parameter
    ---------
    Matchday : Matchday Dataframe
    Return
    ------
    Dataframe [HNAME, RS_ELO_HP_DIST]
    """

    Feature_DF = Dataframe.loc[:,['HNAME', 'RARID']]
    Distance = Dataframe.loc[:,'RADIS'].values[0]
    Dist_Dict = Distance_Similarity(Distance)

    Extraction = Extraction_Database("""
                                     Select HNAME, HELO_1000, HELO_1200, HELO_1400, HELO_1600,
                                     HELO_1650, HELO_1800, HELO_2000, HELO_2200, HELO_2400
                                     from RS_HORSE_ELO
                                     where HNAME in {HNAME_List}
                                     """.format(HNAME_List = HNAME_List))
    Feature_DF = Feature_DF.merge(Extraction, how='left')

    ELO_DIST = []
    for index, row in Feature_DF.iterrows():
        Horse = row['HNAME']
        try :
            df = row.reset_index().loc[2:,:].dropna()
            df.columns = ['RADIS', 'ELO']
            df.loc[:,'RADIS'] = df.loc[:,'RADIS'].apply(lambda x : int(x[5:]))
            df.replace({'RADIS': Dist_Dict}, inplace = True)
            df.loc[:,'RADIS'] = np.exp(df.loc[:,'RADIS']) / np.exp(df.loc[:,'RADIS']).sum()
            ELO = df.loc[:,'RADIS'].dot(df.loc[:,'ELO'])
            ELO_DIST.append([Horse, ELO])
        except :
            ELO_DIST.append([Horse, 1500])
    Feature_DF = Dataframe.loc[:,['HNAME', 'RARID']]
    Feature_DF = Feature_DF.merge(pd.DataFrame(ELO_DIST, columns = ['HNAME','RS_ELO_H_DIST']), how='left')
    Feature_DF.loc[:,'RS_ELO_H_DIST'].fillna(1500, inplace=True)

    #Implied Probability Function
    Feature_DF = Feature_DF.merge(ELO_Expected(Feature_DF, 'HNAME').loc[:,'Expected_Score'], on = 'HNAME')
    Feature_DF.rename(columns={'Expected_Score':'RS_ELO_HP_DIST'},inplace=True)
    Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_HP_DIST']]

    return Feature_DF
Exemple #29
0
def Get_Stable():

    #Start Timer
    start_time = time.time()
    #Selenium Options
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--log-level=OFF")
    web_driver = webdriver.Chrome(ChromeDriverManager().install(),
                                  options=chrome_options)

    #Get Current list of Stables on record
    Data = Extraction_Database("""
                               Select Distinct SNum from SnameDb
                               """)['SNum'].values.tolist()
    To_scrap = list(set(map(str, range(1, 1000))) - set(Data))
    Stable_DF = pd.DataFrame()
    for i in To_scrap:
        web_driver.get(
            'https://racing.appledaily.com.hk/search-horse/result?trainer=' +
            i)
        time.sleep(3)
        _Login(web_driver)
        #Wait for the page to load
        time.sleep(3)
        raw_html = web_driver.page_source
        try:
            name = str(raw_html).split('養馬數目')[0].split(
                '<div data-v-6bfbe05a="">')[-2].split('</div>')[0]
            if len(name) < 10:
                Stable_DF = Stable_DF.append(pd.DataFrame([[name, i]]))
        except:
            pass
    #Closing Session
    web_driver.close()

    if len(Stable_DF) != 0:
        #Reset Index
        Stable_DF.reset_index(inplace=True, drop=True)
        Stable_DF.columns = ['SNAME', 'SNum']
        #Loading to Dataset
        Load_Dataset_toDatabase('SNameDb', Stable_DF)
    print("---- %s Stable Names are Scrapped in %s minutes ----" %
          (len(Stable_DF), str(round((time.time() - start_time) / 60, 4))))

    return None
Exemple #30
0
def CC_REC_DAY_LWIN(Dataframe, HNAME_List, Raceday):

    """
    Number of Days since Last Win or Best Beyer Speed Figure
    Parameter
    ---------
    Matchday : Matchday Dataframe
    HNAME_List : String of List of Horse Names
    Raceday : Date of Race
    Return
    ------
    Dataframe [HNAME, CC_REC_DAY_LWIN]
    """

    Feature_DF = Dataframe.loc[:,['HNAME','RARID']]

    Extraction = Extraction_Database("""
                                     Select HN HNAME, Beyer_Date, Win_Date, First_Date from
                                     (Select HNAME HN, min(RADAT) First_Date from RaceDb
                                     where RADAT < {Raceday} and HNAME in {HNAME_List}
                                     Group by HNAME) A
                                     LEFT OUTER JOIN
                                     (Select HNAME, Beyer_Date, Win_Date from (
                                     Select HNAME, RADAT Beyer_Date, max(BEYER_SPEED) from Race_PosteriorDb
                                     where RADAT < {Raceday} and HNAME in {HNAME_List}
                                     Group by HNAME) BEYER
                                     LEFT OUTER JOIN
                                     (Select HNAME HNAME_WIN, RADAT Win_Date from Race_PosteriorDb
                                     where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP = 1
                                     Group by HNAME) WIN
                                     ON BEYER.HNAME = WIN.HNAME_WIN) B
                                     ON A.HN = B.HNAME
                                     """.format(Raceday = Raceday, HNAME_List = HNAME_List))

    Feature_DF = Feature_DF.merge(Extraction, how='left')
    Feature_DF.loc[:,'Win_Date'].fillna(Feature_DF.loc[:,'Beyer_Date'], inplace = True)
    Feature_DF.loc[:,'Win_Date'].fillna(Feature_DF.loc[:,'First_Date'], inplace = True)
    Feature_DF.loc[:,'Win_Date'].fillna(Raceday, inplace = True)

    Raceday = pd.to_datetime(Raceday, format = '%Y%m%d')
    Feature_DF.loc[:,'CC_REC_DAY_LWIN'] = Raceday - pd.to_datetime(Feature_DF.loc[:, 'Win_Date'], format = '%Y%m%d')
    Feature_DF.loc[:,'CC_REC_DAY_LWIN'] = Feature_DF.loc[:,'CC_REC_DAY_LWIN'].apply(lambda x : int(str(x).split('days')[0]))
    Feature_DF = Feature_DF.loc[:,['HNAME','CC_REC_DAY_LWIN']]

    return Feature_DF