def CC_FRB(Dataframe, HNAME_List, Raceday): """ Binary indicator on whether the underlying race is the horse’s first race. Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_FRB] """ Feature_DF = Dataframe.loc[:, ['HNAME','RARID']] Extraction = Extraction_Database(""" Select HNAME, count(RARID) CC_FRB from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} Group by HNAME """.format(Raceday = Raceday, HNAME_List = HNAME_List)) Extraction.loc[:,'CC_FRB'] = Extraction.loc[:,'CC_FRB'].apply(lambda x : int(x<1)) Feature_DF = Feature_DF.merge(Extraction, how='left') Feature_DF.loc[:,'CC_FRB'].fillna(1, inplace=True) Feature_DF = Feature_DF.loc[:,['HNAME','CC_FRB']] return Feature_DF
def CC_REC_DAY_PT3(Dataframe, HNAME_List, Raceday): """ Predicted days until next Top 3 Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_REC_DAY_PT3] """ Feature_DF = Dataframe.loc[:,['HNAME','RARID']] Extraction_Performance = Extraction_Database(""" Select HNAME, First_Date, Last_T3_Date, Top_Beyer_Date from ( Select HNAME, First_Date, Top_Beyer_Date from ( Select HNAME, min(RADAT) First_Date from Race_PosteriorDb where RADAT < {Raceday} and HNAME in {HNAME_List} Group by HNAME) FIRST LEFT OUTER JOIN (Select HNAME HNAME_BEYER, RADAT Top_Beyer_Date, max(BEYER_SPEED) from Race_PosteriorDb where RADAT < {Raceday} and HNAME in {HNAME_List} Group by HNAME) BEYER ON FIRST.HNAME = BEYER.HNAME_BEYER) FIRST_BEYER LEFT OUTER JOIN (Select HNAME HNAME_W, max(RADAT) Last_T3_Date from Race_PosteriorDb where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3 Group by HNAME) WIN ON WIN.HNAME_W = FIRST_BEYER.HNAME """.format(Raceday = Raceday, HNAME_List = HNAME_List)) Extraction_Avg = Extraction_Database(""" Select HNAME, RADAT, RESFP, BEYER_SPEED from Race_PosteriorDb where RADAT < {Raceday} and HNAME in {HNAME_List} """.format(Raceday = Raceday, HNAME_List = HNAME_List)) Avg_Date = [] for name, group in Extraction_Avg.groupby('HNAME'): Avg_T3 = group.loc[group.loc[:, 'RESFP'] <= 3, 'RADAT'].apply(lambda x: pd.to_datetime(x, format = '%Y%m%d')).diff().mean() Avg_Beyer = group.nlargest(3,'BEYER_SPEED').loc[:, 'RADAT'].apply(lambda x: pd.to_datetime(x, format = '%Y%m%d')).diff().mean() Avg_Date.append([name, Avg_T3, Avg_Beyer]) Avg_Date = pd.DataFrame(Avg_Date, columns=['HNAME','Avg_T3','Avg_Beyer']) Raceday = pd.to_datetime(Raceday, format = '%Y%m%d') Feature_DF = Feature_DF.merge(Extraction_Performance, how='left').merge(Avg_Date, how='left') Feature_DF.loc[:,['First_Date','Last_T3_Date','Top_Beyer_Date']]=Feature_DF.loc[:,['First_Date','Last_T3_Date','Top_Beyer_Date']].fillna('20120101') Feature_DF.loc[:,['Avg_T3','Avg_Beyer']]=Feature_DF.loc[:,['Avg_T3','Avg_Beyer']].fillna(Raceday-pd.to_datetime('20120101',format='%Y%m%d')) Feature_DF.loc[:,'T3'] = Raceday - pd.to_datetime(Feature_DF.loc[:, 'Last_T3_Date'], format = '%Y%m%d') Feature_DF.loc[:,'T3'] = Feature_DF.loc[:,'T3'] - Feature_DF.loc[:,'Avg_T3'].abs() Feature_DF.loc[:,'Beyer'] = Raceday - pd.to_datetime(Feature_DF.loc[:, 'Top_Beyer_Date'], format = '%Y%m%d') Feature_DF.loc[:,'Beyer'] = Feature_DF.loc[:,'Beyer'] - Feature_DF.loc[:,'Avg_Beyer'].abs() Feature_DF.loc[:,'CC_REC_DAY_PT3'] = Feature_DF.loc[:,'T3'].fillna(Feature_DF.loc[:,'Beyer']) Feature_DF.loc[:,'CC_REC_DAY_PT3'] = Feature_DF.loc[:,'CC_REC_DAY_PT3'].apply(lambda x : int(str(x).split('days')[0])) Feature_DF = Feature_DF.loc[:,['HNAME','CC_REC_DAY_PT3']] return Feature_DF
def CC_WEI_SP(Dataframe, HNAME_List, Raceday): """ Weight Carried’s effect on Speed Figure Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_WEI_SP] """ Feature_DF = Dataframe.loc[:,['HNAME','HWEIC']] Extraction_Weight = Extraction_Database(""" Select HNAME, max(RARID), HWEIC Last_Weight from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} Group by HNAME """.format(Raceday = Raceday, HNAME_List = HNAME_List)) Extraction_Speed = Extraction_Database(""" Select HNAME, BEYER_SPEED, max(RARID) from Race_PosteriorDb where RADAT < {Raceday} and HNAME in {HNAME_List} Group by HNAME """.format(Raceday = Raceday, HNAME_List = HNAME_List)) Feature_DF = Feature_DF.merge(Extraction_Weight, how='left') Feature_DF.loc[:,'Weight_Change'] = Feature_DF.loc[:,'HWEIC'] - Feature_DF.loc[:,'Last_Weight'] try : #Load Models with open(Aux_Reg_Path + 'CC_WEI_INC_Model.joblib', 'rb') as location: Inc_model = joblib.load(location) with open(Aux_Reg_Path + 'CC_WEI_DEC_Model.joblib', 'rb') as location: Dec_model = joblib.load(location) SP_Change = [] for index, row in Feature_DF.iterrows(): if row['Weight_Change'] < 0: SP_Change.append(Dec_model.predict(np.array([[row.loc['Weight_Change']]]))) else : SP_Change.append(Inc_model.predict(np.array([[row.loc['Weight_Change']]]))) except : pass Feature_DF = Feature_DF.merge(Extraction_Speed, how='left') try : Feature_DF.loc[:,'CC_WEI_SP'] = Feature_DF.loc[:,'BEYER_SPEED'] + Feature_DF.loc[:,'SP_Change'] except : Feature_DF.loc[:,'CC_WEI_SP'] = Feature_DF.loc[:,'BEYER_SPEED'] Feature_DF.loc[:,'CC_WEI_SP'].fillna(Feature_DF.loc[:,'CC_WEI_SP'].min(), inplace = True) Feature_DF.loc[:,'CC_WEI_SP'].fillna(0, inplace = True) Feature_DF = Feature_DF.loc[:,['HNAME','CC_WEI_SP']] return Feature_DF
def CC_CLS_CL(Dataframe, HNAME_List, Raceday): """ Horse's Competition Level Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_CLS_CL] """ Feature_DF = Dataframe.loc[:,['HNAME','HJRAT']] Horse_Comp = [] #For each horse, get data for last 5 races for Horse in Dataframe['HNAME'].tolist(): Extraction = Extraction_Database(""" Select HNAME, RARID, HJRAT, RESFP from RaceDb where RARID in ( Select RARID from RaceDb where RADAT < {Raceday} and HNAME = {Horse} ORDER BY RARID DESC LIMIT 5) """.format(Raceday = Raceday, Horse = "'" + Horse + "'")) for RARID, race in Extraction.groupby('RARID'): Horse_Rat = race.loc[race.loc[:,'HNAME']==Horse,'HJRAT'].to_list()[0] Horse_FP = race.loc[race.loc[:,'HNAME']==Horse,'RESFP'].to_list()[0] Comp_Rat = race.nlargest(3, 'HJRAT').loc[:,'HJRAT'].mean() Comp_Level = (Comp_Rat - Horse_Rat) / Horse_FP Horse_Comp.append([Horse,Comp_Level]) Horse_Comp = pd.DataFrame(Horse_Comp, columns=['HNAME', 'Comp_Level']) #Recency Weighting Comp = [] for name, group in Horse_Comp.groupby('HNAME'): Comp_Figure = group.loc[:,'Comp_Level'].dropna().values try : model = SimpleExpSmoothing(Comp_Figure) model = model.fit() Comp.append([name, model.forecast()[0]]) except : Comp.append([name,Comp_Figure[0]]) Comp = pd.DataFrame(Comp, columns=['HNAME','CC_CLS_CL']) Feature_DF = Feature_DF.merge(Comp, how='left') Feature_DF.loc[:, 'CC_CLS_CL'].fillna(Feature_DF.loc[:, 'CC_CLS_CL'].min(), inplace = True) Feature_DF.loc[:, 'CC_CLS_CL'].fillna(0, inplace=True) Feature_DF = Feature_DF.loc[:, ['HNAME', 'CC_CLS_CL']] return Feature_DF
def OD_PR_FAVB(Dataframe, HNAME_List, Raceday): """ Number of favourites that ran behind the underlying horse in the last 5 races. Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, OD_PR_FAVB] """ Feature_DF = Dataframe.loc[:, ['HNAME', 'RESFO']] Fav_Beaten_List = [] #For each horse, get data for last 5 races for Horse in Dataframe['HNAME'].tolist(): Extraction = Extraction_Database(""" Select HNAME, RARID, RESFO, RESFP from RaceDb where RARID in ( Select RARID from RaceDb where RADAT < {Raceday} and HNAME = {Horse} ORDER BY RARID DESC LIMIT 5) """.format(Raceday=Raceday, Horse="'" + Horse + "'")) Won_Fav_tot = 0 for RARID, race in Extraction.groupby('RARID'): fav_con = race.loc[:, 'RESFO'] == race.loc[:, 'RESFO'].min() horse_con = race.loc[:, 'HNAME'] == Horse Only_Fav_Horse = race.loc[fav_con | horse_con, ['HNAME', 'RESFP']].sort_values( 'RESFP').reset_index(drop=True) if len(Only_Fav_Horse) != 1: Won_Fav = float(not bool(Only_Fav_Horse.loc[ Only_Fav_Horse.loc[:, 'HNAME'] == Horse, :].index.values)) else: Won_Fav = 0 Won_Fav_tot += Won_Fav Fav_Beaten_List.append([Horse, Won_Fav_tot]) Fav_Beaten = pd.DataFrame(Fav_Beaten_List, columns=['HNAME', 'OD_PR_FAVB']) Feature_DF = Feature_DF.merge(Fav_Beaten, how='left') Feature_DF.loc[:, 'OD_PR_FAVB'].fillna(0, inplace=True) Feature_DF = Feature_DF.loc[:, ['HNAME', 'OD_PR_FAVB']] return Feature_DF
def Weight_Aug_Reg(Raceday): Extraction = Extraction_Database(""" Select A.HNAME, A.RARID, BEYER_SPEED, HWEIC from (Select HNAME, RARID, BEYER_SPEED from Race_PosteriorDb where RADAT < {Raceday}) A, (Select HNAME, RARID, HWEIC from RaceDb where RADAT < {Raceday}) B where A.HNAME = B.HNAME and A.RARID = B.RARID """.format(Raceday = Raceday)) Extraction.fillna(0, inplace=True) if len(Extraction) == 0: return None DF = [] for name, group in Extraction.groupby('HNAME'): Speed_Figure = group.loc[:,'BEYER_SPEED'].diff().values Weight = group.loc[:,'HWEIC'].diff().values One_Horse = pd.DataFrame({'Speed': Speed_Figure, 'Weight':Weight}) One_Horse.replace([np.inf, -np.inf], np.nan, inplace = True) One_Horse.dropna(inplace = True) DF.append(One_Horse) DF = pd.concat(DF) #Slice in increase in weight leading to decrease in Speed Figure Increase_Weight = DF.loc[(DF.loc[:,'Speed'] < 0) & (DF.loc[:,'Weight'] > 0), :] #Slice in decrease in weight leading to increase in Speed Figure Decrease_Weight = DF.loc[(DF.loc[:,'Speed'] > 0) & (DF.loc[:,'Weight'] < 0), :] #NO not fit model if there is no races if len(Increase_Weight) == 0 or len(Decrease_Weight) == 0: return None #Model Fitting model = LinearRegression() model.fit(Increase_Weight.loc[:,'Weight'].values.reshape(-1,1), Increase_Weight.loc[:,'Speed']) #Save Model with open(Aux_Reg_Path + 'CC_WEI_INC_Model.joblib', 'wb') as location: joblib.dump(model, location) #Model Fitting model = LinearRegression() model.fit(Decrease_Weight.loc[:,'Weight'].values.reshape(-1,1), Decrease_Weight.loc[:,'Speed']) #Save Model with open(Aux_Reg_Path + 'CC_WEI_DEC_Model.joblib', 'wb') as location: joblib.dump(model, location) return None
def RS_ELO_JP(Dataframe, HNAME_List, Raceday): """ Jockey’s ELO Score Implied Probability Parameter --------- Matchday : Matchday Dataframe Return ------ Dataframe [HNAME, RS_ELO_JP] """ Feature_DF = Dataframe.loc[:,['HNAME', 'JNAME']] JNAME_List = '('+str(Dataframe['JNAME'].tolist())[1:-1]+')' Extraction = Extraction_Database(""" Select JNAME, JELO RS_ELO_J from RS_JOCKEY_ELO where JNAME in {JNAME_List} """.format(JNAME_List = JNAME_List)) Feature_DF = Feature_DF.merge(Extraction, how='left') Feature_DF.loc[:,'RS_ELO_J'].fillna(1500, inplace = True) #Implied Probability Function Feature_DF = Feature_DF.merge(ELO_Expected(Feature_DF, 'JNAME').loc[:,'Expected_Score'], on = 'JNAME') Feature_DF.rename(columns={'Expected_Score':'RS_ELO_JP'},inplace=True) Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_JP']] return Feature_DF
def CC_REC_NUMM(Dataframe, HNAME_List, Raceday): """ Number of meters ran in last 3 months Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_REC_NUMM] """ Feature_DF = Dataframe.loc[:,['HNAME','RARID']] Offset_Raceday = (pd.to_datetime(Raceday) + pd.tseries.offsets.DateOffset(months=-3)).strftime("%Y%m%d") Extraction = Extraction_Database(""" Select HNAME, sum(RADIS) CC_REC_NUMM from RaceDb where RADAT < {Raceday} and RADAT > {Offset_Raceday} and HNAME in {HNAME_List} Group by HNAME """.format(Raceday = Raceday, Offset_Raceday = Offset_Raceday, HNAME_List = HNAME_List)) Feature_DF = Feature_DF.merge(Extraction, how='left') Feature_DF = Feature_DF.loc[:,['HNAME','CC_REC_NUMM']].fillna(0) return Feature_DF
def RS_ELO_S(Dataframe, HNAME_List, Raceday): """ Stable’s ELO Score Parameter --------- Matchday : Matchday Dataframe Return ------ Dataframe [HNAME, RS_ELO_S] """ Feature_DF = Dataframe.loc[:,['HNAME', 'SNAME']] SNAME_List = '('+str(Dataframe['SNAME'].tolist())[1:-1]+')' Extraction = Extraction_Database(""" Select SNAME, SELO RS_ELO_S from RS_STABLE_ELO where SNAME in {SNAME_List} """.format(SNAME_List = SNAME_List)) Feature_DF = Feature_DF.merge(Extraction, on='SNAME', how='left') Feature_DF.loc[:,'RS_ELO_S'].fillna(1500, inplace = True) Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_S']] return Feature_DF
def RS_ELO_HP_SUR(Dataframe, HNAME_List, Raceday): """ Horse’s ELO Score Implied Probability on Surface Parameter --------- Matchday : Matchday Dataframe Return ------ Dataframe [HNAME, RS_ELO_HP_SUR] """ Feature_DF = Dataframe.loc[:,['HNAME', 'RARID']] Surface = Dataframe.loc[:,'RATRA'].values[0] if Surface == 'T' : Target = 'HELO_TURF' elif Surface == 'AW': Target = 'HELO_AW' Extraction = Extraction_Database(""" Select HNAME, {Target} RS_ELO_H_SUR from RS_HORSE_ELO where HNAME in {HNAME_List} """.format(HNAME_List = HNAME_List, Target=Target)) Feature_DF = Feature_DF.merge(Extraction, how='left') Feature_DF.loc[:,'RS_ELO_H_SUR'].fillna(1500, inplace = True) #Implied Probability Function Feature_DF = Feature_DF.merge(ELO_Expected(Feature_DF, 'HNAME').loc[:,'Expected_Score'], on = 'HNAME') Feature_DF.rename(columns={'Expected_Score':'RS_ELO_HP_SUR'},inplace=True) Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_HP_SUR']] return Feature_DF
def RS_ELO_SP(Dataframe, HNAME_List, Raceday): """ Stable’s ELO Score Implied Probability Parameter --------- Matchday : Matchday Dataframe Return ------ Dataframe [HNAME, RS_ELO_SP] """ Feature_DF = Dataframe.loc[:,['HNAME', 'SNAME']] SNAME_List = '('+str(Dataframe['SNAME'].tolist())[1:-1]+')' Extraction = Extraction_Database(""" Select SNAME, SELO RS_ELO_S from RS_STABLE_ELO where SNAME in {SNAME_List} """.format(SNAME_List = SNAME_List)) Feature_DF = Feature_DF.merge(Extraction, on='SNAME', how='left') Feature_DF.loc[:,'RS_ELO_S'].fillna(1500, inplace = True) #Implied Probability Function Feature_DF = Feature_DF.merge(ELO_Expected(Feature_DF, 'SNAME').loc[:,'Expected_Score'].reset_index(), on = 'SNAME') Feature_DF.rename(columns={'Expected_Score':'RS_ELO_SP'},inplace=True) Feature_DF = Feature_DF.groupby('HNAME').mean().reset_index() Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_SP']] return Feature_DF
def RS_ELO_HP(Dataframe, HNAME_List, Raceday): """ Horse's ELO Score Implied Probability Parameter --------- Matchday : Matchday Dataframe Return ------ Dataframe [HNAME, RS_ELO_HP] """ Feature_DF = Dataframe.loc[:,['HNAME', 'RARID']] Extraction = Extraction_Database(""" Select HNAME, HELO RS_ELO_H from RS_HORSE_ELO where HNAME in {HNAME_List} """.format(HNAME_List = HNAME_List)) Feature_DF = Feature_DF.merge(Extraction, how='left') Feature_DF.loc[:,'RS_ELO_H'].fillna(1500, inplace = True) #Implied Probability Function Feature_DF = Feature_DF.merge(ELO_Expected(Feature_DF, 'HNAME').loc[:,'Expected_Score'], on = 'HNAME') Feature_DF.rename(columns={'Expected_Score':'RS_ELO_HP'},inplace=True) Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_HP']] return Feature_DF
def RS_ELO_HP_PFL(Dataframe, HNAME_List, Raceday): """ Horse’s ELO Score Implied Probability on Profile Parameter --------- Matchday : Matchday Dataframe Return ------ Dataframe [HNAME, RS_ELO_HP_PFL] """ Feature_DF = Dataframe.loc[:,['HNAME', 'RARID']] Target = Dataframe.loc[:,'RALOC'].values[0] + "_" + str(Dataframe.loc[:,'RADIS'].values[0]) + "_" +Dataframe.loc[:,'RATRA'].values[0] Extraction = Extraction_Database(""" Select HNAME, {Target} RS_ELO_H_PFL from RS_HORSE_ELO where HNAME in {HNAME_List} """.format(HNAME_List = HNAME_List, Target=Target)) Feature_DF = Feature_DF.merge(Extraction, how='left') Feature_DF.loc[:,'RS_ELO_H_PFL'].fillna(1500, inplace = True) #Implied Probability Function Feature_DF = Feature_DF.merge(ELO_Expected(Feature_DF, 'HNAME').loc[:,'Expected_Score'], on = 'HNAME') Feature_DF.rename(columns={'Expected_Score':'RS_ELO_HP_PFL'},inplace=True) Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_HP_PFL']] return Feature_DF
def RS_ELO_J(Dataframe, HNAME_List, Raceday): """ Jockey’s ELO Score Parameter --------- Matchday : Matchday Dataframe Return ------ Dataframe [HNAME, RS_ELO_J] """ Feature_DF = Dataframe.loc[:,['HNAME', 'JNAME']] JNAME_List = '('+str(Dataframe['JNAME'].tolist())[1:-1]+')' Extraction = Extraction_Database(""" Select JNAME, JELO RS_ELO_J from RS_JOCKEY_ELO where JNAME in {JNAME_List} """.format(JNAME_List = JNAME_List)) Feature_DF = Feature_DF.merge(Extraction, how='left') Feature_DF.loc[:,'RS_ELO_J'].fillna(1500, inplace = True) Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_J']] return Feature_DF
def RS_ELO_H(Dataframe, HNAME_List, Raceday): """ ELO Score of Horse Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, RS_ELO_H] """ Feature_DF = Dataframe.loc[:,['HNAME', 'RARID']] Extraction = Extraction_Database(""" Select HNAME, HELO RS_ELO_H from RS_HORSE_ELO where HNAME in {HNAME_List} """.format(HNAME_List = HNAME_List)) Feature_DF = Feature_DF.merge(Extraction, how='left') Feature_DF.loc[:,'RS_ELO_H'].fillna(1500, inplace = True) Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_H']] return Feature_DF
def RS_ELO_H_PFL(Dataframe, HNAME_List, Raceday): """ Horse’s ELO Score on Profile Parameter --------- Matchday : Matchday Dataframe Return ------ Dataframe [HNAME, RS_ELO_H_PFL] """ Feature_DF = Dataframe.loc[:,['HNAME', 'RARID']] Target = Dataframe.loc[:,'RALOC'].values[0] + "_" + str(Dataframe.loc[:,'RADIS'].values[0]) + "_" +Dataframe.loc[:,'RATRA'].values[0] Extraction = Extraction_Database(""" Select HNAME, {Target} RS_ELO_H_PFL from RS_HORSE_ELO where HNAME in {HNAME_List} """.format(HNAME_List = HNAME_List, Target=Target)) Feature_DF = Feature_DF.merge(Extraction, how='left') Feature_DF.loc[:,'RS_ELO_H_PFL'].fillna(1500, inplace = True) Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_H_PFL']] return Feature_DF
def OD_PR_BFAV(Dataframe, HNAME_List, Raceday): """ Number of races the underlying horse is a beaten favourite is the last 5 races. Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, OD_PR_BFAV] """ Feature_DF = Dataframe.loc[:, ['HNAME', 'RESFO']] Fav_Beaten_List = [] #For each horse, get data for last 5 races for Horse in Dataframe.loc[:, 'HNAME'].tolist(): Extraction = Extraction_Database(""" Select HNAME, RARID, RESFO, RESFP from RaceDb where RARID in ( Select RARID from RaceDb where RADAT < {Raceday} and HNAME = {Horse} ORDER BY RARID DESC LIMIT 5) """.format(Raceday=Raceday, Horse="'" + Horse + "'")) Lost_Fav_tot = 0 for RARID, race in Extraction.groupby('RARID'): fav_con = race.loc[:, 'RESFO'] == race.loc[:, 'RESFO'].min() horse_con = race.loc[:, 'HNAME'] == Horse Only_Fav_Horse = race.loc[fav_con & horse_con, ['HNAME', 'RESFP']] if len(Only_Fav_Horse) == 1: Lost_Fav = float(Only_Fav_Horse.loc[:, 'RESFP'] == 1) else: Lost_Fav = 0 Lost_Fav_tot += Lost_Fav Fav_Beaten_List.append([Horse, Lost_Fav_tot]) Fav_Beaten = pd.DataFrame(Fav_Beaten_List, columns=['HNAME', 'OD_PR_BFAV']) Feature_DF = Feature_DF.merge(Fav_Beaten, how='left') Feature_DF.loc[:, 'OD_PR_BFAV'].fillna(0, inplace=True) Feature_DF = Feature_DF.loc[:, ['HNAME', 'OD_PR_BFAV']] return Feature_DF
def CC_BWEI_DT3(Dataframe, HNAME_List, Raceday): """ Absolute Difference in Bodyweight compared to average Top 3 finish of horse in percentage of Bodyweight Abs((Current Bodyweight - Average Top 3 Bodyweight ) / Average Top 3 Bodyweight) Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_BWEI_DT3] """ Feature_DF = Dataframe.loc[:,['HNAME','HBWEI']] Extraction_T3 = Extraction_Database(""" Select HNAME, avg(HBWEI) T3_Weight from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3 Group by HNAME """.format(Raceday = Raceday, HNAME_List = HNAME_List)) Bodyweight = Extraction_Database(""" Select HNAME, RARID, HBWEI Best from RaceDb where HNAME in {HNAME_List} and RADAT < {Raceday} """.format(HNAME_List=HNAME_List, Raceday=Raceday)) Speed_Ratings = Extraction_Database(""" Select HNAME, RARID, BEYER_SPEED from Race_PosteriorDb where HNAME in {HNAME_List} and RADAT < {Raceday} """.format(HNAME_List=HNAME_List, Raceday=Raceday)) idx = Speed_Ratings.groupby(['HNAME'])['BEYER_SPEED'].transform(max) == Speed_Ratings['BEYER_SPEED'] Speed_Ratings_Weight = Speed_Ratings[idx].merge(Bodyweight).loc[:,['HNAME','Best']] Speed_Ratings_Weight = Speed_Ratings_Weight.groupby('HNAME').max().reset_index() Feature_DF = Feature_DF.merge(Extraction_T3, how='left').merge(Speed_Ratings_Weight, how='left') Feature_DF.loc[:,'Filled_Weight'] = Feature_DF.loc[:,'T3_Weight'].fillna(Feature_DF.loc[:,'Best']) Feature_DF.loc[:,'CC_BWEI_DT3'] = ((Feature_DF.loc[:,'HBWEI'] - Feature_DF.loc[:,'Filled_Weight']) / Feature_DF.loc[:,'Filled_Weight']).abs() Feature_DF.loc[:,'CC_BWEI_DT3'].fillna(Feature_DF.loc[:,'CC_BWEI_DT3'].max(), inplace = True) Feature_DF.loc[:,'CC_BWEI_DT3'].fillna(0, inplace = True) Feature_DF = Feature_DF.loc[:,['HNAME','CC_BWEI_DT3']] return Feature_DF
def CC_BWEI_DWIN(Dataframe, HNAME_List, Raceday): """ Bodyweight difference with Winning Performance Abs(Current Bodyweight - Average Winning Bodyweight ) / Average Winning Bodyweight) Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_BWEI_DWIN] """ Feature_DF = Dataframe.loc[:,['HNAME','HBWEI']] Extraction_Win = Extraction_Database(""" Select HNAME, avg(HBWEI) Win_Weight from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} and RESWL = 1 Group by HNAME """.format(Raceday = Raceday, HNAME_List = HNAME_List)) Bodyweight = Extraction_Database(""" Select HNAME, RARID, HBWEI Best from RaceDb where HNAME in {HNAME_List} and RADAT < {Raceday} """.format(HNAME_List=HNAME_List, Raceday=Raceday)) Speed_Ratings = Extraction_Database(""" Select HNAME, RARID, BEYER_SPEED from Race_PosteriorDb where HNAME in {HNAME_List} and RADAT < {Raceday} """.format(HNAME_List=HNAME_List, Raceday=Raceday)) idx = Speed_Ratings.groupby(['HNAME'])['BEYER_SPEED'].transform(max) == Speed_Ratings['BEYER_SPEED'] Speed_Ratings_Weight = Speed_Ratings[idx].merge(Bodyweight).loc[:,['HNAME','Best']] Speed_Ratings_Weight = Speed_Ratings_Weight.groupby('HNAME').max().reset_index() Feature_DF = Feature_DF.merge(Extraction_Win, how='left').merge(Speed_Ratings_Weight, how='left') Feature_DF.loc[:,'Filled_Weight'] = Feature_DF.loc[:,'Win_Weight'].fillna(Feature_DF.loc[:,'Best']) Feature_DF.loc[:,'CC_BWEI_DWIN'] = ((Feature_DF.loc[:,'HBWEI'] - Feature_DF.loc[:,'Filled_Weight']) / Feature_DF.loc[:,'Filled_Weight']).abs() Feature_DF.loc[:,'CC_BWEI_DWIN'].fillna(Feature_DF.loc[:,'CC_BWEI_DWIN'].max(), inplace = True) Feature_DF.loc[:,'CC_BWEI_DWIN'].fillna(0, inplace = True) Feature_DF = Feature_DF.loc[:,['HNAME','CC_BWEI_DWIN']] return Feature_DF
def CC_WEI_MAX(Dataframe, HNAME_List, Raceday): """ Weight carrying threshold Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_WEI_MAX] """ Feature_DF = Dataframe.loc[:,['HNAME', 'HWEIC']] Extraction_T3 = Extraction_Database(""" Select HNAME, Avg(HWEIC) T3_Weight from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3 Group by HNAME """.format(Raceday = Raceday, HNAME_List = HNAME_List)) Weight = Extraction_Database(""" Select HNAME, RARID, HWEIC SP_WEI from RaceDb where HNAME in {HNAME_List} and RADAT < {Raceday} """.format(HNAME_List=HNAME_List, Raceday=Raceday)) Speed_Ratings = Extraction_Database(""" Select Distinct HNAME, RARID, BEYER_SPEED from Race_PosteriorDb where HNAME in {HNAME_List} and RADAT < {Raceday} """.format(HNAME_List=HNAME_List, Raceday=Raceday)) idx = Speed_Ratings.groupby(['HNAME'])['BEYER_SPEED'].transform(max) == Speed_Ratings['BEYER_SPEED'] Speed_Ratings_Weight = Speed_Ratings[idx].merge(Weight, on = ['HNAME', 'RARID']).loc[:,['HNAME','SP_WEI']] Speed_Ratings_Weight = Speed_Ratings_Weight.groupby('HNAME').apply(lambda x : x.sort_values('SP_WEI').max()).reset_index(drop = True) if len(Speed_Ratings_Weight) == 0: Feature_DF.loc[:,'CC_WEI_MAX'] = 0 Feature_DF = Feature_DF.loc[:,['HNAME','CC_WEI_MAX']] return Feature_DF Feature_DF = Feature_DF.merge(Extraction_T3, how='left').merge(Speed_Ratings_Weight, how='left') Feature_DF.loc[:,'CC_WEI_MAX'] = Feature_DF.loc[:,'T3_Weight'].fillna(Feature_DF.loc[:,'HWEIC']) Feature_DF = Feature_DF.loc[:,['HNAME','CC_WEI_MAX']] return Feature_DF
def OD_PR_LPAVG(Dataframe, HNAME_List, Raceday): """ Average Log Odds implied Probability Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, OD_PR_LPAVG] """ Feature_DF = Dataframe.loc[:, ['HNAME', 'RARID']] Extraction = Extraction_Database(""" Select HNAME, RARID, RESFO from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} """.format(Raceday=Raceday, HNAME_List=HNAME_List)) Odds = [] for name, group in Extraction.groupby('HNAME'): Probi = group.loc[:, 'RESFO'].map(lambda x: np.log( (1 - 0.175) / x)).dropna().values if len(Probi) > 1: model = SimpleExpSmoothing(Probi) model = model.fit() Odds.append([name, model.forecast()[0]]) elif len(Probi) == 1: Odds.append([name, Probi[0]]) else: Odds.append([name, 0]) Odds = pd.DataFrame(Odds, columns=['HNAME', 'OD_PR_LPAVG']) Feature_DF = Feature_DF.merge(Odds, how='left') Feature_DF.loc[:, 'OD_PR_LPAVG'].fillna(Feature_DF.loc[:, 'OD_PR_LPAVG'].min(), inplace=True) Feature_DF.loc[:, 'OD_PR_LPAVG'].fillna(0, inplace=True) Feature_DF = Feature_DF.loc[:, ['HNAME', 'OD_PR_LPAVG']] return Feature_DF
def CC_BWEI_D(Dataframe, HNAME_List, Raceday): """ Change in Bodyweight of Horse Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_BWEI_D] """ Feature_DF = Dataframe.loc[:,['HNAME','RARID']] Extraction = Extraction_Database(""" Select HNAME, RARID, HBWEI from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} """.format(Raceday = Raceday, HNAME_List = HNAME_List)) HBWEI = [] for name, group in Extraction.groupby('HNAME'): Weight = (group.loc[:,'HBWEI'].diff() / group.loc[:,'HBWEI']).dropna().values if len(Weight) >1: model = SimpleExpSmoothing(Weight) model = model.fit() HBWEI.append([name, model.forecast()[0]]) elif len(Weight) == 1: HBWEI.append([name,Weight[0]]) else : HBWEI.append([name,0]) HBWEI = pd.DataFrame(HBWEI, columns=['HNAME','CC_BWEI_D']) Feature_DF = Feature_DF.merge(HBWEI, how='left') Feature_DF.loc[:,'CC_BWEI_D'] = Feature_DF.loc[:,'CC_BWEI_D'].abs() Feature_DF.loc[:,'CC_BWEI_D'].fillna(Feature_DF.loc[:,'CC_BWEI_D'].max(), inplace = True) Feature_DF.loc[:,'CC_BWEI_D'].fillna(0, inplace = True) Feature_DF = Feature_DF.loc[:,['HNAME','CC_BWEI_D']] return Feature_DF
def CC_CLS_D(Dataframe, HNAME_List, Raceday): """ Change in HKJC Rating Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_CLS_D] """ Feature_DF = Dataframe.loc[:,['HNAME','RARID']] Extraction = Extraction_Database(""" Select HNAME, RARID, HJRAT from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} """.format(Raceday = Raceday, HNAME_List = HNAME_List)) JRat = [] for name, group in Extraction.groupby('HNAME'): Rating = (group.loc[:,'HJRAT'].diff() / group.loc[:,'HJRAT']).dropna().values if len(Rating) >1: model = SimpleExpSmoothing(Rating) model = model.fit() JRat.append([name, model.forecast()[0]]) elif len(Rating) == 1: JRat.append([name,Rating[0]]) else : JRat.append([name,0]) JRat = pd.DataFrame(JRat, columns=['HNAME','CC_CLS_D']) Feature_DF = Feature_DF.merge(JRat, how='left') Feature_DF.loc[:,'CC_CLS_D'].fillna(Feature_DF.loc[:,'CC_CLS_D'].min(), inplace = True) Feature_DF.loc[:,'CC_CLS_D'].fillna(0, inplace = True) Feature_DF = Feature_DF.loc[:,['HNAME','CC_CLS_D']] return Feature_DF
def RS_ELO_HP_GO(Dataframe, HNAME_List, Raceday): """ Horse’s ELO Score Implied Probability on Going Parameter --------- Matchday : Matchday Dataframe Return ------ Dataframe [HNAME, RS_ELO_HP_GO] """ Feature_DF = Dataframe.loc[:,['HNAME', 'RARID']] Going = Dataframe.loc[:,'RAGOG'].values[0].strip() Going_Dict = Going_Similarity(Going) Surface = Dataframe.loc[:,'RATRA'].values[0] if Surface == 'T' : Target_Col = 'HELO_GF, HELO_G, HELO_GL, HELO_L, HELO_LS, HELO_S' elif Surface == 'AW': Target_Col = 'HELO_WF, HELO_MF, HELO_MG, HELO_WS' Extraction = Extraction_Database(""" Select HNAME, {Target_Col} from RS_HORSE_ELO where HNAME in {HNAME_List} """.format(HNAME_List = HNAME_List, Target_Col=Target_Col)) Feature_DF = Feature_DF.merge(Extraction, how='left') ELO_GOG = [] for index, row in Feature_DF.iterrows(): row Horse = row['HNAME'] try : df = row.reset_index().loc[2:,:].dropna() df.columns = ['RAGOG', 'ELO'] df.loc[:,'RAGOG'] = df.loc[:,'RAGOG'].apply(lambda x : str(x[5:])) df.replace({'RAGOG': Going_Dict}, inplace = True) df.loc[:,'RAGOG'] = np.exp(df.loc[:,'RAGOG']) / np.exp(df.loc[:,'RAGOG']).sum() ELO = df.loc[:,'RAGOG'].dot(df.loc[:,'ELO']) ELO_GOG.append([Horse, ELO]) except : ELO_GOG.append([Horse, 1500]) Feature_DF = Dataframe.loc[:,['HNAME', 'RARID']] Feature_DF = Feature_DF.merge(pd.DataFrame(ELO_GOG, columns = ['HNAME','RS_ELO_H_GO']), how='left') Feature_DF.loc[:,'RS_ELO_H_GO'].fillna(1500, inplace=True) #Implied Probability Function Feature_DF = Feature_DF.merge(ELO_Expected(Feature_DF, 'HNAME').loc[:,'Expected_Score'], on = 'HNAME') Feature_DF.rename(columns={'Expected_Score':'RS_ELO_HP_GO'},inplace=True) Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_HP_GO']] return Feature_DF
def Feature_Creation(Dataframe): """ Creates Feature from Feature List and Insert into FeatureDb Parameter --------- Dataframe : MatchDay Data Format Return ------ Dataframe """ #Start Timer start_time = time.time() for RADAT, Race_Day in Dataframe.groupby('RADAT'): print(RADAT) """ Day by Day """ for RARID, Race in Race_Day.groupby('RARID'): """ Race to Race """ # print(RARID) One_Race_Feature(Race) """ Post-Day """ Result_DF = Extraction_Database( """ Select * from RaceDb where RADAT = ? """, [RADAT]) Features_Dataframe = Extraction_Database( """ Select * from FeatureDb where RARID BETWEEN ? and ? """, [int(str(RADAT) + '00'), int(str(RADAT) + '99')]) Post_Raceday_Update(RADAT, Features_Dataframe, Result_DF) print("---- %s Races are Created to FeatureDb in %s seconds ----" \ %(Dataframe['RARID'].nunique(), (str(round((time.time() - start_time),4))))) return None
def CC_REC_DAYL(Dataframe, HNAME_List, Raceday): """ Number of Days since Last Race Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_REC_DAYL] """ Feature_DF = Dataframe.loc[:,['HNAME','RARID']] Extraction = Extraction_Database(""" Select HNAME, max(RADAT) Last_Race from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} Group by HNAME """.format(Raceday = Raceday, HNAME_List = HNAME_List)) #For First Runners if len(Extraction) == 0: Feature_DF.loc[:,'CC_REC_DAYL'] = 0 Feature_DF = Feature_DF.loc[:,['HNAME','CC_REC_DAYL']] return Feature_DF Raceday = pd.to_datetime(Raceday, format = '%Y%m%d') Extraction.loc[:,'Day_Last'] = Raceday - pd.to_datetime(Extraction.loc[:, 'Last_Race'], format = '%Y%m%d') Feature_DF = Feature_DF.merge(Extraction, how='left') Feature_DF.loc[Feature_DF.loc[:,'Day_Last'].notna(),'CC_REC_DAYL'] = Feature_DF.loc[Feature_DF.loc[:,'Day_Last'].notna(),'Day_Last']\ .apply(lambda x : int(str(x).split('days')[0])) Feature_DF.loc[:,'CC_REC_DAYL'].fillna(Feature_DF.loc[:,'CC_REC_DAYL'].min(), inplace = True) Feature_DF.loc[:,'CC_REC_DAYL'].fillna(0, inplace = True) Feature_DF = Feature_DF.loc[:,['HNAME','CC_REC_DAYL']] return Feature_DF
def MatchDay_Dataset(Race_ID): """ Extracting MatchDay Data from RaceDb Parameter --------- Race_ID : Dataframe of RaceID Return ------ Dataframe """ #Start Timer start_time = time.time() Dataset = pd.DataFrame() if len(Race_ID) > 1: Race_ID_List = [i for i in Race_ID['RARID'].tolist()] Dataset = Extraction_Database(""" Select Distinct RARID, HNAME, HAGEI, HBWEI, HDRAW, HJRAT, HWEIC, JNAME, RESFO, RACLS, RADAT, RARAL, RADIS, RAGOG, RALOC, RARUN, RATRA, SNAME from RaceDb where RARID in {RARID} Order By RARID, HNAME """.format( RARID='(' + str(Race_ID_List)[1:-1] + ')')) else: Dataset = Extraction_Database( """ Select Distinct RARID, HNAME, HAGEI, HBWEI, HDRAW, HJRAT, HWEIC, JNAME, RESFO, RACLS, RADAT, RARAL, RADIS, RAGOG, RALOC, RARUN, RATRA, SNAME from RaceDb where RARID = ? Order By HNAME """, [int(list(Race_ID.values)[0])]) #Print Time Taken to Load print("---- %s Races are Extracted from RaceDb in %s seconds / %s minutes----" \ %(len(Race_ID), (str(round((time.time() - start_time),4))),(str(round(((time.time() - start_time))/60,4))))) return Dataset
def RS_ELO_HP_DIST(Dataframe, HNAME_List, Raceday): """ Horse’s ELO Score Implied Probability on Distance Parameter --------- Matchday : Matchday Dataframe Return ------ Dataframe [HNAME, RS_ELO_HP_DIST] """ Feature_DF = Dataframe.loc[:,['HNAME', 'RARID']] Distance = Dataframe.loc[:,'RADIS'].values[0] Dist_Dict = Distance_Similarity(Distance) Extraction = Extraction_Database(""" Select HNAME, HELO_1000, HELO_1200, HELO_1400, HELO_1600, HELO_1650, HELO_1800, HELO_2000, HELO_2200, HELO_2400 from RS_HORSE_ELO where HNAME in {HNAME_List} """.format(HNAME_List = HNAME_List)) Feature_DF = Feature_DF.merge(Extraction, how='left') ELO_DIST = [] for index, row in Feature_DF.iterrows(): Horse = row['HNAME'] try : df = row.reset_index().loc[2:,:].dropna() df.columns = ['RADIS', 'ELO'] df.loc[:,'RADIS'] = df.loc[:,'RADIS'].apply(lambda x : int(x[5:])) df.replace({'RADIS': Dist_Dict}, inplace = True) df.loc[:,'RADIS'] = np.exp(df.loc[:,'RADIS']) / np.exp(df.loc[:,'RADIS']).sum() ELO = df.loc[:,'RADIS'].dot(df.loc[:,'ELO']) ELO_DIST.append([Horse, ELO]) except : ELO_DIST.append([Horse, 1500]) Feature_DF = Dataframe.loc[:,['HNAME', 'RARID']] Feature_DF = Feature_DF.merge(pd.DataFrame(ELO_DIST, columns = ['HNAME','RS_ELO_H_DIST']), how='left') Feature_DF.loc[:,'RS_ELO_H_DIST'].fillna(1500, inplace=True) #Implied Probability Function Feature_DF = Feature_DF.merge(ELO_Expected(Feature_DF, 'HNAME').loc[:,'Expected_Score'], on = 'HNAME') Feature_DF.rename(columns={'Expected_Score':'RS_ELO_HP_DIST'},inplace=True) Feature_DF = Feature_DF.loc[:,['HNAME', 'RS_ELO_HP_DIST']] return Feature_DF
def Get_Stable(): #Start Timer start_time = time.time() #Selenium Options chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--log-level=OFF") web_driver = webdriver.Chrome(ChromeDriverManager().install(), options=chrome_options) #Get Current list of Stables on record Data = Extraction_Database(""" Select Distinct SNum from SnameDb """)['SNum'].values.tolist() To_scrap = list(set(map(str, range(1, 1000))) - set(Data)) Stable_DF = pd.DataFrame() for i in To_scrap: web_driver.get( 'https://racing.appledaily.com.hk/search-horse/result?trainer=' + i) time.sleep(3) _Login(web_driver) #Wait for the page to load time.sleep(3) raw_html = web_driver.page_source try: name = str(raw_html).split('養馬數目')[0].split( '<div data-v-6bfbe05a="">')[-2].split('</div>')[0] if len(name) < 10: Stable_DF = Stable_DF.append(pd.DataFrame([[name, i]])) except: pass #Closing Session web_driver.close() if len(Stable_DF) != 0: #Reset Index Stable_DF.reset_index(inplace=True, drop=True) Stable_DF.columns = ['SNAME', 'SNum'] #Loading to Dataset Load_Dataset_toDatabase('SNameDb', Stable_DF) print("---- %s Stable Names are Scrapped in %s minutes ----" % (len(Stable_DF), str(round((time.time() - start_time) / 60, 4)))) return None
def CC_REC_DAY_LWIN(Dataframe, HNAME_List, Raceday): """ Number of Days since Last Win or Best Beyer Speed Figure Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_REC_DAY_LWIN] """ Feature_DF = Dataframe.loc[:,['HNAME','RARID']] Extraction = Extraction_Database(""" Select HN HNAME, Beyer_Date, Win_Date, First_Date from (Select HNAME HN, min(RADAT) First_Date from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} Group by HNAME) A LEFT OUTER JOIN (Select HNAME, Beyer_Date, Win_Date from ( Select HNAME, RADAT Beyer_Date, max(BEYER_SPEED) from Race_PosteriorDb where RADAT < {Raceday} and HNAME in {HNAME_List} Group by HNAME) BEYER LEFT OUTER JOIN (Select HNAME HNAME_WIN, RADAT Win_Date from Race_PosteriorDb where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP = 1 Group by HNAME) WIN ON BEYER.HNAME = WIN.HNAME_WIN) B ON A.HN = B.HNAME """.format(Raceday = Raceday, HNAME_List = HNAME_List)) Feature_DF = Feature_DF.merge(Extraction, how='left') Feature_DF.loc[:,'Win_Date'].fillna(Feature_DF.loc[:,'Beyer_Date'], inplace = True) Feature_DF.loc[:,'Win_Date'].fillna(Feature_DF.loc[:,'First_Date'], inplace = True) Feature_DF.loc[:,'Win_Date'].fillna(Raceday, inplace = True) Raceday = pd.to_datetime(Raceday, format = '%Y%m%d') Feature_DF.loc[:,'CC_REC_DAY_LWIN'] = Raceday - pd.to_datetime(Feature_DF.loc[:, 'Win_Date'], format = '%Y%m%d') Feature_DF.loc[:,'CC_REC_DAY_LWIN'] = Feature_DF.loc[:,'CC_REC_DAY_LWIN'].apply(lambda x : int(str(x).split('days')[0])) Feature_DF = Feature_DF.loc[:,['HNAME','CC_REC_DAY_LWIN']] return Feature_DF