def CC_REC_DAY_PT3(Dataframe, HNAME_List, Raceday): """ Predicted days until next Top 3 Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_REC_DAY_PT3] """ Feature_DF = Dataframe.loc[:,['HNAME','RARID']] Extraction_Performance = Extraction_Database(""" Select HNAME, First_Date, Last_T3_Date, Top_Beyer_Date from ( Select HNAME, First_Date, Top_Beyer_Date from ( Select HNAME, min(RADAT) First_Date from Race_PosteriorDb where RADAT < {Raceday} and HNAME in {HNAME_List} Group by HNAME) FIRST LEFT OUTER JOIN (Select HNAME HNAME_BEYER, RADAT Top_Beyer_Date, max(BEYER_SPEED) from Race_PosteriorDb where RADAT < {Raceday} and HNAME in {HNAME_List} Group by HNAME) BEYER ON FIRST.HNAME = BEYER.HNAME_BEYER) FIRST_BEYER LEFT OUTER JOIN (Select HNAME HNAME_W, max(RADAT) Last_T3_Date from Race_PosteriorDb where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3 Group by HNAME) WIN ON WIN.HNAME_W = FIRST_BEYER.HNAME """.format(Raceday = Raceday, HNAME_List = HNAME_List)) Extraction_Avg = Extraction_Database(""" Select HNAME, RADAT, RESFP, BEYER_SPEED from Race_PosteriorDb where RADAT < {Raceday} and HNAME in {HNAME_List} """.format(Raceday = Raceday, HNAME_List = HNAME_List)) Avg_Date = [] for name, group in Extraction_Avg.groupby('HNAME'): Avg_T3 = group.loc[group.loc[:, 'RESFP'] <= 3, 'RADAT'].apply(lambda x: pd.to_datetime(x, format = '%Y%m%d')).diff().mean() Avg_Beyer = group.nlargest(3,'BEYER_SPEED').loc[:, 'RADAT'].apply(lambda x: pd.to_datetime(x, format = '%Y%m%d')).diff().mean() Avg_Date.append([name, Avg_T3, Avg_Beyer]) Avg_Date = pd.DataFrame(Avg_Date, columns=['HNAME','Avg_T3','Avg_Beyer']) Raceday = pd.to_datetime(Raceday, format = '%Y%m%d') Feature_DF = Feature_DF.merge(Extraction_Performance, how='left').merge(Avg_Date, how='left') Feature_DF.loc[:,['First_Date','Last_T3_Date','Top_Beyer_Date']]=Feature_DF.loc[:,['First_Date','Last_T3_Date','Top_Beyer_Date']].fillna('20120101') Feature_DF.loc[:,['Avg_T3','Avg_Beyer']]=Feature_DF.loc[:,['Avg_T3','Avg_Beyer']].fillna(Raceday-pd.to_datetime('20120101',format='%Y%m%d')) Feature_DF.loc[:,'T3'] = Raceday - pd.to_datetime(Feature_DF.loc[:, 'Last_T3_Date'], format = '%Y%m%d') Feature_DF.loc[:,'T3'] = Feature_DF.loc[:,'T3'] - Feature_DF.loc[:,'Avg_T3'].abs() Feature_DF.loc[:,'Beyer'] = Raceday - pd.to_datetime(Feature_DF.loc[:, 'Top_Beyer_Date'], format = '%Y%m%d') Feature_DF.loc[:,'Beyer'] = Feature_DF.loc[:,'Beyer'] - Feature_DF.loc[:,'Avg_Beyer'].abs() Feature_DF.loc[:,'CC_REC_DAY_PT3'] = Feature_DF.loc[:,'T3'].fillna(Feature_DF.loc[:,'Beyer']) Feature_DF.loc[:,'CC_REC_DAY_PT3'] = Feature_DF.loc[:,'CC_REC_DAY_PT3'].apply(lambda x : int(str(x).split('days')[0])) Feature_DF = Feature_DF.loc[:,['HNAME','CC_REC_DAY_PT3']] return Feature_DF
def CC_CLS_CL(Dataframe, HNAME_List, Raceday): """ Horse's Competition Level Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_CLS_CL] """ Feature_DF = Dataframe.loc[:,['HNAME','HJRAT']] Horse_Comp = [] #For each horse, get data for last 5 races for Horse in Dataframe['HNAME'].tolist(): Extraction = Extraction_Database(""" Select HNAME, RARID, HJRAT, RESFP from RaceDb where RARID in ( Select RARID from RaceDb where RADAT < {Raceday} and HNAME = {Horse} ORDER BY RARID DESC LIMIT 5) """.format(Raceday = Raceday, Horse = "'" + Horse + "'")) for RARID, race in Extraction.groupby('RARID'): Horse_Rat = race.loc[race.loc[:,'HNAME']==Horse,'HJRAT'].to_list()[0] Horse_FP = race.loc[race.loc[:,'HNAME']==Horse,'RESFP'].to_list()[0] Comp_Rat = race.nlargest(3, 'HJRAT').loc[:,'HJRAT'].mean() Comp_Level = (Comp_Rat - Horse_Rat) / Horse_FP Horse_Comp.append([Horse,Comp_Level]) Horse_Comp = pd.DataFrame(Horse_Comp, columns=['HNAME', 'Comp_Level']) #Recency Weighting Comp = [] for name, group in Horse_Comp.groupby('HNAME'): Comp_Figure = group.loc[:,'Comp_Level'].dropna().values try : model = SimpleExpSmoothing(Comp_Figure) model = model.fit() Comp.append([name, model.forecast()[0]]) except : Comp.append([name,Comp_Figure[0]]) Comp = pd.DataFrame(Comp, columns=['HNAME','CC_CLS_CL']) Feature_DF = Feature_DF.merge(Comp, how='left') Feature_DF.loc[:, 'CC_CLS_CL'].fillna(Feature_DF.loc[:, 'CC_CLS_CL'].min(), inplace = True) Feature_DF.loc[:, 'CC_CLS_CL'].fillna(0, inplace=True) Feature_DF = Feature_DF.loc[:, ['HNAME', 'CC_CLS_CL']] return Feature_DF
def OD_PR_FAVB(Dataframe, HNAME_List, Raceday): """ Number of favourites that ran behind the underlying horse in the last 5 races. Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, OD_PR_FAVB] """ Feature_DF = Dataframe.loc[:, ['HNAME', 'RESFO']] Fav_Beaten_List = [] #For each horse, get data for last 5 races for Horse in Dataframe['HNAME'].tolist(): Extraction = Extraction_Database(""" Select HNAME, RARID, RESFO, RESFP from RaceDb where RARID in ( Select RARID from RaceDb where RADAT < {Raceday} and HNAME = {Horse} ORDER BY RARID DESC LIMIT 5) """.format(Raceday=Raceday, Horse="'" + Horse + "'")) Won_Fav_tot = 0 for RARID, race in Extraction.groupby('RARID'): fav_con = race.loc[:, 'RESFO'] == race.loc[:, 'RESFO'].min() horse_con = race.loc[:, 'HNAME'] == Horse Only_Fav_Horse = race.loc[fav_con | horse_con, ['HNAME', 'RESFP']].sort_values( 'RESFP').reset_index(drop=True) if len(Only_Fav_Horse) != 1: Won_Fav = float(not bool(Only_Fav_Horse.loc[ Only_Fav_Horse.loc[:, 'HNAME'] == Horse, :].index.values)) else: Won_Fav = 0 Won_Fav_tot += Won_Fav Fav_Beaten_List.append([Horse, Won_Fav_tot]) Fav_Beaten = pd.DataFrame(Fav_Beaten_List, columns=['HNAME', 'OD_PR_FAVB']) Feature_DF = Feature_DF.merge(Fav_Beaten, how='left') Feature_DF.loc[:, 'OD_PR_FAVB'].fillna(0, inplace=True) Feature_DF = Feature_DF.loc[:, ['HNAME', 'OD_PR_FAVB']] return Feature_DF
def Weight_Aug_Reg(Raceday): Extraction = Extraction_Database(""" Select A.HNAME, A.RARID, BEYER_SPEED, HWEIC from (Select HNAME, RARID, BEYER_SPEED from Race_PosteriorDb where RADAT < {Raceday}) A, (Select HNAME, RARID, HWEIC from RaceDb where RADAT < {Raceday}) B where A.HNAME = B.HNAME and A.RARID = B.RARID """.format(Raceday = Raceday)) Extraction.fillna(0, inplace=True) if len(Extraction) == 0: return None DF = [] for name, group in Extraction.groupby('HNAME'): Speed_Figure = group.loc[:,'BEYER_SPEED'].diff().values Weight = group.loc[:,'HWEIC'].diff().values One_Horse = pd.DataFrame({'Speed': Speed_Figure, 'Weight':Weight}) One_Horse.replace([np.inf, -np.inf], np.nan, inplace = True) One_Horse.dropna(inplace = True) DF.append(One_Horse) DF = pd.concat(DF) #Slice in increase in weight leading to decrease in Speed Figure Increase_Weight = DF.loc[(DF.loc[:,'Speed'] < 0) & (DF.loc[:,'Weight'] > 0), :] #Slice in decrease in weight leading to increase in Speed Figure Decrease_Weight = DF.loc[(DF.loc[:,'Speed'] > 0) & (DF.loc[:,'Weight'] < 0), :] #NO not fit model if there is no races if len(Increase_Weight) == 0 or len(Decrease_Weight) == 0: return None #Model Fitting model = LinearRegression() model.fit(Increase_Weight.loc[:,'Weight'].values.reshape(-1,1), Increase_Weight.loc[:,'Speed']) #Save Model with open(Aux_Reg_Path + 'CC_WEI_INC_Model.joblib', 'wb') as location: joblib.dump(model, location) #Model Fitting model = LinearRegression() model.fit(Decrease_Weight.loc[:,'Weight'].values.reshape(-1,1), Decrease_Weight.loc[:,'Speed']) #Save Model with open(Aux_Reg_Path + 'CC_WEI_DEC_Model.joblib', 'wb') as location: joblib.dump(model, location) return None
def OD_PR_BFAV(Dataframe, HNAME_List, Raceday): """ Number of races the underlying horse is a beaten favourite is the last 5 races. Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, OD_PR_BFAV] """ Feature_DF = Dataframe.loc[:, ['HNAME', 'RESFO']] Fav_Beaten_List = [] #For each horse, get data for last 5 races for Horse in Dataframe.loc[:, 'HNAME'].tolist(): Extraction = Extraction_Database(""" Select HNAME, RARID, RESFO, RESFP from RaceDb where RARID in ( Select RARID from RaceDb where RADAT < {Raceday} and HNAME = {Horse} ORDER BY RARID DESC LIMIT 5) """.format(Raceday=Raceday, Horse="'" + Horse + "'")) Lost_Fav_tot = 0 for RARID, race in Extraction.groupby('RARID'): fav_con = race.loc[:, 'RESFO'] == race.loc[:, 'RESFO'].min() horse_con = race.loc[:, 'HNAME'] == Horse Only_Fav_Horse = race.loc[fav_con & horse_con, ['HNAME', 'RESFP']] if len(Only_Fav_Horse) == 1: Lost_Fav = float(Only_Fav_Horse.loc[:, 'RESFP'] == 1) else: Lost_Fav = 0 Lost_Fav_tot += Lost_Fav Fav_Beaten_List.append([Horse, Lost_Fav_tot]) Fav_Beaten = pd.DataFrame(Fav_Beaten_List, columns=['HNAME', 'OD_PR_BFAV']) Feature_DF = Feature_DF.merge(Fav_Beaten, how='left') Feature_DF.loc[:, 'OD_PR_BFAV'].fillna(0, inplace=True) Feature_DF = Feature_DF.loc[:, ['HNAME', 'OD_PR_BFAV']] return Feature_DF
def CC_BWEI_DT3(Dataframe, HNAME_List, Raceday): """ Absolute Difference in Bodyweight compared to average Top 3 finish of horse in percentage of Bodyweight Abs((Current Bodyweight - Average Top 3 Bodyweight ) / Average Top 3 Bodyweight) Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_BWEI_DT3] """ Feature_DF = Dataframe.loc[:,['HNAME','HBWEI']] Extraction_T3 = Extraction_Database(""" Select HNAME, avg(HBWEI) T3_Weight from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3 Group by HNAME """.format(Raceday = Raceday, HNAME_List = HNAME_List)) Bodyweight = Extraction_Database(""" Select HNAME, RARID, HBWEI Best from RaceDb where HNAME in {HNAME_List} and RADAT < {Raceday} """.format(HNAME_List=HNAME_List, Raceday=Raceday)) Speed_Ratings = Extraction_Database(""" Select HNAME, RARID, BEYER_SPEED from Race_PosteriorDb where HNAME in {HNAME_List} and RADAT < {Raceday} """.format(HNAME_List=HNAME_List, Raceday=Raceday)) idx = Speed_Ratings.groupby(['HNAME'])['BEYER_SPEED'].transform(max) == Speed_Ratings['BEYER_SPEED'] Speed_Ratings_Weight = Speed_Ratings[idx].merge(Bodyweight).loc[:,['HNAME','Best']] Speed_Ratings_Weight = Speed_Ratings_Weight.groupby('HNAME').max().reset_index() Feature_DF = Feature_DF.merge(Extraction_T3, how='left').merge(Speed_Ratings_Weight, how='left') Feature_DF.loc[:,'Filled_Weight'] = Feature_DF.loc[:,'T3_Weight'].fillna(Feature_DF.loc[:,'Best']) Feature_DF.loc[:,'CC_BWEI_DT3'] = ((Feature_DF.loc[:,'HBWEI'] - Feature_DF.loc[:,'Filled_Weight']) / Feature_DF.loc[:,'Filled_Weight']).abs() Feature_DF.loc[:,'CC_BWEI_DT3'].fillna(Feature_DF.loc[:,'CC_BWEI_DT3'].max(), inplace = True) Feature_DF.loc[:,'CC_BWEI_DT3'].fillna(0, inplace = True) Feature_DF = Feature_DF.loc[:,['HNAME','CC_BWEI_DT3']] return Feature_DF
def CC_BWEI_DWIN(Dataframe, HNAME_List, Raceday): """ Bodyweight difference with Winning Performance Abs(Current Bodyweight - Average Winning Bodyweight ) / Average Winning Bodyweight) Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_BWEI_DWIN] """ Feature_DF = Dataframe.loc[:,['HNAME','HBWEI']] Extraction_Win = Extraction_Database(""" Select HNAME, avg(HBWEI) Win_Weight from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} and RESWL = 1 Group by HNAME """.format(Raceday = Raceday, HNAME_List = HNAME_List)) Bodyweight = Extraction_Database(""" Select HNAME, RARID, HBWEI Best from RaceDb where HNAME in {HNAME_List} and RADAT < {Raceday} """.format(HNAME_List=HNAME_List, Raceday=Raceday)) Speed_Ratings = Extraction_Database(""" Select HNAME, RARID, BEYER_SPEED from Race_PosteriorDb where HNAME in {HNAME_List} and RADAT < {Raceday} """.format(HNAME_List=HNAME_List, Raceday=Raceday)) idx = Speed_Ratings.groupby(['HNAME'])['BEYER_SPEED'].transform(max) == Speed_Ratings['BEYER_SPEED'] Speed_Ratings_Weight = Speed_Ratings[idx].merge(Bodyweight).loc[:,['HNAME','Best']] Speed_Ratings_Weight = Speed_Ratings_Weight.groupby('HNAME').max().reset_index() Feature_DF = Feature_DF.merge(Extraction_Win, how='left').merge(Speed_Ratings_Weight, how='left') Feature_DF.loc[:,'Filled_Weight'] = Feature_DF.loc[:,'Win_Weight'].fillna(Feature_DF.loc[:,'Best']) Feature_DF.loc[:,'CC_BWEI_DWIN'] = ((Feature_DF.loc[:,'HBWEI'] - Feature_DF.loc[:,'Filled_Weight']) / Feature_DF.loc[:,'Filled_Weight']).abs() Feature_DF.loc[:,'CC_BWEI_DWIN'].fillna(Feature_DF.loc[:,'CC_BWEI_DWIN'].max(), inplace = True) Feature_DF.loc[:,'CC_BWEI_DWIN'].fillna(0, inplace = True) Feature_DF = Feature_DF.loc[:,['HNAME','CC_BWEI_DWIN']] return Feature_DF
def CC_WEI_MAX(Dataframe, HNAME_List, Raceday): """ Weight carrying threshold Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_WEI_MAX] """ Feature_DF = Dataframe.loc[:,['HNAME', 'HWEIC']] Extraction_T3 = Extraction_Database(""" Select HNAME, Avg(HWEIC) T3_Weight from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3 Group by HNAME """.format(Raceday = Raceday, HNAME_List = HNAME_List)) Weight = Extraction_Database(""" Select HNAME, RARID, HWEIC SP_WEI from RaceDb where HNAME in {HNAME_List} and RADAT < {Raceday} """.format(HNAME_List=HNAME_List, Raceday=Raceday)) Speed_Ratings = Extraction_Database(""" Select Distinct HNAME, RARID, BEYER_SPEED from Race_PosteriorDb where HNAME in {HNAME_List} and RADAT < {Raceday} """.format(HNAME_List=HNAME_List, Raceday=Raceday)) idx = Speed_Ratings.groupby(['HNAME'])['BEYER_SPEED'].transform(max) == Speed_Ratings['BEYER_SPEED'] Speed_Ratings_Weight = Speed_Ratings[idx].merge(Weight, on = ['HNAME', 'RARID']).loc[:,['HNAME','SP_WEI']] Speed_Ratings_Weight = Speed_Ratings_Weight.groupby('HNAME').apply(lambda x : x.sort_values('SP_WEI').max()).reset_index(drop = True) if len(Speed_Ratings_Weight) == 0: Feature_DF.loc[:,'CC_WEI_MAX'] = 0 Feature_DF = Feature_DF.loc[:,['HNAME','CC_WEI_MAX']] return Feature_DF Feature_DF = Feature_DF.merge(Extraction_T3, how='left').merge(Speed_Ratings_Weight, how='left') Feature_DF.loc[:,'CC_WEI_MAX'] = Feature_DF.loc[:,'T3_Weight'].fillna(Feature_DF.loc[:,'HWEIC']) Feature_DF = Feature_DF.loc[:,['HNAME','CC_WEI_MAX']] return Feature_DF
def OD_PR_LPAVG(Dataframe, HNAME_List, Raceday): """ Average Log Odds implied Probability Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, OD_PR_LPAVG] """ Feature_DF = Dataframe.loc[:, ['HNAME', 'RARID']] Extraction = Extraction_Database(""" Select HNAME, RARID, RESFO from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} """.format(Raceday=Raceday, HNAME_List=HNAME_List)) Odds = [] for name, group in Extraction.groupby('HNAME'): Probi = group.loc[:, 'RESFO'].map(lambda x: np.log( (1 - 0.175) / x)).dropna().values if len(Probi) > 1: model = SimpleExpSmoothing(Probi) model = model.fit() Odds.append([name, model.forecast()[0]]) elif len(Probi) == 1: Odds.append([name, Probi[0]]) else: Odds.append([name, 0]) Odds = pd.DataFrame(Odds, columns=['HNAME', 'OD_PR_LPAVG']) Feature_DF = Feature_DF.merge(Odds, how='left') Feature_DF.loc[:, 'OD_PR_LPAVG'].fillna(Feature_DF.loc[:, 'OD_PR_LPAVG'].min(), inplace=True) Feature_DF.loc[:, 'OD_PR_LPAVG'].fillna(0, inplace=True) Feature_DF = Feature_DF.loc[:, ['HNAME', 'OD_PR_LPAVG']] return Feature_DF
def CC_BWEI_D(Dataframe, HNAME_List, Raceday): """ Change in Bodyweight of Horse Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_BWEI_D] """ Feature_DF = Dataframe.loc[:,['HNAME','RARID']] Extraction = Extraction_Database(""" Select HNAME, RARID, HBWEI from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} """.format(Raceday = Raceday, HNAME_List = HNAME_List)) HBWEI = [] for name, group in Extraction.groupby('HNAME'): Weight = (group.loc[:,'HBWEI'].diff() / group.loc[:,'HBWEI']).dropna().values if len(Weight) >1: model = SimpleExpSmoothing(Weight) model = model.fit() HBWEI.append([name, model.forecast()[0]]) elif len(Weight) == 1: HBWEI.append([name,Weight[0]]) else : HBWEI.append([name,0]) HBWEI = pd.DataFrame(HBWEI, columns=['HNAME','CC_BWEI_D']) Feature_DF = Feature_DF.merge(HBWEI, how='left') Feature_DF.loc[:,'CC_BWEI_D'] = Feature_DF.loc[:,'CC_BWEI_D'].abs() Feature_DF.loc[:,'CC_BWEI_D'].fillna(Feature_DF.loc[:,'CC_BWEI_D'].max(), inplace = True) Feature_DF.loc[:,'CC_BWEI_D'].fillna(0, inplace = True) Feature_DF = Feature_DF.loc[:,['HNAME','CC_BWEI_D']] return Feature_DF
def CC_CLS_D(Dataframe, HNAME_List, Raceday): """ Change in HKJC Rating Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_CLS_D] """ Feature_DF = Dataframe.loc[:,['HNAME','RARID']] Extraction = Extraction_Database(""" Select HNAME, RARID, HJRAT from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} """.format(Raceday = Raceday, HNAME_List = HNAME_List)) JRat = [] for name, group in Extraction.groupby('HNAME'): Rating = (group.loc[:,'HJRAT'].diff() / group.loc[:,'HJRAT']).dropna().values if len(Rating) >1: model = SimpleExpSmoothing(Rating) model = model.fit() JRat.append([name, model.forecast()[0]]) elif len(Rating) == 1: JRat.append([name,Rating[0]]) else : JRat.append([name,0]) JRat = pd.DataFrame(JRat, columns=['HNAME','CC_CLS_D']) Feature_DF = Feature_DF.merge(JRat, how='left') Feature_DF.loc[:,'CC_CLS_D'].fillna(Feature_DF.loc[:,'CC_CLS_D'].min(), inplace = True) Feature_DF.loc[:,'CC_CLS_D'].fillna(0, inplace = True) Feature_DF = Feature_DF.loc[:,['HNAME','CC_CLS_D']] return Feature_DF
def CC_CLS_CC(Dataframe, HNAME_List, Raceday): """ Horse's Compotitive Class Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_CLS_CC] """ Feature_DF = Dataframe.loc[:,['HNAME','HJRAT']] Underlying_Class = Feature_DF.nlargest(3, 'HJRAT').mean().to_list()[0] Races = Extraction_Database(""" Select HNAME, RARID, HJRAT CC_CLS_CC from RaceDb where RARID in ( Select RARID from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3) """.format(Raceday = Raceday, HNAME_List = HNAME_List)) Races_AvgHJRAT = Races.groupby('RARID')['CC_CLS_CC'].apply(lambda x: x.nlargest(3).mean()) Races_AvgHJRAT = Races_AvgHJRAT.reset_index() Race_IDs = Extraction_Database(""" Select HNAME, RARID from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3 """.format(Raceday = Raceday, HNAME_List = HNAME_List)) Merged = Race_IDs.merge(Races_AvgHJRAT) Feature_DF = Feature_DF.merge(Merged.groupby('HNAME').mean()['CC_CLS_CC'].reset_index(), how='left') Feature_DF.loc[:, 'CC_CLS_CC'].fillna(Underlying_Class, inplace = True) Feature_DF.loc[:, 'CC_CLS_CC'] = Underlying_Class - Feature_DF.loc[:, 'CC_CLS_CC'] Feature_DF.loc[:, 'CC_CLS_CC'].fillna(0, inplace = True) Feature_DF = Feature_DF.loc[:, ['HNAME', 'CC_CLS_CC']] return Feature_DF
def OD_PR_LPW(Dataframe, HNAME_List, Raceday): """ Average Winning Odds Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, OD_PR_LPW] """ Feature_DF = Dataframe.loc[:, ['HNAME', 'RESFO']] Extraction_Win = Extraction_Database(""" Select HNAME, avg(RESFO) Win_Odds from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} and RESWL = 1 Group by HNAME """.format(Raceday=Raceday, HNAME_List=HNAME_List)) Odds = Extraction_Database(""" Select HNAME, RARID, RESFO Odds from RaceDb where HNAME in {HNAME_List} and RADAT < {Raceday} """.format(HNAME_List=HNAME_List, Raceday=Raceday)) Speed_Ratings = Extraction_Database(""" Select HNAME, RARID, BEYER_SPEED from Race_PosteriorDb where HNAME in {HNAME_List} and RADAT < {Raceday} """.format(HNAME_List=HNAME_List, Raceday=Raceday)) idx = Speed_Ratings.groupby([ 'HNAME' ])['BEYER_SPEED'].transform(max) == Speed_Ratings['BEYER_SPEED'] Speed_Ratings_Odds = Speed_Ratings[idx].merge(Odds).loc[:, ['HNAME', 'Odds']] try: #Exception for first season Speed_Ratings_Odds = Speed_Ratings_Odds.groupby( 'HNAME').mean().reset_index() except: pass Feature_DF = Feature_DF.merge(Extraction_Win, how='left').merge(Speed_Ratings_Odds, how='left') Feature_DF.loc[:, 'Filled_Odds'] = Feature_DF.loc[:, 'Win_Odds'].fillna( Feature_DF.loc[:, 'Odds']) Feature_DF.loc[:, 'Filled_Odds'] = Feature_DF.loc[:, 'Filled_Odds'].map( lambda x: np.log((1 - 0.175) / x)) Feature_DF.loc[:, 'RESFO'] = Feature_DF.loc[:, 'RESFO'].map( lambda x: np.log((1 - 0.175) / x)) Feature_DF.loc[:, 'OD_PR_LPW'] = ( (Feature_DF.loc[:, 'RESFO'] - Feature_DF.loc[:, 'Filled_Odds']) / Feature_DF.loc[:, 'Filled_Odds']).abs() Feature_DF.loc[:, 'OD_PR_LPW'].fillna(0, inplace=True) Feature_DF = Feature_DF.loc[:, ['HNAME', 'OD_PR_LPW']] return Feature_DF
def CC_REC_NUM_LT3(Dataframe, HNAME_List, Raceday): """ Predicted number of races until next Top 3 Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_REC_NUM_LT3] """ Feature_DF = Dataframe.loc[:,['HNAME','RARID']] Extraction_Performance = Extraction_Database(""" Select HNAME, First_RARID, Beyer_RARID, Last_T3_RARID from ( Select HNAME, First_RARID, Beyer_RARID from ( Select HNAME, min(RARID) First_RARID from Race_PosteriorDb where RADAT < {Raceday} and HNAME in {HNAME_List} Group by HNAME) FIRST LEFT OUTER JOIN (Select HNAME HNAME_BEYER, RARID Beyer_RARID, max(BEYER_SPEED)from Race_PosteriorDb where RADAT < {Raceday} and HNAME in {HNAME_List} Group by HNAME) BEYER ON FIRST.HNAME = BEYER.HNAME_BEYER) FIRST_BEYER LEFT OUTER JOIN (Select HNAME HNAME_T3, max(RARID) Last_T3_RARID from Race_PosteriorDb where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3 Group by HNAME) T3 ON T3.HNAME_T3 = FIRST_BEYER.HNAME """.format(Raceday = Raceday, HNAME_List = HNAME_List)) Extraction_Avg = Extraction_Database(""" Select HNAME, RARID, RESFP, BEYER_SPEED from Race_PosteriorDb where RADAT < {Raceday} and HNAME in {HNAME_List} """.format(Raceday = Raceday, HNAME_List = HNAME_List)) Avg_Date = [] for name, group in Extraction_Avg.groupby('HNAME'): name group group = group.sort_values('RARID').reset_index(drop = True).reset_index() Avg_T3 = group.loc[group.loc[:, 'RESFP'] <= 3, 'index'].diff().abs().mean() Avg_Beyer = group.nlargest(3,'BEYER_SPEED').loc[:, 'index'].diff().abs().mean() Today = group.loc[:,'index'].max()+1 Last_Beyer = Extraction_Performance.loc[Extraction_Performance.loc[:,'HNAME']==name, 'Beyer_RARID'].values[0] Last_Beyer = group.loc[group.loc[:,'RARID'] == Last_Beyer,'index'].values[0] Diff_Beyer = Today - Last_Beyer - Avg_Beyer try : Last_T3 = Extraction_Performance.loc[Extraction_Performance.loc[:,'HNAME']==name, 'Last_T3_RARID'].values[0] Last_T3 = group.loc[group.loc[:,'RARID'] == Last_T3,'index'].values[0] Diff_T3 = Today - Last_T3 - Avg_T3 except : Diff_T3 = np.NaN Avg_Date.append([name, Diff_T3, Diff_Beyer]) Avg_Date = pd.DataFrame(Avg_Date, columns=['HNAME','Diff_T3','Diff_Beyer']) Feature_DF = Feature_DF.merge(Avg_Date, how='left') Feature_DF.loc[:,'CC_REC_NUM_LT3'] = Feature_DF.loc[:,'Diff_T3'].fillna(Feature_DF.loc[:,'Diff_Beyer']) Feature_DF.loc[:,'CC_REC_NUM_LT3'].fillna(Feature_DF.loc[:,'CC_REC_NUM_LT3'].max(), inplace = True) Feature_DF.loc[:,'CC_REC_NUM_LT3'].fillna(0, inplace = True) Feature_DF = Feature_DF.loc[:,['HNAME','CC_REC_NUM_LT3']] return Feature_DF