def CC_CLS_CC(Dataframe, HNAME_List, Raceday): """ Horse's Compotitive Class Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_CLS_CC] """ Feature_DF = Dataframe.loc[:,['HNAME','HJRAT']] Underlying_Class = Feature_DF.nlargest(3, 'HJRAT').mean().to_list()[0] Races = Extraction_Database(""" Select HNAME, RARID, HJRAT CC_CLS_CC from RaceDb where RARID in ( Select RARID from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3) """.format(Raceday = Raceday, HNAME_List = HNAME_List)) Races_AvgHJRAT = Races.groupby('RARID')['CC_CLS_CC'].apply(lambda x: x.nlargest(3).mean()) Races_AvgHJRAT = Races_AvgHJRAT.reset_index() Race_IDs = Extraction_Database(""" Select HNAME, RARID from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} and RESFP <= 3 """.format(Raceday = Raceday, HNAME_List = HNAME_List)) Merged = Race_IDs.merge(Races_AvgHJRAT) Feature_DF = Feature_DF.merge(Merged.groupby('HNAME').mean()['CC_CLS_CC'].reset_index(), how='left') Feature_DF.loc[:, 'CC_CLS_CC'].fillna(Underlying_Class, inplace = True) Feature_DF.loc[:, 'CC_CLS_CC'] = Underlying_Class - Feature_DF.loc[:, 'CC_CLS_CC'] Feature_DF.loc[:, 'CC_CLS_CC'].fillna(0, inplace = True) Feature_DF = Feature_DF.loc[:, ['HNAME', 'CC_CLS_CC']] return Feature_DF
def CC_WEI_EXP(Dataframe, HNAME_List, Raceday): """ Weight Carrying Experience of Horse Parameter --------- Matchday : Matchday Dataframe HNAME_List : String of List of Horse Names Raceday : Date of Race Return ------ Dataframe [HNAME, CC_WEI_EXP] """ Feature_DF = Dataframe.loc[:,['HNAME','HWEIC']] Distance = Dataframe.loc[:,'RADIS'].values[0] Horse_Weight_Req = ["(HNAME = '" + row['HNAME'] + "' and HWEIC >= " + str(row['HWEIC']) + ')' for index, row in Feature_DF.iterrows()] Horse_Weight_Req = ' or '.join(Horse_Weight_Req) Races_above_today = Extraction_Database(""" Select HNAME, RARID from (Select HNAME, RARID, HWEIC, RADAT, RADIS from RaceDb where {Horse_Weight_Req}) where RADAT < {Raceday} and RADIS >= {Distance} """.format(Raceday = Raceday, Horse_Weight_Req = Horse_Weight_Req, Distance=Distance)) Race_ID_List = '('+str(Races_above_today['RARID'].tolist())[1:-1]+')' Speed_Ratings_tdy = Extraction_Database(""" Select HNAME, RARID, BEYER_SPEED from Race_PosteriorDb where RARID in {Race_ID_List} and HNAME in {HNAME_List} """.format(Race_ID_List=Race_ID_List, HNAME_List=HNAME_List)) Best_Speed_Rating = Races_above_today.merge(Speed_Ratings_tdy, how='left').groupby('HNAME').max().reset_index().loc[:,['HNAME','BEYER_SPEED']] Best_Speed_Rating = Best_Speed_Rating.rename(columns={'HNAME': 'HNAME', 'BEYER_SPEED': 'Primary'}) Race_heaviest = Extraction_Database(""" Select HNAME, RARID, HWEIC from RaceDb where RADAT < {Raceday} and HNAME in {HNAME_List} and RADIS >= {Distance} """.format(Raceday = Raceday, HNAME_List = HNAME_List, Distance=Distance)) Race_ID_List = '('+str(Race_heaviest['RARID'].tolist())[1:-1]+')' Speed_Ratings_heavy = Extraction_Database(""" Select HNAME, RARID, BEYER_SPEED from Race_PosteriorDb where RARID in {Race_ID_List} and HNAME in {HNAME_List} """.format(Race_ID_List=Race_ID_List, HNAME_List=HNAME_List)) Backup_Speed_Rating = Race_heaviest.merge(Speed_Ratings_heavy, on=['HNAME','RARID'], how='left') Backup_Speed_Rating.loc[:,'BEYER_SPEED'].fillna(Backup_Speed_Rating.loc[:,'BEYER_SPEED'].min(), inplace = True) if Backup_Speed_Rating.loc[:,'BEYER_SPEED'].sum() == 0: Backup_Speed_Rating.loc[:,'BEYER_SPEED'].fillna(0, inplace = True) if len(Backup_Speed_Rating) == 0: Feature_DF.loc[:,'CC_WEI_EXP'] = 0 Feature_DF = Feature_DF.loc[:,['HNAME','CC_WEI_EXP']] return Feature_DF idx = Backup_Speed_Rating.groupby(['HNAME'])['HWEIC'].transform(max) == Backup_Speed_Rating['HWEIC'] Backup_Speed_Rating = Backup_Speed_Rating[idx].groupby('HNAME').mean().reset_index().loc[:,['HNAME','BEYER_SPEED']] Backup_Speed_Rating = Backup_Speed_Rating.rename(columns={'HNAME': 'HNAME', 'BEYER_SPEED': 'Back_Up'}) Feature_DF = Feature_DF.merge(Best_Speed_Rating, how='left').merge(Backup_Speed_Rating, how='left') Feature_DF.loc[:,'CC_WEI_EXP'] = Feature_DF.loc[:,'Primary'].fillna(Feature_DF.loc[:,'Back_Up']) Feature_DF.loc[:,'CC_WEI_EXP'].fillna(Feature_DF.loc[:,'CC_WEI_EXP'].min(), inplace = True) Feature_DF.loc[:,'CC_WEI_EXP'].fillna(0, inplace = True) Feature_DF = Feature_DF.loc[:,['HNAME','CC_WEI_EXP']] return Feature_DF
def Dataset_Extraction(Race_ID_List): """ Function for Extracting Datasets from FeatureDb Feature Set : Data used for Feature Engineering Modelling Set : Data used for trining base models and Hyperparameter Selection Ensemble Set : Data used for training Ensemble Model Testing Set : Testing Final Model X Dataset for all Sets should be the same They should go through the same Preprocessing Pipeline Parameter -------- Dataset_Type : Feature, Modelling, Ensemble, Harville, Testing Race_ID : pd.Dataframe of RaceID Return ------ Output ndArray of Panda Series """ #Start Timer start_time = time.time() #Select Race ID where Race_ID_List = Extraction_Database(""" Select Distinct RARID from FeatureDb where RARID in {RARID} and CC_FRB = 0 """.format( RARID='(' + str(Race_ID_List['RARID'].tolist())[1:-1] + ')')) """ Constructing X_Dataset """ #Get Feature for one race X_Dataset = Extraction_Database(""" Select * from FeatureDb where RARID in {RARID} Order By RARID, HNAME """.format( RARID='(' + str(Race_ID_List['RARID'].tolist())[1:-1] + ')')) #Convert all features into floats col_list = X_Dataset.columns[2:] for col in col_list: X_Dataset[col] = X_Dataset[col].astype(float) #Get RADIS, RALOC, RATRA X_Condition = Extraction_Database(""" Select RARID, HNAME, RADIS, RALOC, RATRA from RaceDb where RARID in {RARID} Order by RARID, HNAME """.format( RARID='(' + str(Race_ID_List['RARID'].tolist())[1:-1] + ')')) #Merging Dataset X_Dataset = X_Condition.merge(X_Dataset, on=['HNAME', 'RARID']) """ Constructing Y_Dataset """ #Ensemble Model Y_Dataset = Extraction_Database(""" Select RARID, HNAME, RESFO, RESWL, RESFP, ODPLA from RaceDb where RARID in {RARID} Order By RARID, HNAME """.format( RARID='(' + str(Race_ID_List['RARID'].tolist())[1:-1] + ')')) #Convert all features into floats col_list = Y_Dataset.columns[2:] for col in col_list: Y_Dataset[col] = Y_Dataset[col].astype(float) #Print Time Taken to Load print("---- %s Races are Extracted from FeatureDb in %s seconds ----" % (len(Race_ID_List), (str(round((time.time() - start_time), 4))))) return X_Dataset, Y_Dataset