'day_of_week': 'dow' }, inplace=True) #---encode day of week--- #---merge df_V and df_date--- df_V_train = df_V_train.merge(df_date, on='visit_date') df_V_train_feat = df_V_train_feat.merge(df_date, on='visit_date') df_V_train_pred = df_V_train_pred.merge(df_date, on='visit_date') df_V_eval = df_V_eval.merge(df_date, on='visit_date') df_V = df_V.merge(df_date, on='visit_date') df_test = df_test.merge(df_date, on='visit_date') #---merge df_V and df_date--- #---other date-related features--- df_V_train_feat = extractDateFeatures(df_V_train_feat) df_V_train_pred = extractDateFeatures(df_V_train_feat, df_V_train_pred) df_V_train = extractDateFeatures(df_V_train) df_V_eval = extractDateFeatures(df_V_train, df_V_eval) df_V = extractDateFeatures(df_V) df_test = extractDateFeatures(df_V, df_test) #---other date-related features--- #===date-related features=== #===store-related features=== #---encoding categorical features in df_S--- df_S['air_genre_name'] = LabelEncoder().fit_transform(df_S.air_genre_name) df_S['air_area_name'] = LabelEncoder().fit_transform(df_S.air_area_name) #---encoding categorical features in df_S---
df_date.loc[df_date.calendar_date.isin(rng),'gldn_flg']=1 #---golden week--- #---encode day of week--- df_date.day_of_week=df_date.calendar_date.dt.dayofweek df_date.rename(columns={'calendar_date':'visit_date', 'day_of_week':'dow'},inplace=True) #---encode day of week--- #---merge df_V and df_date--- df_V=df_V.merge(df_date,on='visit_date') df_test=df_test.merge(df_date,on='visit_date') #---merge df_V and df_date--- #---other date-related features--- df_V=extractDateFeatures(df_V) df_test=extractDateFeatures(df_V,df_test) #---other date-related features--- #===date-related features=== #===store-related features=== #---encoding categorical features in df_S--- df_S['air_genre_name']=LabelEncoder().fit_transform(df_S.air_genre_name) df_S['air_area_name']=LabelEncoder().fit_transform(df_S.air_area_name) #---encoding categorical features in df_S--- #---scale lon and lat in df_S--- df_S['latitude']=scale(df_S.latitude); df_S['longitude']=scale(df_S.longitude); #---scale lon and lat in df_S---