def transform(self, X: dt.Frame): # Keep date only X = X[:, self.time_column].to_pandas() # Transform to pandas date time X[self.time_column] = pd.to_datetime(X[self.time_column]) # Create Year and day of year so that we can merge with stored holidays X['year'] = X[self.time_column].dt.year X['doy'] = X[self.time_column].dt.dayofyear # General first holi_df = self.memos['country'] holi_df['is_DE_holiday_country'] = 1 X["is_DE_holiday_country"] = X.merge( self.memos['country'], on=['year', 'doy'], how='left' ).fillna(0)['is_DE_holiday_country'] # Then Landers for prov in ['BW', 'BY', 'BE', 'BB', 'HB', 'HH', 'HE', 'MV', 'NI', 'NW', 'RP', 'SL', 'SN', 'ST', 'SH', 'TH']: holi_df = self.memos[prov] holi_df[f'is_DE_holiday_{prov}'] = 1 X[f'is_DE_holiday_{prov}'] = X.merge( holi_df, on=['year', 'doy'], how='left' ).fillna(0)[f'is_DE_holiday_{prov}'] X.drop([self.time_column, 'year', 'doy'], axis=1, inplace=True) features = [ f'is_DE_holiday_{prov}' for prov in ['country', 'BW', 'BY', 'BE', 'BB', 'HB', 'HH', 'HE', 'MV', 'NI', 'NW', 'RP', 'SL', 'SN', 'ST', 'SH', 'TH'] ] self._output_feature_names = list(features) self._feature_desc = list(features) return X
def transform(self, X: dt.Frame): X = X[:, self.time_column] X = X.to_pandas() ge_holidays = holidays.DE() X["is_ge_holiday"] = X[self.time_column].apply( lambda x: x in ge_holidays) for prov in [ "BW", 'BY', 'BE', 'BB', 'HB', 'HH', 'HE', 'MV', 'NI', 'NW', 'RP', 'SL', 'SN', 'ST', 'SH', 'TH' ]: ge_prov_holidays = holidays.DE(state=prov) X["is_ge_holiday_%s" % prov] = X[self.time_column].apply( lambda x: x in ge_prov_holidays) X.drop(self.time_column, axis=1, inplace=True) return X
def transform(self, X: dt.Frame): X = X[:, self.time_column] if X[:, self.time_column].ltypes[0] != dt.ltype.str: assert self.datetime_formats[self.time_column] in ["%Y%m%d", "%Y%m%d%H%M"] X[:, self.time_column] = dt.stype.str32(dt.stype.int64(dt.f[0])) X.replace(['', 'None'], None) X = X.to_pandas() X.loc[:, self.time_column] = pd.to_datetime(X[self.time_column], format=self.datetime_formats[self.time_column]) X['year'] = X[self.time_column].dt.year X['doy'] = X[self.time_column].dt.dayofyear X.drop(self.time_column, axis=1, inplace=True) feat = 'is_holiday' self.memo[feat] = 1 X = X.merge(self.memo, how='left', on=['year', 'doy']).fillna(0) self.memo.drop(feat, axis=1, inplace=True) X = X[[feat]].astype(int) return X
def create_data(X: dt.Frame = None) -> pd.DataFrame: if X is None: return [] from mlxtend.feature_selection import ExhaustiveFeatureSelector as EFS X = X.to_pandas() y = X[TARGET_COLUMN].values X.drop(TARGET_COLUMN, axis=1, inplace=True) efs = EFS(ESTIMATOR, min_features=MIN_FEATURES, max_features=MAX_FEATURES, scoring=SCORING, cv=CV, n_jobs=-1) efs.fit(X, y) X_fs = X.iloc[:, list(efs.best_idx_)] return X_fs
def create_data(X: dt.Frame = None) -> pd.DataFrame: if X is None: return [] from mlxtend.feature_selection import SequentialFeatureSelector as SFS X = X.to_pandas() y = X[TARGET_COLUMN].values X.drop(TARGET_COLUMN, axis=1, inplace=True) sfs = SFS(ESTIMATOR, k_features=K_FEATURES, forward=False, floating=False, scoring=SCORING, cv=CV, n_jobs=-1) sfs.fit(X, y) X_fs = X.iloc[:, list(sfs.k_feature_idx_)] return X_fs
def transform(self, X: dt.Frame): X = X[:, self.time_column] if X[:, self.time_column].ltypes[0] != dt.ltype.str: if self.datetime_formats[self.time_column] not in [ "%Y%m%d", "%Y%m%d%H%M", "%Y", "%Y%m" ]: # raise IgnoreEntirelyError("Unsupported format %s" % self.datetime_formats[self.time_column]) # just return bad data, so feature is not pruned and breaks backend tuning return np.zeros((X.shape[0], 1)) X[:, self.time_column] = dt.stype.str32(dt.stype.int64(dt.f[0])) X.replace(['', 'None'], None) X = X.to_pandas() X.loc[:, self.time_column] = pd.to_datetime( X[self.time_column], format=self.datetime_formats[self.time_column]) X['year'] = X[self.time_column].dt.year X['doy'] = X[self.time_column].dt.dayofyear X.drop(self.time_column, axis=1, inplace=True) feat = 'is_ramadan' self.memo[feat] = 1 X = X.merge(self.memo, how='left', on=['year', 'doy']).fillna(0) self.memo.drop(feat, axis=1, inplace=True) X = X[[feat]].astype(int) return X