Ejemplo n.º 1
0
    def transform(self, X: dt.Frame):
        # Keep date only
        X = X[:, self.time_column].to_pandas()
        # Transform to pandas date time
        X[self.time_column] = pd.to_datetime(X[self.time_column])
        # Create Year and day of year so that we can merge with stored holidays
        X['year'] = X[self.time_column].dt.year
        X['doy'] = X[self.time_column].dt.dayofyear

        # General first
        holi_df = self.memos['country']
        holi_df['is_DE_holiday_country'] = 1
        X["is_DE_holiday_country"] = X.merge(
            self.memos['country'], on=['year', 'doy'], how='left'
        ).fillna(0)['is_DE_holiday_country']

        # Then Landers
        for prov in ['BW', 'BY', 'BE', 'BB', 'HB', 'HH', 'HE', 'MV', 'NI', 'NW', 'RP', 'SL', 'SN', 'ST', 'SH', 'TH']:
            holi_df = self.memos[prov]
            holi_df[f'is_DE_holiday_{prov}'] = 1
            X[f'is_DE_holiday_{prov}'] = X.merge(
                holi_df, on=['year', 'doy'], how='left'
            ).fillna(0)[f'is_DE_holiday_{prov}']

        X.drop([self.time_column, 'year', 'doy'], axis=1, inplace=True)

        features = [
            f'is_DE_holiday_{prov}'
            for prov in ['country', 'BW', 'BY', 'BE', 'BB', 'HB', 'HH', 'HE',
                         'MV', 'NI', 'NW', 'RP', 'SL', 'SN', 'ST', 'SH', 'TH']
        ]
        self._output_feature_names = list(features)
        self._feature_desc = list(features)

        return X
Ejemplo n.º 2
0
 def transform(self, X: dt.Frame):
     X = X[:, self.time_column]
     X = X.to_pandas()
     ge_holidays = holidays.DE()
     X["is_ge_holiday"] = X[self.time_column].apply(
         lambda x: x in ge_holidays)
     for prov in [
             "BW", 'BY', 'BE', 'BB', 'HB', 'HH', 'HE', 'MV', 'NI', 'NW',
             'RP', 'SL', 'SN', 'ST', 'SH', 'TH'
     ]:
         ge_prov_holidays = holidays.DE(state=prov)
         X["is_ge_holiday_%s" % prov] = X[self.time_column].apply(
             lambda x: x in ge_prov_holidays)
     X.drop(self.time_column, axis=1, inplace=True)
     return X
Ejemplo n.º 3
0
    def transform(self, X: dt.Frame):
        X = X[:, self.time_column]
        if X[:, self.time_column].ltypes[0] != dt.ltype.str:
            assert self.datetime_formats[self.time_column] in ["%Y%m%d", "%Y%m%d%H%M"]
            X[:, self.time_column] = dt.stype.str32(dt.stype.int64(dt.f[0]))
        X.replace(['', 'None'], None)
        X = X.to_pandas()
        X.loc[:, self.time_column] = pd.to_datetime(X[self.time_column],
                                                    format=self.datetime_formats[self.time_column])

        X['year'] = X[self.time_column].dt.year
        X['doy'] = X[self.time_column].dt.dayofyear
        X.drop(self.time_column, axis=1, inplace=True)
        feat = 'is_holiday'
        self.memo[feat] = 1
        X = X.merge(self.memo, how='left', on=['year', 'doy']).fillna(0)
        self.memo.drop(feat, axis=1, inplace=True)
        X = X[[feat]].astype(int)
        return X
    def create_data(X: dt.Frame = None) -> pd.DataFrame:
        if X is None:
            return []

        from mlxtend.feature_selection import ExhaustiveFeatureSelector as EFS

        X = X.to_pandas()
        y = X[TARGET_COLUMN].values
        X.drop(TARGET_COLUMN, axis=1, inplace=True)

        efs = EFS(ESTIMATOR,
                  min_features=MIN_FEATURES,
                  max_features=MAX_FEATURES,
                  scoring=SCORING,
                  cv=CV,
                  n_jobs=-1)

        efs.fit(X, y)

        X_fs = X.iloc[:, list(efs.best_idx_)]

        return X_fs
    def create_data(X: dt.Frame = None) -> pd.DataFrame:
        if X is None:
            return []

        from mlxtend.feature_selection import SequentialFeatureSelector as SFS

        X = X.to_pandas()
        y = X[TARGET_COLUMN].values
        X.drop(TARGET_COLUMN, axis=1, inplace=True)

        sfs = SFS(ESTIMATOR,
                  k_features=K_FEATURES,
                  forward=False,
                  floating=False,
                  scoring=SCORING,
                  cv=CV,
                  n_jobs=-1)

        sfs.fit(X, y)

        X_fs = X.iloc[:, list(sfs.k_feature_idx_)]

        return X_fs
Ejemplo n.º 6
0
 def transform(self, X: dt.Frame):
     X = X[:, self.time_column]
     if X[:, self.time_column].ltypes[0] != dt.ltype.str:
         if self.datetime_formats[self.time_column] not in [
                 "%Y%m%d", "%Y%m%d%H%M", "%Y", "%Y%m"
         ]:
             # raise IgnoreEntirelyError("Unsupported format %s" % self.datetime_formats[self.time_column])
             # just return bad data, so feature is not pruned and breaks backend tuning
             return np.zeros((X.shape[0], 1))
         X[:, self.time_column] = dt.stype.str32(dt.stype.int64(dt.f[0]))
     X.replace(['', 'None'], None)
     X = X.to_pandas()
     X.loc[:, self.time_column] = pd.to_datetime(
         X[self.time_column],
         format=self.datetime_formats[self.time_column])
     X['year'] = X[self.time_column].dt.year
     X['doy'] = X[self.time_column].dt.dayofyear
     X.drop(self.time_column, axis=1, inplace=True)
     feat = 'is_ramadan'
     self.memo[feat] = 1
     X = X.merge(self.memo, how='left', on=['year', 'doy']).fillna(0)
     self.memo.drop(feat, axis=1, inplace=True)
     X = X[[feat]].astype(int)
     return X