Exemple #1
0
 def processing(self, featureVecs):
     poly = PolynomialFeatures(degree=2, include_bias=False)
     params = np.load("../models/params/poly_params.npy").item()
     poly.fit(featureVecs)
     poly.set_params(**params)
     featureVecs = poly.transform(featureVecs)
     # print("feats after polynomial projection: ", featureVecs.shape)
     min_xval = np.load("../models/params/min_scale.npy")
     max_xval = np.load("../models/params/max_scale.npy")
     featureVecs = (featureVecs - min_xval) / (max_xval - min_xval)
     featureVecs = np.nan_to_num(featureVecs)
     return featureVecs
Exemple #2
0
 def set_params(self, **params):
     dict1 = {k: v for k, v in params.items() if k in self.__PF_params}
     dict2 = {k: v for k, v in params.items() if k in self.__LR_params}
     PolynomialFeatures.set_params(self, **dict1)
     LinearRegression.set_params(self, **dict2)
Exemple #3
0
class ARXFeatureProcessor(BaseEstimator, TransformerMixin):
    def __init__(self,
                 auto_order=0,
                 exog_order=2,
                 include_interactions=False,
                 interactions_degree=2,
                 seasonality=False,
                 time_resolution='5T',
                 transformer=None,
                 fit_transformer=True,
                 **transformer_params):
        self.auto_order = auto_order
        self.exog_order = exog_order
        self.include_interactions = include_interactions
        self.interactions_degree = interactions_degree
        self.seasonality = seasonality
        self.time_resolution = time_resolution
        self.transformer = transformer
        self.fit_transformer = fit_transformer
        self.transformer_params = transformer_params

    def fit(self,
            X,
            y=None,
            target_column=None,
            storm_level=0,
            time_level=1,
            keep_pd=False,
            **transformer_fit_params):

        if self.transformer is not None:
            self.transformer = self.transformer.set_params(
                **self.transformer_params)

            if self.include_interactions:
                self.transformer = make_pipeline(
                    PolynomialFeatures(degree=self.interactions_degree,
                                       interaction_only=True,
                                       include_bias=False), self.transformer)
        elif self.include_interactions:
            self.transformer = PolynomialFeatures(
                degree=self.interactions_degree,
                interaction_only=True,
                include_bias=False)

        self.target_column_ = target_column
        self.storm_level_ = storm_level
        self.time_level_ = time_level

        # convert auto_order, exog_order to time steps
        # time_res_minutes = to_offset(self.time_resolution).delta.seconds / 60
        # self.auto_order_timesteps_ = np.rint(self.auto_order /
        #                                      time_res_minutes).astype(int)
        # self.exog_order_timesteps_ = np.rint(self.exog_order /
        #                                      time_res_minutes).astype(int)

        if self.transformer is not None:
            self.transformer.set_params(**self.transformer_params)

            if self.fit_transformer:
                self.transformer = _pd_fit(self.transformer, X,
                                           **transformer_fit_params)

        return self

    def transform(self, X, y=None):
        check_is_fitted(self)

        X = _pd_transform(self.transformer, X)
        if isinstance(X.index, pd.MultiIndex):
            features = X.groupby(level=self.storm_level_).apply(
                self._transform_one_storm)
            features = np.vstack(features)
        else:
            features = self._transform_one_storm(X)

        features = check_array(features, force_all_finite='allow-nan')
        return features

    def _transform_one_storm(self, X):
        # Check if time has regular increments
        if isinstance(X.index, pd.MultiIndex):
            times = X.index.get_level_values(level=self.time_level_)
        else:
            times = X.index
        # NOTE: This breaks down when we use 'min' instead of 'T' when
        # specifying time resolution. Fix later.
        if times.inferred_freq != self.time_resolution:
            raise ValueError(
                "X does not have regular time increments with the specified time resolution."
            )

        if self.target_column_ is None:
            y_ = None
            X_ = X.to_numpy()
        else:
            y_ = X.iloc[:, self.target_column_].to_numpy()
            X_ = X.drop(columns=X.columns[self.target_column_]).to_numpy()

        # TODO: write my own version
        p = MetaLagFeatureProcessor(X_, y_, self.auto_order,
                                    [self.exog_order] * X_.shape[1],
                                    [0] * X_.shape[1])

        features = p.generate_lag_features()

        if self.seasonality:
            yr_term = ((2 * np.pi * times.dayofyear) / 365).to_numpy().reshape(
                -1, 1)
            day_term = ((2 * np.pi * times.hour) / 24).to_numpy().reshape(
                -1, 1)
            sin_yr = np.sin(yr_term)
            cos_yr = np.cos(yr_term)
            sin_day = np.sin(day_term)
            cos_day = np.cos(day_term)
            features = np.concatenate(
                (features, sin_yr, cos_yr, sin_day, cos_day), axis=1)

        return features
Exemple #4
0
# verbose,控制详细程度


# 增加特征之多项式处理
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures()
a = np.array([[0, 1],
              [1, 2],
              [2, 3],
              [3, 4]])
poly.fit_transform(a)
# 参数PolynomialFeatures(degree=2, interaction_only=False, include_bias=True)

# degree : integer
#   The degree of the polynomial features. Default = 2.
poly.set_params(degree=1)  # 一次多项式 1, x1, x2
poly.fit_transform(a)
# interaction_only : boolean, default = False
#   If true, only interaction features are produced: features that are
#   products of at most degree distinct input features (so not x[1] ** 2,
#   x[0] * x[2] ** 3, etc.). # 只有交叉项,每个特征最多出现一次
poly.set_params(interaction_only=True)
poly.fit_transform(a)  # 没有x1平方这种项了
# include_bias : boolean
#   If True (default), then include a bias column, the feature in which all
#   polynomial powers are zero (i.e. a column of ones - acts as an intercept
#   term in a linear model).
poly.set_params(include_bias=False)
poly.fit_transform(a)  # 没有常数项

# TODO 其他的选择,看 sklearn.preprocessing的文档