def processing(self, featureVecs): poly = PolynomialFeatures(degree=2, include_bias=False) params = np.load("../models/params/poly_params.npy").item() poly.fit(featureVecs) poly.set_params(**params) featureVecs = poly.transform(featureVecs) # print("feats after polynomial projection: ", featureVecs.shape) min_xval = np.load("../models/params/min_scale.npy") max_xval = np.load("../models/params/max_scale.npy") featureVecs = (featureVecs - min_xval) / (max_xval - min_xval) featureVecs = np.nan_to_num(featureVecs) return featureVecs
def set_params(self, **params): dict1 = {k: v for k, v in params.items() if k in self.__PF_params} dict2 = {k: v for k, v in params.items() if k in self.__LR_params} PolynomialFeatures.set_params(self, **dict1) LinearRegression.set_params(self, **dict2)
class ARXFeatureProcessor(BaseEstimator, TransformerMixin): def __init__(self, auto_order=0, exog_order=2, include_interactions=False, interactions_degree=2, seasonality=False, time_resolution='5T', transformer=None, fit_transformer=True, **transformer_params): self.auto_order = auto_order self.exog_order = exog_order self.include_interactions = include_interactions self.interactions_degree = interactions_degree self.seasonality = seasonality self.time_resolution = time_resolution self.transformer = transformer self.fit_transformer = fit_transformer self.transformer_params = transformer_params def fit(self, X, y=None, target_column=None, storm_level=0, time_level=1, keep_pd=False, **transformer_fit_params): if self.transformer is not None: self.transformer = self.transformer.set_params( **self.transformer_params) if self.include_interactions: self.transformer = make_pipeline( PolynomialFeatures(degree=self.interactions_degree, interaction_only=True, include_bias=False), self.transformer) elif self.include_interactions: self.transformer = PolynomialFeatures( degree=self.interactions_degree, interaction_only=True, include_bias=False) self.target_column_ = target_column self.storm_level_ = storm_level self.time_level_ = time_level # convert auto_order, exog_order to time steps # time_res_minutes = to_offset(self.time_resolution).delta.seconds / 60 # self.auto_order_timesteps_ = np.rint(self.auto_order / # time_res_minutes).astype(int) # self.exog_order_timesteps_ = np.rint(self.exog_order / # time_res_minutes).astype(int) if self.transformer is not None: self.transformer.set_params(**self.transformer_params) if self.fit_transformer: self.transformer = _pd_fit(self.transformer, X, **transformer_fit_params) return self def transform(self, X, y=None): check_is_fitted(self) X = _pd_transform(self.transformer, X) if isinstance(X.index, pd.MultiIndex): features = X.groupby(level=self.storm_level_).apply( self._transform_one_storm) features = np.vstack(features) else: features = self._transform_one_storm(X) features = check_array(features, force_all_finite='allow-nan') return features def _transform_one_storm(self, X): # Check if time has regular increments if isinstance(X.index, pd.MultiIndex): times = X.index.get_level_values(level=self.time_level_) else: times = X.index # NOTE: This breaks down when we use 'min' instead of 'T' when # specifying time resolution. Fix later. if times.inferred_freq != self.time_resolution: raise ValueError( "X does not have regular time increments with the specified time resolution." ) if self.target_column_ is None: y_ = None X_ = X.to_numpy() else: y_ = X.iloc[:, self.target_column_].to_numpy() X_ = X.drop(columns=X.columns[self.target_column_]).to_numpy() # TODO: write my own version p = MetaLagFeatureProcessor(X_, y_, self.auto_order, [self.exog_order] * X_.shape[1], [0] * X_.shape[1]) features = p.generate_lag_features() if self.seasonality: yr_term = ((2 * np.pi * times.dayofyear) / 365).to_numpy().reshape( -1, 1) day_term = ((2 * np.pi * times.hour) / 24).to_numpy().reshape( -1, 1) sin_yr = np.sin(yr_term) cos_yr = np.cos(yr_term) sin_day = np.sin(day_term) cos_day = np.cos(day_term) features = np.concatenate( (features, sin_yr, cos_yr, sin_day, cos_day), axis=1) return features
# verbose,控制详细程度 # 增加特征之多项式处理 from sklearn.preprocessing import PolynomialFeatures poly = PolynomialFeatures() a = np.array([[0, 1], [1, 2], [2, 3], [3, 4]]) poly.fit_transform(a) # 参数PolynomialFeatures(degree=2, interaction_only=False, include_bias=True) # degree : integer # The degree of the polynomial features. Default = 2. poly.set_params(degree=1) # 一次多项式 1, x1, x2 poly.fit_transform(a) # interaction_only : boolean, default = False # If true, only interaction features are produced: features that are # products of at most degree distinct input features (so not x[1] ** 2, # x[0] * x[2] ** 3, etc.). # 只有交叉项,每个特征最多出现一次 poly.set_params(interaction_only=True) poly.fit_transform(a) # 没有x1平方这种项了 # include_bias : boolean # If True (default), then include a bias column, the feature in which all # polynomial powers are zero (i.e. a column of ones - acts as an intercept # term in a linear model). poly.set_params(include_bias=False) poly.fit_transform(a) # 没有常数项 # TODO 其他的选择,看 sklearn.preprocessing的文档