def fit(self, x, y=None): if y is not None: xdot = y else: xdot = self.derivative.transform(x) if self.operators is not None: feature_transformer = SymbolicFeatures(exponents=np.linspace( 1, self.degree, self.degree), operators=self.operators) else: feature_transformer = PolynomialFeatures(degree=self.degree, include_bias=False) steps = [ ("features", feature_transformer), ("model", STRidge(alpha=self.alpha, threshold=self.threshold, **self.kw)), ] self.model = MultiOutputRegressor(Pipeline(steps), n_jobs=self.n_jobs) self.model.fit(x, xdot) self.n_input_features_ = self.model.estimators_[0].steps[0][ 1].n_input_features_ self.n_output_features_ = self.model.estimators_[0].steps[0][ 1].n_output_features_ return self
def __init__(self, strategy, **kw): self.strategy = strategy self.kw = kw super().__init__(steps=[ ("selection", ColumnSelector(index=self.strategy.index)), ("features", SymbolicFeatures(exponents=self.strategy.exponents, operators=self.strategy.operators, consider_products=self.strategy.consider_products)), ("regression", strategy.base(warm_start=True, **self.kw))])
from sklearn.metrics import make_scorer from sklearn.model_selection import GridSearchCV from sklearn.model_selection import train_test_split from sklearn.utils.validation import check_random_state from sparsereg.model.base import print_model from sparsereg.model.group_lasso import SparseGroupLasso from sparsereg.preprocessing.symfeat import SymbolicFeatures rng = check_random_state(42) x = rng.normal(size=(10000, 1)) y = np.cos(x[:, 0]) + x[:, 0]**2 + x[:, 0]**3 # + 0.01*rng.normal(size=1000) x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=rng) pre = SymbolicFeatures(exponents=[1, 2], operators={ "sin": np.sin, "cos": np.cos }).fit(x_train) features_train = pre.transform(x_train) features_test = pre.transform(x_test) km = AgglomerativeClustering(n_clusters=4).fit(features_train.T) labels = defaultdict(list) for k, v in zip(pre.get_feature_names(), km.labels_): labels[v].append(k) print(labels) params = {"alpha": [0.001, 0.01, 0.02, 0.05], "normalize": [True]} scorer = make_scorer(explained_variance_score) sgl = SparseGroupLasso(groups=km.labels_, rho=0.3, alpha=0.02) l = Lasso() for model in [sgl, l]: grid = GridSearchCV(model, params, n_jobs=1, scoring=scorer,