def test_transform_target_regressor_functions_multioutput(): X = friedman[0] y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T regr = TransformedTargetRegressor(regressor=LinearRegression(), func=np.log, inverse_func=np.exp) y_pred = regr.fit(X, y).predict(X) # check the transformer output y_tran = regr.transformer_.transform(y) assert_allclose(np.log(y), y_tran) assert_allclose(y, regr.transformer_.inverse_transform(y_tran)) assert y.shape == y_pred.shape assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X))) # check the regressor output lr = LinearRegression().fit(X, regr.func(y)) assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel())
def test_transform_target_regressor_functions(): X, y = friedman regr = TransformedTargetRegressor(regressor=LinearRegression(), func=np.log, inverse_func=np.exp) y_pred = regr.fit(X, y).predict(X) # check the transformer output y_tran = regr.transformer_.transform(y.reshape(-1, 1)).squeeze() assert_allclose(np.log(y), y_tran) assert_allclose(y, regr.transformer_.inverse_transform( y_tran.reshape(-1, 1)).squeeze()) assert y.shape == y_pred.shape assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X))) # check the regressor output lr = LinearRegression().fit(X, regr.func(y)) assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel())
class SemiSup_RandomizedSearchCV(BaseEstimator): def __init__(self, estimator, param_distributions, n_iter=100, cv=5, scoring=metrics.accuracy_score, pseudo=True): # We initialize our class similar to sklearn randomized search self.estimator = estimator self.scoring = scoring self.pseudo = pseudo self.transformedtargetestimator = TransformedTargetRegressor(regressor=estimator, func=lambda x: x if np.random.rand() > 1/cv else -1, inverse_func=lambda x: x, check_inverse=False) self.scoring = scoring self.sampler = ParameterSampler(param_distributions, n_iter) self.cv_results_ = pd.DataFrame({'mean_test_score': np.empty(shape=[0]), 'std_test_score': np.empty(shape=[0]), 'mean_score_time': np.empty(shape=[0]), 'std_score_time': np.empty(shape=[0]), 'params': None}) self.folds = KFold(n_splits=cv) def fit(self, X, y, sample_weight=None): for params in self.sampler: # Update Parameters self.estimator.set_params(**params) # Reset Scores scores = [] times = [] for train_index, test_index in self.folds.split(X): #Create Semisupervised Sampler self.transformedtargetestimator = TransformedTargetRegressor(regressor=self.estimator, func=lambda x: np.where(np.in1d(x.index,train_index),x,-1), inverse_func=lambda x: x, check_inverse=False) #Fit if self.pseudo: self.transformedtargetestimator.regressor.pseudo_fit = pseudo_fit.__get__(self.transformedtargetestimator.regressor) self.transformedtargetestimator = self.transformedtargetestimator.regressor.pseudo_fit(X, self.transformedtargetestimator.func(y)) else: self.transformedtargetestimator.fit(X, y, sample_weight) #Score score_index = np.in1d(y.index,test_index) start = time() scores.append(self.scoring(y[score_index], self.transformedtargetestimator.predict(X=X[score_index]))) times.append(time()-start) self.cv_results_ = self.cv_results_.append(pd.DataFrame({'mean_test_score': np.mean(scores), 'std_test_score': np.std(scores), 'mean_score_time': np.mean(times), 'std_score_time': np.std(times), 'params': [params]})) self.cv_results_ = self.cv_results_.sort_values('mean_test_score', ascending=False).reset_index(drop=True) return self