예제 #1
0
def test_transform_target_regressor_functions_multioutput():
    X = friedman[0]
    y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      func=np.log, inverse_func=np.exp)
    y_pred = regr.fit(X, y).predict(X)
    # check the transformer output
    y_tran = regr.transformer_.transform(y)
    assert_allclose(np.log(y), y_tran)
    assert_allclose(y, regr.transformer_.inverse_transform(y_tran))
    assert y.shape == y_pred.shape
    assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X)))
    # check the regressor output
    lr = LinearRegression().fit(X, regr.func(y))
    assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel())
예제 #2
0
def test_transform_target_regressor_functions_multioutput():
    X = friedman[0]
    y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      func=np.log, inverse_func=np.exp)
    y_pred = regr.fit(X, y).predict(X)
    # check the transformer output
    y_tran = regr.transformer_.transform(y)
    assert_allclose(np.log(y), y_tran)
    assert_allclose(y, regr.transformer_.inverse_transform(y_tran))
    assert y.shape == y_pred.shape
    assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X)))
    # check the regressor output
    lr = LinearRegression().fit(X, regr.func(y))
    assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel())
예제 #3
0
def test_transform_target_regressor_functions():
    X, y = friedman
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      func=np.log, inverse_func=np.exp)
    y_pred = regr.fit(X, y).predict(X)
    # check the transformer output
    y_tran = regr.transformer_.transform(y.reshape(-1, 1)).squeeze()
    assert_allclose(np.log(y), y_tran)
    assert_allclose(y, regr.transformer_.inverse_transform(
        y_tran.reshape(-1, 1)).squeeze())
    assert y.shape == y_pred.shape
    assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X)))
    # check the regressor output
    lr = LinearRegression().fit(X, regr.func(y))
    assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel())
예제 #4
0
def test_transform_target_regressor_functions():
    X, y = friedman
    regr = TransformedTargetRegressor(regressor=LinearRegression(),
                                      func=np.log, inverse_func=np.exp)
    y_pred = regr.fit(X, y).predict(X)
    # check the transformer output
    y_tran = regr.transformer_.transform(y.reshape(-1, 1)).squeeze()
    assert_allclose(np.log(y), y_tran)
    assert_allclose(y, regr.transformer_.inverse_transform(
        y_tran.reshape(-1, 1)).squeeze())
    assert y.shape == y_pred.shape
    assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X)))
    # check the regressor output
    lr = LinearRegression().fit(X, regr.func(y))
    assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel())
class SemiSup_RandomizedSearchCV(BaseEstimator):
    def __init__(self, estimator, param_distributions, n_iter=100, cv=5, scoring=metrics.accuracy_score, pseudo=True):
        # We initialize our class similar to sklearn randomized search
        self.estimator = estimator
        self.scoring = scoring
        self.pseudo = pseudo
        
        self.transformedtargetestimator = TransformedTargetRegressor(regressor=estimator,
                                                    func=lambda x: x if np.random.rand() > 1/cv else -1, 
                                                    inverse_func=lambda x: x, check_inverse=False)
        self.scoring = scoring
        self.sampler = ParameterSampler(param_distributions, n_iter)
        self.cv_results_ = pd.DataFrame({'mean_test_score': np.empty(shape=[0]),
                                         'std_test_score': np.empty(shape=[0]),
                                         'mean_score_time': np.empty(shape=[0]),
                                         'std_score_time': np.empty(shape=[0]),
                                         'params': None})
        self.folds = KFold(n_splits=cv)
        
    def fit(self, X, y, sample_weight=None):
        for params in self.sampler:
            # Update Parameters
            self.estimator.set_params(**params)
            # Reset Scores
            scores = []
            times = []
            
            for train_index, test_index in self.folds.split(X):
                #Create Semisupervised Sampler
                self.transformedtargetestimator = TransformedTargetRegressor(regressor=self.estimator,
                                                                             func=lambda x: np.where(np.in1d(x.index,train_index),x,-1), 
                                                                             inverse_func=lambda x: x, check_inverse=False)
                #Fit
                if self.pseudo:
                    self.transformedtargetestimator.regressor.pseudo_fit = pseudo_fit.__get__(self.transformedtargetestimator.regressor)
                    self.transformedtargetestimator = self.transformedtargetestimator.regressor.pseudo_fit(X, self.transformedtargetestimator.func(y))
                else:
                    self.transformedtargetestimator.fit(X, y, sample_weight)
                    
                #Score
                score_index = np.in1d(y.index,test_index)
                start = time()
                scores.append(self.scoring(y[score_index], self.transformedtargetestimator.predict(X=X[score_index])))
                times.append(time()-start)
            self.cv_results_ = self.cv_results_.append(pd.DataFrame({'mean_test_score': np.mean(scores),
                                                                     'std_test_score': np.std(scores),
                                                                     'mean_score_time': np.mean(times),
                                                                     'std_score_time': np.std(times),
                                                                     'params': [params]}))
        self.cv_results_ = self.cv_results_.sort_values('mean_test_score', ascending=False).reset_index(drop=True)
        return self