def check_regressors_pickle(name, Regressor): X, y = _boston_subset() y = StandardScaler().fit_transform(y) # X is already scaled y = multioutput_estimator_convert_y_2d(name, y) if name == 'OrthogonalMatchingPursuitCV': # FIXME: This test is unstable on Travis, see issue #3190. check_skip_travis() rnd = np.random.RandomState(0) # catch deprecation warnings with warnings.catch_warnings(record=True): regressor = Regressor() set_fast_parameters(regressor) if not hasattr(regressor, 'alphas') and hasattr(regressor, 'alpha'): # linear regressors need to set alpha, but not generalized CV ones regressor.alpha = 0.01 if name in CROSS_DECOMPOSITION: y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))]) y_ = y_.T else: y_ = y regressor.fit(X, y_) y_pred = regressor.predict(X) # store old predictions pickled_regressor = pickle.dumps(regressor) unpickled_regressor = pickle.loads(pickled_regressor) pickled_y_pred = unpickled_regressor.predict(X) assert_array_almost_equal(pickled_y_pred, y_pred)
def check_regressors_int(name, Regressor, X, y): if name == 'OrthogonalMatchingPursuitCV': # FIXME: This test is unstable on Travis, see issue #3190. check_skip_travis() rnd = np.random.RandomState(0) # catch deprecation warnings with warnings.catch_warnings(record=True): # separate estimators to control random seeds regressor_1 = Regressor() regressor_2 = Regressor() set_random_state(regressor_1) set_random_state(regressor_2) if name in ('_PLS', 'PLSCanonical', 'PLSRegression'): y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))]) y_ = y_.T else: y_ = y # fit regressor_1.fit(X, y_) pred1 = regressor_1.predict(X) regressor_2.fit(X, y_.astype(np.float)) pred2 = regressor_2.predict(X) assert_array_almost_equal(pred1, pred2, 2, name)
def check_regressors_train(name, Regressor, X, y): if name == 'OrthogonalMatchingPursuitCV': # FIXME: This test is unstable on Travis, see issue #3190. check_skip_travis() rnd = np.random.RandomState(0) # catch deprecation warnings with warnings.catch_warnings(record=True): regressor = Regressor() if not hasattr(regressor, 'alphas') and hasattr(regressor, 'alpha'): # linear regressors need to set alpha, but not generalized CV ones regressor.alpha = 0.01 # raises error on malformed input for fit assert_raises(ValueError, regressor.fit, X, y[:-1]) # fit if name in ('PLSCanonical', 'PLSRegression', 'CCA'): y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))]) y_ = y_.T else: y_ = y set_random_state(regressor) regressor.fit(X, y_) regressor.predict(X) # TODO: find out why PLS and CCA fail. RANSAC is random # and furthermore assumes the presence of outliers, hence # skipped if name not in ('PLSCanonical', 'CCA', 'RANSACRegressor'): assert_greater(regressor.score(X, y_), 0.5)
def check_regressors_int(name, Regressor): X, _ = _boston_subset() X = X[:50] rnd = np.random.RandomState(0) y = rnd.randint(3, size=X.shape[0]) y = multioutput_estimator_convert_y_2d(name, y) if name == 'OrthogonalMatchingPursuitCV': # FIXME: This test is unstable on Travis, see issue #3190. check_skip_travis() rnd = np.random.RandomState(0) # catch deprecation warnings with warnings.catch_warnings(record=True): # separate estimators to control random seeds regressor_1 = Regressor() regressor_2 = Regressor() set_fast_parameters(regressor_1) set_fast_parameters(regressor_2) set_random_state(regressor_1) set_random_state(regressor_2) if name in CROSS_DECOMPOSITION: y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))]) y_ = y_.T else: y_ = y # fit regressor_1.fit(X, y_) pred1 = regressor_1.predict(X) regressor_2.fit(X, y_.astype(np.float)) pred2 = regressor_2.predict(X) assert_array_almost_equal(pred1, pred2, 2, name)
def check_regressors_train(name, Regressor): X, y = _boston_subset() y = StandardScaler().fit_transform(y) # X is already scaled y = multioutput_estimator_convert_y_2d(name, y) if name == 'OrthogonalMatchingPursuitCV': # FIXME: This test is unstable on Travis, see issue #3190. check_skip_travis() rnd = np.random.RandomState(0) # catch deprecation warnings with warnings.catch_warnings(record=True): regressor = Regressor() set_fast_parameters(regressor) if not hasattr(regressor, 'alphas') and hasattr(regressor, 'alpha'): # linear regressors need to set alpha, but not generalized CV ones regressor.alpha = 0.01 # raises error on malformed input for fit assert_raises(ValueError, regressor.fit, X, y[:-1]) # fit if name in CROSS_DECOMPOSITION: y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))]) y_ = y_.T else: y_ = y set_random_state(regressor) regressor.fit(X, y_) regressor.fit(X.tolist(), y_.tolist()) regressor.predict(X) # TODO: find out why PLS and CCA fail. RANSAC is random # and furthermore assumes the presence of outliers, hence # skipped if name not in ('PLSCanonical', 'CCA', 'RANSACRegressor'): assert_greater(regressor.score(X, y_), 0.5)
def test_omp_cv(): check_skip_travis() y_ = y[:, 0] gamma_ = gamma[:, 0] ompcv = OrthogonalMatchingPursuitCV(normalize=True, fit_intercept=False, max_iter=10, cv=5) ompcv.fit(X, y_) assert_equal(ompcv.n_nonzero_coefs_, n_nonzero_coefs) assert_array_almost_equal(ompcv.coef_, gamma_) omp = OrthogonalMatchingPursuit(normalize=True, fit_intercept=False, n_nonzero_coefs=ompcv.n_nonzero_coefs_) omp.fit(X, y_) assert_array_almost_equal(ompcv.coef_, omp.coef_)
def test_omp_cv(): # FIXME: This test is unstable on Travis, see issue #3190 for more detail. check_skip_travis() y_ = y[:, 0] gamma_ = gamma[:, 0] ompcv = OrthogonalMatchingPursuitCV(normalize=True, fit_intercept=False, max_iter=10, cv=5) ompcv.fit(X, y_) assert_equal(ompcv.n_nonzero_coefs_, n_nonzero_coefs) assert_array_almost_equal(ompcv.coef_, gamma_) omp = OrthogonalMatchingPursuit(normalize=True, fit_intercept=False, n_nonzero_coefs=ompcv.n_nonzero_coefs_) omp.fit(X, y_) assert_array_almost_equal(ompcv.coef_, omp.coef_)
def test_regressors_int(): # test if regressors can cope with integer labels (by converting them to # float) regressors = all_estimators(type_filter='regressor') X, _ = _boston_subset() X = X[:50] rnd = np.random.RandomState(0) y = rnd.randint(3, size=X.shape[0]) for name, Regressor in regressors: if name in dont_test or name in ('CCA'): continue elif name in ('OrthogonalMatchingPursuitCV'): # FIXME: This test is unstable on Travis, see issue #3190. check_skip_travis() yield (check_regressors_int, name, Regressor, X, multioutput_estimator_convert_y_2d(name, y))
def set_fast_parameters(estimator): # speed up some estimators params = estimator.get_params() if estimator.__class__.__name__ == 'OrthogonalMatchingPursuitCV': # FIXME: This test is unstable on Travis, see issue #3190. check_skip_travis() if ("n_iter" in params and estimator.__class__.__name__ != "TSNE"): estimator.set_params(n_iter=5) if "max_iter" in params: # NMF if estimator.max_iter is not None: estimator.set_params(max_iter=min(5, estimator.max_iter)) # LinearSVR if estimator.__class__.__name__ == 'LinearSVR': estimator.set_params(max_iter=20) if "n_resampling" in params: # randomized lasso estimator.set_params(n_resampling=5) if "n_estimators" in params: # especially gradient boosting with default 100 estimator.set_params(n_estimators=min(5, estimator.n_estimators)) if "max_trials" in params: # RANSAC estimator.set_params(max_trials=10) if "n_init" in params: # K-Means estimator.set_params(n_init=2) if estimator.__class__.__name__ == "SelectFdr": # be tolerant of noisy datasets (not actually speed) estimator.set_params(alpha=.5) if estimator.__class__.__name__ == "TheilSenRegressor": estimator.max_subpopulation = 100 if isinstance(estimator, BaseRandomProjection): # Due to the jl lemma and often very few samples, the number # of components of the random matrix projection will be probably # greater than the number of features. # So we impose a smaller number (avoid "auto" mode) estimator.set_params(n_components=1) if isinstance(estimator, SelectKBest): # SelectKBest has a default of k=10 # which is more feature than we have in most case. estimator.set_params(k=1)
def check_regressors_pickle(name, Regressor, X, y): if name == 'OrthogonalMatchingPursuitCV': # FIXME: This test is unstable on Travis, see issue #3190. check_skip_travis() rnd = np.random.RandomState(0) # catch deprecation warnings with warnings.catch_warnings(record=True): regressor = Regressor() if not hasattr(regressor, 'alphas') and hasattr(regressor, 'alpha'): # linear regressors need to set alpha, but not generalized CV ones regressor.alpha = 0.01 if name in ('PLSCanonical', 'PLSRegression', 'CCA'): y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))]) y_ = y_.T else: y_ = y regressor.fit(X, y_) y_pred = regressor.predict(X) # store old predictions pickled_regressor = pickle.dumps(regressor) unpickled_regressor = pickle.loads(pickled_regressor) pickled_y_pred = unpickled_regressor.predict(X) assert_array_almost_equal(pickled_y_pred, y_pred)