def test_sag_regressor(): """tests if the sag regressor performs well""" xmin, xmax = -5, 5 n_samples = 20 tol = .001 max_iter = 50 alpha = 0.1 rng = np.random.RandomState(0) X = np.linspace(xmin, xmax, n_samples).reshape(n_samples, 1) # simple linear function without noise y = 0.5 * X.ravel() clf1 = Ridge(tol=tol, solver='sag', max_iter=max_iter, alpha=alpha * n_samples, random_state=rng) clf2 = clone(clf1) clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) score1 = clf1.score(X, y) score2 = clf2.score(X, y) assert score1 > 0.99 assert score2 > 0.99 # simple linear function with noise y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel() clf1 = Ridge(tol=tol, solver='sag', max_iter=max_iter, alpha=alpha * n_samples) clf2 = clone(clf1) clf1.fit(X, y) clf2.fit(sp.csr_matrix(X), y) score1 = clf1.score(X, y) score2 = clf2.score(X, y) score2 = clf2.score(X, y) assert score1 > 0.5 assert score2 > 0.5
def test_huber_better_r2_score(): # Test that huber returns a better r2 score than non-outliers""" X, y = make_regression_with_outliers() huber = HuberRegressor(alpha=0.01) huber.fit(X, y) linear_loss = np.dot(X, huber.coef_) + huber.intercept_ - y mask = np.abs(linear_loss) < huber.epsilon * huber.scale_ huber_score = huber.score(X[mask], y[mask]) huber_outlier_score = huber.score(X[~mask], y[~mask]) # The Ridge regressor should be influenced by the outliers and hence # give a worse score on the non-outliers as compared to the huber # regressor. ridge = Ridge(alpha=0.01) ridge.fit(X, y) ridge_score = ridge.score(X[mask], y[mask]) ridge_outlier_score = ridge.score(X[~mask], y[~mask]) assert huber_score > ridge_score # The huber model should also fit poorly on the outliers. assert ridge_outlier_score > huber_outlier_score