def test_regression_corrupted_weights(weighting): reg = RobustWeightedRegressor( max_iter=100, weighting=weighting, k=5, c=1, burn_in=0, random_state=rng, ) reg.fit(X_rc, y_rc) assert reg.weights_[0] < np.mean(reg.weights_[1:])
def test_corrupted_regression(loss, weighting, k, c): reg = RobustWeightedRegressor( loss=loss, max_iter=50, weighting=weighting, k=k, c=None, random_state=rng, ) reg.fit(X_rc, y_rc) score = median_absolute_error(reg.predict(X_rc), y_rc) assert score < 0.2
def test_corrupted_regression(loss, weighting, k, c): reg = RobustWeightedRegressor( loss=loss, max_iter=50, weighting=weighting, k=k, c=c, random_state=rng, n_iter_no_change=20, ) reg.fit(X_rc, y_rc) assert np.abs(reg.coef_[0] - 1) < 0.1 assert np.abs(reg.intercept_[0]) < 0.1
def test_vs_huber(): reg1 = RobustWeightedRegressor( max_iter=100, weighting="huber", k=5, c=1, burn_in=0, sgd_args={"learning_rate": "adaptive"}, # test sgd_args random_state=rng, ) reg2 = HuberRegressor() reg1.fit(X_rcy, y_rcy) reg2.fit(X_rcy, y_rcy) assert np.abs(reg1.coef_[0] - reg2.coef_[0]) < 1e-2
def test_not_robust_regression(loss, weighting): clf = RobustWeightedRegressor( loss=loss, max_iter=100, weighting=weighting, k=0, c=1e7, burn_in=0, random_state=rng, ) clf_not_rob = SGDRegressor(loss=loss, random_state=rng) clf.fit(X_r, y_r) clf_not_rob.fit(X_r, y_r) pred1 = clf.predict(X_r) pred2 = clf_not_rob.predict(X_r) difference = [ np.linalg.norm(pred1[i] - pred2[i]) for i in range(len(pred1)) ] assert np.mean(difference) < 1e-1
def test_not_robust_regression(loss, weighting): reg = RobustWeightedRegressor( loss=loss, max_iter=100, weighting=weighting, k=0, c=1e7, burn_in=0, random_state=rng, ) reg_not_rob = SGDRegressor(loss=loss, random_state=rng) reg.fit(X_r, y_r) reg_not_rob.fit(X_r, y_r) pred1 = reg.predict(X_r) pred2 = reg_not_rob.predict(X_r) difference = [ np.linalg.norm(pred1[i] - pred2[i]) for i in range(len(pred1)) ] assert np.mean(difference) < 1 assert_almost_equal(reg.score(X_r, y_r), r2_score(y_r, reg.predict(X_r)))
y[-5:] = -1 # Shuffle the data so that we don't know where the outlier is. X, y = shuffle(X, y, random_state=rng) estimators = [ ("OLS", LinearRegression()), ("Theil-Sen", TheilSenRegressor(random_state=rng)), ("RANSAC", RANSACRegressor(random_state=rng)), ("HuberRegressor", HuberRegressor()), ( "SGD epsilon loss", SGDRegressor(loss="epsilon_insensitive", random_state=rng), ), ( "RobustWeightedRegressor", RobustWeightedRegressor(weighting="mom", k=7, random_state=rng), # The parameter k is set larger to the number of outliers # because here we know it. ), ] colors = { "OLS": "turquoise", "Theil-Sen": "gold", "RANSAC": "lightgreen", "HuberRegressor": "black", "RobustWeightedRegressor": "magenta", "SGD epsilon loss": "purple", } linestyle = { "OLS": "-",
# Using GridSearchCV, we do a light tuning of the parameters for SGDRegressor # and RobustWeightedEstimator. A fine tune is possible but not necessary to # illustrate the problem of outliers in the output. estimators = [ ( "SGD", SGDRegressor(learning_rate="adaptive", eta0=1e-2), ), ( "RobustWeightedRegressor", RobustWeightedRegressor( weighting="huber", c=0.01, eta0=1e-2, sgd_args={ "learning_rate": "adaptive", "eta0": 1e-3, }, ), ), ("RANSAC", RANSACRegressor()), ("TheilSen", TheilSenRegressor()), ] M = 10 res = np.zeros(shape=[len(estimators), M, 2]) for f in range(M): print("\r Progress: %s / %s" % (f + 1, M), end="")
X = RobustScaler().fit_transform(X) # Using GridSearchCV, we do a light tuning of the parameters for SGDRegressor # and RobustWeightedEstimator. A fine tune is possible but not necessary to # illustrate the problem of outliers in the output. estimators = [ ( "SGD", SGDRegressor(learning_rate="adaptive", eta0=1e-2), ), ( "RobustWeightedRegressor", RobustWeightedRegressor( weighting="huber", c=0.1, eta0=1e-2, sgd_args={ "learning_rate": "invscaling", }, ), ), ("RANSAC", RANSACRegressor()), ("TheilSen", TheilSenRegressor()), ] M = 10 res = np.zeros(shape=[len(estimators), M, 2]) for f in range(M): print("\r Progress: %s / %s" % (f + 1, M), end="") rng = np.random.RandomState(f)