Example #1
0
def test_regression_corrupted_weights(weighting):
    reg = RobustWeightedRegressor(
        max_iter=100,
        weighting=weighting,
        k=5,
        c=1,
        burn_in=0,
        random_state=rng,
    )
    reg.fit(X_rc, y_rc)
    assert reg.weights_[0] < np.mean(reg.weights_[1:])
def test_corrupted_regression(loss, weighting, k, c):
    reg = RobustWeightedRegressor(
        loss=loss,
        max_iter=50,
        weighting=weighting,
        k=k,
        c=None,
        random_state=rng,
    )
    reg.fit(X_rc, y_rc)
    score = median_absolute_error(reg.predict(X_rc), y_rc)
    assert score < 0.2
Example #3
0
def test_corrupted_regression(loss, weighting, k, c):
    reg = RobustWeightedRegressor(
        loss=loss,
        max_iter=50,
        weighting=weighting,
        k=k,
        c=c,
        random_state=rng,
        n_iter_no_change=20,
    )
    reg.fit(X_rc, y_rc)
    assert np.abs(reg.coef_[0] - 1) < 0.1
    assert np.abs(reg.intercept_[0]) < 0.1
Example #4
0
def test_vs_huber():
    reg1 = RobustWeightedRegressor(
        max_iter=100,
        weighting="huber",
        k=5,
        c=1,
        burn_in=0,
        sgd_args={"learning_rate": "adaptive"},  # test sgd_args
        random_state=rng,
    )
    reg2 = HuberRegressor()
    reg1.fit(X_rcy, y_rcy)
    reg2.fit(X_rcy, y_rcy)
    assert np.abs(reg1.coef_[0] - reg2.coef_[0]) < 1e-2
def test_not_robust_regression(loss, weighting):
    clf = RobustWeightedRegressor(
        loss=loss,
        max_iter=100,
        weighting=weighting,
        k=0,
        c=1e7,
        burn_in=0,
        random_state=rng,
    )
    clf_not_rob = SGDRegressor(loss=loss, random_state=rng)
    clf.fit(X_r, y_r)
    clf_not_rob.fit(X_r, y_r)
    pred1 = clf.predict(X_r)
    pred2 = clf_not_rob.predict(X_r)
    difference = [
        np.linalg.norm(pred1[i] - pred2[i]) for i in range(len(pred1))
    ]
    assert np.mean(difference) < 1e-1
Example #6
0
def test_not_robust_regression(loss, weighting):
    reg = RobustWeightedRegressor(
        loss=loss,
        max_iter=100,
        weighting=weighting,
        k=0,
        c=1e7,
        burn_in=0,
        random_state=rng,
    )
    reg_not_rob = SGDRegressor(loss=loss, random_state=rng)
    reg.fit(X_r, y_r)
    reg_not_rob.fit(X_r, y_r)
    pred1 = reg.predict(X_r)
    pred2 = reg_not_rob.predict(X_r)
    difference = [
        np.linalg.norm(pred1[i] - pred2[i]) for i in range(len(pred1))
    ]
    assert np.mean(difference) < 1
    assert_almost_equal(reg.score(X_r, y_r), r2_score(y_r, reg.predict(X_r)))
Example #7
0
y[-5:] = -1

# Shuffle the data so that we don't know where the outlier is.
X, y = shuffle(X, y, random_state=rng)
estimators = [
    ("OLS", LinearRegression()),
    ("Theil-Sen", TheilSenRegressor(random_state=rng)),
    ("RANSAC", RANSACRegressor(random_state=rng)),
    ("HuberRegressor", HuberRegressor()),
    (
        "SGD epsilon loss",
        SGDRegressor(loss="epsilon_insensitive", random_state=rng),
    ),
    (
        "RobustWeightedRegressor",
        RobustWeightedRegressor(weighting="mom", k=7, random_state=rng),
        # The parameter k is set larger to the number of outliers
        # because here we know it.
    ),
]

colors = {
    "OLS": "turquoise",
    "Theil-Sen": "gold",
    "RANSAC": "lightgreen",
    "HuberRegressor": "black",
    "RobustWeightedRegressor": "magenta",
    "SGD epsilon loss": "purple",
}
linestyle = {
    "OLS": "-",
Example #8
0
# Using GridSearchCV, we do a light tuning of the parameters for SGDRegressor
# and RobustWeightedEstimator. A fine tune is possible but not necessary to
# illustrate the problem of outliers in the output.
estimators = [
    (
        "SGD",
        SGDRegressor(learning_rate="adaptive", eta0=1e-2),
    ),
    (
        "RobustWeightedRegressor",
        RobustWeightedRegressor(
            weighting="huber",
            c=0.01,
            eta0=1e-2,
            sgd_args={
                "learning_rate": "adaptive",
                "eta0": 1e-3,
            },
        ),
    ),
    ("RANSAC", RANSACRegressor()),
    ("TheilSen", TheilSenRegressor()),
]

M = 10
res = np.zeros(shape=[len(estimators), M, 2])

for f in range(M):
    print("\r Progress: %s / %s" % (f + 1, M), end="")
X = RobustScaler().fit_transform(X)

# Using GridSearchCV, we do a light tuning of the parameters for SGDRegressor
# and RobustWeightedEstimator. A fine tune is possible but not necessary to
# illustrate the problem of outliers in the output.
estimators = [
    (
        "SGD",
        SGDRegressor(learning_rate="adaptive", eta0=1e-2),
    ),
    (
        "RobustWeightedRegressor",
        RobustWeightedRegressor(
            weighting="huber",
            c=0.1,
            eta0=1e-2,
            sgd_args={
                "learning_rate": "invscaling",
            },
        ),
    ),
    ("RANSAC", RANSACRegressor()),
    ("TheilSen", TheilSenRegressor()),
]

M = 10
res = np.zeros(shape=[len(estimators), M, 2])

for f in range(M):
    print("\r Progress: %s / %s" % (f + 1, M), end="")

    rng = np.random.RandomState(f)