def test_2dimx_1dimy():
    X_train = np.transpose([[1, 2, 3, 4, 5, 6, 7, 8, 9],
                            [1, 2, 3, 4, 6, 5, 7, 8, 9]])
    Y_train = np.array([3, 5, 7, 9, 11, 13, 15, 17, 19])

    model = LinearRegression()
    model.fit(X_train, Y_train)

    assert model.coef_[0] == pytest.approx(2)
    assert model.intercept_ == pytest.approx(1)

    # test predictions
    X_test = np.transpose([[1.5, 2.5, 3.5, 7.5], [1.5, 2.5, 3.5, 7.5]])
    Y_test = np.array([4, 6, 8, 16])
    Y_pred = model.predict(X_test)
    assert np.all(Y_pred == pytest.approx(Y_test))
def test_2d_nonzero_intercept():
    X_train = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])[:, np.newaxis]
    Y_train = np.array([3, 5, 7, 9, 11, 13, 15, 17, 19])

    model = LinearRegression()
    model.fit(X_train, Y_train)

    # test fit
    assert model.coef_[0] == pytest.approx(2)
    assert model.intercept_ == pytest.approx(1)

    # test predictions
    X_test = np.array([1.5, 2.5, 3.5, 7.5])[:, np.newaxis]
    Y_test = np.array([4, 6, 8, 16])
    Y_pred = model.predict(X_test)
    assert np.all(Y_pred == pytest.approx(Y_test))
    assert model.residuals_ is None
def test_2d_zero_intercept():
    X_train = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])[:, np.newaxis]
    Y_train = np.array([2, 4, 6, 8, 10, 12, 14, 16, 18])

    model = LinearRegression(calculate_residuals=True)
    model.fit(X_train, Y_train)

    # test fit
    assert model.coef_[0] == pytest.approx(2)
    assert model.intercept_ == pytest.approx(0)

    assert model.residuals_.shape == (9, )
    assert np.all(model.residuals_ == pytest.approx(0))

    # test predictions
    X_test = np.array([1.5, 2.5, 3.5, 7.5])[:, np.newaxis]
    Y_test = np.array([3, 5, 7, 15])
    Y_pred = model.predict(X_test)
    assert np.all(Y_pred == pytest.approx(Y_test))
Esempio n. 4
0
def hacky_polynomial():
    """As we only have a single feature, x, we perform polynomial regression by
    adding features for x^n up to the desired power. It's slightly hacky in
    that it doesn't give us a nice way to produce predictions other than
    performing the same power calculations for the x_test features.
    NB. This starts to diverge from the sklearn version for large powers (~15)
    I haven't figured out why but I suspect it is to do with f.p. precision"""
    min_x, max_x = -5, 10

    # Plot the true function with a green line
    X_show = np.linspace(min_x, max_x, 200)[:, np.newaxis]
    Y_show = poly_func(X_show)
    plt.plot(X_show, Y_show, color='green')

    # Define our training set, we add noise to y_values to make it interesting
    n_samples = 500
    X_train = np.random.uniform(min_x, max_x, size=(n_samples, 1))
    Y_exact = poly_func(X_train)
    Y_noise = 5 * np.random.standard_normal(size=n_samples)[:, np.newaxis]
    Y_train = Y_exact + Y_noise

    max_pow = 16
    powers = np.array(range(1, max_pow + 1))
    X_train_pow = np.power.outer(X_train[:, 0], powers)

    # Now train a regression model
    model = LinearRegression()
    model.fit(X_train_pow, Y_train)

    # compare with SKL
    skl_model = PolynomialRegression(max_pow)
    skl_model.fit(X_train, Y_train)

    # Plot the results
    X_show_pow = np.power.outer(X_show[:, 0], powers)
    plt.scatter(X_train, Y_train, s=1)
    plt.plot(X_show, model.predict(X_show_pow), c='red')
    plt.plot(X_show, skl_model.predict(X_show), c='blue')

    plt.show()
Esempio n. 5
0
def linear():
    # First invent some data
    n_samples = 300
    n_dim = 1
    x_max = 100
    X_train = np.random.uniform(x_max, size=(n_samples, n_dim))
    coefs = np.random.uniform(1, 10, size=n_dim)
    intercept = random.uniform(0, 100)
    noise = np.sqrt(x_max) * np.random.standard_normal(size=(n_samples, 1))
    Y_mult = np.sum(X_train * coefs, axis=1)[:, np.newaxis]

    Y_train = Y_mult + intercept + noise

    # Now train a regression model
    model = LinearRegression()
    model.fit(X_train, Y_train)

    # Add line of best fit to graph
    X_fit = np.linspace(0, x_max, 20)[:, np.newaxis]
    Y_fit = model.predict(X_fit)

    print(model.score(X_train, Y_train))
    # if we're in 1 dimension we can plot this
    if n_dim == 1:
        plt.figure(facecolor="w", figsize=(15, 10))
        plt.scatter(X_train, Y_train, s=1)
        plt.plot(X_fit, Y_fit, color='green')
        plt.show()
def test_invalid_dimensions():
    X_train = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])[:, np.newaxis]
    Y_train = np.array([3, 5, 7, 9, 11, 13, 15, 17, 19])

    model = LinearRegression()
    with pytest.raises(ValueError):
        model.fit(X_train, Y_train[:, np.newaxis])
    with pytest.raises(ValueError):
        model.fit(X_train.ravel(), Y_train)
def test_nd_nonzero_intercept(n_samples=1000, dim=20):
    X_train = np.random.uniform(100, size=(n_samples, dim))
    coefs = np.random.uniform(1, 10, size=dim)
    intercept = random.uniform(-10, 10)
    Y_train_values = [
        sum(point * coefs) + intercept + random.uniform(-0.2, 0.2)
        for point in X_train
    ]
    Y_train = np.array(Y_train_values)

    model = LinearRegression()
    model.fit(X_train, Y_train)

    # test fit
    assert model.intercept_ == pytest.approx(intercept, abs=0.2)
    assert np.all(model.coef_ == pytest.approx(coefs, abs=0.2))

    # test predictions
    X_test = np.random.uniform(100, size=(50, dim))
    Y_test = np.array([sum(point * coefs) + intercept for point in X_test])
    Y_pred = model.predict(X_test)
    assert np.all(Y_pred == pytest.approx(Y_test, abs=0.2))
    assert model.score(X_test, Y_test) > 0.999
    assert model.residuals_ is None
def test_invalid_method():
    with pytest.raises(ValueError):
        _ = LinearRegression(method='bob')
def test_gradient_desc():
    X_train = np.transpose([[1, 2, 3, 4, 5, 6, 7, 8, 9],
                            [1, 2, 3, 4, 5, 6, 7, 8, 9]])
    Y_train = np.array([6, 10, 14, 18, 22, 26, 30, 34, 38])

    model_la = LinearRegression()
    with pytest.raises(np.linalg.LinAlgError):
        model_la.fit(X_train, Y_train)

    model_gd_no_params = LinearRegression(method='gradient_descent')
    with pytest.warns(RuntimeWarning):
        model_gd_no_params.fit(X_train, Y_train)

    gd_params = {'tol': 0.001, 'max_iter': 2000, 'learning_rate': 2}
    model_gd = LinearRegression(method='gradient_descent', params=gd_params)
    model_gd.fit(X_train, Y_train)

    assert np.sum(model_gd.coef_) == pytest.approx(4, abs=0.1)
    assert model_gd.intercept_ == pytest.approx(2, abs=0.2)

    # test predictions
    X_test = np.transpose([[1.5, 2.5, 3.5, 7.5], [1.5, 2.5, 3.5, 7.5]])
    Y_test = np.array([8, 12, 16, 32])
    Y_pred = model_gd.predict(X_test)
    assert np.all(Y_pred == pytest.approx(Y_test, abs=0.2))