예제 #1
0
def run_LinearRegression_fit():
    """
    Run the fit but now using astroML's LinearRegression
    """
    x, y, y_obs, sigma = generate_points()
    m = LinearRegression()
    m.fit(x[:, None], y_obs, sigma)
    print("Intercept={0} Slope={1}".format(m.coef_[0], m.coef_[1]))
예제 #2
0
def test_LinearRegression_simple():
    """
    Test a simple linear regression
    """
    x = np.arange(10.).reshape((10, 1))
    y = np.arange(10.) + 1
    dy = 1

    clf = LinearRegression().fit(x, y, dy)
    y_true = clf.predict(x)

    assert_allclose(y, y_true, atol=1E-10)
예제 #3
0
def test_error_transform_diag(N=20, rseed=0):
    rng = np.random.RandomState(rseed)
    X = rng.rand(N, 2)
    yerr = 0.05 * (1 + rng.rand(N))
    y = (X[:, 0]**2 + X[:, 1]) + yerr * rng.randn(N)
    Sigma = np.eye(N) * yerr**2

    X1, y1 = LinearRegression._scale_by_error(X, y, yerr)
    X2, y2 = LinearRegression._scale_by_error(X, y, Sigma)

    assert_allclose(X1, X2)
    assert_allclose(y1, y2)
예제 #4
0
def test_error_transform_diag(N=20, rseed=0):
    rng = np.random.RandomState(rseed)
    X = rng.rand(N, 2)
    yerr = 0.05 * (1 + rng.rand(N))
    y = (X[:, 0] ** 2 + X[:, 1]) + yerr * rng.randn(N)
    Sigma = np.eye(N) * yerr ** 2

    X1, y1 = LinearRegression._scale_by_error(X, y, yerr)
    X2, y2 = LinearRegression._scale_by_error(X, y, Sigma)

    assert_allclose(X1, X2)
    assert_allclose(y1, y2)
예제 #5
0
def test_LinearRegression_simple():
    """
    Test a simple linear regression
    """
    x = np.arange(10.).reshape((10, 1))
    y = np.arange(10.) + 1
    dy = 1

    clf = LinearRegression().fit(x, y, dy)
    y_true = clf.predict(x)

    assert_allclose(y, y_true, atol=1E-10)
예제 #6
0
def test_LinearRegression_fit_intercept():
    np.random.seed(0)
    X = np.random.random((10, 1))
    y = np.random.random(10)

    clf1 = LinearRegression(fit_intercept=False).fit(X, y)
    clf2 = skLinearRegression(fit_intercept=False).fit(X, y)

    assert_allclose(clf1.coef_, clf2.coef_)
예제 #7
0
def linearregression(M, z_spec, M_B, z_spec_B):
    #apply linear regression techniques on M to find a function for z_phot
    model = LinearRegression(fit_intercept=True)
    res = model.fit(M, z_spec)
    coeff = list(res.coef_)
    print "The best fit model is:"
    print "z_phot = {0:.3f} + {1:.3f} (u-g) + {2:.3f} (g-r) + {3:.3f} (r-i) + {4:.3f} (i-z)".format(
        coeff[0], coeff[1], coeff[2], coeff[3], coeff[4])

    #test if z_phot is predicted to have a value close to z_spec
    z_phot = model.predict(M)
    print "The training error on the fit is:", Training_Error(z_phot, z_spec)

    #calculate the training error using file B
    z_phot_B = coeff[0] + coeff[1] * M_B[:, 0] + coeff[2] * M_B[:, 1] + coeff[
        3] * M_B[:, 2] + coeff[4] * M_B[:, 3]
    print "The estimated error for the test file B is:", Training_Error(
        z_phot_B, z_spec_B)
예제 #8
0
def test_LinearRegression_err():
    """
    Test that errors are correctly accounted for
    By comparing to scikit-learn LinearRegression
    """
    np.random.seed(0)
    X = np.random.random((10, 1))
    y = np.random.random(10) + 1
    dy = 0.1

    y = np.random.normal(y, dy)

    X_fit = np.linspace(0, 1, 10)[:, None]
    clf1 = LinearRegression().fit(X, y, dy)
    clf2 = skLinearRegression().fit(X / dy, y / dy)

    assert_allclose(clf1.coef_[1:], clf2.coef_)
    assert_allclose(clf1.coef_[0], clf2.intercept_ * dy)
예제 #9
0
def test_error_transform_full(N=20, rseed=0):
    rng = np.random.RandomState(rseed)
    X = rng.rand(N, 2)

    # generate a pos-definite error matrix
    Sigma = 0.05 * rng.randn(N, N)
    u, s, v = np.linalg.svd(Sigma)
    Sigma = np.dot(u * s, u.T)

    # draw y from this error distribution
    y = (X[:, 0]**2 + X[:, 1])
    y = rng.multivariate_normal(y, Sigma)

    X2, y2 = LinearRegression._scale_by_error(X, y, Sigma)

    # check that the form entering the chi^2 is correct
    assert_allclose(np.dot(X2.T, X2), np.dot(X.T, np.linalg.solve(Sigma, X)))
    assert_allclose(np.dot(y2, y2), np.dot(y, np.linalg.solve(Sigma, y)))
예제 #10
0
def test_LinearRegressionwithErrors():
    """
    Test for small errors agrees with fit with y errors only
    """

    from astroML.linear_model import LinearRegressionwithErrors

    np.random.seed(0)
    X = np.random.random(10) + 1
    dy = np.random.random(10) * 0.1
    y = X * 2 + 1 + (dy - 0.05)
    dx = np.random.random(10) * 0.01
    X = X + (dx - 0.005)

    clf1 = LinearRegression().fit(X[:, None], y, dy)
    clf2 = LinearRegressionwithErrors().fit(np.atleast_2d(X), y, dy, dx)

    assert_allclose(clf1.coef_, clf2.coef_, 0.2)
예제 #11
0
def test_error_transform_full(N=20, rseed=0):
    rng = np.random.RandomState(rseed)
    X = rng.rand(N, 2)

    # generate a pos-definite error matrix
    Sigma = 0.05 * rng.randn(N, N)
    u, s, v = np.linalg.svd(Sigma)
    Sigma = np.dot(u * s, u.T)

    # draw y from this error distribution
    y = (X[:, 0] ** 2 + X[:, 1])
    y = rng.multivariate_normal(y, Sigma)

    X2, y2 = LinearRegression._scale_by_error(X, y, Sigma)

    # check that the form entering the chi^2 is correct
    assert_allclose(np.dot(X2.T, X2),
                    np.dot(X.T, np.linalg.solve(Sigma, X)))
    assert_allclose(np.dot(y2, y2),
                    np.dot(y, np.linalg.solve(Sigma, y)))
예제 #12
0
#------------------------------------------------------------
# Generate data
z_sample, mu_sample, dmu = generate_mu_z(100, random_state=0)

cosmo = Cosmology()
z = np.linspace(0.01, 2, 1000)
mu_true = np.asarray(map(cosmo.mu, z))

#------------------------------------------------------------
# Define our classifiers
basis_mu = np.linspace(0, 2, 15)[:, None]
basis_sigma = 3 * (basis_mu[1] - basis_mu[0])

subplots = [221, 222, 223, 224]
classifiers = [
    LinearRegression(),
    PolynomialRegression(4),
    BasisFunctionRegression('gaussian', mu=basis_mu, sigma=basis_sigma),
    NadarayaWatson('gaussian', h=0.1)
]
text = [
    'Straight-line Regression', '4th degree Polynomial\n Regression',
    'Gaussian Basis Function\n Regression', 'Gaussian Kernel\n Regression'
]

# number of constraints of the model.  Because
# Nadaraya-watson is just a weighted mean, it has only one constraint
n_constraints = [2, 5, len(basis_mu) + 1, 1]

#------------------------------------------------------------
# Plot the results
예제 #13
0
def plot_regressions(ksi,
                     eta,
                     x,
                     y,
                     sigma_x,
                     sigma_y,
                     add_regression_lines=False,
                     alpha_in=1,
                     beta_in=0.5,
                     basis='linear'):

    figure = plt.figure(figsize=(8, 6))
    ax = figure.add_subplot(111)
    ax.scatter(x, y, alpha=0.5)
    ax.errorbar(x, y, xerr=sigma_x, yerr=sigma_y, alpha=0.3, ls='')
    ax.set_xlabel('x')
    ax.set_ylabel('y')

    x0 = np.linspace(np.min(x) - 0.5, np.max(x) + 0.5, 20)

    # True regression line

    if alpha_in is not None and beta_in is not None:
        if basis == 'linear':
            y0 = alpha_in + x0 * beta_in
        elif basis == 'poly':
            y0 = alpha_in + beta_in[0] * x0 + beta_in[1] * x0 * x0 + beta_in[
                2] * x0 * x0 * x0

        ax.plot(x0, y0, color='black', label='True regression')
    else:
        y0 = None

    if add_regression_lines:
        for label, data, *target in [['fit no errors', x, y, 1],
                                     ['fit y errors only', x, y, sigma_y],
                                     ['fit x errors only', y, x, sigma_x]]:
            linreg = LinearRegression()
            linreg.fit(data[:, None], *target)
            if label == 'fit x errors only' and y0 is not None:
                x_fit = linreg.predict(y0[:, None])
                ax.plot(x_fit, y0, label=label)
            else:
                y_fit = linreg.predict(x0[:, None])
                ax.plot(x0, y_fit, label=label)

        # TLS
        X = np.vstack((x, y)).T
        dX = np.zeros((len(x), 2, 2))
        dX[:, 0, 0] = sigma_x
        dX[:, 1, 1] = sigma_y

        def min_func(beta):
            return -TLS_logL(beta, X, dX)

        beta_fit = optimize.fmin(min_func, x0=[-1, 1])
        m_fit, b_fit = get_m_b(beta_fit)
        x_fit = np.linspace(-10, 10, 20)
        ax.plot(x_fit, m_fit * x_fit + b_fit, label='TLS')

    ax.set_xlim(np.min(x) - 0.5, np.max(x) + 0.5)
    ax.set_ylim(np.min(y) - 0.5, np.max(y) + 0.5)
    ax.legend()
예제 #14
0
        fh.close()
    except:
        print("Loading pickled data failed!", sys.exc_info()[0])
        data = None

    return data


d = pickle_from_file('points_example1.pkl')

x = d['x']
yobs = d['y']
sigma = d['sigma']

M = x[:, None]
model = LinearRegression(fit_intercept=True)
res = model.fit(M, yobs, sigma)
model.predict(M)
print(res.coef_)


def lnprob(theta, x, yobs, sigma):
    a, b = theta
    model = b * x + a
    inv_sigma2 = 1.0 / (sigma**2)
    return -0.5 * (np.sum((yobs - model)**2 * inv_sigma2))


p_init = res.coef_
ndim, nwalkers = 2, 100
pos = [p_init + 1e-4 * np.random.randn(ndim) for i in range(nwalkers)]
예제 #15
0
widths = 0.2
X = gaussian_basis(z_sample[:, np.newaxis], centers, widths)

#------------------------------------------------------------
# Set up the figure to plot the results
fig = plt.figure(figsize=(5, 2.7))
fig.subplots_adjust(left=0.1, right=0.95,
                    bottom=0.1, top=0.95,
                    hspace=0.15, wspace=0.2)

regularization = ['none', 'l2', 'l1']
kwargs = [dict(), dict(alpha=0.005), dict(alpha=0.001)]
labels = ['Linear Regression', 'Ridge Regression', 'Lasso Regression']

for i in range(3):
    clf = LinearRegression(regularization=regularization[i],
                           fit_intercept=True, kwds=kwargs[i])
    clf.fit(X, mu_sample, dmu)
    w = clf.coef_[1:]
    fit = clf.predict(gaussian_basis(z[:, None], centers, widths))

    # plot fit
    ax = fig.add_subplot(231 + i)
    ax.xaxis.set_major_formatter(plt.NullFormatter())

    # plot curves for regularized fits
    if i == 0:
        ax.set_ylabel('$\mu$')
    else:
        ax.yaxis.set_major_formatter(plt.NullFormatter())
        curves = 37 + w * gaussian_basis(z[:, np.newaxis], centers, widths)
        curves = curves[:, abs(w) > 0.01]
예제 #16
0
widths = 0.2
X = gaussian_basis(z_sample[:, np.newaxis], centers, widths)

#------------------------------------------------------------
# Set up the figure to plot the results
fig = plt.figure(figsize=(5, 2.7))
fig.subplots_adjust(left=0.1, right=0.95,
                    bottom=0.1, top=0.95,
                    hspace=0.15, wspace=0.2)

regularization = ['none', 'l2', 'l1']
kwargs = [dict(), dict(alpha=0.005), dict(alpha=0.001)]
labels = ['Linear Regression', 'Ridge Regression', 'Lasso Regression']

for i in range(3):
    clf = LinearRegression(regularization=regularization[i],
                           fit_intercept=True, kwds=kwargs[i])
    clf.fit(X, mu_sample, dmu)
    w = clf.coef_[1:]
    fit = clf.predict(gaussian_basis(z[:, None], centers, widths))

    # plot fit
    ax = fig.add_subplot(231 + i)
    ax.xaxis.set_major_formatter(plt.NullFormatter())

    # plot curves for regularized fits
    if i == 0:
        ax.set_ylabel('$\mu$')
    else:
        ax.yaxis.set_major_formatter(plt.NullFormatter())
        curves = 37 + w * gaussian_basis(z[:, np.newaxis], centers, widths)
        curves = curves[:, abs(w) > 0.01]