def run_LinearRegression_fit(): """ Run the fit but now using astroML's LinearRegression """ x, y, y_obs, sigma = generate_points() m = LinearRegression() m.fit(x[:, None], y_obs, sigma) print("Intercept={0} Slope={1}".format(m.coef_[0], m.coef_[1]))
def test_LinearRegression_simple(): """ Test a simple linear regression """ x = np.arange(10.).reshape((10, 1)) y = np.arange(10.) + 1 dy = 1 clf = LinearRegression().fit(x, y, dy) y_true = clf.predict(x) assert_allclose(y, y_true, atol=1E-10)
def test_error_transform_diag(N=20, rseed=0): rng = np.random.RandomState(rseed) X = rng.rand(N, 2) yerr = 0.05 * (1 + rng.rand(N)) y = (X[:, 0]**2 + X[:, 1]) + yerr * rng.randn(N) Sigma = np.eye(N) * yerr**2 X1, y1 = LinearRegression._scale_by_error(X, y, yerr) X2, y2 = LinearRegression._scale_by_error(X, y, Sigma) assert_allclose(X1, X2) assert_allclose(y1, y2)
def test_error_transform_diag(N=20, rseed=0): rng = np.random.RandomState(rseed) X = rng.rand(N, 2) yerr = 0.05 * (1 + rng.rand(N)) y = (X[:, 0] ** 2 + X[:, 1]) + yerr * rng.randn(N) Sigma = np.eye(N) * yerr ** 2 X1, y1 = LinearRegression._scale_by_error(X, y, yerr) X2, y2 = LinearRegression._scale_by_error(X, y, Sigma) assert_allclose(X1, X2) assert_allclose(y1, y2)
def test_LinearRegression_fit_intercept(): np.random.seed(0) X = np.random.random((10, 1)) y = np.random.random(10) clf1 = LinearRegression(fit_intercept=False).fit(X, y) clf2 = skLinearRegression(fit_intercept=False).fit(X, y) assert_allclose(clf1.coef_, clf2.coef_)
def linearregression(M, z_spec, M_B, z_spec_B): #apply linear regression techniques on M to find a function for z_phot model = LinearRegression(fit_intercept=True) res = model.fit(M, z_spec) coeff = list(res.coef_) print "The best fit model is:" print "z_phot = {0:.3f} + {1:.3f} (u-g) + {2:.3f} (g-r) + {3:.3f} (r-i) + {4:.3f} (i-z)".format( coeff[0], coeff[1], coeff[2], coeff[3], coeff[4]) #test if z_phot is predicted to have a value close to z_spec z_phot = model.predict(M) print "The training error on the fit is:", Training_Error(z_phot, z_spec) #calculate the training error using file B z_phot_B = coeff[0] + coeff[1] * M_B[:, 0] + coeff[2] * M_B[:, 1] + coeff[ 3] * M_B[:, 2] + coeff[4] * M_B[:, 3] print "The estimated error for the test file B is:", Training_Error( z_phot_B, z_spec_B)
def test_LinearRegression_err(): """ Test that errors are correctly accounted for By comparing to scikit-learn LinearRegression """ np.random.seed(0) X = np.random.random((10, 1)) y = np.random.random(10) + 1 dy = 0.1 y = np.random.normal(y, dy) X_fit = np.linspace(0, 1, 10)[:, None] clf1 = LinearRegression().fit(X, y, dy) clf2 = skLinearRegression().fit(X / dy, y / dy) assert_allclose(clf1.coef_[1:], clf2.coef_) assert_allclose(clf1.coef_[0], clf2.intercept_ * dy)
def test_error_transform_full(N=20, rseed=0): rng = np.random.RandomState(rseed) X = rng.rand(N, 2) # generate a pos-definite error matrix Sigma = 0.05 * rng.randn(N, N) u, s, v = np.linalg.svd(Sigma) Sigma = np.dot(u * s, u.T) # draw y from this error distribution y = (X[:, 0]**2 + X[:, 1]) y = rng.multivariate_normal(y, Sigma) X2, y2 = LinearRegression._scale_by_error(X, y, Sigma) # check that the form entering the chi^2 is correct assert_allclose(np.dot(X2.T, X2), np.dot(X.T, np.linalg.solve(Sigma, X))) assert_allclose(np.dot(y2, y2), np.dot(y, np.linalg.solve(Sigma, y)))
def test_LinearRegressionwithErrors(): """ Test for small errors agrees with fit with y errors only """ from astroML.linear_model import LinearRegressionwithErrors np.random.seed(0) X = np.random.random(10) + 1 dy = np.random.random(10) * 0.1 y = X * 2 + 1 + (dy - 0.05) dx = np.random.random(10) * 0.01 X = X + (dx - 0.005) clf1 = LinearRegression().fit(X[:, None], y, dy) clf2 = LinearRegressionwithErrors().fit(np.atleast_2d(X), y, dy, dx) assert_allclose(clf1.coef_, clf2.coef_, 0.2)
def test_error_transform_full(N=20, rseed=0): rng = np.random.RandomState(rseed) X = rng.rand(N, 2) # generate a pos-definite error matrix Sigma = 0.05 * rng.randn(N, N) u, s, v = np.linalg.svd(Sigma) Sigma = np.dot(u * s, u.T) # draw y from this error distribution y = (X[:, 0] ** 2 + X[:, 1]) y = rng.multivariate_normal(y, Sigma) X2, y2 = LinearRegression._scale_by_error(X, y, Sigma) # check that the form entering the chi^2 is correct assert_allclose(np.dot(X2.T, X2), np.dot(X.T, np.linalg.solve(Sigma, X))) assert_allclose(np.dot(y2, y2), np.dot(y, np.linalg.solve(Sigma, y)))
#------------------------------------------------------------ # Generate data z_sample, mu_sample, dmu = generate_mu_z(100, random_state=0) cosmo = Cosmology() z = np.linspace(0.01, 2, 1000) mu_true = np.asarray(map(cosmo.mu, z)) #------------------------------------------------------------ # Define our classifiers basis_mu = np.linspace(0, 2, 15)[:, None] basis_sigma = 3 * (basis_mu[1] - basis_mu[0]) subplots = [221, 222, 223, 224] classifiers = [ LinearRegression(), PolynomialRegression(4), BasisFunctionRegression('gaussian', mu=basis_mu, sigma=basis_sigma), NadarayaWatson('gaussian', h=0.1) ] text = [ 'Straight-line Regression', '4th degree Polynomial\n Regression', 'Gaussian Basis Function\n Regression', 'Gaussian Kernel\n Regression' ] # number of constraints of the model. Because # Nadaraya-watson is just a weighted mean, it has only one constraint n_constraints = [2, 5, len(basis_mu) + 1, 1] #------------------------------------------------------------ # Plot the results
def plot_regressions(ksi, eta, x, y, sigma_x, sigma_y, add_regression_lines=False, alpha_in=1, beta_in=0.5, basis='linear'): figure = plt.figure(figsize=(8, 6)) ax = figure.add_subplot(111) ax.scatter(x, y, alpha=0.5) ax.errorbar(x, y, xerr=sigma_x, yerr=sigma_y, alpha=0.3, ls='') ax.set_xlabel('x') ax.set_ylabel('y') x0 = np.linspace(np.min(x) - 0.5, np.max(x) + 0.5, 20) # True regression line if alpha_in is not None and beta_in is not None: if basis == 'linear': y0 = alpha_in + x0 * beta_in elif basis == 'poly': y0 = alpha_in + beta_in[0] * x0 + beta_in[1] * x0 * x0 + beta_in[ 2] * x0 * x0 * x0 ax.plot(x0, y0, color='black', label='True regression') else: y0 = None if add_regression_lines: for label, data, *target in [['fit no errors', x, y, 1], ['fit y errors only', x, y, sigma_y], ['fit x errors only', y, x, sigma_x]]: linreg = LinearRegression() linreg.fit(data[:, None], *target) if label == 'fit x errors only' and y0 is not None: x_fit = linreg.predict(y0[:, None]) ax.plot(x_fit, y0, label=label) else: y_fit = linreg.predict(x0[:, None]) ax.plot(x0, y_fit, label=label) # TLS X = np.vstack((x, y)).T dX = np.zeros((len(x), 2, 2)) dX[:, 0, 0] = sigma_x dX[:, 1, 1] = sigma_y def min_func(beta): return -TLS_logL(beta, X, dX) beta_fit = optimize.fmin(min_func, x0=[-1, 1]) m_fit, b_fit = get_m_b(beta_fit) x_fit = np.linspace(-10, 10, 20) ax.plot(x_fit, m_fit * x_fit + b_fit, label='TLS') ax.set_xlim(np.min(x) - 0.5, np.max(x) + 0.5) ax.set_ylim(np.min(y) - 0.5, np.max(y) + 0.5) ax.legend()
fh.close() except: print("Loading pickled data failed!", sys.exc_info()[0]) data = None return data d = pickle_from_file('points_example1.pkl') x = d['x'] yobs = d['y'] sigma = d['sigma'] M = x[:, None] model = LinearRegression(fit_intercept=True) res = model.fit(M, yobs, sigma) model.predict(M) print(res.coef_) def lnprob(theta, x, yobs, sigma): a, b = theta model = b * x + a inv_sigma2 = 1.0 / (sigma**2) return -0.5 * (np.sum((yobs - model)**2 * inv_sigma2)) p_init = res.coef_ ndim, nwalkers = 2, 100 pos = [p_init + 1e-4 * np.random.randn(ndim) for i in range(nwalkers)]
widths = 0.2 X = gaussian_basis(z_sample[:, np.newaxis], centers, widths) #------------------------------------------------------------ # Set up the figure to plot the results fig = plt.figure(figsize=(5, 2.7)) fig.subplots_adjust(left=0.1, right=0.95, bottom=0.1, top=0.95, hspace=0.15, wspace=0.2) regularization = ['none', 'l2', 'l1'] kwargs = [dict(), dict(alpha=0.005), dict(alpha=0.001)] labels = ['Linear Regression', 'Ridge Regression', 'Lasso Regression'] for i in range(3): clf = LinearRegression(regularization=regularization[i], fit_intercept=True, kwds=kwargs[i]) clf.fit(X, mu_sample, dmu) w = clf.coef_[1:] fit = clf.predict(gaussian_basis(z[:, None], centers, widths)) # plot fit ax = fig.add_subplot(231 + i) ax.xaxis.set_major_formatter(plt.NullFormatter()) # plot curves for regularized fits if i == 0: ax.set_ylabel('$\mu$') else: ax.yaxis.set_major_formatter(plt.NullFormatter()) curves = 37 + w * gaussian_basis(z[:, np.newaxis], centers, widths) curves = curves[:, abs(w) > 0.01]