def run_LinearRegression_fit(): """ Run the fit but now using astroML's LinearRegression """ x, y, y_obs, sigma = generate_points() m = LinearRegression() m.fit(x[:, None], y_obs, sigma) print("Intercept={0} Slope={1}".format(m.coef_[0], m.coef_[1]))
def linearregression(M, z_spec, M_B, z_spec_B): #apply linear regression techniques on M to find a function for z_phot model = LinearRegression(fit_intercept=True) res = model.fit(M, z_spec) coeff = list(res.coef_) print "The best fit model is:" print "z_phot = {0:.3f} + {1:.3f} (u-g) + {2:.3f} (g-r) + {3:.3f} (r-i) + {4:.3f} (i-z)".format( coeff[0], coeff[1], coeff[2], coeff[3], coeff[4]) #test if z_phot is predicted to have a value close to z_spec z_phot = model.predict(M) print "The training error on the fit is:", Training_Error(z_phot, z_spec) #calculate the training error using file B z_phot_B = coeff[0] + coeff[1] * M_B[:, 0] + coeff[2] * M_B[:, 1] + coeff[ 3] * M_B[:, 2] + coeff[4] * M_B[:, 3] print "The estimated error for the test file B is:", Training_Error( z_phot_B, z_spec_B)
def plot_regressions(ksi, eta, x, y, sigma_x, sigma_y, add_regression_lines=False, alpha_in=1, beta_in=0.5, basis='linear'): figure = plt.figure(figsize=(8, 6)) ax = figure.add_subplot(111) ax.scatter(x, y, alpha=0.5) ax.errorbar(x, y, xerr=sigma_x, yerr=sigma_y, alpha=0.3, ls='') ax.set_xlabel('x') ax.set_ylabel('y') x0 = np.linspace(np.min(x) - 0.5, np.max(x) + 0.5, 20) # True regression line if alpha_in is not None and beta_in is not None: if basis == 'linear': y0 = alpha_in + x0 * beta_in elif basis == 'poly': y0 = alpha_in + beta_in[0] * x0 + beta_in[1] * x0 * x0 + beta_in[ 2] * x0 * x0 * x0 ax.plot(x0, y0, color='black', label='True regression') else: y0 = None if add_regression_lines: for label, data, *target in [['fit no errors', x, y, 1], ['fit y errors only', x, y, sigma_y], ['fit x errors only', y, x, sigma_x]]: linreg = LinearRegression() linreg.fit(data[:, None], *target) if label == 'fit x errors only' and y0 is not None: x_fit = linreg.predict(y0[:, None]) ax.plot(x_fit, y0, label=label) else: y_fit = linreg.predict(x0[:, None]) ax.plot(x0, y_fit, label=label) # TLS X = np.vstack((x, y)).T dX = np.zeros((len(x), 2, 2)) dX[:, 0, 0] = sigma_x dX[:, 1, 1] = sigma_y def min_func(beta): return -TLS_logL(beta, X, dX) beta_fit = optimize.fmin(min_func, x0=[-1, 1]) m_fit, b_fit = get_m_b(beta_fit) x_fit = np.linspace(-10, 10, 20) ax.plot(x_fit, m_fit * x_fit + b_fit, label='TLS') ax.set_xlim(np.min(x) - 0.5, np.max(x) + 0.5) ax.set_ylim(np.min(y) - 0.5, np.max(y) + 0.5) ax.legend()
except: print("Loading pickled data failed!", sys.exc_info()[0]) data = None return data d = pickle_from_file('points_example1.pkl') x = d['x'] yobs = d['y'] sigma = d['sigma'] M = x[:, None] model = LinearRegression(fit_intercept=True) res = model.fit(M, yobs, sigma) model.predict(M) print(res.coef_) def lnprob(theta, x, yobs, sigma): a, b = theta model = b * x + a inv_sigma2 = 1.0 / (sigma**2) return -0.5 * (np.sum((yobs - model)**2 * inv_sigma2)) p_init = res.coef_ ndim, nwalkers = 2, 100 pos = [p_init + 1e-4 * np.random.randn(ndim) for i in range(nwalkers)]
#------------------------------------------------------------ # Set up the figure to plot the results fig = plt.figure(figsize=(5, 2.7)) fig.subplots_adjust(left=0.1, right=0.95, bottom=0.1, top=0.95, hspace=0.15, wspace=0.2) regularization = ['none', 'l2', 'l1'] kwargs = [dict(), dict(alpha=0.005), dict(alpha=0.001)] labels = ['Linear Regression', 'Ridge Regression', 'Lasso Regression'] for i in range(3): clf = LinearRegression(regularization=regularization[i], fit_intercept=True, kwds=kwargs[i]) clf.fit(X, mu_sample, dmu) w = clf.coef_[1:] fit = clf.predict(gaussian_basis(z[:, None], centers, widths)) # plot fit ax = fig.add_subplot(231 + i) ax.xaxis.set_major_formatter(plt.NullFormatter()) # plot curves for regularized fits if i == 0: ax.set_ylabel('$\mu$') else: ax.yaxis.set_major_formatter(plt.NullFormatter()) curves = 37 + w * gaussian_basis(z[:, np.newaxis], centers, widths) curves = curves[:, abs(w) > 0.01] ax.plot(z, curves,