n_cols=n_sig_cols+n_noise_cols dictionary = {'x{}'.format(i):np.random.randn(n_data) for i in range(n_cols)} X = pd.DataFrame(dictionary) coeffs = np.random.randn(n_cols) coeffs[0:n_noise_cols]=np.zeros(coeffs[0:n_noise_cols].shape) y = pd.DataFrame(np.dot(X,coeffs)+np.random.randn(n_data)*noise) X_train, X_test, y_train, y_test = time_series_split(X, y, test_size=0.2) # Investigate the performance of the Lasso Regressor at 3 levels of alpha alphas = [.05,.1,.3, 1, 50] plt.figure(figsize=(len(alphas)*10, 10)) models = [] for i, alpha in enumerate(alphas): model = BayesianLasso(sigma2=alpha) models.append(model) model.fit(X_train, y_train) pred_train = model.predict(X_train) pred_test = model.predict(X_test) print np.array(model._map_betas), coeffs plt.subplot(2, len(alphas), 1 + i) plot_comparison(np.array(y_train), np.array(pred_train), 'Training Set for alpha={}'.format(alpha)) plt.subplot(2, len(alphas), len(alphas) + 1 + i) plot_comparison(np.array(y_test), np.array(pred_test), 'Testing Set for alpha={}'.format(alpha)) plt.savefig('plots/bayesian_lasso_fit_comp_toy.png')
X_scaled = scaler.fit_transform(X) X_train, X_test, y_train, y_test = time_series_split(X_scaled, y, test_size=0.2) # Investigate the performance of the Lasso Regressor at 3 levels of alpha sigma2s = [.001,1,10000] plt.figure(figsize=(len(sigma2s)*10, 10)) models = [] for i, sigma2 in enumerate(sigma2s): model = BayesianLasso(sigma2=sigma2, use_mcmc=True) models.append(model) print "fitting model with sigma2={}".format(sigma2) model.fit(X_scaled, y) pred_train = model.predict(X_train) pred_test = model.predict(X_test) plt.subplot(2, len(sigma2s), 1 + i) plot_comparison(y_train, pred_train, 'Training Set for sigma^2={}'.format(sigma2)) plt.subplot(2, len(sigma2s), len(sigma2s) + 1 + i) plot_comparison(y_test, pred_test, 'Testing Set for sigma^2={}'.format(sigma2)) plt.savefig('plots/bayesian_lasso_fit_comp_2.png') #Investigate the value of the Lasso's coeffecients at the same 3 levels of alpha plt.figure(figsize=(len(sigma2s)*10, 10)) for i, model in enumerate(models): subplot = plt.subplot(1, len(models), 1 + i) plot_coeffs(model, features, subplot,confidence=True) plt.savefig('plots/bayesian_lasso_coeff_comp_2.png')