def load_globals(config): import mapreduce as GLOBAL # access to global variables GLOBAL.DATA = GLOBAL.load_data(config["data"]) IM_SHAPE = config["im_shape"] A= sparse.vstack(nesterov_tv.linear_operator_from_shape(IM_SHAPE)) N_COMP = config["n_comp"] GLOBAL.A,GLOBAL.N_COMP = A,N_COMP
def test_tvhelper_linear_operator_from_shape(self): import parsimony.functions.nesterov.tv as tv dx = 5 # p should be odd shape = (dx, dx, dx) # A_from_shape p = np.prod(shape) beta = np.zeros(p) beta[0:p:2] = 1 # checkerboard of 0 and 1 A = tv.linear_operator_from_shape(shape) tvfunc = tv.TotalVariation(l=1.0, A=A) assert tvfunc.f(beta) == self._f_checkerboard_cube(shape)
alpha=alpha / 1000 * n_train, l1_ratio=.5, fit_intercept=True) MODELS["2d_l1l2_inter_fista"] = \ estimators.ElasticNetLogisticRegression(alpha=alpha / 10, l=.5, penalty_start=1, algorithm_params=algorithm_params) ## LogisticRegressionL1L2TV, Parsimony only # Minimize: # f(beta, X, y) = - loglik/n_train # + k/2 * ||beta||^2_2 # + l * ||beta||_1 # + g * TV(beta) A = nesterov_tv.linear_operator_from_shape(beta3d.shape) l1, l2, tv = alpha * np.array((.05, .75, .2)) # l2, l1, tv penalties MODELS["2d_l1l2tv_fista"] = \ estimators.LogisticRegressionL1L2TV( l1, l2, tv, A, algorithm=algorithms.proximal.FISTA(), algorithm_params=algorithm_params) MODELS["2d_l1l2tv_inter_fista"] = \ estimators.LogisticRegressionL1L2TV( l1, l2, tv, A, penalty_start=1, algorithm=algorithms.proximal.FISTA(), algorithm_params=algorithm_params)
X = X3d.reshape((n_samples, np.prod(shape))) ############################################################################### from sklearn.decomposition import PCA pca = PCA(n_components=3) pca.fit(X) print(pca.explained_variance_ratio_) ############################################################################### # PCA-TV l1max = PCAL1L2TV.l1_max(X) alpha = l1max l1, l2, tv = alpha * np.array((.05, 1, .1)) # l1, l2, tv penalties Atv = nesterov_tv.linear_operator_from_shape(shape) pcatv = PCAL1L2TV(l1, l2, tv, Atv, n_components=3, eps=1e-6, max_iter=100, inner_max_iter=int(1e3), verbose=True) pcatv.fit(X) np.sum(np.abs(pcatv.V) > 1e-6, axis=0) ############################################################################### # Plot
@license: BSD 3-clause. """ #import numpy.random import numpy as np import parsimony.datasets import parsimony.functions.nesterov.tv as tv from parsimony.functions import PCA_L1_TV from parsimony.algorithms.multiblock import MultiblockProjectedGradientMethod import parsimony.utils.weights as weights n_samples = 500 shape = (30, 30, 1) X3d, y, beta3d = parsimony.datasets.make_regression_struct(n_samples=n_samples, shape=shape, r2=.75, random_seed=1) X = X3d.reshape((n_samples, np.prod(shape))) A = tv.linear_operator_from_shape(shape) start_vector = weights.RandomStartVector() w = start_vector.get_vector(X.shape[1]) alpha = 10. k, l, g = alpha * np.array((.1, .4, .5)) # l2, l1, tv penalties # run /home/fh235918/git/pylearn-parsimony/parsimony/functions/functions.py func = PCA_L1_TV(X, k, l, g, A, mu=0.0001) #algo = MultiblockProjectedGradientMethod(max_iter=10) # ## w_x = start_vector_x.get_vector(X.shape[1]) ## w_y = start_vector_y.get_vector(Y.shape[1]) # #algo.run(func, w)
#import numpy.random import numpy as np import parsimony.datasets import parsimony.functions.nesterov.tv as tv from parsimony.functions import PCA_L1_TV from parsimony.algorithms.multiblock import MultiblockProjectedGradientMethod import parsimony.utils.weights as weights n_samples = 500 shape = (30, 30, 1) X3d, y, beta3d = parsimony.datasets.make_regression_struct(n_samples=n_samples, shape=shape, r2=.75, random_seed=1) X = X3d.reshape((n_samples, np.prod(shape))) A = tv.linear_operator_from_shape(shape) start_vector = weights.RandomStartVector() w = start_vector.get_vector(X.shape[1]) alpha = 10. k, l, g = alpha * np.array((.1, .4, .5)) # l2, l1, tv penalties # run /home/fh235918/git/pylearn-parsimony/parsimony/functions/functions.py func = PCA_L1_TV(X, k, l, g, A, mu=0.0001) #algo = MultiblockProjectedGradientMethod(max_iter=10) # ## w_x = start_vector_x.get_vector(X.shape[1]) ## w_y = start_vector_y.get_vector(Y.shape[1]) # #algo.run(func, w)
def test_tvhelper_linear_operator_from_mask(self): import parsimony.functions.nesterov.tv as tv ## Simple mask with offset shape = (5, 4) mask = np.zeros(shape) mask[1:(shape[0] - 1), 0:(shape[1] - 1)] = 1 Ax_ = np.matrix( [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, -1, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, -1, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, -1, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, -1, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, -1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, -1, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]) Ay_ = np.matrix( [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, -1, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, -1, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, -1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, -1, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, -1, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, -1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]) A = tv.linear_operator_from_mask(mask, offset=1) Ax, Ay, Az = A assert np.all(Ax.todense() == Ax_) assert np.all(Ay.todense() == Ay_) assert np.sum(Az.todense() == 0) ####################################################################### ## GROUP TV shape = (6, 4) mask = np.zeros(shape, dtype=int) mask[:3, :3] = 1 mask[3:6, 1:4] = 2 Ax_ = np.matrix( [[-1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, -1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, -1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, -1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, -1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, -1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]) Ay_ = np.matrix( [[-1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]) A = tv.linear_operator_from_mask(mask) Ax, Ay, Az = A assert np.all(Ax.todense() == Ax_) assert np.all(Ay.todense() == Ay_) assert np.sum(Az.todense() == 0) ####################################################################### ## test function tv on checkerboard ####################################################################### dx = 5 # p should be odd shape = (dx, dx, dx) # linear_operator_from_masks mask = np.zeros(shape) mask[1:(dx - 1), 1:(dx - 1), 1:(dx - 1)] = 1 p = np.prod((dx - 2, dx - 2, dx - 2)) beta = np.zeros(p) beta[0:p:2] = 1 # checkerboard of 0 and 1 A = tv.linear_operator_from_mask(mask) tvfunc = tv.TotalVariation(l=1., A=A) assert tvfunc.f(beta) == self._f_checkerboard_cube((dx - 2, dx - 2, dx - 2)) # linear_operator_from_masks with group mask = np.zeros(shape) # 4 groups mask[0:(dx / 2), 0:(dx / 2), :] = 1 mask[0:(dx / 2), (dx / 2):dx, :] = 2 mask[(dx / 2):dx, 0:(dx / 2), :] = 3 mask[(dx / 2):dx, (dx / 2):dx, :] = 4 p = np.prod((dx, dx, dx)) beta = np.zeros(p) beta[0:p:2] = 1 # checkerboard of 0 and 1 A = tv.linear_operator_from_mask(mask) tvfunc = tv.TotalVariation(l=1., A=A) assert np.allclose(tvfunc.f(beta), self._f_checkerboard_cube((dx / 2, dx / 2, dx)) + self._f_checkerboard_cube((dx / 2, dx / 2 + 1, dx)) + self._f_checkerboard_cube((dx / 2 + 1, dx / 2, dx)) + self._f_checkerboard_cube((dx / 2 + 1, dx / 2 + 1, dx))) shape = (2, 3) mask = np.ones(shape) weights1D = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] #weights2D = np.reshape(weights1D, shape) A_shape = tv.linear_operator_from_shape(shape, weights1D) #A_mask = tv.linear_operator_from_subset_mask(mask, weights2D) A_true = (np.array([[-1., 1., 0., 0., 0., 0.], [0., -2., 2., 0., 0., 0.], [0., 0., 0., 0., 0., 0.], [0., 0., 0., -4., 4., 0.], [0., 0., 0., 0., -5., 5.], [0., 0., 0., 0., 0., 0.]]), np.array([[-1., 0., 0., 1., 0., 0.], [0., -2., 0., 0., 2., 0.], [0., 0., -3., 0., 0., 3.], [0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0.]]), np.array([[0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0.]])) assert np.array_equal(A_true[0], A_shape[0].todense()) assert np.array_equal(A_shape[0].todense(), A_shape[0].todense()) assert np.array_equal(A_true[1], A_shape[1].todense()) assert np.array_equal(A_shape[1].todense(), A_shape[1].todense()) assert np.array_equal(A_true[2], A_shape[2].todense()) assert np.array_equal(A_shape[2].todense(), A_shape[2].todense())
def test_combo_smooth(self): from parsimony.functions import CombinedFunction import parsimony.algorithms.proximal as proximal import parsimony.functions as functions import parsimony.functions.nesterov.tv as tv import parsimony.datasets.simulate.l1_l2_tvmu as l1_l2_tvmu import parsimony.utils.start_vectors as start_vectors np.random.seed(42) px = 4 py = 4 pz = 4 shape = (pz, py, px) n, p = 50, np.prod(shape) l = 0.618 k = 1.0 - l g = 1.1 start_vector = start_vectors.RandomStartVector(normalise=True) beta = start_vector.get_vector(p) alpha = 1.0 Sigma = alpha * np.eye(p, p) \ + (1.0 - alpha) * np.random.randn(p, p) mean = np.zeros(p) M = np.random.multivariate_normal(mean, Sigma, n) e = np.random.randn(n, 1) snr = 100.0 A = tv.linear_operator_from_shape(shape) mu_min = 5e-8 X, y, beta_star = l1_l2_tvmu.load(l=l, k=k, g=g, beta=beta, M=M, e=e, A=A, mu=mu_min, snr=snr) eps = 1e-8 max_iter = 5300 beta_start = start_vector.get_vector(p) mus = [5e-2, 5e-4, 5e-6, 5e-8] fista = proximal.FISTA(eps=eps, max_iter=max_iter / len(mus)) beta_parsimony = beta_start for mu in mus: # function = functions.LinearRegressionL1L2GL(X, y, l, k, g, # A=A, mu=mu, # penalty_start=0) function = CombinedFunction() function.add_function(functions.losses.LinearRegression(X, y, mean=False)) function.add_penalty(tv.TotalVariation(l=g, A=A, mu=mu, penalty_start=0)) function.add_penalty(functions.penalties.L2Squared(l=k)) function.add_prox(functions.penalties.L1(l=l)) beta_parsimony = fista.run(function, beta_parsimony) berr = np.linalg.norm(beta_parsimony - beta_star) # print "berr:", berr assert berr < 5e-3 f_parsimony = function.f(beta_parsimony) f_star = function.f(beta_star) ferr = abs(f_parsimony - f_star) # print "ferr:", ferr assert ferr < 5e-5
ridge_prsmy = estimators.RidgeLogisticRegression(alpha) yte_pred_ridge_prsmy = ridge_prsmy.fit(Xtr, ytr).predict(Xte) _, recall_ridge_prsmy, _, _ = \ precision_recall_fscore_support(yte, yte_pred_ridge_prsmy, average=None) # EldasticNet enet = estimators.ElasticNetLogisticRegression(l=0.5, alpha=alpha) yte_pred_enet = enet.fit(Xtr, ytr).predict(Xte) _, recall_enet, _, _ = \ precision_recall_fscore_support(yte, yte_pred_enet, average=None) # GraphNet # l1, l2, gn = alpha * np.array((.05, .75, .2)) # l1, l2, gn penalties l1, l2, gn = alpha * np.array((.33, .33, 33)) # l1, l2, gn penalties A = sparse.vstack(nesterov_tv.linear_operator_from_shape(shape)) enetgn = estimators.LogisticRegressionL1L2GraphNet(l1, l2, gn, A) yte_pred_enetgn = enetgn.fit(Xtr, ytr).predict(Xte) _, recall_enetgn, _, _ = \ precision_recall_fscore_support(yte, yte_pred_enetgn, average=None) # LogisticRegressionL1L2TV l1, l2, tv = alpha * np.array((.05, .75, .2)) # l1, l2, tv penalties # l1, l2, tv = alpha * np.array((.33, .33, 33)) # l1, l2, gn penalties A = nesterov_tv.linear_operator_from_shape(beta3d.shape) enettv = estimators.LogisticRegressionL1L2TV(l1, l2, tv, A, algorithm_params=dict(eps=1e-5)) yte_pred_enettv = enettv.fit(Xtr, ytr).predict(Xte)
ridge_prsmy = estimators.RidgeLogisticRegression(alpha) yte_pred_ridge_prsmy = ridge_prsmy.fit(Xtr, ytr).predict(Xte) _, recall_ridge_prsmy, _, _ = precision_recall_fscore_support(yte, yte_pred_ridge_prsmy, average=None) ########################################################################### ## LogisticRegressionL1L2TV # Minimize: # f(beta, X, y) = - loglik/n_train # + l2/2 * ||beta||^2_2 # + l1 * ||beta||_1 # + tv * TV(beta) l1, l2, tv = alpha * np.array((.05, .75, .2)) # l1, l2, tv penalties ## Limit the precison to 1e-3 A = nesterov_tv.linear_operator_from_shape(beta3d.shape) enettv = estimators.LogisticRegressionL1L2TV(l1, l2, tv, A, algorithm_params = dict(eps=1e-3)) yte_pred_enettv = enettv.fit(Xtr, ytr).predict(Xte) _, recall_enettv, _, _ = precision_recall_fscore_support(yte, yte_pred_enettv, average=None) ########################################################################### ## Plot plot = plt.subplot(221) limits = None #np.array((beta3d.min(), beta3d.max())) / 2. #limits = None utils.plot_map2d(beta3d.reshape(shape), plot, title="beta star") plot = plt.subplot(222) utils.plot_map2d(enettv.beta.reshape(shape), plot, limits=limits, title="L1+L2+TV (%.2f, %.2f)" % tuple(recall_enettv)) plot = plt.subplot(223) utils.plot_map2d(ridge_sklrn.coef_.reshape(shape), plot, limits=limits,
ytr = y[:n_train] Xte = X[n_train:, :] yte = y[n_train:] alpha = 1. # global penalty ############################################################################### # Estimators # Fit RidgeRegression rr = estimators.RidgeRegression(l=alpha) rr.fit(Xtr, ytr) yte_pred_rr = rr.fit(Xtr, ytr).predict(Xte) # Fit GraphNet l1, l2, gn = alpha * np.array((.33, .33, 33)) # l1, l2, gn penalties A = sparse.vstack(nesterov_tv.linear_operator_from_shape(shape)) enetgn = estimators.LinearRegressionL1L2GraphNet(l1, l2, gn, A) yte_pred_enetgn = enetgn.fit(Xtr, ytr).predict(Xte) # Fit LinearRegressionL1L2TV l1, l2, tv = alpha * np.array((.33, .33, .33)) # l1, l2, tv penalties Atv = nesterov_tv.linear_operator_from_shape(shape) enettv = estimators.LinearRegressionL1L2TV(l1, l2, tv, Atv, algorithm_params=dict(max_iter=500)) yte_pred_enettv = enettv.fit(Xtr, ytr).predict(Xte) ############################################################################### # Plot
def test_smoothed_l1tv(self): import numpy as np from parsimony.functions import CombinedFunction import parsimony.algorithms.proximal as proximal import parsimony.functions as functions import parsimony.functions.penalties as penalties import parsimony.functions.nesterov.tv as tv import parsimony.functions.nesterov.l1tv as l1tv import parsimony.utils.start_vectors as start_vectors import parsimony.datasets.simulate as simulate np.random.seed(42) px = 10 py = 1 pz = 1 shape = (pz, py, px) n, p = 5, np.prod(shape) l = 0.618 k = 0.01 g = 1.1 start_vector = start_vectors.RandomStartVector(normalise=True) beta = start_vector.get_vector(p) alpha = 1.0 Sigma = alpha * np.eye(p, p) \ + (1.0 - alpha) * np.random.randn(p, p) mean = np.zeros(p) M = np.random.multivariate_normal(mean, Sigma, n) e = np.random.randn(n, 1) snr = 100.0 mu = 5e-3 A = tv.linear_operator_from_shape(shape) # X, y, beta_star = l1_l2_tvmu.load(l=l, k=k, g=g, beta=beta, M=M, e=e, # A=A, mu=mu, snr=snr) funs = [simulate.grad.L1(l), simulate.grad.L2Squared(k), simulate.grad.TotalVariation(g, A)] lr = simulate.LinearRegressionData(funs, M, e, snr=snr, intercept=False) X, y, beta_star = lr.load(beta) eps = 1e-8 max_iter = 810 alg = proximal.FISTA(eps=eps, max_iter=max_iter) function = CombinedFunction() function.add_loss(functions.losses.LinearRegression(X, y, mean=False)) function.add_penalty(penalties.L2Squared(l=k)) A = l1tv.linear_operator_from_shape(shape, p) function.add_prox(l1tv.L1TV(l, g, A=A, mu=mu, penalty_start=0)) # A = tv.linear_operator_from_shape(shape) # function.add_penalty(tv.TotalVariation(l=g, A=A, mu=mu, # penalty_start=0)) # function.add_prox(penalties.L1(l=l)) beta_start = start_vector.get_vector(p) beta = alg.run(function, beta_start) berr = np.linalg.norm(beta - beta_star) # print "berr:", berr # assert berr < 5e-1 assert_less(berr, 5e-1, "The found regression vector is not correct.") f_parsimony = function.f(beta) f_star = function.f(beta_star) ferr = abs(f_parsimony - f_star) # print "ferr:", ferr # assert ferr < 5e-3 assert_less(ferr, 5e-3, "The found regression vector is not correct.")
def test_smoothed_l1tv(self): import numpy as np from parsimony.functions import CombinedFunction import parsimony.algorithms.proximal as proximal import parsimony.functions as functions import parsimony.functions.penalties as penalties import parsimony.functions.nesterov.tv as tv import parsimony.functions.nesterov.l1tv as l1tv import parsimony.datasets.simulate.l1_l2_tvmu as l1_l2_tvmu import parsimony.utils.start_vectors as start_vectors import parsimony.datasets.simulate as simulate np.random.seed(42) px = 10 py = 1 pz = 1 shape = (pz, py, px) n, p = 5, np.prod(shape) l = 0.618 k = 0.01 g = 1.1 start_vector = start_vectors.RandomStartVector(normalise=True) beta = start_vector.get_vector(p) alpha = 1.0 Sigma = alpha * np.eye(p, p) \ + (1.0 - alpha) * np.random.randn(p, p) mean = np.zeros(p) M = np.random.multivariate_normal(mean, Sigma, n) e = np.random.randn(n, 1) snr = 100.0 mu = 5e-3 A = tv.linear_operator_from_shape(shape) # X, y, beta_star = l1_l2_tvmu.load(l=l, k=k, g=g, beta=beta, M=M, e=e, # A=A, mu=mu, snr=snr) funs = [ simulate.grad.L1(l), simulate.grad.L2Squared(k), simulate.grad.TotalVariation(g, A) ] lr = simulate.LinearRegressionData(funs, M, e, snr=snr, intercept=False) X, y, beta_star = lr.load(beta) eps = 1e-8 max_iter = 810 alg = proximal.FISTA(eps=eps, max_iter=max_iter) function = CombinedFunction() function.add_function( functions.losses.LinearRegression(X, y, mean=False)) function.add_penalty(penalties.L2Squared(l=k)) A = l1tv.linear_operator_from_shape(shape, p) function.add_prox(l1tv.L1TV(l, g, A=A, mu=mu, penalty_start=0)) # A = tv.linear_operator_from_shape(shape) # function.add_penalty(tv.TotalVariation(l=g, A=A, mu=mu, # penalty_start=0)) # function.add_prox(penalties.L1(l=l)) beta_start = start_vector.get_vector(p) beta = alg.run(function, beta_start) berr = np.linalg.norm(beta - beta_star) # print "berr:", berr assert berr < 5e-1 f_parsimony = function.f(beta) f_star = function.f(beta_star) ferr = abs(f_parsimony - f_star) # print "ferr:", ferr assert ferr < 5e-3
# # Empirically set the global penalty, based on maximum l1 penaly alpha = l1_max_logistic_loss(X, y) ############################################################################### # Penalization parameters are now vectors of equal length l1 = alpha * np.array([0.5, 0.5, 0.5]) l2 = alpha * np.array([0.5, 0.5, 0.5]) tv = alpha * np.array([0.01, 0.2, 0.8]) max_iter = 1000 ############################################################################### # Build linear operator and fit the model: A = nesterov_tv.linear_operator_from_shape(beta3d.shape, calc_lambda_max=True) enettv = estimators.LogisticRegressionL1L2TV( l1=l1, l2=l2, tv=tv, A = A, algorithm_params=dict(max_iter=max_iter)) enettv.fit(X, y) ############################################################################### # Plot coeffitients maps plt.clf() plot = plt.subplot(221) utils.plots.map2d(beta3d, plot, title="beta star")
# Create data n = 20 natural_shape = px, py, pz = (10, 10, 10) p = np.prod(natural_shape) data_shape = n, p # Uniform data X = np.random.rand(n, p) # Multiply some variables to increase variance along them X[:, 0] = 3 * X[:, 0] X[:, 1] = 5 * X[:, 1] # Scale X = sklearn.preprocessing.scale(X, with_mean=True, with_std=False) # A matrices Atv = nesterov_tv.linear_operator_from_shape(natural_shape) # Test function l1 = 1 l2 = 1 ltv = 1 u = np.random.rand(n, 1) u /= np.linalg.norm(u) f = pca_tv.RightSingularL1L2SmoothedTV_CONESTA(X, u, l1, l2, ltv, Atv) # Fit an estimator without l1 and TV constraints and compare to PCA e_con = pca_tv.PCAL1L2TV(l1=0.0, l2=1.0, ltv=6e-8, Atv=Atv, n_components=2,
# + alpha * l * ||beta||_1 # + alpha * ((1.0 - l) / 2) * ||beta||²_2 # Parsimony Elasticnet is based on FISTA, is then slower that scikit-learn one l1_ratio = .5 enet = estimators.ElasticNet(alpha=alpha, l=.5) yte_pred_enet = enet.fit(Xtr, ytr).predict(Xte) ########################################################################### ## Fit LinearRegressionL1L2TV # Min: (1 / (2 * n)) * ||Xbeta - y||^2_2 # + l1 * ||beta||_1 # + (l2 / 2) * ||beta||^2_2 # + tv * TV(beta) # l1, l2, tv = alpha * np.array((.33, .33, .33)) # l1, l2, tv penalties A = nesterov_tv.linear_operator_from_shape(shape) algo = algorithms.proximal.CONESTA(max_iter=500) enettv = estimators.LinearRegressionL1L2TV(l1, l2, tv, A, algorithm=algo) yte_pred_enettv = enettv.fit(Xtr, ytr).predict(Xte) ########################################################################### ## Plot # TODO: Please remove dependence on scikit-learn. Add required functionality # to parsimony instead. plot = plt.subplot(131) utils.plot_map2d(beta3d.reshape(shape), plot, title="beta star") plot = plt.subplot(132) utils.plot_map2d(enet.beta.reshape(shape), plot, title="beta enet (R2=%.2f)" % r2_score(yte, yte_pred_enet)) #utils.plot_map2d(enet.coef_.reshape(shape), plot, title="beta enet (R2=%.2f)" %