m, d, d_nonzero, k, sigma = 100, 200, 5, 1, 0.5 (X, C, y), sol = random_data(m, d, d_nonzero, k, sigma, zerosum=True, seed=1) # %% # Remark : one can see the parameters that should be selected : print(np.nonzero(sol)) # %% # Define the classo instance # ^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # Next we can define a default c-lasso problem instance with the generated data: problem = classo_problem(X, y, C) # %% # Check parameters # ^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # You can look at the generated problem instance by typing: print(problem) # %% # Solve optimization problems # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # We only use stability selection as default model selection strategy. # The command also allows you to inspect the computed stability profile for all variables
path = "../../figures/examplePH/" from classo import classo_problem import numpy as np from copy import deepcopy as dc import scipy.io as sio pH = sio.loadmat("pH_data/matlab/pHData.mat") tax = sio.loadmat("pH_data/matlab/taxTablepHData.mat")["None"][0] X, Y_uncent = pH["X"], pH["Y"].T[0] y = Y_uncent - np.mean(Y_uncent) # Center Y problem = classo_problem(X, y) # zero sum is default C # Solve the entire path problem.model_selection.PATH = True problem.solve() problem.solution.PATH.save = path + "R3-" problem.solution.StabSel.save1 = path + "R3-StabSel" problem.solution.StabSel.save3 = path + "R3-StabSel-beta" problem1 = dc(problem) # problem.formulation.huber = True problem.solve() problem.solution.PATH.save = path + "R4-" problem.solution.StabSel.save1 = path + "R4-StabSel" problem.solution.StabSel.save3 = path + "R4-StabSel-beta" problem2 = dc(problem) print(problem1, problem1.solution) print(problem2, problem2.solution)
label_short = np.array([l.split("::")[-1] for l in label]) pseudo_count = 1 X = np.log(pseudo_count + x) nleaves = np.sum(A, axis=0) logGeom = X.dot(A) / nleaves n, d = logGeom.shape tr = np.random.permutation(n)[:int(0.8 * n)] # %% # Cross validation and Path Computation # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ problem = classo_problem(logGeom[tr], y[tr], label=label_short) problem.formulation.w = 1 / nleaves problem.formulation.intercept = True problem.formulation.concomitant = False problem.model_selection.StabSel = False problem.model_selection.PATH = True problem.model_selection.CV = True problem.model_selection.CVparameters.seed = 6 # one could change logscale, Nsubset, oneSE print(problem) problem.solve() print(problem.solution) selection = problem.solution.CV.selected_param[1:] # exclude the intercept
# %% # Set up design matrix and zero-sum constraints for 45 genera # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ X = np.concatenate((X0, X_C, X_F, np.ones((len(X0), 1))), axis=1) # Joint microbiome and covariate data and offset label = np.concatenate([labels, np.array(['Calorie', 'Fat', 'Bias'])]) C = np.ones((1, len(X[0]))) C[0, -1], C[0, -2], C[0, -3] = 0., 0., 0. # %% # Set up c-lassso problem # ^^^^^^^^^^^^^^^^^^^^^^^^^^^ problem = classo_problem(X, y, C, label=label) # %% # Use stability selection with theoretical lambda [Combettes & Müller, 2020b] problem.model_selection.StabSelparameters.method = 'lam' problem.model_selection.StabSelparameters.threshold_label = 0.5 # %% # Use formulation R3 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^ problem.formulation.concomitant = True problem.solve() print(problem) print(problem.solution)