Esempio n. 1
0
m, d, d_nonzero, k, sigma = 100, 200, 5, 1, 0.5
(X, C, y), sol = random_data(m, d, d_nonzero, k, sigma, zerosum=True, seed=1)

# %%
# Remark : one can see the parameters that should be selected :

print(np.nonzero(sol))

# %%
# Define the classo instance
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# Next we can define a default c-lasso problem instance with the generated data:

problem = classo_problem(X, y, C)

# %%
# Check parameters
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# You can look at the generated problem instance by typing:

print(problem)

# %%
#  Solve optimization problems
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# We only use stability selection as default model selection strategy.
# The command also allows you to inspect the computed stability profile for all variables
Esempio n. 2
0
path = "../../figures/examplePH/"
from classo import classo_problem
import numpy as np
from copy import deepcopy as dc
import scipy.io as sio

pH = sio.loadmat("pH_data/matlab/pHData.mat")
tax = sio.loadmat("pH_data/matlab/taxTablepHData.mat")["None"][0]

X, Y_uncent = pH["X"], pH["Y"].T[0]
y = Y_uncent - np.mean(Y_uncent)  # Center Y
problem = classo_problem(X, y)  # zero sum is default C

# Solve the entire path
problem.model_selection.PATH = True
problem.solve()
problem.solution.PATH.save = path + "R3-"
problem.solution.StabSel.save1 = path + "R3-StabSel"
problem.solution.StabSel.save3 = path + "R3-StabSel-beta"
problem1 = dc(problem)

# problem.formulation.huber = True

problem.solve()
problem.solution.PATH.save = path + "R4-"
problem.solution.StabSel.save1 = path + "R4-StabSel"
problem.solution.StabSel.save3 = path + "R4-StabSel-beta"
problem2 = dc(problem)

print(problem1, problem1.solution)
print(problem2, problem2.solution)
Esempio n. 3
0
label_short = np.array([l.split("::")[-1] for l in label])

pseudo_count = 1
X = np.log(pseudo_count + x)
nleaves = np.sum(A, axis=0)
logGeom = X.dot(A) / nleaves

n, d = logGeom.shape

tr = np.random.permutation(n)[:int(0.8 * n)]

# %%
# Cross validation and Path Computation
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

problem = classo_problem(logGeom[tr], y[tr], label=label_short)

problem.formulation.w = 1 / nleaves
problem.formulation.intercept = True
problem.formulation.concomitant = False

problem.model_selection.StabSel = False
problem.model_selection.PATH = True
problem.model_selection.CV = True
problem.model_selection.CVparameters.seed = 6  # one could change logscale, Nsubset, oneSE
print(problem)

problem.solve()
print(problem.solution)

selection = problem.solution.CV.selected_param[1:]  # exclude the intercept
Esempio n. 4
0
# %%
# Set up design matrix and zero-sum constraints for 45 genera
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

X = np.concatenate((X0, X_C, X_F, np.ones((len(X0), 1))),
                   axis=1)  # Joint microbiome and covariate data and offset
label = np.concatenate([labels, np.array(['Calorie', 'Fat', 'Bias'])])
C = np.ones((1, len(X[0])))
C[0, -1], C[0, -2], C[0, -3] = 0., 0., 0.

# %%
# Set up c-lassso problem
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^

problem = classo_problem(X, y, C, label=label)

# %%
# Use stability selection with theoretical lambda [Combettes & Müller, 2020b]
problem.model_selection.StabSelparameters.method = 'lam'
problem.model_selection.StabSelparameters.threshold_label = 0.5

# %%
# Use formulation R3
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^
problem.formulation.concomitant = True

problem.solve()
print(problem)
print(problem.solution)