def do_copy_results(): outer_str = ["cv%02d" % i for i in range(5)] inner_str = ["cvnested%02d" % i for i in range(5)] + ["refit"] SRC = "/neurospin/brainomics/2013_adni/MCIc-CTL-FS_cs_modselectcv/modselectcv/%s/%s" DST = "/neurospin/brainomics/2013_adni/MCIc-CTL-FS_cs_all/5cv/%s/%s" copy_results(SRC, DST, outer_str, inner_str, dst_prefix="enettv") import parsimony.utils.weights as weights start_vector=weights.RandomUniformWeights(normalise=True) start_vector.get_weights(10)
def run(self, X, Y, start_vector=None): """Find the right-singular vector of the product of two matrices. Parameters ---------- X : Numpy array with shape (n, p). The first matrix of the product. Y : Numpy array with shape (p, m). The second matrix of the product. start_vector : BaseStartVector. A start vector generator. Default is to use a random start vector. """ if self.info_requested(utils.Info.ok): self.info_set(utils.Info.ok, False) if self.info_requested(utils.Info.time): _t = utils.time() if self.info_requested(utils.Info.converged): self.info_set(utils.Info.converged, False) M, N = X.shape if start_vector is None: start_vector = weights.RandomUniformWeights(normalise=True) v = start_vector.get_weights(Y.shape[1]) for it in range(1, self.max_iter + 1): v_ = v v = np.dot(X, np.dot(Y, v_)) v = np.dot(Y.T, np.dot(X.T, v)) v *= 1.0 / maths.norm(v) if maths.norm(v_ - v) / maths.norm(v) < self.eps \ and it >= self.min_iter: if self.info_requested(utils.Info.converged): self.info_set(utils.Info.converged, True) break if self.info_requested(utils.Info.time): self.info_set(utils.Info.time, utils.time() - _t) if self.info_requested(utils.Info.func_val): _f = maths.norm(np.dot(X, np.dot(Y, v))) # Largest singular value. self.info_set(utils.Info.func_val, _f) if self.info_requested(utils.Info.ok): self.info_set(utils.Info.ok, True) return utils.direct_vector(v)
def __init__(self, l, mean=True, penalty_start=0, start_vector=weights.RandomUniformWeights(normalise=True, limits=(-1.0, 1.0)), eps=consts.TOLERANCE, info=[], max_iter=10000, min_iter=1): super(LassoCoordinateDescent, self).__init__(info=info, max_iter=max_iter, min_iter=min_iter) self.l = max(0.0, float(l)) self.mean = bool(mean) self.penalty_start = max(0, int(penalty_start)) self.start_vector = start_vector self.eps = max(consts.TOLERANCE, float(eps))
def test_smooth_1D_l2(self): from parsimony.functions import CombinedFunction import parsimony.functions as functions import parsimony.functions.nesterov.grouptv as grouptv import parsimony.datasets.simulate.l1_l2_grouptvmu as l1_l2_grouptvmu import parsimony.utils.weights as weights np.random.seed(1337) n, p = 10, 15 shape = (1, 1, p) l = 0.0 k = 0.1 # Must have some regularisation for all variables. g = 0.9 start_vector = weights.RandomUniformWeights(normalise=True) beta = start_vector.get_weights(p) rects = [[(0, 5)], [(4, 10)], [(13, 15)]] # 0 [ 5 ] 0 # 1 [ 5 ] 0 # 2 [ 5 ] 0 # 3 [ 5 ] 0 # 4 [ 4 ] 0 / 1 beta[:5, :] = 5.0 # 5 [ 3 ] 1 beta[4, :] = 4.0 # 6 [ 3 ] 1 beta[5:10, :] = 3.0 # 7 [ 3 ] 1 beta[13:15, :] = 7.0 # 8 [ 3 ] 1 # 9 [ 3 ] 1 # 0 [ x ] - # 1 [ x ] - # 2 [ x ] - # 3 [ 7 ] 2 # 4 [ 7 ] 2 alpha = 1.0 Sigma = alpha * np.eye(p, p) \ + (1.0 - alpha) * np.random.randn(p, p) mean = np.zeros(p) M = np.random.multivariate_normal(mean, Sigma, n) e = np.random.randn(n, 1) snr = 100.0 A = grouptv.linear_operator_from_rects(rects, shape) mu_min = 5e-8 X, y, beta_star = l1_l2_grouptvmu.load(l=l, k=k, g=g, beta=beta, M=M, e=e, A=A, mu=mu_min, snr=snr) eps = 1e-5 max_iter = 12000 beta_start = start_vector.get_weights(p) mus = [5e-2, 5e-4, 5e-6, 5e-8] fista = FISTA(eps=eps, max_iter=max_iter / len(mus)) beta_parsimony = beta_start for mu in mus: function = CombinedFunction() function.add_loss(functions.losses.LinearRegression(X, y, mean=False)) function.add_penalty(grouptv.GroupTotalVariation(l=g, A=A, mu=mu, penalty_start=0)) function.add_penalty(functions.penalties.L2Squared(l=k, penalty_start=0)) beta_parsimony = fista.run(function, beta_parsimony) berr = np.linalg.norm(beta_parsimony - beta_star) # print "berr:", berr assert berr < 5e-2 f_parsimony = function.f(beta_parsimony) f_star = function.f(beta_star) ferr = abs(f_parsimony - f_star) # print "ferr:", ferr assert ferr < 5e-5
if has_data: WEIGHTS_TRUTH = np.load(weights_filename(shape, n_samples)) # Ensure that train dataset is balanced tr = np.hstack([ np.where(y.ravel() == 1)[0][:int(n_train / 2)], np.where(y.ravel() == 0)[0][:int(n_train / 2)] ]) te = np.setdiff1d(np.arange(y.shape[0]), tr) X = X3d.reshape((n_samples, np.prod(beta3d.shape))) Xtr = X[tr, :] ytr = y[tr] Xte = X[te, :] yte = y[te] beta_start = weights.RandomUniformWeights().get_weights(Xtr.shape[1]) # check that ytr is balanced #assert ytr.sum() / ytr.shape[0] == 0.5 #assert yte.sum() / yte.shape[0] == 0.53500000000000003 # Dataset with intercept Xtr_i = np.c_[np.ones((Xtr.shape[0], 1)), Xtr] Xte_i = np.c_[np.ones((Xte.shape[0], 1)), Xte] beta_start_i = weights.RandomUniformWeights().get_weights(Xtr_i.shape[1]) # global penalty alpha = l1_max_logistic_loss(Xtr, ytr) from parsimony.algorithms.utils import Info info = [Info.converged, Info.num_iter, Info.time, Info.func_val]
def test_combo_overlapping_nonsmooth(self): import numpy as np from parsimony.functions import CombinedFunction import parsimony.algorithms.proximal as proximal import parsimony.functions as functions import parsimony.functions.nesterov.gl as gl import parsimony.datasets.simulate.l1_l2_gl as l1_l2_gl import parsimony.utils.weights as weights np.random.seed(42) # Note that p must be even! n, p = 25, 30 groups = [list(range(0, 2 * int(p / 3))), list(range(int(p / 3), p))] group_weights = [1.5, 0.5] A = gl.linear_operator_from_groups(p, groups=groups, weights=group_weights) l = 0.618 k = 1.0 - l g = 2.718 start_vector = weights.RandomUniformWeights(normalise=True) beta = start_vector.get_weights(p) alpha = 1.0 Sigma = alpha * np.eye(p, p) \ + (1.0 - alpha) * np.random.randn(p, p) mean = np.zeros(p) M = np.random.multivariate_normal(mean, Sigma, n) e = np.random.randn(n, 1) snr = 100.0 X, y, beta_star = l1_l2_gl.load(l, k, g, beta, M, e, A, snr=snr) eps = 1e-8 max_iter = 10000 beta_start = start_vector.get_weights(p) mus = [5e-0, 5e-2, 5e-4, 5e-6, 5e-8] fista = proximal.FISTA(eps=eps, max_iter=max_iter / len(mus)) beta_parsimony = beta_start for mu in mus: # function = functions.LinearRegressionL1L2GL(X, y, l, k, g, # A=A, mu=mu, # penalty_start=0) function = CombinedFunction() function.add_loss( functions.losses.LinearRegression(X, y, mean=False)) function.add_penalty(functions.penalties.L2Squared(l=k)) function.add_penalty( gl.GroupLassoOverlap(l=g, A=A, mu=mu, penalty_start=0)) function.add_prox(functions.penalties.L1(l=l)) beta_parsimony = fista.run(function, beta_parsimony) berr = np.linalg.norm(beta_parsimony - beta_star) # print berr assert berr < 5e-3 f_parsimony = function.f(beta_parsimony) f_star = function.f(beta_star) # print abs(f_parsimony - f_star) assert abs(f_parsimony - f_star) < 5e-6
def test_nonoverlapping_smooth(self): # Spams: http://spams-devel.gforge.inria.fr/doc-python/doc_spams.pdf import numpy as np from parsimony.functions import CombinedFunction import parsimony.algorithms.proximal as proximal import parsimony.functions as functions import parsimony.functions.nesterov.gl as gl import parsimony.datasets.simulate.l1_l2_glmu as l1_l2_glmu import parsimony.utils.weights as weights np.random.seed(42) # Note that p must be even! n, p = 25, 20 groups = [list(range(0, int(p / 2))), list(range(int(p / 2), p))] # weights = [1.5, 0.5] A = gl.linear_operator_from_groups(p, groups=groups) # , weights=weights) l = 0.0 k = 0.0 g = 0.9 start_vector = weights.RandomUniformWeights(normalise=True) beta = start_vector.get_weights(p) alpha = 1.0 Sigma = alpha * np.eye(p, p) \ + (1.0 - alpha) * np.random.randn(p, p) mean = np.zeros(p) M = np.random.multivariate_normal(mean, Sigma, n) e = np.random.randn(n, 1) snr = 100.0 mu_min = 5e-8 X, y, beta_star = l1_l2_glmu.load(l, k, g, beta, M, e, A, mu=mu_min, snr=snr) eps = 1e-8 max_iter = 18000 beta_start = start_vector.get_weights(p) mus = [5e-0, 5e-2, 5e-4, 5e-6, 5e-8] fista = proximal.FISTA(eps=eps, max_iter=max_iter / len(mus)) beta_parsimony = beta_start for mu in mus: # function = functions.LinearRegressionL1L2GL(X, y, l, k, g, # A=A, mu=mu, # penalty_start=0) function = CombinedFunction() function.add_loss( functions.losses.LinearRegression(X, y, mean=False)) function.add_penalty( gl.GroupLassoOverlap(l=g, A=A, mu=mu, penalty_start=0)) beta_parsimony = fista.run(function, beta_parsimony) try: import spams params = { "loss": "square", "regul": "group-lasso-l2", "groups": np.array([1] * (int(p / 2)) + [2] * (int(p / 2)), dtype=np.int32), "lambda1": g, "max_it": max_iter, "tol": eps, "ista": False, "numThreads": -1, } beta_spams, optim_info = \ spams.fistaFlat(Y=np.asfortranarray(y), X=np.asfortranarray(X), W0=np.asfortranarray(beta_start), return_optim_info=True, **params) # print beta_spams except ImportError: # beta_spams = np.asarray([[15.56784201], # [39.51679274], # [30.42583205], # [24.8816362], # [6.48671072], # [6.48350546], # [2.41477318], # [36.00285723], # [24.98522184], # [29.43128643], # [0.85520539], # [40.31463542], # [34.60084146], # [8.82322513], # [7.55741642], # [7.62364398], # [12.64594707], # [21.81113869], # [17.95400007], # [12.10507338]]) beta_spams = np.asarray([[-11.93855944], [42.889350930], [22.076438880], [9.3869208300], [-32.73310431], [-32.73509107], [-42.05298794], [34.844819990], [9.6210946300], [19.799892400], [-45.62041548], [44.716039010], [31.634706630], [-27.37416567], [-30.27711859], [-30.12673231], [-18.62803747], [2.3561952400], [-6.476922020], [-19.86630857]]) berr = np.linalg.norm(beta_parsimony - beta_spams) # print berr assert berr < 5e-3 f_parsimony = function.f(beta_parsimony) f_spams = function.f(beta_spams) ferr = abs(f_parsimony - f_spams) # print ferr assert ferr < 5e-6
def run(self, X, start_vector=None): """Find the right-singular vector of the given sparse matrix. Parameters ---------- X : Scipy sparse array. The sparse matrix to decompose. start_vector : BaseStartVector. A start vector generator. Default is to use a random start vector. """ if self.info_requested(utils.Info.ok): self.info_set(utils.Info.ok, False) if self.info_requested(utils.Info.time): _t = utils.time() if self.info_requested(utils.Info.converged): self.info_set(utils.Info.converged, False) if start_vector is None: start_vector = weights.RandomUniformWeights(normalise=True) v0 = start_vector.get_weights(np.min(X.shape)) # determine when to use power method or scipy_sparse use_power = True if X.shape[1] >= 10 ** 3 else False if not use_power: try: if not sp.sparse.issparse(X): X = sp.sparse.csr_matrix(X) try: [_, _, v] = sparse_linalg.svds(X, k=1, v0=v0, tol=self.eps, maxiter=self.max_iter, return_singular_vectors=True) except TypeError: # For scipy 0.9.0. [_, _, v] = sparse_linalg.svds(X, k=1, tol=self.eps) v = v.T if self.info_requested(utils.Info.converged): self.info_set(utils.Info.converged, True) except ArpackNoConvergence: use_power = True if use_power: # Use the power method if scipy failed or if determined. # TODO: Use estimators for this! M, N = X.shape if M < N: K = X.dot(X.T) t = v0 for it in range(self.max_iter): t_ = t t = K.dot(t_) t *= 1.0 / maths.norm(t) crit = float(maths.norm(t_ - t)) / float(maths.norm(t)) if crit < consts.TOLERANCE: if self.info_requested(utils.Info.converged): self.info_set(utils.Info.converged, True) break v = X.T.dot(t) v *= 1.0 / maths.norm(v) else: K = X.T.dot(X) v = v0 for it in range(self.max_iter): v_ = v v = K.dot(v_) v *= 1.0 / maths.norm(v) crit = float(maths.norm(v_ - v)) / float(maths.norm(v)) if crit < consts.TOLERANCE: if self.info_requested(utils.Info.converged): self.info_set(utils.Info.converged, True) break if self.info_requested(utils.Info.time): self.info_set(utils.Info.time, utils.time() - _t) if self.info_requested(utils.Info.func_val): _f = maths.norm(X.dot(v)) # Largest singular value. self.info_set(utils.Info.func_val, _f) if self.info_requested(utils.Info.ok): self.info_set(utils.Info.ok, True) return utils.direct_vector(v)
def run(self, X, start_vector=None): """Find the right-singular vector of the given matrix. Parameters ---------- X : Numpy array. The matrix to decompose. start_vector : BaseStartVector. A start vector generator. Default is to use a random start vector. """ if self.info_requested(utils.Info.ok): self.info_set(utils.Info.ok, False) if self.info_requested(utils.Info.time): _t = utils.time() if start_vector is None: start_vector = weights.RandomUniformWeights(normalise=True) v0 = start_vector.get_weights(np.min(X.shape)) arpack_failed = False try: try: [_, _, v] = sparse_linalg.svds(X, k=1, v0=v0, tol=self.eps, maxiter=self.max_iter, return_singular_vectors=True) except TypeError: # For scipy 0.9.0. [_, _, v] = sparse_linalg.svds(X, k=1, tol=self.eps) v = v.T if self.info_requested(utils.Info.converged): self.info_set(utils.Info.converged, True) except ArpackNoConvergence: arpack_failed = True if arpack_failed: # Use the power method if this happens. M, N = X.shape if M < 80 and N < 80: # Very arbitrary threshold from one computer _, _, V = scipy.linalg.svd(X, full_matrices=True) v = V[[0], :].T elif M < N: K = np.dot(X, X.T) t = v0 for it in range(self.max_iter): t_ = t t = np.dot(K, t_) t *= 1.0 / maths.norm(t) if maths.norm(t_ - t) / maths.norm(t) < self.eps: break v = np.dot(X.T, t) v *= 1.0 / maths.norm(v) else: K = np.dot(X.T, X) v = v0 for it in range(self.max_iter): v_ = v v = np.dot(K, v_) v *= 1.0 / maths.norm(v) if maths.norm(v_ - v) / maths.norm(v) < self.eps: break if self.info_requested(utils.Info.time): self.info_set(utils.Info.time, utils.time() - _t) if self.info_requested(utils.Info.func_val): _f = maths.norm(np.dot(X, v)) # Largest singular value. self.info_set(utils.Info.func_val, _f) if self.info_requested(utils.Info.ok): self.info_set(utils.Info.ok, True) return utils.direct_vector(v)
def test_combo_smooth(self): from parsimony.functions import CombinedFunction import parsimony.algorithms.proximal as proximal import parsimony.functions as functions import parsimony.functions.nesterov.tv as tv import parsimony.datasets.simulate.l1_l2_tvmu as l1_l2_tvmu import parsimony.utils.weights as weights np.random.seed(42) px = 4 py = 4 pz = 4 shape = (pz, py, px) n, p = 50, np.prod(shape) l = 0.618 k = 1.0 - l g = 1.1 start_vector = weights.RandomUniformWeights(normalise=True) beta = start_vector.get_weights(p) alpha = 1.0 Sigma = alpha * np.eye(p, p) \ + (1.0 - alpha) * np.random.randn(p, p) mean = np.zeros(p) M = np.random.multivariate_normal(mean, Sigma, n) e = np.random.randn(n, 1) snr = 100.0 A = tv.linear_operator_from_shape(shape) mu_min = 5e-8 X, y, beta_star = l1_l2_tvmu.load(l=l, k=k, g=g, beta=beta, M=M, e=e, A=A, mu=mu_min, snr=snr) eps = 1e-8 max_iter = 5300 beta_start = start_vector.get_weights(p) mus = [5e-2, 5e-4, 5e-6, 5e-8] fista = proximal.FISTA(eps=eps, max_iter=max_iter / len(mus)) beta_parsimony = beta_start for mu in mus: # function = functions.LinearRegressionL1L2GL(X, y, l, k, g, # A=A, mu=mu, # penalty_start=0) function = CombinedFunction() function.add_loss( functions.losses.LinearRegression(X, y, mean=False)) function.add_penalty( tv.TotalVariation(l=g, A=A, mu=mu, penalty_start=0)) function.add_penalty(functions.penalties.L2Squared(l=k)) function.add_prox(functions.penalties.L1(l=l)) beta_parsimony = fista.run(function, beta_parsimony) berr = np.linalg.norm(beta_parsimony - beta_star) # print "berr:", berr assert berr < 5e-3 f_parsimony = function.f(beta_parsimony) f_star = function.f(beta_star) ferr = abs(f_parsimony - f_star) # print "ferr:", ferr assert ferr < 5e-5
def test_nonsmooth(self): import numpy as np import parsimony.utils.consts as consts from parsimony.functions import CombinedFunction import parsimony.algorithms.proximal as proximal import parsimony.functions.losses as losses import parsimony.functions.nesterov as nesterov import parsimony.utils.weights as weights import parsimony.datasets.simulate.l1_l2_tv as l1_l2_tv start_vector = weights.RandomUniformWeights(normalise=True) np.random.seed(42) n, p = 75, 100 alpha = 0.9 V = np.random.randn(p, p) Sigma = alpha * np.eye(p, p) \ + (1.0 - alpha) * np.dot(V.T, V) mean = np.zeros(p) M = np.random.multivariate_normal(mean, Sigma, n) e = np.random.randn(n, 1) beta_start = start_vector.get_weights(p) beta_start[np.abs(beta_start) < 0.1] = 0.0 l = 0.618 k = 0.0 g = 0.0 A = np.eye(p) A = [A, A, A] snr = 100.0 X, y, beta_star = l1_l2_tv.load(l, k, g, beta_start, M, e, A, snr=snr) beta = beta_start for mu in [5e-2, 5e-3, 5e-4, 5e-5]: function = CombinedFunction() function.add_loss(losses.LinearRegression(X, y, mean=False)) A = nesterov.l1.linear_operator_from_variables(p, penalty_start=0) function.add_penalty(nesterov.l1.L1(l, A=A, mu=mu, penalty_start=0)) fista = proximal.FISTA(eps=consts.TOLERANCE, max_iter=2300) beta = fista.run(function, beta) berr = np.linalg.norm(beta - beta_star) # print "berr:", berr # assert berr < 5e-2 assert_less(berr, 5e-2, "The found regression vector is not correct.") # Test proximal operator beta = beta_start function = CombinedFunction() function.add_loss(losses.LinearRegression(X, y, mean=False)) A = nesterov.l1.linear_operator_from_variables(p, penalty_start=0) # function.add_penalty(nesterov.l1.L1(l, A=A, mu=mu_min, # penalty_start=penalty_start)) function.add_prox(nesterov.l1.L1(l, A=A, mu=5e-5, penalty_start=0)) fista = proximal.FISTA(eps=consts.TOLERANCE, max_iter=2000) beta = fista.run(function, beta) berr = np.linalg.norm(beta - beta_star) # print "berr:", berr # assert berr < 5e-0 assert_less(berr, 5e-0, "The found regression vector is not correct.")
def test_smoothed_l1tv(self): import numpy as np from parsimony.functions import CombinedFunction import parsimony.algorithms.proximal as proximal import parsimony.functions as functions import parsimony.functions.penalties as penalties import parsimony.functions.nesterov.tv as tv import parsimony.functions.nesterov.l1tv as l1tv import parsimony.utils.weights as weights import parsimony.datasets.simulate as simulate np.random.seed(42) px = 10 py = 1 pz = 1 shape = (pz, py, px) n, p = 5, np.prod(shape) l = 0.618 k = 0.01 g = 1.1 start_vector = weights.RandomUniformWeights(normalise=True) beta = start_vector.get_weights(p) alpha = 1.0 Sigma = alpha * np.eye(p, p) \ + (1.0 - alpha) * np.random.randn(p, p) mean = np.zeros(p) M = np.random.multivariate_normal(mean, Sigma, n) e = np.random.randn(n, 1) snr = 100.0 mu = 5e-3 A = tv.linear_operator_from_shape(shape) # X, y, beta_star = l1_l2_tvmu.load(l=l, k=k, g=g, beta=beta, M=M, e=e, # A=A, mu=mu, snr=snr) funs = [ simulate.grad.L1(l), simulate.grad.L2Squared(k), simulate.grad.TotalVariation(g, A) ] lr = simulate.LinearRegressionData(funs, M, e, snr=snr, intercept=False) X, y, beta_star = lr.load(beta) np.random.seed(42) eps = 1e-8 max_iter = 1000 alg = proximal.FISTA(eps=eps, max_iter=max_iter) function = CombinedFunction() function.add_loss(functions.losses.LinearRegression(X, y, mean=False)) function.add_penalty(penalties.L2Squared(l=k)) A = l1tv.linear_operator_from_shape(shape, p) function.add_prox(l1tv.L1TV(l, g, A=A, mu=mu, penalty_start=0)) # A = tv.linear_operator_from_shape(shape) # function.add_penalty(tv.TotalVariation(l=g, A=A, mu=mu, # penalty_start=0)) # function.add_prox(penalties.L1(l=l)) beta_start = start_vector.get_weights(p) beta = alg.run(function, beta_start) berr = np.linalg.norm(beta - beta_star) # print "berr:", berr # assert berr < 5e-1 assert_less(berr, 0.5, "The found regression vector is not correct.") f_parsimony = function.f(beta) f_star = function.f(beta_star) ferr = abs(f_parsimony - f_star) # print "ferr:", ferr # assert ferr < 5e-3 assert_less(ferr, 5e-3, "The found regression vector is not correct.")
def init(): INPUT_DATA_X = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/Freesurfer/data/30yo/X.npy' INPUT_DATA_y = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/Freesurfer/data/30yo/y.npy' INPUT_MASK_PATH = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/Freesurfer/data/30yo/mask.npy' INPUT_LINEAR_OPE_PATH = '/neurospin/brainomics/2016_schizConnect/analysis/NUSDAST/Freesurfer/data/30yo/Atv.npz' NFOLDS_OUTER = 5 NFOLDS_INNER = 5 os.makedirs(WD, exist_ok=True) shutil.copy(INPUT_DATA_y, WD) shutil.copy(INPUT_MASK_PATH, WD) shutil.copy(INPUT_LINEAR_OPE_PATH, WD) X = np.load(INPUT_DATA_X) np.save(os.path.join(WD, "X.npy"), X[:, penalty_start:]) start_vector = weights.RandomUniformWeights(normalise=True, seed=40004) np.save(os.path.join(WD, "start_vector.npy"), start_vector) y = np.load(INPUT_DATA_y) cv_outer = [[tr, te] for tr, te in StratifiedKFold( y.ravel(), n_folds=NFOLDS_OUTER, random_state=42)] if cv_outer[0] is not None: # Make sure first fold is None cv_outer.insert(0, None) null_resampling = list() null_resampling.append(np.arange(0, len(y))), null_resampling.append( np.arange(0, len(y))) cv_outer[0] = null_resampling # import collections cv = collections.OrderedDict() for cv_outer_i, (tr_val, te) in enumerate(cv_outer): if cv_outer_i == 0: cv["all/all"] = [tr_val, te] else: cv["cv%02d/all" % (cv_outer_i - 1)] = [tr_val, te] cv_inner = StratifiedKFold(y[tr_val].ravel(), n_folds=NFOLDS_INNER, random_state=42) for cv_inner_i, (tr, val) in enumerate(cv_inner): cv["cv%02d/cvnested%02d" % ((cv_outer_i - 1), cv_inner_i)] = [tr_val[tr], tr_val[val]] for k in cv: cv[k] = [cv[k][0].tolist(), cv[k][1].tolist()] print(list(cv.keys())) params = [[0.01, 0.72, 0.08, 0.2], [0.01, 0.08, 0.72, 0.2], [0.01, 0.18, 0.02, 0.8], [0.1, 0.18, 0.02, 0.8], [0.1, 0.02, 0.18, 0.8], [0.01, 0.02, 0.18, 0.8], [0.1, 0.08, 0.72, 0.2], [0.1, 0.72, 0.08, 0.2]] assert len(params) == 8 user_func_filename = "/home/ad247405/git/scripts/2017_parsimony_settings/warm_start/no_covariates/random_start/no_warm_restart_NUDAST_30yo_FS.py" config = dict(data=dict(X="X.npy", y="y.npy"), params=params, resample=cv, structure="mask.npy", start_vector=dict(start_vector="start_vector.npy"), structure_linear_operator_tv="Atv.npz", map_output="model_selectionCV", user_func=user_func_filename, reduce_input="results/*/*", reduce_group_by="params", reduce_output="model_selectionCV.csv") json.dump(config, open(os.path.join(WD, "config_dCV.json"), "w")) # Build utils files: sync (push/pull) and PBS import brainomics.cluster_gabriel as clust_utils sync_push_filename, sync_pull_filename, _ = \ clust_utils.gabriel_make_sync_data_files(WD, wd_cluster=WD_CLUSTER) cmd = "mapreduce.py --map %s/config_dCV.json" % WD_CLUSTER clust_utils.gabriel_make_qsub_job_files(WD, cmd, walltime="2500:00:00")