def mapper(key, output_collector): import mapreduce as GLOBAL # access to global variables: #raise ImportError("could not import ") # GLOBAL.DATA, GLOBAL.STRUCTURE, GLOBAL.A # GLOBAL.DATA ::= {"X":[Xtrain, ytrain], "y":[Xtest, ytest]} # key: list of parameters Xtr = GLOBAL.DATA_RESAMPLED["X"][0] Xte = GLOBAL.DATA_RESAMPLED["X"][1] ytr = GLOBAL.DATA_RESAMPLED["y"][0] yte = GLOBAL.DATA_RESAMPLED["y"][1] print key, "Data shape:", Xtr.shape, Xte.shape, ytr.shape, yte.shape # STRUCTURE = GLOBAL.STRUCTURE #alpha, ratio_l1, ratio_l2, ratio_tv, k = key #key = np.array(key) penalty_start = GLOBAL.CONFIG["penalty_start"] class_weight = "auto" # unbiased alpha = float(key[0]) l1, l2, tv, k = alpha * float(key[1]), alpha * float( key[2]), alpha * float(key[3]), key[4] print "l1:%f, l2:%f, tv:%f, k:%i" % (l1, l2, tv, k) if k != -1: k = int(k) aov = SelectKBest(k=k) aov.fit(Xtr[..., penalty_start:], ytr.ravel()) mask = GLOBAL.mask != 0 mask[mask] = aov.get_support() #print mask.sum() A, _ = tv_helper.nesterov_linear_operator_from_mesh( GLOBAL.mesh_coord, GLOBAL.mesh_triangles, mask) Xtr_r = np.hstack([ Xtr[:, :penalty_start], Xtr[:, penalty_start:][:, aov.get_support()] ]) Xte_r = np.hstack([ Xte[:, :penalty_start], Xte[:, penalty_start:][:, aov.get_support()] ]) else: mask = np.ones(Xtr.shape[0], dtype=bool) Xtr_r = Xtr Xte_r = Xte A = GLOBAL.A mod = LogisticRegressionL1L2TV(l1, l2, tv, A, penalty_start=penalty_start, class_weight=class_weight) mod.fit(Xtr_r, ytr) y_pred = mod.predict(Xte_r) proba_pred = mod.predict_probability(Xte_r) ret = dict(y_pred=y_pred, y_true=yte, beta=mod.beta, mask=mask, proba_pred=proba_pred) if output_collector: output_collector.collect(key, ret) else: return ret
def run_all(): WD = "/neurospin/brainomics/2014_mlc/GM" key = '0.01_0.01_0.98_0.01' OUTPUT = os.path.join(os.path.dirname(WD), 'logistictvenet_all', key) if not os.path.exists(OUTPUT): os.makedirs(OUTPUT) X = np.load(os.path.join(WD, 'GMtrain.npy')) y = np.load(os.path.join(WD, 'ytrain.npy')) A, STRUCTURE = A_from_structure(os.path.join(WD, "mask.nii")) params = np.array([float(p) for p in key.split("_")]) l1, l2, tv = params[0] * params[1:] mod = LogisticRegressionL1L2TV(l1, l2, tv, A, penalty_start=1, class_weight="auto") mod.fit(X, y) #CPU times: user 1936.73 s, sys: 0.66 s, total: 1937.39 s # Wall time: 1937.13 s / 2042.58 s y_pred = mod.predict(X) p, r, f, s = precision_recall_fscore_support(y, y_pred, average=None) n_ite = mod.algorithm.num_iter scores = dict( recall_0=r[0], recall_1=r[1], recall_mean=r.mean(), precision_0=p[0], precision_1=p[1], precision_mean=p.mean(), f1_0=f[0], f1_1=f[1], f1_mean=f.mean(), support_0=s[0] , support_1=s[1], n_ite=n_ite, intercept=mod.beta[0, 0]) beta3d = np.zeros(STRUCTURE.get_data().shape) beta3d[STRUCTURE.get_data() != 0 ] = mod.beta[1:].ravel() out_im = nibabel.Nifti1Image(beta3d, affine=STRUCTURE.get_affine()) ret = dict(y_pred=y_pred, y_true=y, beta=mod.beta, beta3d=out_im, scores=scores) # run /home/ed203246/bin/mapreduce.py oc = OutputCollector(OUTPUT) oc.collect(key=key, value=ret)
def mapper(key, output_collector): import mapreduce as GLOBAL # access to global variables: #raise ImportError("could not import ") # GLOBAL.DATA, GLOBAL.STRUCTURE, GLOBAL.A # GLOBAL.DATA ::= {"X":[Xtrain, Xtest], "y":[ytrain, ytest]} # key: list of parameters n_fold = GLOBAL.N_FOLD # data for model validation (2nd cross validation, outer loop) Xvalid = GLOBAL.DATA_RESAMPLED_VALIDMODEL["X"][0] Xcalib = GLOBAL.DATA_RESAMPLED_VALIDMODEL["X"][1] yvalid = GLOBAL.DATA_RESAMPLED_VALIDMODEL["y"][0] ycalib = GLOBAL.DATA_RESAMPLED_VALIDMODEL["y"][1] # data for model selection (1rst cross validation, outer loop) Xtest = GLOBAL.DATA_RESAMPLED_SELECTMODEL["X"][0] Xtrain = GLOBAL.DATA_RESAMPLED_SELECTMODEL["X"][1] ytest = GLOBAL.DATA_RESAMPLED_VALIDMODEL["y"][0] ytrain = GLOBAL.DATA_RESAMPLED_VALIDMODEL["y"][1] print key, "Data shape:", Xvalid.shape, Xcalib.shape, Xtest.shape, Xtrain.shape STRUCTURE = GLOBAL.STRUCTURE #(alpha, ratio_l1, ratio_l2, ratio_tv, ratio_k) = key #key = np.array(key) penalty_start = GLOBAL.PENALTY_START class_weight = "auto" # unbiased alpha = float(key[0]) l1, l2 = alpha * float(key[1]), alpha * float(key[2]) tv, k_ratio = alpha * float(key[3]), key[4] print "l1:%f, l2:%f, tv:%f, k_ratio:%f" % (l1, l2, tv, k_ratio) mask = STRUCTURE.get_data() != 0 A = GLOBAL.A info = [Info.num_iter] mod = LogisticRegressionL1L2TV(l1, l2, tv, A, penalty_start=penalty_start, class_weight=class_weight, algorithm_params={'info': info}) mod.fit(Xtrain, ytrain) y_pred = mod.predict(Xtest) proba_pred = mod.predict_probability(Xtest) # a posteriori probability beta = mod.beta ret = dict(y_pred=y_pred, proba_pred=proba_pred, y_true=ytest, X_calib=Xcalib, y_calib=ycalib, X_valid=Xvalid, y_test=yvalid, n_fold=n_fold, beta=beta, mask=mask, n_iter=mod.get_info()['num_iter']) if output_collector: output_collector.collect(key, ret) else: return ret
def mapper(key, output_collector): import mapreduce as GLOBAL # access to global variables: Xtrain = GLOBAL.DATA_RESAMPLED["X"][0] Xtest = GLOBAL.DATA_RESAMPLED["X"][1] ytrain = GLOBAL.DATA_RESAMPLED["y"][0] ytest = GLOBAL.DATA_RESAMPLED["y"][1] alpha, ratio_k, ratio_l, ratio_g = key k, l, g = alpha * np.array((ratio_k, ratio_l, ratio_g)) mod = LogisticRegressionL1L2TV(k, l, g, GLOBAL.A, class_weight="auto") y_pred = mod.fit(Xtrain, ytrain).predict(Xtest) ret = dict(model=mod, y_pred=y_pred, y_true=ytest, beta=mod.beta) output_collector.collect(key, ret)
def mapper(key, output_collector): import mapreduce as GLOBAL # access to global variables: #raise ImportError("could not import ") # GLOBAL.DATA, GLOBAL.STRUCTURE, GLOBAL.A # GLOBAL.DATA ::= {"X":[Xtrain, Xtest], "y":[ytrain, ytest]} # key: criterion used for the model selection Xvalid = GLOBAL.DATA_RESAMPLED["X"][0] Xcalib = GLOBAL.DATA_RESAMPLED["X"][1] yvalid = GLOBAL.DATA_RESAMPLED["y"][0] ycalib = GLOBAL.DATA_RESAMPLED["y"][1] criterion = '' for c in key: criterion += c print criterion, "Data shape:", Xcalib.shape, Xvalid.shape, ycalib.shape, yvalid.shape STRUCTURE = GLOBAL.STRUCTURE penalty_start = GLOBAL.PENALTY_START class_weight = "auto" # unbiased n_fold = GLOBAL.FOLD model = GLOBAL.MODEL[criterion][n_fold] model_params = model.split('_') alpha = float(model_params[0]) l1, l2 = alpha * float(model_params[1]), alpha * float(model_params[2]) tv, k_ratio = alpha * float(model_params[3]), float(model_params[4]) print "l1:%f, l2:%f, tv:%f, k_ratio:%f" % (l1, l2, tv, k_ratio) mask = STRUCTURE.get_data() != 0 A = GLOBAL.A info = [Info.num_iter] mod = LogisticRegressionL1L2TV(l1, l2, tv, A, penalty_start=penalty_start, class_weight=class_weight, algorithm_params={'info': info}) mod.fit(Xcalib, ycalib) y_pred = mod.predict(Xvalid) proba_pred = mod.predict_probability(Xvalid) # a posteriori probability beta = mod.beta ret = dict(y_pred=y_pred, proba_pred=proba_pred, y_true=yvalid, beta=beta, mask=mask, model=model, n_iter=mod.get_info()['num_iter']) if output_collector: output_collector.collect(key, ret) else: return ret
def mapper(key, output_collector): import mapreduce as GLOBAL # access to global variables: # GLOBAL.DATA, GLOBAL.STRUCTURE, GLOBAL.A # GLOBAL.DATA ::= {"X":[Xtrain, ytrain], "y":[Xtest, ytest]} # key: list of parameters alpha, ratio_l1, ratio_l2, ratio_tv = key class_weight="auto" # unbiased l1, l2, tv = alpha * np.array((ratio_l1, ratio_l2, ratio_tv)) mod = LogisticRegressionL1L2TV(l1, l2, tv, GLOBAL.A, penalty_start=3, class_weight=class_weight) mod.fit(GLOBAL.DATA["X"][0], GLOBAL.DATA["y"][0]) y_pred = mod.predict(GLOBAL.DATA["X"][1]) ret = dict(y_pred=y_pred, y_true=GLOBAL.DATA["y"][1], beta=mod.beta) output_collector.collect(key, ret)
def mapper_fix(key, output_collector): """This mapper do not fit, re-use the precomputed stored beta and compute proba of test samples. Call it using mapreduce.py -m -f config.json """ import mapreduce as GLOBAL # access to global variables: #raise ImportError("could not import ") # GLOBAL.DATA, GLOBAL.STRUCTURE, GLOBAL.A # GLOBAL.DATA ::= {"X":[Xtrain, ytrain], "y":[Xtest, ytest]} # key: list of parameters Xte = GLOBAL.DATA_RESAMPLED["X"][1] #print output_collector, STRUCTURE = GLOBAL.STRUCTURE penalty_start = GLOBAL.CONFIG["penalty_start"] class_weight = "auto" # unbiased alpha = float(key[0]) l1, l2, tv, k = alpha * float(key[1]), alpha * float( key[2]), alpha * float(key[3]), key[4] values = output_collector.load() if k != -1: k = int(k) mask3d_to_1d = STRUCTURE.get_data() != 0 mask3d_to_1dr = values["mask"] mask_1d_to_1dr = mask3d_to_1dr[mask3d_to_1d] A, _ = tv_helper.A_from_shape((3, 3, 3)) # dummy A Xte_r = np.hstack([ Xte[:, :penalty_start], Xte[:, penalty_start:][:, mask_1d_to_1dr] ]) else: Xte_r = Xte A = GLOBAL.A mod = LogisticRegressionL1L2TV(l1, l2, tv, A, penalty_start=penalty_start, class_weight=class_weight) mod.beta = values["beta"] # check prevously predicted equals new predictions assert np.all(mod.predict(Xte_r) == values["y_pred"]) proba_pred = mod.predict_probability(Xte_r) ret = dict(proba_pred=proba_pred) if output_collector: output_collector.collect(key, ret) else: return ret
def mapper(key, output_collector): import mapreduce as GLOBAL # access to global variables: #raise ImportError("could not import ") # GLOBAL.DATA, GLOBAL.STRUCTURE, GLOBAL.A # GLOBAL.DATA ::= {"X":[Xtrain, ytrain], "y":[Xtest, ytest]} # key: list of parameters nfold = GLOBAL.N_FOLD nrndperm = GLOBAL.NRNDPERM # data for model validation (2nd cross validation, outer loop) Xvalid = GLOBAL.DATA_RESAMPLED["X"][0] Xcalib = GLOBAL.DATA_RESAMPLED["X"][1] yvalid = GLOBAL.DATA_RESAMPLED["y"][0] ycalib = GLOBAL.DATA_RESAMPLED["y"][1] criterion = '' for c in key: criterion += c print criterion, "Data shape:", Xcalib.shape, Xvalid.shape, \ ycalib.shape, yvalid.shape penalty_start = GLOBAL.PENALTY_START class_weight = "auto" # unbiased selection = GLOBAL.SELECTION #set of parameters (alpha, l1, l2, tv) selected model = selection[(selection.n_fold == nfold) & \ (selection.permutation == nrndperm)] \ ['param_opt_' + criterion].values[0] model_params = model.split('_') alpha = float(model_params[0]) l1, l2 = alpha * float(model_params[1]), alpha * float(model_params[2]) tv, k_ratio = alpha * float(model_params[3]), float(model_params[4]) print "l1:%f, l2:%f, tv:%f, k_ratio:%f" % (l1, l2, tv, k_ratio) A = GLOBAL.A info = [Info.num_iter] mod = LogisticRegressionL1L2TV(l1, l2, tv, A, penalty_start=penalty_start, class_weight=class_weight, algorithm_params={'info': info}) mod.fit(Xcalib, ycalib) y_pred = mod.predict(Xvalid) proba_pred = mod.predict_probability(Xvalid) # a posteriori probability ret = dict(y_pred=y_pred, proba_pred=proba_pred, y_true=yvalid) if output_collector: output_collector.collect(key, ret) else: return ret
def mapper(key, output_collector): import mapreduce as GLOBAL # access to global variables: #raise ImportError("could not import ") # GLOBAL.DATA, GLOBAL.STRUCTURE, GLOBAL.A # GLOBAL.DATA ::= {"X":[Xtrain, Xtest], "y":[ytrain, ytest]} # key: list of parameters Xtr = GLOBAL.DATA_RESAMPLED["X"][0] Xte = GLOBAL.DATA_RESAMPLED["X"][1] ytr = GLOBAL.DATA_RESAMPLED["y"][0] yte = GLOBAL.DATA_RESAMPLED["y"][1] print key, "Data shape:", Xtr.shape, Xte.shape, ytr.shape, yte.shape STRUCTURE = GLOBAL.STRUCTURE penalty_start = GLOBAL.PENALTY_START class_weight = "auto" # unbiased alpha = float(key[0]) l1, l2 = alpha * float(key[1]), alpha * float(key[2]) tv, k_ratio = alpha * float(key[3]), key[4] print "l1:%f, l2:%f, tv:%f, k_ratio:%f" % (l1, l2, tv, k_ratio) mask = STRUCTURE.get_data() != 0 Xtr_r = Xtr Xte_r = Xte A = GLOBAL.A info = [Info.num_iter] mod = LogisticRegressionL1L2TV(l1, l2, tv, A, penalty_start=penalty_start, class_weight=class_weight, algorithm_params={'info': info}) mod.fit(Xtr_r, ytr) y_pred = mod.predict(Xte_r) proba_pred = mod.predict_probability(Xte_r) # a posteriori probability beta = mod.beta ret = dict(y_pred=y_pred, proba_pred=proba_pred, y_true=yte, beta=beta, mask=mask, n_iter=mod.get_info()['num_iter']) if output_collector: output_collector.collect(key, ret) else: return ret
{l1_ratio: dict(y_pred=[], y_true=[]) for l1_ratio in L1_RATIOS} for alpha in ALPHAS } for fold, (train, test) in enumerate(utils.CV10): print "fold", fold Xtr = X[train, :] Xte = X[test, :] ytr = y[train, :] yte = y[test, :] for alpha in ALPHAS: for l1_ratio in L1_RATIOS: k, l, g = alpha * np.array([1 - l1_ratio, l1_ratio, 0]) mod = LogisticRegressionL1L2TV(k=k, l=l, g=g, A=A, penalty_start=1, class_weight="auto") mod.fit(Xtr, ytr) RES[alpha][l1_ratio]["y_pred"].append(mod.predict(Xte).ravel()) RES[alpha][l1_ratio]["y_true"].append(yte.ravel()) scores = list() for alpha in ALPHAS: for l1_ratio in L1_RATIOS: y_pred = np.concatenate(RES[alpha][l1_ratio]["y_pred"]) y_true = np.concatenate(RES[alpha][l1_ratio]["y_true"]) p, r, f, s = precision_recall_fscore_support(y_true, y_pred, average=None) scores.append([alpha, l1_ratio] + r.tolist() + [r.mean()])
############################################################################# ## Fit on all if False: key = '0.01_0.001_0.999_0.0' OUTPUT = os.path.join(os.path.dirname(WD), 'logistictvenet_all', key) if not os.path.exists(OUTPUT): os.makedirs(OUTPUT) X = np.load(os.path.join(os.path.dirname(WD), 'X.npy')) y = np.load(os.path.join(os.path.dirname(WD), 'y.npy')) A, STRUCTURE = A_from_structure( os.path.join(os.path.dirname(WD), "mask.nii")) params = np.array([float(p) for p in key.split("_")]) l1, l2, tv = params[0] * params[1:] mod = LogisticRegressionL1L2TV(l1, l2, tv, A, penalty_start=3, class_weight="auto") mod.fit(X, y) #CPU times: user 1936.73 s, sys: 0.66 s, total: 1937.39 s # Wall time: 1937.13 s / 2042.58 s y_pred = mod.predict(X) p, r, f, s = precision_recall_fscore_support(y, y_pred, average=None) n_ite = mod.algorithm.num_iter scores = dict(recall_0=r[0], recall_1=r[1], recall_mean=r.mean(), precision_0=p[0], precision_1=p[1], precision_mean=p.mean(), f1_0=f[0],
def mapper(key, output_collector): import mapreduce as GLOBAL # access to global variables: #raise ImportError("could not import ") # GLOBAL.DATA, GLOBAL.STRUCTURE, GLOBAL.A # GLOBAL.DATA ::= {"X":[Xtrain, Xtest], "y":[ytrain, ytest]} # key: list of parameters Xtr = GLOBAL.DATA_RESAMPLED["X"][0] Xte = GLOBAL.DATA_RESAMPLED["X"][1] ytr = GLOBAL.DATA_RESAMPLED["y"][0] yte = GLOBAL.DATA_RESAMPLED["y"][1] print key, "Data shape:", Xtr.shape, Xte.shape, ytr.shape, yte.shape STRUCTURE = GLOBAL.STRUCTURE #alpha, ratio_l1, ratio_l2, ratio_tv, k = key #key = np.array(key) penalty_start = GLOBAL.PENALTY_START class_weight = "auto" # unbiased alpha = float(key[0]) l1, l2 = alpha * float(key[1]), alpha * float(key[2]) tv, k_ratio = alpha * float(key[3]), key[4] print "l1:%f, l2:%f, tv:%f, k_ratio:%f" % (l1, l2, tv, k_ratio) n_voxels = np.count_nonzero(STRUCTURE.get_data()) if k_ratio != -1: k = n_voxels * k_ratio k = int(k) aov = SelectKBest(k=k) aov.fit(Xtr[..., penalty_start:], ytr.ravel()) mask = STRUCTURE.get_data() != 0 mask[mask] = aov.get_support() #print mask.sum() A, _ = tv_helper.A_from_mask(mask) Xtr_r = np.hstack([ Xtr[:, :penalty_start], Xtr[:, penalty_start:][:, aov.get_support()] ]) Xte_r = np.hstack([ Xte[:, :penalty_start], Xte[:, penalty_start:][:, aov.get_support()] ]) else: mask = STRUCTURE.get_data() != 0 Xtr_r = Xtr Xte_r = Xte A = GLOBAL.A info = [Info.num_iter] mod = LogisticRegressionL1L2TV(l1, l2, tv, A, penalty_start=penalty_start, class_weight=class_weight, algorithm_params={'info': info}) mod.fit(Xtr_r, ytr) y_pred = mod.predict(Xte_r) proba_pred = mod.predict_probability(Xte_r) # a posteriori probability beta = mod.beta ret = dict(y_pred=y_pred, proba_pred=proba_pred, y_true=yte, beta=beta, mask=mask, n_iter=mod.get_info()['num_iter']) if output_collector: output_collector.collect(key, ret) else: return ret
y_train = np.load(os.path.join(GM, 'ytrain.npy')) assert np.all(pop_train.Label.values == y_train.ravel()) Xroi_train = pd.read_csv(INPUT_ROI_TRAIN, header=None).values # enettv for GM arg = [float(p) for p in WHICH.split("_")] if len(arg) == 4: alpha, l1, l2, tv = arg else: alpha, l1, l2, tv, k = arg l1, l2, tv = alpha * l1, alpha * l2, alpha * tv enettv = LogisticRegressionL1L2TV(l1, l2, tv, 0, penalty_start=penalty_start, class_weight="auto") C = 0.0022 # lr l2 for roi p_lr_l2 = Pipeline([ ('scaler', StandardScaler()), # ('classifier', LogisticRegression(C=0.005, penalty='l2')), ('classifier', LogisticRegression(C=C, penalty='l2')), ]) print "==========" print "== %s ==" % CV print "==========" #print "enettv", WHICH, GM
def mapper(key, output_collector): import mapreduce as GLOBAL # access to global variables: #raise ImportError("could not import ") # GLOBAL.DATA, GLOBAL.STRUCTURE, GLOBAL.A # GLOBAL.DATA ::= {"X":[Xtrain, Xtest], "y":[ytrain, ytest]} # key: list of parameters MODALITY = GLOBAL.MODALITY Xtr = GLOBAL.DATA_RESAMPLED["X"][0] Xte = GLOBAL.DATA_RESAMPLED["X"][1] ytr = GLOBAL.DATA_RESAMPLED["y"][0] yte = GLOBAL.DATA_RESAMPLED["y"][1] print key, "Data shape:", Xtr.shape, Xte.shape, ytr.shape, yte.shape STRUCTURE = GLOBAL.STRUCTURE n_voxels = np.count_nonzero(STRUCTURE.get_data()) #alpha, ratio_l1, ratio_l2, ratio_tv, k = key #key = np.array(key) penalty_start = GLOBAL.PENALTY_START class_weight = "auto" # unbiased alpha = float(key[0]) l1, l2 = alpha * float(key[1]), alpha * float(key[2]) tv, k_ratio = alpha * float(key[3]), key[4] print "l1:%f, l2:%f, tv:%f, k_ratio:%f" % (l1, l2, tv, k_ratio) if np.logical_or(MODALITY == "MRI", MODALITY == "PET"): if k_ratio != -1: k = n_voxels * k_ratio k = int(k) aov = SelectKBest(k=k) aov.fit(Xtr[..., penalty_start:], ytr.ravel()) mask = STRUCTURE.get_data() != 0 mask[mask] = aov.get_support() #print mask.sum() A, _ = tv_helper.A_from_mask(mask) Xtr_r = np.hstack([ Xtr[:, :penalty_start], Xtr[:, penalty_start:][:, aov.get_support()] ]) Xte_r = np.hstack([ Xte[:, :penalty_start], Xte[:, penalty_start:][:, aov.get_support()] ]) else: mask = STRUCTURE.get_data() != 0 Xtr_r = Xtr Xte_r = Xte A = GLOBAL.A elif MODALITY == "MRI+PET": if k_ratio != -1: k = 2 * n_voxels * k_ratio k = int(k) aov = SelectKBest(k=k) aov.fit(Xtr[..., penalty_start:], ytr.ravel()) support_mask = aov.get_support() # Create 3D mask for MRI mask_MRI = STRUCTURE.get_data() != 0 mask_MRI[mask_MRI] = support_mask[:n_voxels] mask_PET = STRUCTURE.get_data() != 0 mask_PET[mask_PET] = support_mask[n_voxels:] # We construct matrix A, it size is k*k # If k_MRI and k_PET are both different to 0 we construct # a matrix A for each modality and then concatenate them # If one of the modality is empty, the matrix A is constructed # from the other modality only k_MRI = np.count_nonzero(mask_MRI) k_PET = np.count_nonzero(mask_PET) # k_MRI and k_Pet can not be simultaneously equal to zero assert (k_MRI + k_PET == k) if (k_MRI == 0) and (k_PET != 0): A, _ = tv_helper.A_from_mask(mask_PET) if (k_PET == 0) and (k_MRI != 0): A, _ = tv_helper.A_from_mask(mask_MRI) if (k_MRI != 0) and (k_PET != 0): A1, _ = tv_helper.A_from_mask(mask_MRI) A2, _ = tv_helper.A_from_mask(mask_PET) A = [] for i in range(3): a = sparse.bmat([[A1[i], None], [None, A2[i]]]) A.append(a) Xtr_r = np.hstack([ Xtr[:, :penalty_start], Xtr[:, penalty_start:][:, support_mask] ]) Xte_r = np.hstack([ Xte[:, :penalty_start], Xte[:, penalty_start:][:, support_mask] ]) else: k_MRI = n_voxels k_PET = n_voxels mask_MRI = STRUCTURE.get_data() != 0 mask_PET = STRUCTURE.get_data() != 0 Xtr_r = Xtr Xte_r = Xte A = GLOBAL.A info = [Info.num_iter] mod = LogisticRegressionL1L2TV(l1, l2, tv, A, penalty_start=penalty_start, class_weight=class_weight, algorithm_params={'info': info}) mod.fit(Xtr_r, ytr) y_pred = mod.predict(Xte_r) proba_pred = mod.predict_probability(Xte_r) # a posteriori probability beta = mod.beta if (MODALITY == "MRI") or (MODALITY == "PET"): ret = dict(y_pred=y_pred, proba_pred=proba_pred, y_true=yte, beta=beta, mask=mask, n_iter=mod.get_info()['num_iter']) elif MODALITY == "MRI+PET": beta_MRI = beta[:(penalty_start + k_MRI)] beta_PET = np.vstack( [beta[:penalty_start], beta[(penalty_start + k_MRI):]]) ret = dict(y_pred=y_pred, proba_pred=proba_pred, y_true=yte, beta=beta, beta_MRI=beta_MRI, beta_PET=beta_PET, mask_MRI=mask_MRI, mask_PET=mask_PET, n_iter=mod.get_info()['num_iter']) if output_collector: output_collector.collect(key, ret) else: return ret
def mapper(key, output_collector): import mapreduce as GLOBAL # access to global variables: #raise ImportError("could not import ") # GLOBAL.DATA, GLOBAL.STRUCTURE, GLOBAL.A # GLOBAL.DATA ::= {"X":[Xtrain, Xtest], "y":[ytrain, ytest]} # key: list of parameters print "key: ", key Xtr = GLOBAL.DATA_RESAMPLED["X"][0] Xte = GLOBAL.DATA_RESAMPLED["X"][1] ytr = GLOBAL.DATA_RESAMPLED["y"][0] yte = GLOBAL.DATA_RESAMPLED["y"][1] print key, "Data shape:", Xtr.shape, Xte.shape, ytr.shape, yte.shape method = key[0] if method == "statsmodels": # Logistic Regression with statsmodels tool, Logit logit_mod = sm.Logit(ytr, Xtr) logit_res = logit_mod.fit(disp=0) prob_pred = logit_res.predict(Xte) y_pred = np.zeros((Xte.shape[0])) y_pred[prob_pred >= 0.5] = 1 beta = logit_res.params.reshape(-1, 1) elif method == "log_parsimony": # Logistic Regression with parsimnoy tool, LogisticRegression mod = LogisticRegression() mod.fit(Xtr, ytr) y_pred = mod.predict(Xte) prob_pred = mod.predict_probability(Xte) # a posteriori probability beta = mod.beta elif method == "enettv_parsimony": # enettv with l1, l2, tv null l1, l2, tv = 0, 0, 0 class_weight = "auto" penalty_start = 1 A = [sparse.csr_matrix((2, 2)) for i in xrange(3)] mod = LogisticRegressionL1L2TV(l1, l2, tv, A, penalty_start=penalty_start, class_weight=class_weight) mod.fit(Xtr, ytr) y_pred = mod.predict(Xte) prob_pred = mod.predict_probability(Xte) # a posteriori probability beta = mod.beta elif method == 'enettv_parsimony_early_stopping': # enettv with l1, l2, tv null l1, l2, tv = 0, 0, 0 class_weight = "auto" penalty_start = 1 A = [sparse.csr_matrix((2, 2)) for i in xrange(3)] mod = LogisticRegressionL1L2TV(l1, l2, tv, A, penalty_start=penalty_start, class_weight=class_weight, algorithm_params={'max_iter': 100}) mod.fit(Xtr, ytr) y_pred = mod.predict(Xte) prob_pred = mod.predict_probability(Xte) # a posteriori probability beta = mod.beta ret = dict(y_pred=y_pred, prob_pred=prob_pred, y_true=yte, beta=beta) if output_collector: output_collector.collect(key, ret) else: return ret