Beispiel #1
0
def mapper(key, output_collector):
    import mapreduce as GLOBAL  # access to global variables:
    #raise ImportError("could not import ")
    # GLOBAL.DATA, GLOBAL.STRUCTURE, GLOBAL.A
    # GLOBAL.DATA ::= {"X":[Xtrain, ytrain], "y":[Xtest, ytest]}
    # key: list of parameters
    Xtr = GLOBAL.DATA_RESAMPLED["X"][0]
    Xte = GLOBAL.DATA_RESAMPLED["X"][1]
    ytr = GLOBAL.DATA_RESAMPLED["y"][0]
    yte = GLOBAL.DATA_RESAMPLED["y"][1]
    print key, "Data shape:", Xtr.shape, Xte.shape, ytr.shape, yte.shape
    # STRUCTURE = GLOBAL.STRUCTURE
    #alpha, ratio_l1, ratio_l2, ratio_tv, k = key
    #key = np.array(key)
    penalty_start = GLOBAL.CONFIG["penalty_start"]
    class_weight = "auto"  # unbiased
    alpha = float(key[0])
    l1, l2, tv, k = alpha * float(key[1]), alpha * float(
        key[2]), alpha * float(key[3]), key[4]
    print "l1:%f, l2:%f, tv:%f, k:%i" % (l1, l2, tv, k)
    if k != -1:
        k = int(k)
        aov = SelectKBest(k=k)
        aov.fit(Xtr[..., penalty_start:], ytr.ravel())
        mask = GLOBAL.mask != 0
        mask[mask] = aov.get_support()
        #print mask.sum()
        A, _ = tv_helper.nesterov_linear_operator_from_mesh(
            GLOBAL.mesh_coord, GLOBAL.mesh_triangles, mask)
        Xtr_r = np.hstack([
            Xtr[:, :penalty_start], Xtr[:, penalty_start:][:,
                                                           aov.get_support()]
        ])
        Xte_r = np.hstack([
            Xte[:, :penalty_start], Xte[:, penalty_start:][:,
                                                           aov.get_support()]
        ])
    else:
        mask = np.ones(Xtr.shape[0], dtype=bool)
        Xtr_r = Xtr
        Xte_r = Xte
        A = GLOBAL.A
    mod = LogisticRegressionL1L2TV(l1,
                                   l2,
                                   tv,
                                   A,
                                   penalty_start=penalty_start,
                                   class_weight=class_weight)
    mod.fit(Xtr_r, ytr)
    y_pred = mod.predict(Xte_r)
    proba_pred = mod.predict_probability(Xte_r)
    ret = dict(y_pred=y_pred,
               y_true=yte,
               beta=mod.beta,
               mask=mask,
               proba_pred=proba_pred)
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret
def run_all():
    WD = "/neurospin/brainomics/2014_mlc/GM"
    key = '0.01_0.01_0.98_0.01'
    OUTPUT = os.path.join(os.path.dirname(WD), 'logistictvenet_all', key)
    if not os.path.exists(OUTPUT): os.makedirs(OUTPUT)
    X = np.load(os.path.join(WD,  'GMtrain.npy'))
    y = np.load(os.path.join(WD,  'ytrain.npy'))
    A, STRUCTURE = A_from_structure(os.path.join(WD,  "mask.nii"))
    params = np.array([float(p) for p in key.split("_")])
    l1, l2, tv = params[0] * params[1:]
    mod = LogisticRegressionL1L2TV(l1, l2, tv, A, penalty_start=1,
                                   class_weight="auto")
    mod.fit(X, y)
    #CPU times: user 1936.73 s, sys: 0.66 s, total: 1937.39 s
    # Wall time: 1937.13 s / 2042.58 s
    y_pred = mod.predict(X)
    p, r, f, s = precision_recall_fscore_support(y, y_pred, average=None)
    n_ite = mod.algorithm.num_iter
    scores = dict(
               recall_0=r[0], recall_1=r[1], recall_mean=r.mean(),
               precision_0=p[0], precision_1=p[1], precision_mean=p.mean(),
               f1_0=f[0], f1_1=f[1], f1_mean=f.mean(),
               support_0=s[0] , support_1=s[1], n_ite=n_ite, intercept=mod.beta[0, 0])
    beta3d = np.zeros(STRUCTURE.get_data().shape)
    beta3d[STRUCTURE.get_data() != 0 ] = mod.beta[1:].ravel()
    out_im = nibabel.Nifti1Image(beta3d, affine=STRUCTURE.get_affine())
    ret = dict(y_pred=y_pred, y_true=y, beta=mod.beta, beta3d=out_im, scores=scores)
    # run /home/ed203246/bin/mapreduce.py
    oc = OutputCollector(OUTPUT)
    oc.collect(key=key, value=ret)
Beispiel #3
0
def mapper(key, output_collector):
    import mapreduce as GLOBAL  # access to global variables:
    #raise ImportError("could not import ")
    # GLOBAL.DATA, GLOBAL.STRUCTURE, GLOBAL.A
    # GLOBAL.DATA ::= {"X":[Xtrain, Xtest], "y":[ytrain, ytest]}
    # key: list of parameters
    n_fold = GLOBAL.N_FOLD

    # data for model validation (2nd cross validation, outer loop)
    Xvalid = GLOBAL.DATA_RESAMPLED_VALIDMODEL["X"][0]
    Xcalib = GLOBAL.DATA_RESAMPLED_VALIDMODEL["X"][1]
    yvalid = GLOBAL.DATA_RESAMPLED_VALIDMODEL["y"][0]
    ycalib = GLOBAL.DATA_RESAMPLED_VALIDMODEL["y"][1]

    # data for model selection (1rst cross validation, outer loop)
    Xtest = GLOBAL.DATA_RESAMPLED_SELECTMODEL["X"][0]
    Xtrain = GLOBAL.DATA_RESAMPLED_SELECTMODEL["X"][1]
    ytest = GLOBAL.DATA_RESAMPLED_VALIDMODEL["y"][0]
    ytrain = GLOBAL.DATA_RESAMPLED_VALIDMODEL["y"][1]

    print key, "Data shape:", Xvalid.shape, Xcalib.shape, Xtest.shape,
    Xtrain.shape
    STRUCTURE = GLOBAL.STRUCTURE
    #(alpha, ratio_l1, ratio_l2, ratio_tv, ratio_k) = key
    #key = np.array(key)
    penalty_start = GLOBAL.PENALTY_START
    class_weight = "auto"  # unbiased
    alpha = float(key[0])
    l1, l2 = alpha * float(key[1]), alpha * float(key[2])
    tv, k_ratio = alpha * float(key[3]), key[4]
    print "l1:%f, l2:%f, tv:%f, k_ratio:%f" % (l1, l2, tv, k_ratio)
    mask = STRUCTURE.get_data() != 0
    A = GLOBAL.A
    info = [Info.num_iter]
    mod = LogisticRegressionL1L2TV(l1,
                                   l2,
                                   tv,
                                   A,
                                   penalty_start=penalty_start,
                                   class_weight=class_weight,
                                   algorithm_params={'info': info})
    mod.fit(Xtrain, ytrain)
    y_pred = mod.predict(Xtest)
    proba_pred = mod.predict_probability(Xtest)  # a posteriori probability
    beta = mod.beta
    ret = dict(y_pred=y_pred,
               proba_pred=proba_pred,
               y_true=ytest,
               X_calib=Xcalib,
               y_calib=ycalib,
               X_valid=Xvalid,
               y_test=yvalid,
               n_fold=n_fold,
               beta=beta,
               mask=mask,
               n_iter=mod.get_info()['num_iter'])
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret
Beispiel #4
0
def mapper(key, output_collector):
    import mapreduce as GLOBAL  # access to global variables:
    Xtrain = GLOBAL.DATA_RESAMPLED["X"][0]
    Xtest = GLOBAL.DATA_RESAMPLED["X"][1]
    ytrain = GLOBAL.DATA_RESAMPLED["y"][0]
    ytest = GLOBAL.DATA_RESAMPLED["y"][1]
    alpha, ratio_k, ratio_l, ratio_g = key
    k, l, g = alpha * np.array((ratio_k, ratio_l, ratio_g))
    mod = LogisticRegressionL1L2TV(k, l, g, GLOBAL.A, class_weight="auto")
    y_pred = mod.fit(Xtrain, ytrain).predict(Xtest)
    ret = dict(model=mod, y_pred=y_pred, y_true=ytest, beta=mod.beta)
    output_collector.collect(key, ret)
def mapper(key, output_collector):
    import mapreduce as GLOBAL  # access to global variables:
    #raise ImportError("could not import ")
    # GLOBAL.DATA, GLOBAL.STRUCTURE, GLOBAL.A
    # GLOBAL.DATA ::= {"X":[Xtrain, Xtest], "y":[ytrain, ytest]}
    # key: criterion used for the model selection
    Xvalid = GLOBAL.DATA_RESAMPLED["X"][0]
    Xcalib = GLOBAL.DATA_RESAMPLED["X"][1]
    yvalid = GLOBAL.DATA_RESAMPLED["y"][0]
    ycalib = GLOBAL.DATA_RESAMPLED["y"][1]

    criterion = ''
    for c in key:
        criterion += c
    print criterion, "Data shape:", Xcalib.shape, Xvalid.shape, ycalib.shape,
    yvalid.shape
    STRUCTURE = GLOBAL.STRUCTURE
    penalty_start = GLOBAL.PENALTY_START
    class_weight = "auto"  # unbiased
    n_fold = GLOBAL.FOLD
    model = GLOBAL.MODEL[criterion][n_fold]
    model_params = model.split('_')
    alpha = float(model_params[0])
    l1, l2 = alpha * float(model_params[1]), alpha * float(model_params[2])
    tv, k_ratio = alpha * float(model_params[3]), float(model_params[4])
    print "l1:%f, l2:%f, tv:%f, k_ratio:%f" % (l1, l2, tv, k_ratio)
    mask = STRUCTURE.get_data() != 0
    A = GLOBAL.A

    info = [Info.num_iter]
    mod = LogisticRegressionL1L2TV(l1,
                                   l2,
                                   tv,
                                   A,
                                   penalty_start=penalty_start,
                                   class_weight=class_weight,
                                   algorithm_params={'info': info})
    mod.fit(Xcalib, ycalib)
    y_pred = mod.predict(Xvalid)
    proba_pred = mod.predict_probability(Xvalid)  # a posteriori probability
    beta = mod.beta
    ret = dict(y_pred=y_pred,
               proba_pred=proba_pred,
               y_true=yvalid,
               beta=beta,
               mask=mask,
               model=model,
               n_iter=mod.get_info()['num_iter'])
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret
def mapper(key, output_collector):
    import mapreduce  as GLOBAL # access to global variables:
    # GLOBAL.DATA, GLOBAL.STRUCTURE, GLOBAL.A
    # GLOBAL.DATA ::= {"X":[Xtrain, ytrain], "y":[Xtest, ytest]}
    # key: list of parameters
    alpha, ratio_l1, ratio_l2, ratio_tv = key
    class_weight="auto" # unbiased
    l1, l2, tv = alpha *  np.array((ratio_l1, ratio_l2, ratio_tv))
    mod = LogisticRegressionL1L2TV(l1, l2, tv, GLOBAL.A, penalty_start=3, 
                                        class_weight=class_weight)
    mod.fit(GLOBAL.DATA["X"][0], GLOBAL.DATA["y"][0])
    y_pred = mod.predict(GLOBAL.DATA["X"][1])
    ret = dict(y_pred=y_pred, y_true=GLOBAL.DATA["y"][1], beta=mod.beta)
    output_collector.collect(key, ret)
Beispiel #7
0
def mapper_fix(key, output_collector):
    """This mapper do not fit, re-use the precomputed stored beta and compute
    proba of test samples. Call it using mapreduce.py -m -f config.json
    """
    import mapreduce as GLOBAL  # access to global variables:
    #raise ImportError("could not import ")
    # GLOBAL.DATA, GLOBAL.STRUCTURE, GLOBAL.A
    # GLOBAL.DATA ::= {"X":[Xtrain, ytrain], "y":[Xtest, ytest]}
    # key: list of parameters
    Xte = GLOBAL.DATA_RESAMPLED["X"][1]
    #print output_collector,
    STRUCTURE = GLOBAL.STRUCTURE
    penalty_start = GLOBAL.CONFIG["penalty_start"]
    class_weight = "auto"  # unbiased
    alpha = float(key[0])
    l1, l2, tv, k = alpha * float(key[1]), alpha * float(
        key[2]), alpha * float(key[3]), key[4]
    values = output_collector.load()
    if k != -1:
        k = int(k)
        mask3d_to_1d = STRUCTURE.get_data() != 0
        mask3d_to_1dr = values["mask"]
        mask_1d_to_1dr = mask3d_to_1dr[mask3d_to_1d]
        A, _ = tv_helper.A_from_shape((3, 3, 3))  # dummy A
        Xte_r = np.hstack([
            Xte[:, :penalty_start], Xte[:, penalty_start:][:, mask_1d_to_1dr]
        ])
    else:
        Xte_r = Xte
        A = GLOBAL.A
    mod = LogisticRegressionL1L2TV(l1,
                                   l2,
                                   tv,
                                   A,
                                   penalty_start=penalty_start,
                                   class_weight=class_weight)
    mod.beta = values["beta"]
    # check prevously predicted equals new predictions
    assert np.all(mod.predict(Xte_r) == values["y_pred"])
    proba_pred = mod.predict_probability(Xte_r)
    ret = dict(proba_pred=proba_pred)
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret
def mapper(key, output_collector):
    import mapreduce as GLOBAL  # access to global variables:
        #raise ImportError("could not import ")
    # GLOBAL.DATA, GLOBAL.STRUCTURE, GLOBAL.A
    # GLOBAL.DATA ::= {"X":[Xtrain, ytrain], "y":[Xtest, ytest]}
    # key: list of parameters
    nfold = GLOBAL.N_FOLD
    nrndperm = GLOBAL.NRNDPERM
    # data for model validation (2nd cross validation, outer loop)
    Xvalid = GLOBAL.DATA_RESAMPLED["X"][0]
    Xcalib = GLOBAL.DATA_RESAMPLED["X"][1]
    yvalid = GLOBAL.DATA_RESAMPLED["y"][0]
    ycalib = GLOBAL.DATA_RESAMPLED["y"][1]

    criterion = ''
    for c in key: criterion += c
    print criterion, "Data shape:", Xcalib.shape, Xvalid.shape, \
                                    ycalib.shape, yvalid.shape
    penalty_start = GLOBAL.PENALTY_START
    class_weight = "auto"  # unbiased
    selection = GLOBAL.SELECTION
    #set of parameters (alpha, l1, l2, tv) selected
    model = selection[(selection.n_fold == nfold) & \
                      (selection.permutation == nrndperm)] \
                     ['param_opt_' + criterion].values[0]
    model_params = model.split('_')
    alpha = float(model_params[0])
    l1, l2 = alpha * float(model_params[1]), alpha * float(model_params[2])
    tv, k_ratio = alpha * float(model_params[3]), float(model_params[4])
    print "l1:%f, l2:%f, tv:%f, k_ratio:%f" % (l1, l2, tv, k_ratio)
    A = GLOBAL.A
    info = [Info.num_iter]
    mod = LogisticRegressionL1L2TV(l1, l2, tv, A, penalty_start=penalty_start,
                                   class_weight=class_weight,
                                   algorithm_params={'info': info})
    mod.fit(Xcalib, ycalib)
    y_pred = mod.predict(Xvalid)
    proba_pred = mod.predict_probability(Xvalid)  # a posteriori probability
    ret = dict(y_pred=y_pred, proba_pred=proba_pred, y_true=yvalid)
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret
Beispiel #9
0
def mapper(key, output_collector):
    import mapreduce as GLOBAL  # access to global variables:
        #raise ImportError("could not import ")
    # GLOBAL.DATA, GLOBAL.STRUCTURE, GLOBAL.A
    # GLOBAL.DATA ::= {"X":[Xtrain, Xtest], "y":[ytrain, ytest]}
    # key: list of parameters
    Xtr = GLOBAL.DATA_RESAMPLED["X"][0]
    Xte = GLOBAL.DATA_RESAMPLED["X"][1]
    ytr = GLOBAL.DATA_RESAMPLED["y"][0]
    yte = GLOBAL.DATA_RESAMPLED["y"][1]
    print key, "Data shape:", Xtr.shape, Xte.shape, ytr.shape, yte.shape
    STRUCTURE = GLOBAL.STRUCTURE
    penalty_start = GLOBAL.PENALTY_START
    class_weight = "auto"  # unbiased
    alpha = float(key[0])
    l1, l2 = alpha * float(key[1]), alpha * float(key[2])
    tv, k_ratio = alpha * float(key[3]), key[4]
    print "l1:%f, l2:%f, tv:%f, k_ratio:%f" % (l1, l2, tv, k_ratio)
    mask = STRUCTURE.get_data() != 0
    Xtr_r = Xtr
    Xte_r = Xte
    A = GLOBAL.A

    info = [Info.num_iter]
    mod = LogisticRegressionL1L2TV(l1, l2, tv, A, penalty_start=penalty_start,
                                   class_weight=class_weight,
                                   algorithm_params={'info': info})
    mod.fit(Xtr_r, ytr)
    y_pred = mod.predict(Xte_r)
    proba_pred = mod.predict_probability(Xte_r)  # a posteriori probability
    beta = mod.beta
    ret = dict(y_pred=y_pred, proba_pred=proba_pred, y_true=yte,
                   beta=beta,  mask=mask,
                   n_iter=mod.get_info()['num_iter'])
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret
Beispiel #10
0
        {l1_ratio: dict(y_pred=[], y_true=[])
         for l1_ratio in L1_RATIOS}
        for alpha in ALPHAS
    }
    for fold, (train, test) in enumerate(utils.CV10):
        print "fold", fold
        Xtr = X[train, :]
        Xte = X[test, :]
        ytr = y[train, :]
        yte = y[test, :]
        for alpha in ALPHAS:
            for l1_ratio in L1_RATIOS:
                k, l, g = alpha * np.array([1 - l1_ratio, l1_ratio, 0])
                mod = LogisticRegressionL1L2TV(k=k,
                                               l=l,
                                               g=g,
                                               A=A,
                                               penalty_start=1,
                                               class_weight="auto")
                mod.fit(Xtr, ytr)
                RES[alpha][l1_ratio]["y_pred"].append(mod.predict(Xte).ravel())
                RES[alpha][l1_ratio]["y_true"].append(yte.ravel())

    scores = list()
    for alpha in ALPHAS:
        for l1_ratio in L1_RATIOS:
            y_pred = np.concatenate(RES[alpha][l1_ratio]["y_pred"])
            y_true = np.concatenate(RES[alpha][l1_ratio]["y_true"])
            p, r, f, s = precision_recall_fscore_support(y_true,
                                                         y_pred,
                                                         average=None)
            scores.append([alpha, l1_ratio] + r.tolist() + [r.mean()])
Beispiel #11
0
    #############################################################################
    ## Fit on all
    if False:
        key = '0.01_0.001_0.999_0.0'
        OUTPUT = os.path.join(os.path.dirname(WD), 'logistictvenet_all', key)
        if not os.path.exists(OUTPUT): os.makedirs(OUTPUT)
        X = np.load(os.path.join(os.path.dirname(WD), 'X.npy'))
        y = np.load(os.path.join(os.path.dirname(WD), 'y.npy'))
        A, STRUCTURE = A_from_structure(
            os.path.join(os.path.dirname(WD), "mask.nii"))
        params = np.array([float(p) for p in key.split("_")])
        l1, l2, tv = params[0] * params[1:]
        mod = LogisticRegressionL1L2TV(l1,
                                       l2,
                                       tv,
                                       A,
                                       penalty_start=3,
                                       class_weight="auto")
        mod.fit(X, y)
        #CPU times: user 1936.73 s, sys: 0.66 s, total: 1937.39 s
        # Wall time: 1937.13 s / 2042.58 s
        y_pred = mod.predict(X)
        p, r, f, s = precision_recall_fscore_support(y, y_pred, average=None)
        n_ite = mod.algorithm.num_iter
        scores = dict(recall_0=r[0],
                      recall_1=r[1],
                      recall_mean=r.mean(),
                      precision_0=p[0],
                      precision_1=p[1],
                      precision_mean=p.mean(),
                      f1_0=f[0],
def mapper(key, output_collector):
    import mapreduce as GLOBAL  # access to global variables:
    #raise ImportError("could not import ")
    # GLOBAL.DATA, GLOBAL.STRUCTURE, GLOBAL.A
    # GLOBAL.DATA ::= {"X":[Xtrain, Xtest], "y":[ytrain, ytest]}
    # key: list of parameters
    Xtr = GLOBAL.DATA_RESAMPLED["X"][0]
    Xte = GLOBAL.DATA_RESAMPLED["X"][1]
    ytr = GLOBAL.DATA_RESAMPLED["y"][0]
    yte = GLOBAL.DATA_RESAMPLED["y"][1]
    print key, "Data shape:", Xtr.shape, Xte.shape, ytr.shape, yte.shape
    STRUCTURE = GLOBAL.STRUCTURE
    #alpha, ratio_l1, ratio_l2, ratio_tv, k = key
    #key = np.array(key)
    penalty_start = GLOBAL.PENALTY_START
    class_weight = "auto"  # unbiased
    alpha = float(key[0])
    l1, l2 = alpha * float(key[1]), alpha * float(key[2])
    tv, k_ratio = alpha * float(key[3]), key[4]
    print "l1:%f, l2:%f, tv:%f, k_ratio:%f" % (l1, l2, tv, k_ratio)

    n_voxels = np.count_nonzero(STRUCTURE.get_data())
    if k_ratio != -1:
        k = n_voxels * k_ratio
        k = int(k)
        aov = SelectKBest(k=k)
        aov.fit(Xtr[..., penalty_start:], ytr.ravel())
        mask = STRUCTURE.get_data() != 0
        mask[mask] = aov.get_support()
        #print mask.sum()
        A, _ = tv_helper.A_from_mask(mask)
        Xtr_r = np.hstack([
            Xtr[:, :penalty_start], Xtr[:, penalty_start:][:,
                                                           aov.get_support()]
        ])
        Xte_r = np.hstack([
            Xte[:, :penalty_start], Xte[:, penalty_start:][:,
                                                           aov.get_support()]
        ])
    else:
        mask = STRUCTURE.get_data() != 0
        Xtr_r = Xtr
        Xte_r = Xte
        A = GLOBAL.A
    info = [Info.num_iter]
    mod = LogisticRegressionL1L2TV(l1,
                                   l2,
                                   tv,
                                   A,
                                   penalty_start=penalty_start,
                                   class_weight=class_weight,
                                   algorithm_params={'info': info})
    mod.fit(Xtr_r, ytr)
    y_pred = mod.predict(Xte_r)
    proba_pred = mod.predict_probability(Xte_r)  # a posteriori probability
    beta = mod.beta
    ret = dict(y_pred=y_pred,
               proba_pred=proba_pred,
               y_true=yte,
               beta=beta,
               mask=mask,
               n_iter=mod.get_info()['num_iter'])
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret
Beispiel #13
0
y_train = np.load(os.path.join(GM, 'ytrain.npy'))
assert np.all(pop_train.Label.values == y_train.ravel())

Xroi_train = pd.read_csv(INPUT_ROI_TRAIN, header=None).values

# enettv for GM
arg = [float(p) for p in WHICH.split("_")]
if len(arg) == 4:
    alpha, l1, l2, tv = arg
else:
    alpha, l1, l2, tv, k = arg

l1, l2, tv = alpha * l1, alpha * l2, alpha * tv
enettv = LogisticRegressionL1L2TV(l1,
                                  l2,
                                  tv,
                                  0,
                                  penalty_start=penalty_start,
                                  class_weight="auto")
C = 0.0022
# lr l2 for roi
p_lr_l2 = Pipeline([
    ('scaler', StandardScaler()),
    # ('classifier', LogisticRegression(C=0.005, penalty='l2')),
    ('classifier', LogisticRegression(C=C, penalty='l2')),
])

print "=========="
print "== %s ==" % CV
print "=========="

#print "enettv", WHICH, GM
Beispiel #14
0
def mapper(key, output_collector):
    import mapreduce as GLOBAL  # access to global variables:
    #raise ImportError("could not import ")
    # GLOBAL.DATA, GLOBAL.STRUCTURE, GLOBAL.A
    # GLOBAL.DATA ::= {"X":[Xtrain, Xtest], "y":[ytrain, ytest]}
    # key: list of parameters
    MODALITY = GLOBAL.MODALITY
    Xtr = GLOBAL.DATA_RESAMPLED["X"][0]
    Xte = GLOBAL.DATA_RESAMPLED["X"][1]
    ytr = GLOBAL.DATA_RESAMPLED["y"][0]
    yte = GLOBAL.DATA_RESAMPLED["y"][1]
    print key, "Data shape:", Xtr.shape, Xte.shape, ytr.shape, yte.shape
    STRUCTURE = GLOBAL.STRUCTURE
    n_voxels = np.count_nonzero(STRUCTURE.get_data())
    #alpha, ratio_l1, ratio_l2, ratio_tv, k = key
    #key = np.array(key)
    penalty_start = GLOBAL.PENALTY_START
    class_weight = "auto"  # unbiased
    alpha = float(key[0])
    l1, l2 = alpha * float(key[1]), alpha * float(key[2])
    tv, k_ratio = alpha * float(key[3]), key[4]
    print "l1:%f, l2:%f, tv:%f, k_ratio:%f" % (l1, l2, tv, k_ratio)
    if np.logical_or(MODALITY == "MRI", MODALITY == "PET"):
        if k_ratio != -1:
            k = n_voxels * k_ratio
            k = int(k)
            aov = SelectKBest(k=k)
            aov.fit(Xtr[..., penalty_start:], ytr.ravel())
            mask = STRUCTURE.get_data() != 0
            mask[mask] = aov.get_support()
            #print mask.sum()
            A, _ = tv_helper.A_from_mask(mask)
            Xtr_r = np.hstack([
                Xtr[:, :penalty_start], Xtr[:,
                                            penalty_start:][:,
                                                            aov.get_support()]
            ])
            Xte_r = np.hstack([
                Xte[:, :penalty_start], Xte[:,
                                            penalty_start:][:,
                                                            aov.get_support()]
            ])
        else:
            mask = STRUCTURE.get_data() != 0
            Xtr_r = Xtr
            Xte_r = Xte
            A = GLOBAL.A

    elif MODALITY == "MRI+PET":
        if k_ratio != -1:
            k = 2 * n_voxels * k_ratio
            k = int(k)
            aov = SelectKBest(k=k)
            aov.fit(Xtr[..., penalty_start:], ytr.ravel())
            support_mask = aov.get_support()
            # Create 3D mask for MRI
            mask_MRI = STRUCTURE.get_data() != 0
            mask_MRI[mask_MRI] = support_mask[:n_voxels]

            mask_PET = STRUCTURE.get_data() != 0
            mask_PET[mask_PET] = support_mask[n_voxels:]

            # We construct matrix A, it size is k*k
            # If k_MRI and k_PET are both different to 0 we construct
            # a matrix A for each modality and then concatenate them
            # If one of the modality is empty, the matrix A is constructed
            # from the other modality only
            k_MRI = np.count_nonzero(mask_MRI)
            k_PET = np.count_nonzero(mask_PET)
            # k_MRI and k_Pet can not be simultaneously equal to zero
            assert (k_MRI + k_PET == k)
            if (k_MRI == 0) and (k_PET != 0):
                A, _ = tv_helper.A_from_mask(mask_PET)
            if (k_PET == 0) and (k_MRI != 0):
                A, _ = tv_helper.A_from_mask(mask_MRI)
            if (k_MRI != 0) and (k_PET != 0):
                A1, _ = tv_helper.A_from_mask(mask_MRI)
                A2, _ = tv_helper.A_from_mask(mask_PET)
                A = []
                for i in range(3):
                    a = sparse.bmat([[A1[i], None], [None, A2[i]]])
                    A.append(a)

            Xtr_r = np.hstack([
                Xtr[:, :penalty_start], Xtr[:, penalty_start:][:, support_mask]
            ])
            Xte_r = np.hstack([
                Xte[:, :penalty_start], Xte[:, penalty_start:][:, support_mask]
            ])

        else:
            k_MRI = n_voxels
            k_PET = n_voxels
            mask_MRI = STRUCTURE.get_data() != 0
            mask_PET = STRUCTURE.get_data() != 0
            Xtr_r = Xtr
            Xte_r = Xte
            A = GLOBAL.A
    info = [Info.num_iter]
    mod = LogisticRegressionL1L2TV(l1,
                                   l2,
                                   tv,
                                   A,
                                   penalty_start=penalty_start,
                                   class_weight=class_weight,
                                   algorithm_params={'info': info})
    mod.fit(Xtr_r, ytr)
    y_pred = mod.predict(Xte_r)
    proba_pred = mod.predict_probability(Xte_r)  # a posteriori probability
    beta = mod.beta
    if (MODALITY == "MRI") or (MODALITY == "PET"):
        ret = dict(y_pred=y_pred,
                   proba_pred=proba_pred,
                   y_true=yte,
                   beta=beta,
                   mask=mask,
                   n_iter=mod.get_info()['num_iter'])
    elif MODALITY == "MRI+PET":
        beta_MRI = beta[:(penalty_start + k_MRI)]
        beta_PET = np.vstack(
            [beta[:penalty_start], beta[(penalty_start + k_MRI):]])
        ret = dict(y_pred=y_pred,
                   proba_pred=proba_pred,
                   y_true=yte,
                   beta=beta,
                   beta_MRI=beta_MRI,
                   beta_PET=beta_PET,
                   mask_MRI=mask_MRI,
                   mask_PET=mask_PET,
                   n_iter=mod.get_info()['num_iter'])
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret
def mapper(key, output_collector):
    import mapreduce as GLOBAL  # access to global variables:
    #raise ImportError("could not import ")
    # GLOBAL.DATA, GLOBAL.STRUCTURE, GLOBAL.A
    # GLOBAL.DATA ::= {"X":[Xtrain, Xtest], "y":[ytrain, ytest]}
    # key: list of parameters
    print "key: ", key
    Xtr = GLOBAL.DATA_RESAMPLED["X"][0]
    Xte = GLOBAL.DATA_RESAMPLED["X"][1]
    ytr = GLOBAL.DATA_RESAMPLED["y"][0]
    yte = GLOBAL.DATA_RESAMPLED["y"][1]
    print key, "Data shape:", Xtr.shape, Xte.shape, ytr.shape, yte.shape
    method = key[0]
    if method == "statsmodels":
        # Logistic Regression with statsmodels tool, Logit
        logit_mod = sm.Logit(ytr, Xtr)
        logit_res = logit_mod.fit(disp=0)
        prob_pred = logit_res.predict(Xte)
        y_pred = np.zeros((Xte.shape[0]))
        y_pred[prob_pred >= 0.5] = 1
        beta = logit_res.params.reshape(-1, 1)
    elif method == "log_parsimony":
        # Logistic Regression with parsimnoy tool, LogisticRegression
        mod = LogisticRegression()
        mod.fit(Xtr, ytr)
        y_pred = mod.predict(Xte)
        prob_pred = mod.predict_probability(Xte)  # a posteriori probability
        beta = mod.beta
    elif method == "enettv_parsimony":
        # enettv with l1, l2, tv null
        l1, l2, tv = 0, 0, 0
        class_weight = "auto"
        penalty_start = 1
        A = [sparse.csr_matrix((2, 2)) for i in xrange(3)]
        mod = LogisticRegressionL1L2TV(l1,
                                       l2,
                                       tv,
                                       A,
                                       penalty_start=penalty_start,
                                       class_weight=class_weight)
        mod.fit(Xtr, ytr)
        y_pred = mod.predict(Xte)
        prob_pred = mod.predict_probability(Xte)  # a posteriori probability
        beta = mod.beta
    elif method == 'enettv_parsimony_early_stopping':
        # enettv with l1, l2, tv null
        l1, l2, tv = 0, 0, 0
        class_weight = "auto"
        penalty_start = 1
        A = [sparse.csr_matrix((2, 2)) for i in xrange(3)]
        mod = LogisticRegressionL1L2TV(l1,
                                       l2,
                                       tv,
                                       A,
                                       penalty_start=penalty_start,
                                       class_weight=class_weight,
                                       algorithm_params={'max_iter': 100})
        mod.fit(Xtr, ytr)
        y_pred = mod.predict(Xte)
        prob_pred = mod.predict_probability(Xte)  # a posteriori probability
        beta = mod.beta
    ret = dict(y_pred=y_pred, prob_pred=prob_pred, y_true=yte, beta=beta)
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret