Ejemplo n.º 1
0
def mapper(key, output_collector):
    """
    # debug mapper
    config = json.load(open(os.path.join(WD, "config_cv_largerange.json"), "r"))
    load_globals(config)
    resample(config, 'refit/refit')
    key = ('enettv', 0.01, 0.1, 0.3)
    """
    import mapreduce as GLOBAL
    Xtr = GLOBAL.DATA_RESAMPLED["X"][0]
    Xte = GLOBAL.DATA_RESAMPLED["X"][1]
    ytr = GLOBAL.DATA_RESAMPLED["y"][0]
    yte = GLOBAL.DATA_RESAMPLED["y"][1]

    # key = 'enettv_0.01_0.1_0.2'.split("_")
    algo, alpha, l1l2ratio, tvratio = key[0], float(key[1]), float(key[2]), float(key[3])

    tv = alpha * tvratio
    l1 = alpha * float(1 - tv) * l1l2ratio
    l2 = alpha * float(1 - tv) * (1- l1l2ratio)

    print(key, algo, alpha, l1, l2, tv)
    # alpha = float(key[0])
    # l1, l2, tv = alpha * float(key[1]), alpha * float(key[2]), alpha * float(key[3])
    # print("l1:%f, l2:%f, tv:%f" % (l1, l2, tv))

    class_weight = "auto"  # unbiased

    # mask = np.ones(Xtr.shape[0], dtype=bool)

    scaler = preprocessing.StandardScaler().fit(Xtr)
    Xtr = scaler.transform(Xtr)
    Xte = scaler.transform(Xte)

    if algo == 'enettv':
        conesta = algorithms.proximal.CONESTA(max_iter=10000)
        mod = estimators.LogisticRegressionL1L2TV(l1, l2, tv,  GLOBAL.Atv,
            algorithm=conesta, class_weight=class_weight, penalty_start=penalty_start)
        mod.fit(Xtr, ytr.ravel())
    elif algo == 'enetgn':
        fista = algorithms.proximal.FISTA(max_iter=5000)
        mod = estimators.LogisticRegressionL1L2GraphNet(l1, l2, tv, GLOBAL.Agn,
            algorithm=fista, class_weight=class_weight, penalty_start=penalty_start)
        mod.fit(Xtr, ytr.ravel())
    elif algo == 'enet':
        fista = algorithms.proximal.FISTA(max_iter=5000)
        mod = estimators.ElasticNetLogisticRegression(l1l2ratio, alpha,
            algorithm=fista, class_weight=class_weight, penalty_start=penalty_start)
        mod.fit(Xtr, ytr.ravel())
    else:
        raise Exception('Algo%s not handled' %algo)

    #mod.fit(Xtr, ytr.ravel())
    y_pred = mod.predict(Xte)
    proba_pred = mod.predict_probability(Xte)
    ret = dict(y_pred=y_pred, y_true=yte, proba_pred=proba_pred,beta=mod.beta)
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret
Ejemplo n.º 2
0
def mapper(key, output_collector):
    import mapreduce as GLOBAL 
    Xtr = GLOBAL.DATA_RESAMPLED["X"][0]
    Xte = GLOBAL.DATA_RESAMPLED["X"][1]
    ytr = GLOBAL.DATA_RESAMPLED["y"][0]
    yte = GLOBAL.DATA_RESAMPLED["y"][1]

    penalty_start = 3
        
    alpha = float(key[0])
    l1, l2, tv = alpha * float(key[1]), alpha * float(key[2]), alpha * float(key[3])
    print("l1:%f, l2:%f, tv:%f" % (l1, l2, tv))

    class_weight="auto" # unbiased
    
    mask = np.ones(Xtr.shape[0], dtype=bool)
   
    scaler = preprocessing.StandardScaler().fit(Xtr)
    Xtr = scaler.transform(Xtr)
    Xte=scaler.transform(Xte)    
    A = GLOBAL.A
    
    conesta = algorithms.proximal.CONESTA(max_iter=500)
    mod= estimators.LogisticRegressionL1L2TV(l1,l2,tv, A, algorithm=conesta,class_weight=class_weight,penalty_start=penalty_start)
    mod.fit(Xtr, ytr.ravel())
    y_pred = mod.predict(Xte)
    proba_pred = mod.predict_probability(Xte)
    ret = dict(y_pred=y_pred, y_true=yte, proba_pred=proba_pred, beta=mod.beta,  mask=mask)
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret
Ejemplo n.º 3
0
def mapper(key, output_collector):
    import mapreduce as GLOBAL
    Xtr = GLOBAL.DATA_RESAMPLED["X"][0]
    Xte = GLOBAL.DATA_RESAMPLED["X"][1]
    ytr = GLOBAL.DATA_RESAMPLED["y"][0]
    yte = GLOBAL.DATA_RESAMPLED["y"][1]

    alpha = float(key[0])
    l1, l2, tv = alpha * float(key[1]), alpha * float(key[2]), alpha * float(
        key[3])
    print("l1:%f, l2:%f, tv:%f" % (l1, l2, tv))
    print(key)
    class_weight = "auto"  # unbiased
    print(output_collector.output_dir)
    beta_start = GLOBAL.BETA_START.all()[os.path.basename(
        output_collector.output_dir)]
    mask = np.ones(Xtr.shape[0], dtype=bool)

    scaler = preprocessing.StandardScaler().fit(Xtr)
    Xtr = scaler.transform(Xtr)
    Xte = scaler.transform(Xte)
    A = GLOBAL.A

    info = [
        Info.converged, Info.num_iter, Info.time, Info.func_val, Info.mu,
        Info.gap
    ]
    conesta = algorithms.proximal.CONESTA()
    algorithm_params = dict(max_iter=50000, info=info)
    out_fista = os.path.join(WD, output_collector.output_dir,
                             "fista_ite_snapshots/")
    out_conesta = os.path.join(WD, output_collector.output_dir,
                               "conesta_ite_snapshots/")

    os.makedirs(out_fista, exist_ok=True)
    os.makedirs(out_conesta, exist_ok=True)

    snapshot_fista = AlgorithmSnapshot(out_fista, saving_period=1).save_fista
    snapshot_conesta = AlgorithmSnapshot(out_conesta,
                                         saving_period=1).save_conesta
    algorithm_params["callback_fista"] = snapshot_fista
    algorithm_params["callback_conesta"] = snapshot_conesta


    mod= estimators.LogisticRegressionL1L2TV(l1,l2,tv, A, algorithm=conesta,\
                                             algorithm_params=algorithm_params,\
                                             class_weight=class_weight)
    mod.fit(Xtr, ytr.ravel(), beta=beta_start)
    y_pred = mod.predict(Xte)
    proba_pred = mod.predict_probability(Xte)
    ret = dict(y_pred=y_pred,
               y_true=yte,
               proba_pred=proba_pred,
               beta=mod.beta,
               mask=mask,
               beta_start=beta_start)
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret
def test_conesta_do_not_enter_loop_if_criterium_satisfied():
    # beta_start = 0; with over penalized problem: should not enter loop
    mod = estimators.LogisticRegressionL1L2TV(
        l1*100, l2*100, tv*100, A,
        algorithm=algorithms.proximal.CONESTA(),
        algorithm_params=algorithm_params)
    mod.fit(Xtr, ytr, beta=np.zeros((Xtr.shape[1], 1), dtype=float))
    assert np.all(mod.beta == 0)
    assert mod.get_info()['num_iter'] == 0
Ejemplo n.º 5
0
def fitting(p):
    global_pen, l1_ratio, tv_ratio, = p[0], p[1], p[2]

    ltv = global_pen * tv_ratio
    ll1 = l1_ratio * global_pen * (1 - tv_ratio)
    ll2 = (1 - l1_ratio) * global_pen * (1 - tv_ratio)

    #l1, l2, tv= alpha * float(p[0]), alpha * float(p[1]), alpha * float(p[2])
    clf = estimators.LogisticRegressionL1L2TV(ll1,
                                              ll2,
                                              ltv,
                                              A,
                                              algorithm=conesta)
    n = 0
    list_predict = list()
    list_true = list()
    list_proba_pred = list()
    coef = np.zeros((23, 63966))
    for i in range(1, 24):
        test_bool = (subject == i)
        train_bool = (subject != i)
        Xtest = T[test_bool, :]
        ytest = y[test_bool]
        Xtrain = np.vstack((T_IMA_diff, T[train_bool, :]))
        ytrain = np.hstack((y_IMA, y[train_bool]))
        list_true.append(ytest.ravel())
        scaler = preprocessing.StandardScaler().fit(Xtrain)
        Xtrain = scaler.transform(Xtrain)
        Xtest = scaler.transform(Xtest)
        clf.fit(Xtrain, ytrain.ravel())
        coef[n, :] = clf.beta[:, 0]
        pred = (clf.predict(Xtest))
        list_predict.append(pred)
        proba_pred = clf.predict_probability(Xtest)
        list_proba_pred.append(proba_pred)
        n = n + 1
        print n

    true = np.concatenate(list_true)
    pred = np.concatenate(list_predict)
    proba_pred = np.concatenate(list_proba_pred)
    precision, recall, f, s = precision_recall_fscore_support(true,
                                                              pred,
                                                              average=None)
    acc = metrics.accuracy_score(true, pred)
    auc = roc_auc_score(true, pred)
    current = [
        global_pen, l1_ratio, tv_ratio, acc, recall[0], recall[1],
        precision[0], precision[1], auc
    ]
    np.save(
        os.path.join(BASE_PATH, 'toward_on', 'Logistic_L1_L2_TV_with_HC',
                     'betas_subj.npy'), coef)
    return current
def mapper(key, output_collector):
    import mapreduce as GLOBAL
    Xtr = GLOBAL.DATA_RESAMPLED["X"][0]
    Xte = GLOBAL.DATA_RESAMPLED["X"][1]
    ytr = GLOBAL.DATA_RESAMPLED["y"][0]
    yte = GLOBAL.DATA_RESAMPLED["y"][1]
    print key, "Data shape:", Xtr.shape, Xte.shape, ytr.shape, yte.shape
    STRUCTURE = GLOBAL.STRUCTURE

    global_pen, l1_ratio, tv_ratio = key
    ltv = global_pen * tv_ratio
    ll1 = l1_ratio * global_pen * (1 - tv_ratio)
    ll2 = (1 - l1_ratio) * global_pen * (1 - tv_ratio)

    class_weight = "auto"  # unbiased

    mask = np.ones(Xtr.shape[0], dtype=bool)

    T_IMA = np.load(
        '/neurospin/brainomics/2016_classif_hallu_fmri/unsupervised_fmri/clustering_3rdcomp/cluster1/mapreduce/T_IMA.npy'
    )
    y_IMA = np.load(
        '/neurospin/brainomics/2016_classif_hallu_fmri/unsupervised_fmri/clustering_3rdcomp/cluster1/mapreduce/y_IMA.npy'
    )

    T = GLOBAL.DATA["X"]
    y = GLOBAL.DATA["y"]

    Tdiff = np.mean(T_IMA, axis=0) - np.mean(T[y == 0], axis=0)
    T_IMA_diff = T_IMA - Tdiff
    Xtr = np.vstack((T_IMA_diff, Xtr))
    ytr = np.hstack((y_IMA, ytr))

    scaler = preprocessing.StandardScaler().fit(Xtr)
    Xtr = scaler.transform(Xtr)
    Xte = scaler.transform(Xte)
    A = GLOBAL.A

    conesta = algorithms.proximal.CONESTA(max_iter=500)
    mod = estimators.LogisticRegressionL1L2TV(ll1,
                                              ll2,
                                              ltv,
                                              A,
                                              algorithm=conesta,
                                              class_weight=class_weight)
    mod.fit(Xtr, ytr.ravel())
    y_pred = mod.predict(Xte)
    ret = dict(y_pred=y_pred, y_true=yte, beta=mod.beta, mask=mask)
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret
Ejemplo n.º 7
0
def mapper(key, output_collector):
    import mapreduce as GLOBAL
    X = GLOBAL.DATA["X"]
    y = GLOBAL.DATA["y"]
    start_vector = GLOBAL.DATA["start_vector"]

    alpha = float(key[0])
    l1, l2, tv = alpha * float(key[1]), alpha * float(key[2]), alpha * float(
        key[3])
    print("l1:%f, l2:%f, tv:%f" % (l1, l2, tv))

    class_weight = "auto"  # unbiased
    mask = np.ones(X.shape[0], dtype=bool)

    scaler = preprocessing.StandardScaler().fit(X)
    X = scaler.transform(X)
    A = GLOBAL.A

    info = [
        Info.converged, Info.num_iter, Info.time, Info.func_val, Info.mu,
        Info.gap
    ]
    conesta = algorithms.proximal.CONESTA()
    algorithm_params = dict(max_iter=1000000, info=info)
    out = os.path.join(WD_CLUSTER,GLOBAL.DIR,"0",str(key[0])+"_"+ str(key[1]) + "_" +\
                                                      str(key[2]) +"_"+str(key[3]),"conesta_ite_snapshots/")
    os.makedirs(out, exist_ok=True)

    snapshot = AlgorithmSnapshot(out, saving_period=1).save_conesta
    algorithm_params["callback"] = snapshot



    mod= estimators.LogisticRegressionL1L2TV(l1,l2,tv, A, algorithm=conesta,\
                                             algorithm_params=algorithm_params,\
                                             class_weight=class_weight,\
                                             penalty_start=penalty_start,start_vector=start_vector)
    mod.fit(X, y.ravel())
    y_pred = mod.predict(X)
    proba_pred = mod.predict_probability(X)
    ret = dict(y_pred=y_pred,
               y_true=y,
               proba_pred=proba_pred,
               beta=mod.beta,
               mask=mask)
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret
Ejemplo n.º 8
0
def resample(config, resample_nb):
    import mapreduce as GLOBAL  # access to global variables
    #GLOBAL.DATA = GLOBAL.load_data(config["data"])
    resample = config["resample"][resample_nb]
    print "reslicing %d" % resample_nb
    GLOBAL.DATA_RESAMPLED = {
        k: [GLOBAL.DATA[k][idx, ...] for idx in resample]
        for k in GLOBAL.DATA
    }
    print "done reslicing %d" % resample_nb

    ###############################
    #weight computation for this fold
    ################################
    Xtr = GLOBAL.DATA_RESAMPLED["X"][0]
    ytr = GLOBAL.DATA_RESAMPLED["y"][0]
    p = Xtr.shape[1]
    groups = GLOBAL.groups
    # Compute A matrix (as penalty_start is 1 we need to only p-1 columns)
    Atv, n_compacts = parsimony.functions.nesterov.tv.A_from_shape(
        (p - PENALTY_START, ))
    eps = 1e-8
    max_iter = 2600
    info_conf = [Info.fvalue, Info.num_iter]
    logr_tv = estimators.LogisticRegressionL1L2TV(
        l1=0,
        l2=0,
        tv=0.001,
        penalty_start=PENALTY_START,
        A=Atv,
        algorithm=explicit.StaticCONESTA(eps=eps,
                                         max_iter=max_iter,
                                         info=info_conf),
        mean=False)
    logr_tv.fit(Xtr, ytr)
    beta_w = logr_tv.beta
    #    weights = [1./(np.linalg.norm(beta_w[group])) for group in groups]
    weights = [np.sqrt(len(group)) for group in groups]
    GLOBAL.ridge_coef = 1. / ((np.linalg.norm(beta_w)))
    GLOBAL.weights = weights
    # Store weights
    output_dir = os.path.join(config['map_output'], str(resample_nb))
    np.save(os.path.join(output_dir, "weights.npy"), GLOBAL.weights)
    np.save(os.path.join(output_dir, "ridge_coef.npy"), GLOBAL.ridge_coef)
Ejemplo n.º 9
0
def mapper(key, output_collector):
    import mapreduce as GLOBAL
    Xtr = GLOBAL.DATA_RESAMPLED["X"][0]
    Xte = GLOBAL.DATA_RESAMPLED["X"][1]
    ytr = GLOBAL.DATA_RESAMPLED["y"][0]
    yte = GLOBAL.DATA_RESAMPLED["y"][1]

    T_IMA = GLOBAL.DATA_IMA["X_IMA"]
    y_IMA = GLOBAL.DATA_IMA["y_IMA"]
    T = GLOBAL.DATA["X"]
    y = GLOBAL.DATA["y"]
    Tdiff = np.mean(T_IMA, axis=0) - np.mean(T[y == 0], axis=0)
    T_IMA_diff = T_IMA - Tdiff
    Xtr = np.vstack((T_IMA_diff, Xtr))
    ytr = np.hstack((y_IMA, ytr))

    alpha = float(key[0])
    l1, l2, tv = alpha * float(key[1]), alpha * float(key[2]), alpha * float(
        key[3])
    print "l1:%f, l2:%f, tv:%f" % (l1, l2, tv)

    class_weight = "auto"  # unbiased

    mask = np.ones(Xtr.shape[0], dtype=bool)

    scaler = preprocessing.StandardScaler().fit(Xtr)
    Xtr = scaler.transform(Xtr)
    Xte = scaler.transform(Xte)
    A = GLOBAL.A

    conesta = algorithms.proximal.CONESTA(max_iter=500)
    mod = estimators.LogisticRegressionL1L2TV(l1,
                                              l2,
                                              tv,
                                              A,
                                              algorithm=conesta,
                                              class_weight=class_weight)
    mod.fit(Xtr, ytr.ravel())
    y_pred = mod.predict(Xte)
    ret = dict(y_pred=y_pred, y_true=yte, beta=mod.beta, mask=mask)
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret
Ejemplo n.º 10
0
def fitting(p):

    l1, l2, tv = alpha * float(p[0]), alpha * float(p[1]), alpha * float(p[2])
    clf = estimators.LogisticRegressionL1L2TV(l1, l2, tv, A, algorithm=conesta)
    n = 0
    list_predict = list()
    list_true = list()
    list_proba_pred = list()
    coef = np.zeros((23, sum(mask_bool)))
    for i in range(1, 24):
        test_bool = (subject == i)
        train_bool = (subject != i)
        Xtest = T[test_bool, :]
        ytest = y[test_bool]
        #        Xtrain=T[train_bool,:]
        #        ytrain=y[train_bool]
        Xtrain = np.vstack((T_IMA_diff, T[train_bool, :]))
        ytrain = np.hstack((y_IMA, y[train_bool]))
        list_true.append(ytest.ravel())
        scaler = preprocessing.StandardScaler().fit(Xtrain)
        Xtrain = scaler.transform(Xtrain)
        Xtest = scaler.transform(Xtest)
        clf.fit(Xtrain, ytrain.ravel())
        coef[n, :] = clf.beta[:, 0]
        pred = (clf.predict(Xtest))
        list_predict.append(pred)
        proba_pred = clf.predict_probability(Xtest)
        list_proba_pred.append(proba_pred)
        n = n + 1
        print n

    true = np.concatenate(list_true)
    pred = np.concatenate(list_predict)
    proba_pred = np.concatenate(list_proba_pred)
    precision, recall, f, s = precision_recall_fscore_support(true,
                                                              pred,
                                                              average=None)
    acc = metrics.accuracy_score(true, pred)
    auc = roc_auc_score(true, pred)
    current = [
        alpha, p[0], p[1], p[2], acc, recall[0], recall[1], precision[0],
        precision[1], auc
    ]
    return current
    estimators.ElasticNetLogisticRegression(alpha=alpha / 10, l=.5,
                                            penalty_start=1,
                                            algorithm_params=algorithm_params)

## LogisticRegressionL1L2TV, Parsimony only
# Minimize:
#    f(beta, X, y) = - loglik/n_train
#                    + k/2 * ||beta||^2_2
#                    + l * ||beta||_1
#                    + g * TV(beta)
A = nesterov_tv.linear_operator_from_shape(beta3d.shape)
l1, l2, tv = alpha * np.array((.05, .75, .2))  # l2, l1, tv penalties

MODELS["2d_l1l2tv_fista"] = \
    estimators.LogisticRegressionL1L2TV(
        l1, l2, tv, A,
        algorithm=algorithms.proximal.FISTA(),
        algorithm_params=algorithm_params)

MODELS["2d_l1l2tv_inter_fista"] = \
    estimators.LogisticRegressionL1L2TV(
        l1, l2, tv, A, penalty_start=1,
        algorithm=algorithms.proximal.FISTA(),
        algorithm_params=algorithm_params)


MODELS["2d_l1l2tv_static_conesta"] = \
    estimators.LogisticRegressionL1L2TV(
        l1, l2, tv, A,
        algorithm=algorithms.proximal.StaticCONESTA(),
        algorithm_params=algorithm_params)
# GraphNet
# l1, l2, gn = alpha * np.array((.05, .75, .2))  # l1, l2, gn penalties
l1, l2, gn = alpha * np.array((.33, .33, 33))  # l1, l2, gn penalties
A = sparse.vstack(nesterov_tv.linear_operator_from_shape(shape))
enetgn = estimators.LogisticRegressionL1L2GraphNet(l1, l2, gn, A)
yte_pred_enetgn = enetgn.fit(Xtr, ytr).predict(Xte)
_, recall_enetgn, _, _ = \
    precision_recall_fscore_support(yte, yte_pred_enetgn, average=None)

# LogisticRegressionL1L2TV
l1, l2, tv = alpha * np.array((.05, .75, .2))  # l1, l2, tv penalties
# l1, l2, tv = alpha * np.array((.33, .33, 33))  # l1, l2, gn penalties
A = nesterov_tv.linear_operator_from_shape(beta3d.shape)
enettv = estimators.LogisticRegressionL1L2TV(l1,
                                             l2,
                                             tv,
                                             A,
                                             algorithm_params=dict(eps=1e-5))
yte_pred_enettv = enettv.fit(Xtr, ytr).predict(Xte)
_, recall_enettv, _, _ = \
    precision_recall_fscore_support(yte, yte_pred_enettv, average=None)

###############################################################################
# Plot
plot = plt.subplot(231)
utils.plots.map2d(beta3d.reshape(shape), plot, title="beta star")

plot = plt.subplot(232)
utils.plots.map2d(ridge_sklrn.coef_.reshape(shape),
                  plot,
                  title="Ridge (sklrn) (%.2f, %.2f)" %
assert X_pra.shape[1] == 299864

scaler = preprocessing.StandardScaler().fit(X_scz)
X_scz = scaler.transform(X_scz)
X_pra = scaler.transform(X_pra)

##'enettv':
tvratio=1.0
l1l2ratio = 0.1
alpha = 1.0
tv = alpha * tvratio
l1 = alpha * float(1 - tv) * l1l2ratio
l2 = alpha * float(1 - tv) * (1- l1l2ratio)

conesta = algorithms.proximal.CONESTA(max_iter=10000)
mod = estimators.LogisticRegressionL1L2TV(l1, l2, tv,Atv,\
algorithm=conesta, class_weight=class_weight, penalty_start=penalty_start)

#
#'enetgn':
tvratio=0.8
l1l2ratio = 0.1
alpha = 0.1
tv = alpha * tvratio
l1 = alpha * float(1 - tv) * l1l2ratio
l2 = alpha * float(1 - tv) * (1- l1l2ratio)
fista = algorithms.proximal.FISTA(max_iter=5000)
mod = estimators.LogisticRegressionL1L2GraphNet(l1, l2, tv,Agn,
algorithm=fista, class_weight=class_weight, penalty_start=penalty_start)

#algo == 'enet':
fista = algorithms.proximal.FISTA(max_iter=5000)
Ejemplo n.º 14
0
y_test_decfunc_pred = np.zeros(len(y))
y_train_pred = np.zeros(len(y))
coefs_cv = np.zeros((NFOLDS, X.shape[1]))

auc_test = list()
recalls_test = list()
acc_test = list()

for cv_i, (train, test) in enumerate(cv.split(X, y)):
    #for train, test in cv.split(X, y, None):
    print(cv_i)
    X_train, X_test, y_train, y_test = X[train, :], X[test, :], y[train], y[test]
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    conesta = algorithms.proximal.CONESTA(max_iter=10000)
    estimator = estimators.LogisticRegressionL1L2TV(l1, l2, tv, Atv, algorithm=conesta,
                                                    class_weight="auto", penalty_start=0)
    estimator.fit(X_train, y_train.ravel())
    # Store prediction for micro avg
    y_test_pred[test] = estimator.predict(X_test).ravel()
    y_test_prob_pred[test] = estimator.predict_probability(X_test).ravel()#[:, 1]
    #y_test_decfunc_pred[test] = estimator.decision_function(X_test)
    y_train_pred[train] = estimator.predict(X_train).ravel()
    # Compute score for macro avg
    auc_test.append(metrics.roc_auc_score(y_test, estimator.predict_probability(X_test).ravel()))
    recalls_test.append(metrics.recall_score(y_test, estimator.predict(X_test).ravel(), average=None))
    acc_test.append(metrics.accuracy_score(y_test, estimator.predict(X_test).ravel()))

    coefs_cv[cv_i, :] = estimator.beta.ravel()

# Micro Avg
recall_test_microavg = metrics.recall_score(y, y_test_pred, average=None)
Ejemplo n.º 15
0
def mapper(key, output_collector):
    import mapreduce as GLOBAL
    Xtr = GLOBAL.DATA_RESAMPLED["X"][0]
    Xte = GLOBAL.DATA_RESAMPLED["X"][1]
    ytr = GLOBAL.DATA_RESAMPLED["y"][0]
    yte = GLOBAL.DATA_RESAMPLED["y"][1]

    # key = 'enettv_0.01_0.1_0.2'.split("_")
    algo, alpha, l1l2ratio, tvratio = key[0], float(key[1]), float(
        key[2]), float(key[3])

    tv = alpha * tvratio
    l1 = alpha * float(1 - tv) * l1l2ratio
    l2 = alpha * float(1 - tv) * (1 - l1l2ratio)

    print(key, algo, alpha, l1, l2, tv)
    # alpha = float(key[0])
    # l1, l2, tv = alpha * float(key[1]), alpha * float(key[2]), alpha * float(key[3])
    # print("l1:%f, l2:%f, tv:%f" % (l1, l2, tv))

    class_weight = "auto"  # unbiased

    beta_start = GLOBAL.beta_start["lambda_%.4f" % alpha]
    print(beta_start.shape, Xtr.shape, beta_start.mean())
    # mask = np.ones(Xtr.shape[0], dtype=bool)

    scaler = preprocessing.StandardScaler().fit(Xtr)
    Xtr = scaler.transform(Xtr)
    Xte = scaler.transform(Xte)

    if algo == 'enettv':
        conesta = algorithms.proximal.CONESTA(max_iter=10000)
        mod = estimators.LogisticRegressionL1L2TV(l1,
                                                  l2,
                                                  tv,
                                                  GLOBAL.Atv,
                                                  algorithm=conesta,
                                                  class_weight=class_weight,
                                                  penalty_start=penalty_start)
        mod.fit(Xtr, ytr.ravel(), beta=beta_start)
    elif algo == 'enetgn':
        fista = algorithms.proximal.FISTA(max_iter=500)
        mod = estimators.LogisticRegressionL1L2GraphNet(
            l1,
            l2,
            tv,
            GLOBAL.Agn,
            algorithm=fista,
            class_weight=class_weight,
            penalty_start=penalty_start)
        mod.fit(Xtr, ytr.ravel())
    elif algo == 'enet':
        fista = algorithms.proximal.FISTA(max_iter=500)
        mod = estimators.ElasticNetLogisticRegression(
            l1l2ratio,
            alpha,
            algorithm=fista,
            class_weight=class_weight,
            penalty_start=penalty_start)
        mod.fit(Xtr, ytr.ravel())
    else:
        raise Exception('Algo%s not handled' % algo)

    #mod.fit(Xtr, ytr.ravel())
    y_pred = mod.predict(Xte)
    proba_pred = mod.predict_probability(Xte)
    ret = dict(y_pred=y_pred, y_true=yte, proba_pred=proba_pred,
               beta=mod.beta)  #, mask=mask)
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret
Ejemplo n.º 16
0
    #

    # 4- build A matrix
    import parsimony.functions.nesterov.gl as gl
    import parsimony.algorithms.primaldual as explicit
    import parsimony.estimators as estimators
    Atv, n_compacts = parsimony.functions.nesterov.tv.A_from_shape((p_orig, ))
    eps = 1e-8
    max_iter = 2600
    conts = 2  # will be removed next version current max_iter x cont
    info_conf = [Info.fvalue, Info.num_iter]
    logr_tv = estimators.LogisticRegressionL1L2TV(
        l1=0,
        l2=0,
        tv=0.1,
        A=Atv,
        algorithm=explicit.StaticCONESTA(eps=eps,
                                         max_iter=max_iter,
                                         info=info_conf),
        mean=False)
    logr_tv.fit(X_orig, y)
    beta_w = logr_tv.beta
    #    plt.plot(beta_w[1:])
    #    plt.show()

    PENALTY_START = 1
    extended_groups = groups
    #    + [[i] for i in range(PENALTY_START, p-1)]
    #test avec tv
    weights = [
        1. / (np.linalg.norm(beta_w[group])) for group in extended_groups