Example #1
0
def mapper(key, output_collector):
    import mapreduce as GLOBAL
    Xtr = GLOBAL.DATA_RESAMPLED["X"][0]
    Xte = GLOBAL.DATA_RESAMPLED["X"][1]
    ytr = GLOBAL.DATA_RESAMPLED["y"][0]
    yte = GLOBAL.DATA_RESAMPLED["y"][1]

    alpha, l1l2ratio = float(key[0]), float(key[1])

    l1 = alpha * l1l2ratio
    l2 = alpha * (1 - l1l2ratio)

    print(key, alpha, l1, l2)

    class_weight = 'auto'  # unbiased

    scaler = preprocessing.StandardScaler().fit(Xtr)
    Xtr = scaler.transform(Xtr)
    Xte = scaler.transform(Xte)

    fista = algorithms.proximal.FISTA(max_iter=5000)
    mod = estimators.ElasticNetLogisticRegression(l1l2ratio,
                                                  alpha,
                                                  algorithm=fista,
                                                  class_weight=class_weight,
                                                  penalty_start=penalty_start)

    mod.fit(Xtr, ytr.ravel())
    y_pred = mod.predict(Xte)
    proba_pred = mod.predict_probability(Xte)
    ret = dict(y_pred=y_pred, y_true=yte, proba_pred=proba_pred, beta=mod.beta)
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret
Example #2
0
def mapper(key, output_collector):
    """
    # debug mapper
    config = json.load(open(os.path.join(WD, "config_cv_largerange.json"), "r"))
    load_globals(config)
    resample(config, 'refit/refit')
    key = ('enettv', 0.01, 0.1, 0.3)
    """
    import mapreduce as GLOBAL
    Xtr = GLOBAL.DATA_RESAMPLED["X"][0]
    Xte = GLOBAL.DATA_RESAMPLED["X"][1]
    ytr = GLOBAL.DATA_RESAMPLED["y"][0]
    yte = GLOBAL.DATA_RESAMPLED["y"][1]

    # key = 'enettv_0.01_0.1_0.2'.split("_")
    algo, alpha, l1l2ratio, tvratio = key[0], float(key[1]), float(key[2]), float(key[3])

    tv = alpha * tvratio
    l1 = alpha * float(1 - tv) * l1l2ratio
    l2 = alpha * float(1 - tv) * (1- l1l2ratio)

    print(key, algo, alpha, l1, l2, tv)
    # alpha = float(key[0])
    # l1, l2, tv = alpha * float(key[1]), alpha * float(key[2]), alpha * float(key[3])
    # print("l1:%f, l2:%f, tv:%f" % (l1, l2, tv))

    class_weight = "auto"  # unbiased

    # mask = np.ones(Xtr.shape[0], dtype=bool)

    scaler = preprocessing.StandardScaler().fit(Xtr)
    Xtr = scaler.transform(Xtr)
    Xte = scaler.transform(Xte)

    if algo == 'enettv':
        conesta = algorithms.proximal.CONESTA(max_iter=10000)
        mod = estimators.LogisticRegressionL1L2TV(l1, l2, tv,  GLOBAL.Atv,
            algorithm=conesta, class_weight=class_weight, penalty_start=penalty_start)
        mod.fit(Xtr, ytr.ravel())
    elif algo == 'enetgn':
        fista = algorithms.proximal.FISTA(max_iter=5000)
        mod = estimators.LogisticRegressionL1L2GraphNet(l1, l2, tv, GLOBAL.Agn,
            algorithm=fista, class_weight=class_weight, penalty_start=penalty_start)
        mod.fit(Xtr, ytr.ravel())
    elif algo == 'enet':
        fista = algorithms.proximal.FISTA(max_iter=5000)
        mod = estimators.ElasticNetLogisticRegression(l1l2ratio, alpha,
            algorithm=fista, class_weight=class_weight, penalty_start=penalty_start)
        mod.fit(Xtr, ytr.ravel())
    else:
        raise Exception('Algo%s not handled' %algo)

    #mod.fit(Xtr, ytr.ravel())
    y_pred = mod.predict(Xte)
    proba_pred = mod.predict_probability(Xte)
    ret = dict(y_pred=y_pred, y_true=yte, proba_pred=proba_pred,beta=mod.beta)
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret
MODELS["2d_l2_inter_grad_descnt"] = \
    estimators.RidgeLogisticRegression(alpha, class_weight=None,
                                       mean=False,
                                       penalty_start=1,
                                       algorithm_params=algorithm_params)

if has_sklearn:
    MODELS["2d_l1_sklearn"] = \
        sklearn.linear_model.LogisticRegression(C=1. / alpha, penalty="l1",
                                                fit_intercept=False,
                                                class_weight=None,
                                                dual=False,
                                                solver="liblinear")
MODELS["2d_l1_fista"] = \
    estimators.ElasticNetLogisticRegression(alpha=alpha, l=1.,
                                            class_weight=None,
                                            mean=False,
                                            algorithm_params=algorithm_params)

if has_sklearn:
    MODELS["2d_l1_inter_sklearn"] = \
        sklearn.linear_model.LogisticRegression(C=1. / alpha, penalty="l1",
                                                fit_intercept=True,
                                                class_weight=None,
                                                dual=False,
                                                solver="liblinear")

MODELS["2d_l1_inter_fista"] = \
    estimators.ElasticNetLogisticRegression(alpha=alpha, l=1.,
                                            class_weight=None,
                                            mean=False,
                                            penalty_start=1,
ridge_sklrn = LogisticRegression(C=1. / (alpha * n_train), fit_intercept=False)

yte_pred_ridge = ridge_sklrn.fit(Xtr, ytr.ravel()).predict(Xte)
_, recall_ridge_sklrn, _, _ = \
    precision_recall_fscore_support(yte, yte_pred_ridge, average=None)

# Ridge Parsimony
#   Min  f(beta, X, y) = - loglik/n_train + k/2 * ||beta||^2_2
ridge_prsmy = estimators.RidgeLogisticRegression(alpha)

yte_pred_ridge_prsmy = ridge_prsmy.fit(Xtr, ytr).predict(Xte)
_, recall_ridge_prsmy, _, _ = \
    precision_recall_fscore_support(yte, yte_pred_ridge_prsmy, average=None)

# EldasticNet
enet = estimators.ElasticNetLogisticRegression(l=0.5, alpha=alpha)
yte_pred_enet = enet.fit(Xtr, ytr).predict(Xte)
_, recall_enet, _, _ = \
    precision_recall_fscore_support(yte, yte_pred_enet, average=None)

# GraphNet
# l1, l2, gn = alpha * np.array((.05, .75, .2))  # l1, l2, gn penalties
l1, l2, gn = alpha * np.array((.33, .33, 33))  # l1, l2, gn penalties
A = sparse.vstack(nesterov_tv.linear_operator_from_shape(shape))
enetgn = estimators.LogisticRegressionL1L2GraphNet(l1, l2, gn, A)
yte_pred_enetgn = enetgn.fit(Xtr, ytr).predict(Xte)
_, recall_enetgn, _, _ = \
    precision_recall_fscore_support(yte, yte_pred_enetgn, average=None)

# LogisticRegressionL1L2TV
l1, l2, tv = alpha * np.array((.05, .75, .2))  # l1, l2, tv penalties
#
#'enetgn':
tvratio=0.8
l1l2ratio = 0.1
alpha = 0.1
tv = alpha * tvratio
l1 = alpha * float(1 - tv) * l1l2ratio
l2 = alpha * float(1 - tv) * (1- l1l2ratio)
fista = algorithms.proximal.FISTA(max_iter=5000)
mod = estimators.LogisticRegressionL1L2GraphNet(l1, l2, tv,Agn,
algorithm=fista, class_weight=class_weight, penalty_start=penalty_start)

#algo == 'enet':
fista = algorithms.proximal.FISTA(max_iter=5000)
mod = estimators.ElasticNetLogisticRegression(0.01,0.1)
algorithm=fista, class_weight=class_weight, penalty_start=penalty_start)



mod.fit(X_scz,y_scz)
y_pred_pra = mod.predict(X_pra)
y_proba_pred_pra = mod.predict_probability(X_pra)

p, r, f, s = precision_recall_fscore_support(y_pra, y_pred_pra, average=None)
auc = roc_auc_score(y_pra, y_proba_pred_pra)

print("######################################")
print("Classification performance on PRAGUE dataset:")
print("Balanced accuracy : " + str(r.mean()))
print("Spe and Sen : " + str(r[0]) + " " + str(r[1]))
l1 = alpha * float(1 - tv) * l1l2ratio
l2 = alpha * float(1 - tv) * (1 - l1l2ratio)
fista = algorithms.proximal.FISTA(max_iter=5000)
mod = estimators.LogisticRegressionL1L2GraphNet(l1,
                                                l2,
                                                tv,
                                                Agn,
                                                algorithm=fista,
                                                class_weight=class_weight,
                                                penalty_start=penalty_start)

#algo == 'enet':
fista = algorithms.proximal.FISTA(max_iter=5000)
mod = estimators.ElasticNetLogisticRegression(0.1,
                                              0.1,
                                              algorithm=fista,
                                              class_weight=class_weight,
                                              penalty_start=penalty_start)

mod.fit(X_scz, y_scz)
y_pred_pra = mod.predict(X_pra)
y_proba_pred_pra = mod.predict_probability(X_pra)

p, r, f, s = precision_recall_fscore_support(y_pra, y_pred_pra, average=None)
auc = roc_auc_score(y_pra, y_proba_pred_pra)

print("######################################")
print("Classification performance on PRAGUE dataset:")
print("Balanced accuracy : " + str(r.mean()))
print("Spe and Sen : " + str(r[0]) + " " + str(r[1]))
print("AUC : " + str(auc))
def mapper(key, output_collector):
    import mapreduce as GLOBAL
    Xtr = GLOBAL.DATA_RESAMPLED["X"][0]
    Xte = GLOBAL.DATA_RESAMPLED["X"][1]
    ytr = GLOBAL.DATA_RESAMPLED["y"][0]
    yte = GLOBAL.DATA_RESAMPLED["y"][1]

    # key = 'enettv_0.01_0.1_0.2'.split("_")
    algo, alpha, l1l2ratio, tvratio = key[0], float(key[1]), float(
        key[2]), float(key[3])

    tv = alpha * tvratio
    l1 = alpha * float(1 - tv) * l1l2ratio
    l2 = alpha * float(1 - tv) * (1 - l1l2ratio)

    print(key, algo, alpha, l1, l2, tv)
    # alpha = float(key[0])
    # l1, l2, tv = alpha * float(key[1]), alpha * float(key[2]), alpha * float(key[3])
    # print("l1:%f, l2:%f, tv:%f" % (l1, l2, tv))

    class_weight = "auto"  # unbiased

    beta_start = GLOBAL.beta_start["lambda_%.4f" % alpha]
    print(beta_start.shape, Xtr.shape, beta_start.mean())
    # mask = np.ones(Xtr.shape[0], dtype=bool)

    scaler = preprocessing.StandardScaler().fit(Xtr)
    Xtr = scaler.transform(Xtr)
    Xte = scaler.transform(Xte)

    if algo == 'enettv':
        conesta = algorithms.proximal.CONESTA(max_iter=10000)
        mod = estimators.LogisticRegressionL1L2TV(l1,
                                                  l2,
                                                  tv,
                                                  GLOBAL.Atv,
                                                  algorithm=conesta,
                                                  class_weight=class_weight,
                                                  penalty_start=penalty_start)
        mod.fit(Xtr, ytr.ravel(), beta=beta_start)
    elif algo == 'enetgn':
        fista = algorithms.proximal.FISTA(max_iter=500)
        mod = estimators.LogisticRegressionL1L2GraphNet(
            l1,
            l2,
            tv,
            GLOBAL.Agn,
            algorithm=fista,
            class_weight=class_weight,
            penalty_start=penalty_start)
        mod.fit(Xtr, ytr.ravel())
    elif algo == 'enet':
        fista = algorithms.proximal.FISTA(max_iter=500)
        mod = estimators.ElasticNetLogisticRegression(
            l1l2ratio,
            alpha,
            algorithm=fista,
            class_weight=class_weight,
            penalty_start=penalty_start)
        mod.fit(Xtr, ytr.ravel())
    else:
        raise Exception('Algo%s not handled' % algo)

    #mod.fit(Xtr, ytr.ravel())
    y_pred = mod.predict(Xte)
    proba_pred = mod.predict_probability(Xte)
    ret = dict(y_pred=y_pred, y_true=yte, proba_pred=proba_pred,
               beta=mod.beta)  #, mask=mask)
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret