def mapper(key, output_collector):
    import mapreduce as GLOBAL  # access to global variables:
    # key: list of parameters
    k, g, alpha, l = key[0], key[1], key[2], key[3]
    Xtr = GLOBAL.DATA_RESAMPLED["X"][0]
    Xte = GLOBAL.DATA_RESAMPLED["X"][1]
    ytr = GLOBAL.DATA_RESAMPLED["y"][0]
    yte = GLOBAL.DATA_RESAMPLED["y"][1]
    l = GLOBAL.ridge_coef
    #print key, "Data shape:", Xtr.shape, Xte.shape, ytr.shape, yte.shape

    p = Xtr.shape[1]
    groups = GLOBAL.groups
    weights = GLOBAL.weights
    eps = 1e-8
    max_iter = 2600
    # Compute A matrix (we need only p-PENALTY_START columns)
    A_gl = gl.A_from_groups(p - PENALTY_START, groups=groups, weights=weights)
    mod = estimators.LogisticRegressionL1L2GL(
        alpha * k,
        alpha * l,
        alpha * g,
        A=A_gl,
        algorithm=explicit.StaticCONESTA(eps=eps, max_iter=max_iter),
        penalty_start=PENALTY_START,
        mean=False
    )  #since we residualized BMI with 2 categorical covariables (Gender and ImagingCentreCity - 8 columns) and 2 ordinal variables (tiv_gaser and mean_pds - 2 columns)
    y_pred = mod.fit(Xtr, ytr).predict(Xte)
    ret = dict(y_pred=y_pred, y_true=yte, beta=mod.beta)
    output_collector.collect(key, ret)
def A_from_structure(structure_filepath):
    """User defined function, to build the A matrices.

    Parameters
    ----------
    structure : string, filepath to the structure

    Return
    ------
    A, structure
    Those two objects will be accessible via global variables: A and STRUCTURE
    """
    sys.path.append(
        os.path.join(os.getenv('HOME'), 'gits', 'scripts',
                     '2013_brainomics_genomics'))
    from bgutils.build_websters import group_pw_snp2
    #a terme on passe ici la fichier vers les données de contrainte
    group, group_names, snpList = group_pw_snp2(fic='go_synaptic_snps_gene',
                                                cache=True)
    tmp = []
    for i in group:
        tmp.extend(group[i])
    p = len(set(tmp))
    print "DEBUG: ", p
    import parsimony.functions.nesterov.gl as gl
    weights = [np.sqrt(len(group[i])) for i in group]
    A = gl.A_from_groups(p, groups=group, weights=weights)
    structure = None

    return A, structure
def A_from_structure(structure_filepath):
    # Input: structure_filepath. Output: A, structure
    # read weights infos
    unbiased_beta = np.load(structure_filepath + '-unbiased-beta.npz')['arr_0']
    combo = pickle.load(open(structure_filepath + '.pickle'))
    group, group_names, pw, snpList = combo['group'], combo['group_names'],\
                                      combo['constraint'], combo['snpList']
    weights = [np.linalg.norm(unbiased_beta[group[i]]) for i in group]
    weights = 1. / np.sqrt(np.asarray(weights))
    import parsimony.functions.nesterov.gl as gl
    A = gl.A_from_groups(len(snpList), groups=group, weights=weights)
    STRUCTURE = unbiased_beta

    return A, STRUCTURE
                                    mean=True)
    enet_PP.fit(X_res, z)
    print "Compute beta values"
    beta = enet_PP.beta
    beta = beta[11:]  #do not consider covariates
    print "Compute the weights using Parsimony's ElasticNet algorithm."
    weights = [
        math.pow(abs(beta[j[0]]) + 1 / float(n), -gamma) for j in groups
    ]

    # Adaptive Elasticnet algorithm
    adaptive_enet = estimators.LinearRegressionL1L2GL(
        l1=0,
        l2=0.8,
        gl=0.006,
        A=gl.A_from_groups(p, groups, weights=weights, penalty_start=11),
        algorithm=proximal.FISTA(),
        algorithm_params=dict(max_iter=10000),
        penalty_start=11,
        mean=True)

    stime = time.time()
    print "================================================================="
    print "Now fitting the model"
    adaptive_enet.fit(X_res, z)
    print "Fit duration : ", time.time() - stime
    print "================================================================="

    # Interpretation
    beta_w = adaptive_enet.beta
    plt.plot(beta_w[11:])
Beispiel #5
0
    groups = [[j] for j in range(0, p)]
    print "Compute ElasticNet algorithm"
    enet_PP = estimators.ElasticNet(l=0.8,
                                    alpha=0.006,
                                    penalty_start=11,
                                    mean=True)
    enet_PP.fit(X_res, z)
    print "Compute beta values"
    beta = enet_PP.beta
    beta = beta[11:]  #do not consider covariates
    print "Compute the weights using Parsimony's ElasticNet algorithm."
    weights = [
        math.pow(abs(beta[j[0]]) + 1 / float(n), -gamma) for j in groups
    ]
    # Adaptive Elasticnet algorithm
    A = gl.A_from_groups(p, groups, weights=weights)
    adaptive_enet = estimators.LinearRegressionL1L2GL(
        l1=0,
        l2=0.8,
        gl=0.006,
        A=A,
        algorithm=proximal.FISTA(),
        algorithm_params=dict(max_iter=10000),
        penalty_start=11,
        mean=True)

    stime = time.time()
    print "================================================================="
    print "Now fitting the model"
    adaptive_enet.fit(X_res, z)
    print "Fit duration : ", time.time() - stime
Beispiel #6
0
def webs_logr_weight(basepath, pw_name, precomp_save=True):
    precomp_save = False
    # 1- read constraints : we do not use Group Constraint here
#    from bgutils.build_websters import group_pw_snp2,get_websters_logr, pw_gene_snp2
#    fic = 'go_synaptic_snps_gene'  #'go_synaptic_snps_gene10'
#    group, group_names, snpList = group_pw_snp2(fic=fic, cache=True)
#    pw, _ = pw_gene_snp2(fic=fic, cache=True)
    from bgutils.build_websters import get_websters_logr
    from bgutils.utils_pw import build_msigdb
    group, group_names, pw, snpList = build_msigdb(
                 pw_name= pw_name, 
                 mask = os.path.join(basepath,'data','geno','genetic_control_xpt'), 
                 outdir=os.path.join(basepath,'data'), cache=True)
    
    # 2- get the snps list to get a data set w/ y continous variable
    # convenient snp order
    # subject order granted by the method
    snp_subset=np.asarray(snpList,dtype=str).tolist()
    y, X = get_websters_logr(snp_subset=snp_subset)

    # 3- fix X : add a ones constant regressor
    p = (X.shape)[1]                            # keep orig size
    X = np.hstack((np.ones((X.shape[0],1)),X))  # add intercept
    
    eps = 1e-6
    max_iter = 200
    conts = 20        # will be removed next version current max_iter x cont
     
    if precomp_save:        
        # 4- build A matrix
        #normalement ne sert a rien
        import parsimony.functions.nesterov.gl as gl
        weights = [np.sqrt(len(group[i])) for i in group]
        A = gl.A_from_groups(p, groups=group, weights=weights)
        
        import parsimony.algorithms.explicit as explicit
        import parsimony.estimators as estimators
        # 5- Logistic regresssion
        k = 0.002 #ridge 
        l = 0.0 #lasso ( if ENET k+l should be 1
        g = 0.0 
        logr_gl = estimators.RidgeLogisticRegression_L1_GL(
                        k=k, l=l, g=g,
                        A=A,
                        output=True,
                        algorithm=explicit.StaticCONESTA(eps=eps,
                                                         continuations=conts,
                                                         max_iter=max_iter),
                        penalty_start=1,
                        mean=False)    #mean error of lST sq error
        stime = time.time()
        print "================================================================="
        print "Now fitting the model"
        logr_gl.fit(X, y )
        print "Fit duration : ", time.time() - stime
        print "================================================================="
    
    
        # 6- Interpretation
        beta = logr_gl.beta[1:] 
        np.savez(os.path.join(basepath,'data',pw_name+'-unbiased-beta'), beta)
    
    else:
        print "Now performing adaptive groupLasso"
        unbiased_beta = np.load(os.path.join(basepath,'data',pw_name+'-unbiased-beta.npz'))['arr_0']
        norme2 = np.linalg.norm(unbiased_beta)
        k = 1./norme2
        weights = [np.linalg.norm(unbiased_beta[group[i]]) for i in group]
#        weights = 1./np.asarray(weights)
        weights = 1./np.sqrt(np.asarray(weights))
        l = 0.
        g = 1.
        alpha = 50.
        
        import parsimony.functions.nesterov.gl as gl
        import parsimony.algorithms.explicit as explicit
        import parsimony.estimators as estimators

        A = gl.A_from_groups(p, groups=group, weights=weights)
        k, l, g = alpha * np.array((k, l , g))
        logr_gl = estimators.RidgeLogisticRegression_L1_GL(
                        k=k, l=l, g=g,
                        A=A,
                        output=True,
                        algorithm=explicit.StaticCONESTA(eps=eps,
                                                         continuations=conts,
                                                         max_iter=max_iter),
                        penalty_start=1,
                        mean=False)    #mean error of lST sq error
        stime = time.time()
        print "================================================================="
        print "Now fitting the model"
        logr_gl.fit(X, y )
        print "Fit duration : ", time.time() - stime
        print "================================================================="
        
        beta = logr_gl.beta[1:] 
        mask = (logr_gl.beta[1:] != 0.).ravel()
        mask = (beta*beta>1e-8)
        from bgutils.pway_interpret import pw_status, pw_beta_thresh
        pw_status(pw, snpList, mask.ravel())
    
        # 7- 
        from bgutils.pway_plot import plot_pw
        beta = logr_gl.beta[1:].copy()
        beta = beta / np.max(np.abs(beta))
    #    nbeta = pw_beta_thresh(beta, threshold=1e-2)
    #    nbeta[nbeta!=0.] = 0.8
    #    nbeta[nbeta==0.] = 0.1
#        plot_pw(beta, pway=pw, snplist=snpList, cache=True)    
#        plt.show()
#    
#        plt.plot(logr_gl.info['f'], '+')
#        plt.show()
        return(dict(
            model=logr_gl,group=group, group_names=group_names, 
            pw=pw, snpList=snpList 
            ))
    logr_tv.fit(X_orig, y)
    beta_w = logr_tv.beta
    #    plt.plot(beta_w[1:])
    #    plt.show()

    PENALTY_START = 1
    extended_groups = groups
    #    + [[i] for i in range(PENALTY_START, p-1)]
    #test avec tv
    weights = [
        1. / (np.linalg.norm(beta_w[group])) for group in extended_groups
    ]
    #test avec lengeur
    #    weights = [np.sqrt(len(group[i])) for i in group]
    A = gl.A_from_groups(p - PENALTY_START,
                         groups=extended_groups,
                         weights=weights)

    # 5- Logistic regresssion
    eps = 1e-8
    max_iter = 2600
    conts = 2
    alpha = 11  # will be removed next version current max_iter x cont
    k = (0.1) * (1. / (np.linalg.norm(beta_w)))
    l = 0.1  #lasso ( if ENET k+l should be 1
    g = 0.1
    logr_gl = estimators.LogisticRegressionL1L2GL(
        l1=alpha * l,
        l2=alpha * k,
        gl=alpha * g,
        A=A,