def mapper(key, output_collector): import mapreduce as GLOBAL # access to global variables: # key: list of parameters k, g, alpha, l = key[0], key[1], key[2], key[3] Xtr = GLOBAL.DATA_RESAMPLED["X"][0] Xte = GLOBAL.DATA_RESAMPLED["X"][1] ytr = GLOBAL.DATA_RESAMPLED["y"][0] yte = GLOBAL.DATA_RESAMPLED["y"][1] l = GLOBAL.ridge_coef #print key, "Data shape:", Xtr.shape, Xte.shape, ytr.shape, yte.shape p = Xtr.shape[1] groups = GLOBAL.groups weights = GLOBAL.weights eps = 1e-8 max_iter = 2600 # Compute A matrix (we need only p-PENALTY_START columns) A_gl = gl.A_from_groups(p - PENALTY_START, groups=groups, weights=weights) mod = estimators.LogisticRegressionL1L2GL( alpha * k, alpha * l, alpha * g, A=A_gl, algorithm=explicit.StaticCONESTA(eps=eps, max_iter=max_iter), penalty_start=PENALTY_START, mean=False ) #since we residualized BMI with 2 categorical covariables (Gender and ImagingCentreCity - 8 columns) and 2 ordinal variables (tiv_gaser and mean_pds - 2 columns) y_pred = mod.fit(Xtr, ytr).predict(Xte) ret = dict(y_pred=y_pred, y_true=yte, beta=mod.beta) output_collector.collect(key, ret)
def A_from_structure(structure_filepath): """User defined function, to build the A matrices. Parameters ---------- structure : string, filepath to the structure Return ------ A, structure Those two objects will be accessible via global variables: A and STRUCTURE """ sys.path.append( os.path.join(os.getenv('HOME'), 'gits', 'scripts', '2013_brainomics_genomics')) from bgutils.build_websters import group_pw_snp2 #a terme on passe ici la fichier vers les données de contrainte group, group_names, snpList = group_pw_snp2(fic='go_synaptic_snps_gene', cache=True) tmp = [] for i in group: tmp.extend(group[i]) p = len(set(tmp)) print "DEBUG: ", p import parsimony.functions.nesterov.gl as gl weights = [np.sqrt(len(group[i])) for i in group] A = gl.A_from_groups(p, groups=group, weights=weights) structure = None return A, structure
def A_from_structure(structure_filepath): # Input: structure_filepath. Output: A, structure # read weights infos unbiased_beta = np.load(structure_filepath + '-unbiased-beta.npz')['arr_0'] combo = pickle.load(open(structure_filepath + '.pickle')) group, group_names, pw, snpList = combo['group'], combo['group_names'],\ combo['constraint'], combo['snpList'] weights = [np.linalg.norm(unbiased_beta[group[i]]) for i in group] weights = 1. / np.sqrt(np.asarray(weights)) import parsimony.functions.nesterov.gl as gl A = gl.A_from_groups(len(snpList), groups=group, weights=weights) STRUCTURE = unbiased_beta return A, STRUCTURE
mean=True) enet_PP.fit(X_res, z) print "Compute beta values" beta = enet_PP.beta beta = beta[11:] #do not consider covariates print "Compute the weights using Parsimony's ElasticNet algorithm." weights = [ math.pow(abs(beta[j[0]]) + 1 / float(n), -gamma) for j in groups ] # Adaptive Elasticnet algorithm adaptive_enet = estimators.LinearRegressionL1L2GL( l1=0, l2=0.8, gl=0.006, A=gl.A_from_groups(p, groups, weights=weights, penalty_start=11), algorithm=proximal.FISTA(), algorithm_params=dict(max_iter=10000), penalty_start=11, mean=True) stime = time.time() print "=================================================================" print "Now fitting the model" adaptive_enet.fit(X_res, z) print "Fit duration : ", time.time() - stime print "=================================================================" # Interpretation beta_w = adaptive_enet.beta plt.plot(beta_w[11:])
groups = [[j] for j in range(0, p)] print "Compute ElasticNet algorithm" enet_PP = estimators.ElasticNet(l=0.8, alpha=0.006, penalty_start=11, mean=True) enet_PP.fit(X_res, z) print "Compute beta values" beta = enet_PP.beta beta = beta[11:] #do not consider covariates print "Compute the weights using Parsimony's ElasticNet algorithm." weights = [ math.pow(abs(beta[j[0]]) + 1 / float(n), -gamma) for j in groups ] # Adaptive Elasticnet algorithm A = gl.A_from_groups(p, groups, weights=weights) adaptive_enet = estimators.LinearRegressionL1L2GL( l1=0, l2=0.8, gl=0.006, A=A, algorithm=proximal.FISTA(), algorithm_params=dict(max_iter=10000), penalty_start=11, mean=True) stime = time.time() print "=================================================================" print "Now fitting the model" adaptive_enet.fit(X_res, z) print "Fit duration : ", time.time() - stime
def webs_logr_weight(basepath, pw_name, precomp_save=True): precomp_save = False # 1- read constraints : we do not use Group Constraint here # from bgutils.build_websters import group_pw_snp2,get_websters_logr, pw_gene_snp2 # fic = 'go_synaptic_snps_gene' #'go_synaptic_snps_gene10' # group, group_names, snpList = group_pw_snp2(fic=fic, cache=True) # pw, _ = pw_gene_snp2(fic=fic, cache=True) from bgutils.build_websters import get_websters_logr from bgutils.utils_pw import build_msigdb group, group_names, pw, snpList = build_msigdb( pw_name= pw_name, mask = os.path.join(basepath,'data','geno','genetic_control_xpt'), outdir=os.path.join(basepath,'data'), cache=True) # 2- get the snps list to get a data set w/ y continous variable # convenient snp order # subject order granted by the method snp_subset=np.asarray(snpList,dtype=str).tolist() y, X = get_websters_logr(snp_subset=snp_subset) # 3- fix X : add a ones constant regressor p = (X.shape)[1] # keep orig size X = np.hstack((np.ones((X.shape[0],1)),X)) # add intercept eps = 1e-6 max_iter = 200 conts = 20 # will be removed next version current max_iter x cont if precomp_save: # 4- build A matrix #normalement ne sert a rien import parsimony.functions.nesterov.gl as gl weights = [np.sqrt(len(group[i])) for i in group] A = gl.A_from_groups(p, groups=group, weights=weights) import parsimony.algorithms.explicit as explicit import parsimony.estimators as estimators # 5- Logistic regresssion k = 0.002 #ridge l = 0.0 #lasso ( if ENET k+l should be 1 g = 0.0 logr_gl = estimators.RidgeLogisticRegression_L1_GL( k=k, l=l, g=g, A=A, output=True, algorithm=explicit.StaticCONESTA(eps=eps, continuations=conts, max_iter=max_iter), penalty_start=1, mean=False) #mean error of lST sq error stime = time.time() print "=================================================================" print "Now fitting the model" logr_gl.fit(X, y ) print "Fit duration : ", time.time() - stime print "=================================================================" # 6- Interpretation beta = logr_gl.beta[1:] np.savez(os.path.join(basepath,'data',pw_name+'-unbiased-beta'), beta) else: print "Now performing adaptive groupLasso" unbiased_beta = np.load(os.path.join(basepath,'data',pw_name+'-unbiased-beta.npz'))['arr_0'] norme2 = np.linalg.norm(unbiased_beta) k = 1./norme2 weights = [np.linalg.norm(unbiased_beta[group[i]]) for i in group] # weights = 1./np.asarray(weights) weights = 1./np.sqrt(np.asarray(weights)) l = 0. g = 1. alpha = 50. import parsimony.functions.nesterov.gl as gl import parsimony.algorithms.explicit as explicit import parsimony.estimators as estimators A = gl.A_from_groups(p, groups=group, weights=weights) k, l, g = alpha * np.array((k, l , g)) logr_gl = estimators.RidgeLogisticRegression_L1_GL( k=k, l=l, g=g, A=A, output=True, algorithm=explicit.StaticCONESTA(eps=eps, continuations=conts, max_iter=max_iter), penalty_start=1, mean=False) #mean error of lST sq error stime = time.time() print "=================================================================" print "Now fitting the model" logr_gl.fit(X, y ) print "Fit duration : ", time.time() - stime print "=================================================================" beta = logr_gl.beta[1:] mask = (logr_gl.beta[1:] != 0.).ravel() mask = (beta*beta>1e-8) from bgutils.pway_interpret import pw_status, pw_beta_thresh pw_status(pw, snpList, mask.ravel()) # 7- from bgutils.pway_plot import plot_pw beta = logr_gl.beta[1:].copy() beta = beta / np.max(np.abs(beta)) # nbeta = pw_beta_thresh(beta, threshold=1e-2) # nbeta[nbeta!=0.] = 0.8 # nbeta[nbeta==0.] = 0.1 # plot_pw(beta, pway=pw, snplist=snpList, cache=True) # plt.show() # # plt.plot(logr_gl.info['f'], '+') # plt.show() return(dict( model=logr_gl,group=group, group_names=group_names, pw=pw, snpList=snpList ))
logr_tv.fit(X_orig, y) beta_w = logr_tv.beta # plt.plot(beta_w[1:]) # plt.show() PENALTY_START = 1 extended_groups = groups # + [[i] for i in range(PENALTY_START, p-1)] #test avec tv weights = [ 1. / (np.linalg.norm(beta_w[group])) for group in extended_groups ] #test avec lengeur # weights = [np.sqrt(len(group[i])) for i in group] A = gl.A_from_groups(p - PENALTY_START, groups=extended_groups, weights=weights) # 5- Logistic regresssion eps = 1e-8 max_iter = 2600 conts = 2 alpha = 11 # will be removed next version current max_iter x cont k = (0.1) * (1. / (np.linalg.norm(beta_w))) l = 0.1 #lasso ( if ENET k+l should be 1 g = 0.1 logr_gl = estimators.LogisticRegressionL1L2GL( l1=alpha * l, l2=alpha * k, gl=alpha * g, A=A,