def mapper(key, output_collector): """ # debug mapper config = json.load(open(os.path.join(WD, "config_cv_largerange.json"), "r")) load_globals(config) resample(config, 'refit/refit') key = ('enettv', 0.01, 0.1, 0.3) """ import mapreduce as GLOBAL Xtr = GLOBAL.DATA_RESAMPLED["X"][0] Xte = GLOBAL.DATA_RESAMPLED["X"][1] ytr = GLOBAL.DATA_RESAMPLED["y"][0] yte = GLOBAL.DATA_RESAMPLED["y"][1] # key = 'enettv_0.01_0.1_0.2'.split("_") algo, alpha, l1l2ratio, tvratio = key[0], float(key[1]), float( key[2]), float(key[3]) tv = alpha * tvratio l1 = alpha * float(1 - tv) * l1l2ratio l2 = alpha * float(1 - tv) * (1 - l1l2ratio) print(key, algo, alpha, l1, l2, tv) scaler = preprocessing.StandardScaler().fit(Xtr) Xtr = scaler.transform(Xtr) Xte = scaler.transform(Xte) if algo == 'enettv': conesta = algorithms.proximal.CONESTA(max_iter=10000) mod = estimators.LinearRegressionL1L2TV(l1, l2, tv, GLOBAL.Atv, algorithm=conesta) mod.fit(Xtr, ytr.ravel()) elif algo == 'enetgn': fista = algorithms.proximal.FISTA(max_iter=5000) mod = estimators.LinearRegressionL1L2GraphNet(l1, l2, tv, GLOBAL.Agn, algorithm=fista) mod.fit(Xtr, ytr.ravel()) elif algo == 'enet': fista = algorithms.proximal.FISTA(max_iter=5000) mod = estimators.ElasticNet(l1l2ratio, algorithm=fista) mod.fit(Xtr, ytr.ravel()) else: raise Exception('Algo%s not handled' % algo) #mod.fit(Xtr, ytr.ravel()) y_pred = mod.predict(Xte) ret = dict(y_pred=y_pred, y_true=yte, beta=mod.beta) if output_collector: output_collector.collect(key, ret) else: return ret
def mapper(key, output_collector): import mapreduce as GLOBAL # access to global variables: # key: list of parameters alpha, l1_ratio = key[0], key[1] Xtr = GLOBAL.DATA_RESAMPLED["X"][0] Xte = GLOBAL.DATA_RESAMPLED["X"][1] ztr = GLOBAL.DATA_RESAMPLED["z"][0] zte = GLOBAL.DATA_RESAMPLED["z"][1] print key, "Data shape:", Xtr.shape, Xte.shape, ztr.shape, zte.shape # #mod = estimators.ElasticNet(alpha*l1_ratio, penalty_start=1, mean=True) mod = estimators.ElasticNet(alpha*l1_ratio, penalty_start = 11, mean = True) #since we residualize BMI with 2 categorical covariables (8 columns) and 2 ordinal variables z_pred = mod.fit(Xtr,ztr).predict(Xte) ret = dict(z_pred=z_pred, z_true=zte, beta=mod.beta) output_collector.collect(key, ret)
def mapper(key, output_collector): import mapreduce as GLOBAL # access to global variables (GLOBAL.DATA) alpha, l1_ratio = key[0], key[1] # mod = ElasticNet(alpha=key[0], l1_ratio=key[1]) print "i am a work that works" #mod = estimators.ElasticNet(alpha*l1_ratio, penalty_start = 1, mean = True) mod = estimators.ElasticNet( alpha * l1_ratio, penalty_start=11, mean=True ) #since we residualize BMI with 2 categorical covariables (8 columns) and 2 ordinal variables z_pred = mod.fit(GLOBAL.DATA_RESAMPLED["X"][0], GLOBAL.DATA_RESAMPLED["z"][0]).predict( GLOBAL.DATA_RESAMPLED["X"][1]) output_collector.collect(key, dict(z_pred=z_pred, z_true=GLOBAL.DATA_RESAMPLED["z"][1]), beta=mod.beta)
def mapper(key, output_collector): import mapreduce as GLOBAL # access to global variables: # key: list of parameters alpha, l1_ratio = key[0], key[1] Xtr = GLOBAL.DATA_RESAMPLED["X"][0] Xte = GLOBAL.DATA_RESAMPLED["X"][1] ztr = GLOBAL.DATA_RESAMPLED["z"][0] zte = GLOBAL.DATA_RESAMPLED["z"][1] print key, "Data shape:", Xtr.shape, Xte.shape, ztr.shape, zte.shape # mod_PP = estimators.ElasticNet(l1_ratio, alpha=alpha, penalty_start=1, mean=True) z_pred_PP = mod_PP.fit(Xtr, ztr).predict(Xte) ret = dict(z_pred=z_pred_PP, z_true=zte, beta=mod_PP.beta, model=mod_PP) output_collector.collect(key, ret)
def mapper(key, output_collector): import mapreduce as GLOBAL # key: list of parameters alpha, l1_ratio = key[0], key[1] Xtr = GLOBAL.DATA_RESAMPLED['X'][0] Xte = GLOBAL.DATA_RESAMPLED['X'][1] ztr = GLOBAL.DATA_RESAMPLED['z'][0] zte = GLOBAL.DATA_RESAMPLED['z'][1] print key, "Data shape:", Xtr.shape, Xte.shape, ztr.shape, zte.shape # penalty_start since we residualized BMI with 2 categorical covariables # (Gender and ImagingCentreCity - 8 columns) and 3 ordinal variables # (tiv_gaser, tiv_gaser² and mean_pds - 3 columns) penalty_start = 12 mod = estimators.ElasticNet(l1_ratio, alpha, penalty_start=penalty_start, mean=True) z_pred = mod.fit(Xtr, ztr).predict(Xte) ret = dict(z_pred=z_pred, z_true=zte, beta=mod.beta) output_collector.collect(key, ret)
# Initialize beta_map beta_map = np.zeros(X.shape[1]) # Elasticnet algorithm via Pylearn-Parsimony print "Elasticnet algorithm" alpha = 0.006 l1_ratio = 0.8 #l1_ratio = 0 # Since we residualized BMI with 2 categorical covariables (Gender and # ImagingCentreCity - 8 columns) and 2 ordinal variables (tiv_gaser and # mean_pds - 2 columns) penalty_start = 11 mod = estimators.ElasticNet(l1_ratio, alpha, penalty_start=penalty_start, mean=True) mod.fit(X, z) print "Compute beta values" beta_map = mod.beta print "Compute R2" r2 = r2_score(z, mod.predict(X)) print r2 # Use mask template_for_size_img = ni.load(MASK_PATH) mask_data = template_for_size_img.get_data() masked_data_index = (mask_data != 0.0) # Draw beta map print "Draw beta map"
if not os.path.exists(SHARED_DIR): os.makedirs(SHARED_DIR) # Load data print "Load data" X_init, X_res, z = load_residualized_bmi_data(cache=False) np.save(os.path.join(WD, 'X_res.npy'), X_res) np.save(os.path.join(WD, "z.npy"), z) # (n, p) = X_init.shape gamma = 1 groups = [[j] for j in range(0, p)] print "Compute ElasticNet algorithm" enet_PP = estimators.ElasticNet(l=0.8, alpha=0.006, penalty_start=11, mean=True) enet_PP.fit(X_res, z) print "Compute beta values" beta = enet_PP.beta beta = beta[11:] #do not consider covariates print "Compute the weights using Parsimony's ElasticNet algorithm." weights = [ math.pow(abs(beta[j[0]]) + 1 / float(n), -gamma) for j in groups ] # Adaptive Elasticnet algorithm adaptive_enet = estimators.LinearRegressionL1L2GL( l1=0, l2=0.8, gl=0.006,
FOLD = 0 TRAIN = 0 TEST = 1 Xtrain = X[cv[FOLD][TRAIN], ...] Xtest = X[cv[FOLD][TEST], ...] ztrain = z[cv[FOLD][TRAIN], ...] ztest = z[cv[FOLD][TEST], ...] # alpha l1_ratio alpha = 1.0 l1_ratio = 0.9 #parsimony XtrainPP = np.hstack((np.ones((ztrain.shape[0], 1)), Xtrain)) XtestPP = np.hstack((np.ones((ztest.shape[0], 1)), Xtest)) mod_PP = estimators.ElasticNet(l1_ratio, alpha=alpha, penalty_start=1, mean=True) time_curr = time.time() z_pred_PP = mod_PP.fit(XtrainPP, ztrain).predict(XtestPP) print "Parsimony elapsed time: ", time.time() - time_curr print "Parsimony r2:", r2_score(ztest, z_pred_PP) time_curr = time.time() mod_SL = ElasticNet(alpha, l1_ratio, fit_intercept=True) z_pred_SL = mod_SL.fit(Xtrain, ztrain).predict(Xtest) print "Scikit elapsed time: ", time.time() - time_curr print "Scikit r2:", r2_score(ztest, z_pred_SL)
sklearn.linear_model.Lasso(alpha=alpha / n_train, fit_intercept=True) MODELS["l1_inter__fista"] = \ estimators.Lasso(l=alpha, mean=False, penalty_start=1) ## Enet + fista if has_sklearn: MODELS["l1l2__sklearn"] = \ sklearn.linear_model.ElasticNet(alpha=alpha, l1_ratio=.5, fit_intercept=False) MODELS["l1l2__fista"] = \ estimators.ElasticNet(alpha=alpha, l=.5) if has_sklearn: MODELS["l1l2_inter__sklearn"] = \ sklearn.linear_model.ElasticNet(alpha=alpha, l1_ratio=.5, fit_intercept=True) MODELS["l1l2_inter__fista"] = \ estimators.ElasticNet(alpha=alpha, l=.5, penalty_start=1) ## LinearRegressionL1L2TV, Parsimony only # Minimize: # f(beta, X, y) = (1 / (2 * n)) * ||Xbeta - y||²_2 # + l1 * ||beta||_1
def mapper(key, output_collector): import mapreduce as GLOBAL Xtr = GLOBAL.DATA_RESAMPLED["X"][0] Xte = GLOBAL.DATA_RESAMPLED["X"][1] ytr = GLOBAL.DATA_RESAMPLED["y"][0] yte = GLOBAL.DATA_RESAMPLED["y"][1] # key = 'enettv_0.01_0.1_0.2'.split("_") algo, alpha, l1l2ratio, tvratio = key[0], float(key[1]), float( key[2]), float(key[3]) tv = alpha * tvratio l1 = alpha * float(1 - tv) * l1l2ratio l2 = alpha * float(1 - tv) * (1 - l1l2ratio) print(key, algo, alpha, l1, l2, tv) scaler = preprocessing.StandardScaler().fit(Xtr[:, 1:]) Xtr[:, 1:] = scaler.transform(Xtr[:, 1:]) Xte[:, 1:] = scaler.transform(Xte[:, 1:]) if algo == 'enettv': conesta = algorithms.proximal.CONESTA(max_iter=10000) mod = estimators.LinearRegressionL1L2TV(l1, l2, tv, GLOBAL.Atv, algorithm=conesta, penalty_start=penalty_start) mod.fit(Xtr, ytr.ravel()) beta = mod.beta elif algo == 'enetgn': fista = algorithms.proximal.FISTA(max_iter=5000) mod = estimators.LinearRegressionL1L2GraphNet( l1, l2, tv, GLOBAL.Agn, algorithm=fista, penalty_start=penalty_start) mod.fit(Xtr, ytr.ravel()) beta = mod.beta elif algo == 'enet': fista = algorithms.proximal.FISTA(max_iter=5000) mod = estimators.ElasticNet(l1l2ratio, algorithm=fista, penalty_start=penalty_start) mod.fit(Xtr, ytr.ravel()) beta = mod.beta elif algo == 'Ridge': mod = estimators.RidgeRegression(l1l2ratio, penalty_start=penalty_start) mod.fit(Xtr, ytr.ravel()) beta = mod.beta elif algo == 'RidgeAGD': mod = estimators.RidgeRegression(l1l2ratio,\ algorithm=gradient.GradientDescent(max_iter=1000),penalty_start = penalty_start ) mod.fit(Xtr, ytr.ravel()) beta = mod.beta elif algo == 'linearSklearn': mod = linear_model.LinearRegression(fit_intercept=False) mod.fit(Xtr, ytr.ravel()) beta = mod.coef_ beta = beta.reshape(beta.shape[0], 1) elif algo == 'SkRidge': mod = linear_model.Ridge(alpha=l1l2ratio, fit_intercept=False) mod.fit(Xtr, ytr.ravel()) beta = mod.coef_ beta = beta.reshape(beta.shape[0], 1) elif algo == 'SkRidgeInt': mod = linear_model.Ridge(alpha=l1l2ratio, fit_intercept=True) mod.fit(Xtr, ytr.ravel()) beta = mod.coef_ beta = beta.reshape(beta.shape[0], 1) else: raise Exception('Algo%s not handled' % algo) y_pred = mod.predict(Xte) ret = dict(y_pred=y_pred, y_true=yte, beta=beta) if output_collector: output_collector.collect(key, ret) else: return ret
X = X3d.reshape((n_samples, np.prod(shape))) n_train = 100 Xtr = X[:n_train, :] ytr = y[:n_train] Xte = X[n_train:, :] yte = y[n_train:] alpha = 1. # global penalty ########################################################################### ## Elasticnet # Min: (1 / (2 * n)) * ||X * beta - y||²_2 # + alpha * l * ||beta||_1 # + alpha * ((1.0 - l) / 2) * ||beta||²_2 # Parsimony Elasticnet is based on FISTA, is then slower that scikit-learn one l1_ratio = .5 enet = estimators.ElasticNet(alpha=alpha, l=.5) yte_pred_enet = enet.fit(Xtr, ytr).predict(Xte) ########################################################################### ## Fit LinearRegressionL1L2TV # Min: (1 / (2 * n)) * ||Xbeta - y||^2_2 # + l1 * ||beta||_1 # + (l2 / 2) * ||beta||^2_2 # + tv * TV(beta) # l1, l2, tv = alpha * np.array((.33, .33, .33)) # l1, l2, tv penalties A = nesterov_tv.linear_operator_from_shape(shape) algo = algorithms.proximal.CONESTA(max_iter=500) enettv = estimators.LinearRegressionL1L2TV(l1, l2, tv, A, algorithm=algo) yte_pred_enettv = enettv.fit(Xtr, ytr).predict(Xte)