def mapper(key, output_collector): import mapreduce as GLOBAL # access to global variables: model_name, global_pen, tv_ratio, l1_ratio = key if model_name == 'pca': # Force the key global_pen = tv_ratio = l1_ratio = 0 if model_name == 'sparse_pca': global_pen = tv_ratio = 0 ll1 = l1_ratio if model_name == 'struct_pca': ltv = global_pen * tv_ratio ll1 = l1_ratio * global_pen * (1 - tv_ratio) ll2 = (1 - l1_ratio) * global_pen * (1 - tv_ratio) assert (np.allclose(ll1 + ll2 + ltv, global_pen)) X_train = GLOBAL.DATA_RESAMPLED["X"][0] n, p = X_train.shape X_test = GLOBAL.DATA_RESAMPLED["X"][1] # A matrices Atv = GLOBAL.A N_COMP = GLOBAL.N_COMP # Fit model if model_name == 'pca': model = sklearn.decomposition.PCA(n_components=N_COMP) if model_name == 'sparse_pca': model = sklearn.decomposition.SparsePCA(n_components=N_COMP, alpha=ll1) if model_name == 'struct_pca': model = pca_tv.PCA_L1_L2_TV(n_components=N_COMP, l1=ll1, l2=ll2, ltv=ltv, Atv=Atv, criterion="frobenius", eps=1e-6, max_iter=100, inner_max_iter=int(1e4), output=False) model.fit(X_train) # Save the projectors if (model_name == 'pca') or (model_name == 'sparse_pca'): V = model.components_.T if model_name == 'struct_pca': V = model.V # Project train & test data if (model_name == 'pca') or (model_name == 'sparse_pca'): X_train_transform = model.transform(X_train) X_test_transform = model.transform(X_test) if (model_name == 'struct_pca'): X_train_transform, _ = model.transform(X_train) X_test_transform, _ = model.transform(X_test) # Reconstruct train & test data # For SparsePCA or PCA, the formula is: UV^t (U is given by transform) # For StructPCA this is implemented in the predict method (which uses # transform) if (model_name == 'pca') or (model_name == 'sparse_pca'): X_train_predict = np.dot(X_train_transform, V.T) X_test_predict = np.dot(X_test_transform, V.T) if (model_name == 'struct_pca'): X_train_predict = model.predict(X_train) X_test_predict = model.predict(X_test) # Compute Frobenius norm between original and recontructed datasets frobenius_train = np.linalg.norm(X_train - X_train_predict, 'fro') frobenius_test = np.linalg.norm(X_test - X_test_predict, 'fro') print(frobenius_test) # Compute explained variance ratio evr_train = metrics.adjusted_explained_variance(X_train_transform) evr_train /= np.var(X_train, axis=0).sum() evr_test = metrics.adjusted_explained_variance(X_test_transform) evr_test /= np.var(X_test, axis=0).sum() # Remove predicted values (they are huge) del X_train_predict, X_test_predict ret = dict(frobenius_train=frobenius_train, frobenius_test=frobenius_test, components=V, X_train_transform=X_train_transform, X_test_transform=X_test_transform, evr_train=evr_train, evr_test=evr_test) output_collector.collect(key, ret)
ltv = global_pen * tv_ratio ll1 = l1_ratio * global_pen * (1 - tv_ratio) ll2 = (1 - l1_ratio) * global_pen * (1 - tv_ratio) assert (np.allclose(ll1 + ll2 + ltv, global_pen)) #Compute A and mask masks = [] INPUT_OBJECT_MASK_FILE_FORMAT = "mask_{o}.npy" for i in range(3): filename = INPUT_OBJECT_MASK_FILE_FORMAT.format(o=i) masks.append(np.load(filename)) im_shape = config["im_shape"] Atv = nesterov_tv.A_from_shape(im_shape) ######################################## snapshot = AlgorithmSnapshot( '/neurospin/brainomics/2014_pca_struct/lambda_max/', saving_period=1).save_conesta mod = pca_tv.PCA_L1_L2_TV(n_components=3, l1=ll1, l2=ll2, ltv=ltv, Atv=Atv, criterion="frobenius", eps=1e-4, max_iter=100, inner_max_iter=int(1e4), output=True, callback=snapshot) mod.fit(X[:250, :])
def mapper(key, output_collector): import mapreduce as GLOBAL # access to global variables: model_name, global_pen, struct_ratio, l1_ratio = key if model_name == 'pca': global_pen = struct_ratio = l1_ratio = 0 if model_name == 'sparse_pca': global_pen = struct_ratio = 0 ll1 = l1_ratio if model_name == 'struct_pca': ltv = global_pen * struct_ratio ll1 = l1_ratio * global_pen * (1 - struct_ratio) ll2 = (1 - l1_ratio) * global_pen * (1 - struct_ratio) assert (np.allclose(ll1 + ll2 + ltv, global_pen)) if model_name == 'graphNet_pca': lgn = global_pen * struct_ratio ll1 = l1_ratio * global_pen * (1 - struct_ratio) ll2 = (1 - l1_ratio) * global_pen * (1 - struct_ratio) assert (np.allclose(ll1 + ll2 + lgn, global_pen)) X_train = GLOBAL.DATA_RESAMPLED["X"][0] n, p = X_train.shape X_test = GLOBAL.DATA_RESAMPLED["X"][1] # A matrices A = GLOBAL.A N_COMP = GLOBAL.N_COMP # Fit model if model_name == 'pca': model = sklearn.decomposition.PCA(n_components=N_COMP) if model_name == 'sparse_pca': model = sklearn.decomposition.SparsePCA(n_components=N_COMP, alpha=ll1) if model_name == 'struct_pca': model = pca_tv.PCA_L1_L2_TV(n_components=N_COMP, l1=ll1, l2=ll2, ltv=ltv, Atv=A, criterion="frobenius", eps=1e-6, max_iter=100, inner_max_iter=int(1e3), output=False) if model_name == 'graphNet_pca': A = sparse.vstack(A) model = pca_struct.PCAGraphNet(n_components=N_COMP, l1=ll1, l2=ll2, lgn=lgn, Agn=A, criterion="frobenius", eps=1e-6, max_iter=500, output=False) model.fit(X_train) # Save the projectors if (model_name == 'pca') or (model_name == 'sparse_pca'): V = model.components_.T if (model_name == 'struct_pca') or (model_name == 'graphNet_pca'): V = model.V # Project train & test data if (model_name == 'pca') or (model_name == 'sparse_pca'): X_train_transform = model.transform(X_train) X_test_transform = model.transform(X_test) if (model_name == 'struct_pca') or (model_name == 'graphNet_pca'): X_train_transform, _ = model.transform(X_train) X_test_transform, _ = model.transform(X_test) # Reconstruct train & test data # For SparsePCA or PCA, the formula is: UV^t (U is given by transform) # For StructPCA this is implemented in the predict method (which uses # transform) if (model_name == 'pca') or (model_name == 'sparse_pca'): X_train_predict = np.dot(X_train_transform, V.T) X_test_predict = np.dot(X_test_transform, V.T) if (model_name == 'struct_pca') or (model_name == 'graphNet_pca'): X_train_predict = model.predict(X_train) X_test_predict = model.predict(X_test) # Compute Frobenius norm between original and recontructed datasets frobenius_train = np.linalg.norm(X_train - X_train_predict, 'fro') frobenius_test = np.linalg.norm(X_test - X_test_predict, 'fro') print(frobenius_test) del X_train_predict, X_test_predict ret = dict(frobenius_train=frobenius_train, frobenius_test=frobenius_test, components=V, X_train_transform=X_train_transform, X_test_transform=X_test_transform) output_collector.collect(key, ret)
def mapper(key, output_collector): import mapreduce as GLOBAL # access to global variables: model_name, global_pen, tv_ratio, l1_ratio = key if model_name == 'pca': # Force the key global_pen = tv_ratio = l1_ratio = 0 # if model_name == 'sparse_pca': # # Force the key # ltv = 1e-6 # ll1 = l1_ratio * global_pen # ll2 = (1 - l1_ratio) * global_pen * (1 - tv_ratio) if model_name == 'sparse_pca': global_pen = tv_ratio = 0 ll1=l1_ratio if model_name == 'struct_pca': ltv = global_pen * tv_ratio ll1 = l1_ratio * global_pen * (1 - tv_ratio) ll2 = (1 - l1_ratio) * global_pen * (1 - tv_ratio) assert(np.allclose(ll1 + ll2 + ltv, global_pen)) X_train = GLOBAL.DATA_RESAMPLED["X"][0] print (X_train.shape) n, p = X_train.shape X_test = GLOBAL.DATA_RESAMPLED["X"][1] # A matrices Atv = GLOBAL.Atv # Fit model if model_name == 'pca': model = sklearn.decomposition.PCA(n_components=N_COMP) # if model_name == 'sparse_pca': # model = pca_tv.PCA_L1_L2_TV(n_components=N_COMP, # l1=ll1, l2=ll2, ltv=ltv, # Atv=Atv, # criterion="frobenius", # eps=1e-6, # max_iter=100, # inner_max_iter=int(1e4), # output=False) if model_name == 'sparse_pca': model = sklearn.decomposition.SparsePCA(n_components=N_COMP,alpha=ll1) if model_name == 'struct_pca': model = pca_tv.PCA_L1_L2_TV(n_components=N_COMP, l1=ll1, l2=ll2, ltv=ltv, Atv=Atv, criterion="frobenius", eps=1e-6, max_iter=100, inner_max_iter=int(1e4), output=True) t0 = time.clock() t0 = time.clock() model.fit(X_train) t1 = time.clock() _time = t1 - t0 #print "X_test", GLOBAL.DATA["X"][1].shape # Save the projectors if (model_name == 'pca') or (model_name == 'sparse_pca'): V = model.components_.T if model_name == 'struct_pca' : V = model.V # Project train & test data if (model_name == 'pca') or (model_name == 'sparse_pca'): X_train_transform = model.transform(X_train) X_test_transform = model.transform(X_test) if (model_name == 'struct_pca') : X_train_transform, _ = model.transform(X_train) X_test_transform, _ = model.transform(X_test) # Reconstruct train & test data # For SparsePCA or PCA, the formula is: UV^t (U is given by transform) # For StructPCA this is implemented in the predict method (which uses # transform) if (model_name == 'pca') or (model_name == 'sparse_pca'): X_train_predict = np.dot(X_train_transform, V.T) X_test_predict = np.dot(X_test_transform, V.T) if (model_name == 'struct_pca') : X_train_predict = model.predict(X_train) X_test_predict = model.predict(X_test) # Compute Frobenius norm between original and recontructed datasets frobenius_train = np.linalg.norm(X_train - X_train_predict, 'fro') frobenius_test = np.linalg.norm(X_test - X_test_predict, 'fro') print (frobenius_test ) # Compute explained variance ratio evr_train = metrics.adjusted_explained_variance(X_train_transform) evr_train /= np.var(X_train, axis=0).sum() evr_test = metrics.adjusted_explained_variance(X_test_transform) evr_test /= np.var(X_test, axis=0).sum() # Remove predicted values (they are huge) del X_train_predict, X_test_predict # Compute geometric metrics and norms of components TV = parsimony.functions.nesterov.tv.TotalVariation(1, A=Atv) l0 = np.zeros((N_COMP,)) l1 = np.zeros((N_COMP,)) l2 = np.zeros((N_COMP,)) tv = np.zeros((N_COMP,)) for i in range(N_COMP): # Norms l0[i] = np.linalg.norm(V[:, i], 0) l1[i] = np.linalg.norm(V[:, i], 1) l2[i] = np.linalg.norm(V[:, i], 2) tv[i] = TV.f(V[:, i]) ret = dict(frobenius_train=frobenius_train, frobenius_test=frobenius_test, components=V, X_train_transform=X_train_transform, X_test_transform=X_test_transform, evr_train=evr_train, evr_test=evr_test, l0=l0, l1=l1, l2=l2, tv=tv, time=_time) output_collector.collect(key, ret)
global_pen=0.01 l1_ratio=0.5 tv_ratio=0.5 ll1, ll2, ltv = compute_coefs_from_ratios(global_pen,tv_ratio,l1_ratio) start_vector = start_vectors.RandomStartVector(seed=24) MODELS["SparsePCA"] = \ sklearn.decomposition.SparsePCA(n_components=N_COMP,alpha=1) MODELS["ElasticNetPCA"] = \ pca_tv.PCA_L1_L2_TV(n_components=N_COMP, l1=ll1, l2=ll2, ltv=1e-6, Atv=Atv, criterion="frobenius", eps=1e-6, max_iter=100, inner_max_iter=int(1e4), output=False,start_vector=start_vector) MODELS["SPCATV"] = \ pca_tv.PCA_L1_L2_TV(n_components=N_COMP, l1=ll1, l2=ll2, ltv=ltv, Atv=Atv, criterion="frobenius", eps=1e-6, max_iter=100, inner_max_iter=int(1e4), output=False,start_vector=start_vector) ###############################################################################
# pending ll1, ll2, ltv = 0.05 * 0.025937425654559931, 1, 0.003 key_pca_enettv = "pca_enettv_%.4f_%.3f_%.3f" % (ll1, ll2, ltv) ## key_pca_enettv = CHOICE key = key_pca_enettv print(OUTPUT_DIR.format(key=key)) if not (os.path.exists(OUTPUT_DIR.format(key=key))): os.makedirs(OUTPUT_DIR.format(key=key)) model = pca_tv.PCA_L1_L2_TV(n_components=N_COMP, l1=ll1, l2=ll2, ltv=ltv, Atv=A, criterion="frobenius", eps=1e-6, max_iter=100, inner_max_iter=inner_max_iter, verbose=True) t0 = time.clock() model.fit(X) model.l1_max(X) t1 = time.clock() _time = t1 - t0 print("Time TOT(s)", _time) # Save results #model.U, model.d, model.V = m["U"], m["d"], m["V"] PC, d = model.transform(X)
def mapper(key, output_collector): import mapreduce as GLOBAL # access to global variables: # GLOBAL.DATA # GLOBAL.DATA ::= {"X":[Xtrain, ytrain], "y":[Xtest, ytest]} # key: list of parameters model_name, global_pen, tv_ratio, l1_ratio = key if model_name == 'pca': # Force the key global_pen = tv_ratio = l1_ratio = 0 if model_name == 'struct_pca': ll1, ll2, ltv = compute_coefs_from_ratios(global_pen, tv_ratio, l1_ratio) # This should not happen if ll1 > GLOBAL.l1_max: raise ValueError X_train = GLOBAL.DATA_RESAMPLED["X"][0] n, p = X_train.shape X_test = GLOBAL.DATA_RESAMPLED["X"][1] # A matrices Atv = GLOBAL.Atv # Fit model if model_name == 'pca': model = sklearn.decomposition.PCA(n_components=GLOBAL.N_COMP) if model_name == 'struct_pca': model = pca_tv.PCA_L1_L2_TV(n_components=GLOBAL.N_COMP, l1=ll1, l2=ll2, ltv=ltv, Atv=Atv, criterion="frobenius", eps=1e-6, max_iter=100, inner_max_iter=int(1e4), output=False) t0 = time.clock() model.fit(X_train) t1 = time.clock() _time = t1 - t0 #print "X_test", GLOBAL.DATA["X"][1].shape # Save the projectors if (model_name == 'pca'): components = model.components_.T if model_name == 'struct_pca': components = model.V # Threshold components thresh_components = np.empty(components.shape) thresholds = np.empty((GLOBAL.N_COMP, )) for k in range(GLOBAL.N_COMP): thresh_comp, t = array_utils.arr_threshold_from_norm2_ratio( components[:, k], .99) thresh_components[:, k] = thresh_comp thresholds[k] = t # Project train & test data if (model_name == 'pca'): X_train_transform = model.transform(X_train) X_test_transform = model.transform(X_test) if (model_name == 'struct_pca'): X_train_transform, _ = model.transform(X_train) X_test_transform, _ = model.transform(X_test) # Reconstruct train & test data # For PCA, the formula is: UV^t (U is given by transform) # For StructPCA this is implemented in the predict method (which uses # transform) if (model_name == 'pca'): X_train_predict = np.dot(X_train_transform, components.T) X_test_predict = np.dot(X_test_transform, components.T) if (model_name == 'struct_pca'): X_train_predict = model.predict(X_train) X_test_predict = model.predict(X_test) # Compute Frobenius norm between original and recontructed datasets frobenius_train = np.linalg.norm(X_train - X_train_predict, 'fro') frobenius_test = np.linalg.norm(X_test - X_test_predict, 'fro') # Compute geometric metrics and norms of components TV = parsimony.functions.nesterov.tv.TotalVariation(1, A=Atv) l0 = np.zeros((GLOBAL.N_COMP, )) l1 = np.zeros((GLOBAL.N_COMP, )) l2 = np.zeros((GLOBAL.N_COMP, )) tv = np.zeros((GLOBAL.N_COMP, )) recall = np.zeros((GLOBAL.N_COMP, )) precision = np.zeros((GLOBAL.N_COMP, )) fscore = np.zeros((GLOBAL.N_COMP, )) for i in range(GLOBAL.N_COMP): # Norms l0[i] = np.linalg.norm(components[:, i], 0) l1[i] = np.linalg.norm(components[:, i], 1) l2[i] = np.linalg.norm(components[:, i], 2) tv[i] = TV.f(components[:, i]) # Compute explained variance ratio evr_train = metrics.adjusted_explained_variance(X_train_transform) evr_train /= np.var(X_train, axis=0).sum() evr_test = metrics.adjusted_explained_variance(X_test_transform) evr_test /= np.var(X_test, axis=0).sum() ret = dict(frobenius_train=frobenius_train, frobenius_test=frobenius_test, components=components, thresh_components=thresh_components, thresholds=thresholds, X_train_transform=X_train_transform, X_test_transform=X_test_transform, X_train_predict=X_train_predict, X_test_predict=X_test_predict, recall=recall, precision=precision, fscore=fscore, evr_train=evr_train, evr_test=evr_test, l0=l0, l1=l1, l2=l2, tv=tv, time=_time) output_collector.collect(key, ret)
) ######################################################################## #explained variance import pca_tv N_COMP = 5 import parsimony Atv = parsimony.functions.nesterov.tv.A_from_mask(babel_mask.get_data()) X = T_hallu print("# explained variance #############################################") #fh = open(os.path.join(OUTPUT_DIR.format(key=key), "pca_enettv_info.txt"), "a") mod = pca_tv.PCA_L1_L2_TV(n_components=N_COMP, l1=0.1, l2=0.1, ltv=0.1, Atv=Atv, criterion="frobenius", eps=1e-6, max_iter=100, inner_max_iter=1000) mod.U, mod.V = projections, components rsquared = np.zeros((N_COMP)) for j in range(N_COMP): mod.n_components = j + 1 X_predict = mod.predict(X) sse = np.sum((X - X_predict)**2) ssX = np.sum(X**2) rsquared[j] = 1 - sse / ssX