def decomposition_interact_re_button(self, foo): dataset = self.EEMstack_cw[self.datlist_cw.index( self.range1.value):self.datlist_cw.index(self.range2.value) + 1] if self.decomposition_method_list.value == 'parafac': factors = parafac(dataset, rank=self.rank_display.value) elif self.decomposition_method_list.value == 'non_negative_parafac': factors = non_negative_parafac(dataset, rank=self.rank_display.value) elif self.decomposition_method_list.value == 'test_function': factors = non_negative_parafac(dataset, rank=self.rank_display.value, fixed_modes=[0, 1], init="random") I_0 = factors[1][0] J_0 = factors[1][1] K_0 = factors[1][2] decomposition_reconstruction_interact( I_0, J_0, K_0, self.EEMstack_cw[self.datlist_cw.index(self.data_to_view.value)], self.Em_range_cw, self.Ex_range_cw, self.datlist_cw[self.datlist_cw.index(self.range1.value):self. datlist_cw.index(self.range2.value) + 1], self.data_to_view.value, crange=self.crange_cw.value)
def factorize(self, n_factors, new_mz_len=None, gauss_params=None): factors = [] if self.n_concatenated != 1: nnp = non_negative_parafac(self.concatenated_grid, n_factors, init='random') #, n_iter_max=50) for i in range(n_factors): factors.append( Factor(tensor_idx=self.tensor_idx, charge_state=self.charge_state, rts=nnp[1][0].T[i], dts=nnp[1][1].T[i], mz_data=nnp[1][2].T[i], factor_idx=i, n_factors=n_factors, lows=self.lows, highs=self.highs, abs_mz_low=self.mz_bin_low, n_concatenated=self.n_concatenated)) return factors if new_mz_len != None: if gauss_params != None: grid, lows, highs = interpolate(self.full_grid_out, new_mz_len, gauss_params[0], gauss_params[1]) nnp = non_negative_parafac(grid, n_factors, init='random') #, n_iter_max = 50) else: grid, lows, highs = interpolate(self.full_grid_out, new_mz_len) nnp = non_negative_parafac(grid, n_factors, init='random') #, n_iter_max = 50) else: lows, highs = self.lows, self.highs if gauss_params != None: grid = self.gauss(self.full_grid_out, gauss_params[0], gauss_params[1]) else: grid = self.full_grid_out nnp = non_negative_parafac(grid, n_factors, init='random') #, n_iter_max = 50) for i in range(n_factors): factors.append( Factor(tensor_idx=self.tensor_idx, charge_state=self.charge_state, rts=nnp[1][0].T[i], dts=nnp[1][1].T[i], mz_data=nnp[1][2].T[i], factor_idx=i, n_factors=n_factors, lows=lows, highs=highs, abs_mz_low=self.mz_bin_low, n_concatenated=self.n_concatenated)) return factors
def perform_decomposition(tensor, r, weightFactor=2): """ Perform PARAFAC decomposition. """ fac = non_negative_parafac(tensor, r, tol=1.0e-10, n_iter_max=6000) fac = tl.cp_normalize(fac) fac.factors[weightFactor] *= fac.weights[ np.newaxis, :] # Put weighting in designated factor return fac.factors
def tensDecomp(tens, rnk): # Make this check as we need to reduce the number of dimensions and not increase it if (rnk >= len(tens[0][0])): print("Error in data: rank is greater than number of features") return #factors = parafac(tens, rnk) factors = non_negative_parafac(tens, rnk) #print(factors[0]) return factors
def factorTensor(tensor, numComps): """ Takes Tensor, and mask and returns tensor factorized form. """ tfac = non_negative_parafac(np.nan_to_num(tensor), rank=numComps, mask=np.isfinite(tensor), n_iter_max=5000, tol=1e-9) tensor = tensor.copy() tensor[np.isnan(tensor)] = tl.cp_to_tensor(tfac)[np.isnan(tensor)] return non_negative_parafac_hals(tensor, numComps, n_iter_max=5000)
def separate_kernel(kernel, max_rank='sqrt'): if max_rank is None: max_rank = min(*kernel.shape) elif max_rank == 'sqrt': max_rank = int(ceil(sqrt(max(*kernel.shape)))) else: max_rank = min(*kernel.shape) // 3 if kernel.ndim == 1: return kernel else: P = non_negative_parafac(tensor(kernel), rank=max_rank) return [D.T for D in P]
return all_ind def show_proj(factors, matlab_data, ex1_em0): color = ["r", "g", "b"] for i in range(3): mat1 = np.array(factors[1][1]).transpose()[i] w = tl.norm(factors[1][0][i]) sq_mat = np.array(mat1).reshape(61, 201) m = np.max(sq_mat) ind = tl.where(sq_mat == m) if (ex1_em0 == 1): x = matlab_data["EmAx"][0] y = w * tl.transpose(sq_mat[ind[0]]) else: x = matlab_data["ExAx"][0] y = w * np.transpose(tl.transpose(sq_mat)[ind[1]]) plt.grid() plt.plot(x, y, color[i]) plt.show() if __name__ == "__main__": mat = scipy.io.loadmat('C:/Users/Tatiana/Desktop/amino.mat') ex, em = transform_data(mat) factors = non_negative_parafac(mat["X"], rank=3, n_iter_max=2000) show_proj(factors, mat, 0) show_proj(factors, mat, 1)
entry = row.split(sep=',') for i in range(441): column = i * 4 Box1List.append(float(entry[column])) Box2List.append(float(entry[column + 1])) Box3List.append(float(entry[column + 2])) Box4List.append(float(entry[column + 3])) # Creating AllBoxes, which is a huge list of all SpectraMatrix data for easy tensor creation AllBoxes = Box1List + Box2List + Box3List + Box4List theTensor = np.array(AllBoxes).reshape( (4, 200, 441)) # The tensor, in the proper shape, that we hope to do CPT on theCPT = non_negative_parafac(theTensor, rank=4) # The CPT decomposition matlab5factor1 = pd.read_csv('data files/factor1rank5Matlab.csv', header=None) matlab5factor2 = pd.read_csv('data files/factor2rank5Matlab.csv', header=None) matlab5factor3 = pd.read_csv('data files/factor3rank5Matlab.csv', header=None) matlab8factor1 = pd.read_csv('data files/factor1rank8Matlab.csv', header=None) matlab8factor2 = pd.read_csv('data files/factor2rank8Matlab.csv', header=None) matlab8factor3 = pd.read_csv('data files/factor3rank8Matlab.csv', header=None) matlab5factor2 = th.attachXAxis(matlab5factor2) # ------- Cool subplots of the MATLAB CPT ------- plt.figure(figsize=(15, 9)) plt.suptitle('Factor 2 After CP Tensor Decomposition', fontsize=22)
print(filename) if os.path.isfile(filename): svd = np.load(filename) u = svd['u'][:, :rank] s = svd['s'][:rank] vt = svd['vt'][:rank, :] T[graph_idx, :, :] = np.dot(np.dot(u, np.diag(s)), vt) del svd continue else: continue graph_idx += 1 factors = non_negative_parafac(T, rank=comm, n_iter_max=100, verbose=1, init='random', tol=1e-8) del T save = True load = False fileName = '/media/garner1/hdd1/gpseq' + '/info_10000G' + '/nnparafac_WOintraChrom' + '_rank' + str( rank) + '_sample' + str(ind) + '_size' + str(samples) + '.pkl' fileObject = open(fileName, 'wb') if save: pkl.dump(factors, fileObject) fileObject.close()
def analyze_with_tensor_decomposition(mat,x,y,z): print("Tensor Decomposition starts...") tensor = tl.tensor(mat) #tensor = tl.tensor(np.arange(24).reshape((3, 4, 2))) print("The given tensor:") start_analysis_fignmbr=0 weights, factors = non_negative_parafac(tensor, rank=rank) print("Factors: ") print(factors) print("Slice: ") print(factors[0]) print(list(factors[0][:,0])) user=[i for i in range(0,x)] print(user) thread=[i for i in range(0,y)] time=[i for i in range(0,z)] userdataset=[] threaddataset=[] timedataset=[] print("Tensor figures drawing..") plt.figure(1) for i in range(start_analysis_fignmbr,rank*3): fignmbr=(i%3)+1 col=int(i/3) row=int(i%3) print("row: ",row," col: ",col," fignmbr: ",fignmbr) y=list(factors[row][:,col]) #print(y) ax=plt.subplot(1,3,fignmbr) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) if i%3==0: plt.plot(user,y,'r.') plt.xlabel("User") userdataset.append(y) if i%3==1: plt.plot(thread,y,'c.') plt.xlabel("Thread") threaddataset.append(y) if i%3==2: plt.plot(time,y,'g.') plt.xlabel("Monthly TIme Bin") timedataset.append(y) if fignmbr%3==0: directory='figures/component ' + str(int((i+1)/3)) plt.savefig(directory) plt.close() u_t=0.1; # these values are invalid because threshold are calculated inside the callee function th_t=0.05 t_t=0.005 print("Finding and drawing scree plots in figure anamalous...") map_component_userlist,map_component_user_value=find_anomalous_component(userdataset,threaddataset,"Number of User","Number of Thread",u_t,th_t,1) map_component_timelist,map_component_time_value=find_anomalous_component(timedataset,userdataset,"Number of Time Bin","Number of User",t_t,u_t,3) map_component_threadlist,map_component_thread_value=find_anomalous_component(threaddataset,timedataset,"Number of Thread","Number of Time Bin",th_t,t_t,2) print("checking") print(map_component_userlist.keys()) print(map_component_userlist[0]) print(map_component_user_value[0]) return map_component_userlist,map_component_user_value,map_component_timelist,map_component_time_value,map_component_threadlist,map_component_thread_value
filename = os.path.join(path, dirname + '/coords.csv_sparse_graph.npz') if os.path.isfile(filename): with open(filename, 'r') as f: mat = scipy.sparse.load_npz(filename).astype(np.float32).todense() # sub_threshold_indices = mat < 0.5 # mat[sub_threshold_indices] = 0 T[count, :, :] = mat del mat continue else: continue count += 1 factors = non_negative_parafac(T, rank=rank, verbose=1, init='random', tol=1e-4) # print(factors[0]) # print([f.shape for f in factors[1]]) # print([tl.norm(factors[1][0][:,ind],2)*tl.norm(factors[1][1][:,ind],2)*tl.norm(factors[1][2][:,ind],2) for ind in range(rank)]) del T import pickle as pkl save = True load = False fileName = path + '/NNparafac' + '_rank' + str(rank) + '_sample' + str( ind) + '_size' + str(samples) + '.pkl' fileObject = open(fileName, 'wb')
def seq_parafac(input_tensor, max_rank, nb_trial, pred_df, tol=1e-07, mode='non-negative'): """Sequential TCA for model selection This method computes TCA with a number of components ranging from 1 to the maximum rank indicated, and stores reconstruction error, similarity, sparsity for each model obtained. It fits also random forest classifiers to predict reward and odor for each trial using trial factors, and it stores the associated prediction accuracy. Arguments: input_tensor {array} -- 3-dimensional activity array max_rank {scalar} -- maximum rank for TCA nb_trial {scalar} -- number of replication of same TCA pred_df {pandas dataframe} -- reward and odor information for each trial Keyword Arguments: tol {scalar} -- tolerance for optimization convergence mode {string} -- version of TCA to compute, classic or non-negative (default: {'non-negative'}) Returns: pandas err_df -- reconstruction error for each TCA run pandas sim_df -- similarity against best fit for each TCA run pandas spa_df -- sparsity for each TCA run pandas odor_df -- odor prediction accuracy for each TCA run pandas rew_df -- reward prediction accuracy for each TCA run """ # lists used for output dataframes rank_err = [] rank_sim = [] err_list = [] sim_list = [] spa_list = [] odor_acc = [] odor_std = [] rew_acc = [] rew_std = [] for rank in np.arange(1, max_rank + 1): # in this list we store factors extracted with TCA for each run pred_fac = [] # minimal error initialized at maximum 1, useful to identify best fit model min_err = 1 # index of the best fit model in the factors list min_idx = 0 # we iterate over nb_trial, the number of replicates of TCA to run # it allows stability check, i.e by computing sparsity for trial in range(nb_trial): # verbose to know which replicate is running print('Trial', trial) # build a list useful for err_df rank_err.append(rank) if mode == 'non-negative': # where TCA is actually computed, here in its non-negative version pred_fac.append( non_negative_parafac(input_tensor, rank=rank, n_iter_max=1000, init='svd', tol=tol, verbose=1)) else: # where TCA is actually computed, in its classic version pred_fac.append( parafac(input_tensor, rank=rank, n_iter_max=5000, init='random', tol=tol)) # we store all factors in a list, to be able to compute model similarity in the end # transform pred_fac from kruskal form (list of factors) to full-tensor form pred_tensor = tl.kruskal_to_tensor(pred_fac[trial]) # compute reconstruction error, L2 distance from predicted to original tensor err = rec_err(input_tensor, pred_tensor) err_list.append(err) # here we compute sparsity, the proportion of almost-zero elements nb_nonzero = 0 tot_size = 0 for i in range(len(pred_fac[trial])): nb_nonzero += np.count_nonzero(np.round(pred_fac[trial][i], 2)) tot_size += pred_fac[trial][i].size spa = 1 - nb_nonzero / tot_size spa_list.append(spa) # we shuffle samples matrix (here trial factors) and labels (odor and reward) # using same permutation X, y_odor, y_rew = shuffle(pred_fac[trial][2], pred_df['Odor'].tolist(), pred_df['Reward'].tolist()) # initialize random forest classifier clf = RandomForestClassifier(n_estimators=50, max_depth=None, min_samples_split=2, max_features='sqrt') # scale the data before fitting X = StandardScaler().fit_transform(X) # compute cross validated prediction accuracy for odor and reward odor_acc.append(cross_val_score(clf, X, y_odor, cv=8).mean()) odor_std.append(cross_val_score(clf, X, y_odor, cv=8).std()) rew_acc.append(cross_val_score(clf, X, y_rew, cv=8).mean()) rew_std.append(cross_val_score(clf, X, y_rew, cv=8).std()) # we keep track of the model having lowest reconstruction error # we will use this model as a reference to compute model similarity if err < min_err: min_err = err min_idx = trial # we iterate again over all computed models to calculate similarity # versus best fit model for trial in range(nb_trial): # if the model is the best fit, do nothing if trial == min_idx: continue # build a list useful for sim_df rank_sim.append(rank) # align factors to compute similarity sim_list.append( tt.kruskal_align(tt.tensors.KTensor(pred_fac[min_idx]), tt.tensors.KTensor(pred_fac[trial]), permute_U=True, permute_V=True)) # build dataframes to store results err_df = pd.DataFrame(data=np.transpose([rank_err, err_list]), columns=['Rank', 'Reconstruction Error']) sim_df = pd.DataFrame(data=np.transpose([rank_sim, sim_list]), columns=['Rank', 'Similarity']) spa_df = pd.DataFrame(data=np.transpose([rank_err, spa_list]), columns=['Rank', 'Sparsity']) odor_df = pd.DataFrame(data=np.transpose([rank_err, odor_acc, odor_std]), columns=[ 'Rank', 'Accuracy - Odor Prediction', 'Std - Odor Prediction' ]) rew_df = pd.DataFrame(data=np.transpose([rank_err, rew_acc, rew_std]), columns=[ 'Rank', 'Accuracy - Reward Prediction', 'Std - Reward Prediction' ]) return err_df, sim_df, spa_df, odor_df, rew_df
break final_tensor = tl.tensor(final_tensor, dtype='float64') #Training Tucker and PARAFAC tensor decomposition models core, factors = non_negative_tucker(final_tensor, ranks=[120, 20, 220], n_iter_max=100, init='random', tol=0.00001, random_state=None, verbose=True) factors = non_negative_parafac(final_tensor, rank=70, n_iter_max=100, init='random', tol=0.00001, random_state=None, verbose=True) reconstructed_tensor = tl.kruskal_to_tensor(factors) nonzero_mat = np.nonzero(final_tensor) #Computing RMSE error = 0 for i in range(len(nonzero_mat[0])): error += final_tensor[nonzero_mat[0][i]][nonzero_mat[1][i]][ nonzero_mat[2][i]] - reconstructed_tensor[nonzero_mat[0][i]][ nonzero_mat[1][i]][nonzero_mat[2][i]]
# factors of the NCP, and transform these factors (and factors weights) into # an instance of the CPTensor class: weights_init, factors_init = initialize_nn_cp(tensor, init='random', rank=10) cp_init = CPTensor((weights_init, factors_init)) ############################################################################## # Non-negative Parafac # ----------------------- # From now on, we can use the same ``cp_init`` tensor as the initial tensor when # we use decomposition functions. Now let us first use the algorithm based on # Multiplicative Update, which can be called as follows: tic = time.time() tensor_mu, errors_mu = non_negative_parafac(tensor, rank=10, init=deepcopy(cp_init), return_errors=True) cp_reconstruction_mu = tl.cp_to_tensor(tensor_mu) time_mu = time.time()-tic ############################################################################## # Here, we also compute the output tensor from the decomposed factors by using # the cp_to_tensor function. The tensor cp_reconstruction_mu is therefore a # low-rank non-negative approximation of the input tensor; looking at the # first few values of both tensors shows that this is indeed # the case but the approximation is quite coarse. print('reconstructed tensor\n', cp_reconstruction_mu[10:12, 10:12, 10:12], '\n') print('input data tensor\n', tensor[10:12, 10:12, 10:12]) ############################################################################## # Non-negative Parafac with HALS
delt1 = 10 for i in range(len(names)): x, y, z = read_file(default_way + names[i] + '.txt') z = erase_first_line(x, y, z) z = erase_second_line(x, y, z) #fig = plt.figure(figsize=(5, 5)) #show_data(x, y, z, fig) tensor_data.append(z) #plt.show() # получение факторв rank = 12 for rank in range(15, 16, 1): factors = decomp.non_negative_parafac(np.array(tensor_data), rank, n_iter_max=2000, tol=1e-6) fig = plt.figure(figsize=(5, 5)) print(factors[0]) plt.subplot(311) for i in range(rank): z_f1 = np.transpose(factors[1][0])[i] plt.plot(np.linspace(1, len(names), len(names), endpoint=True), z_f1) plt.subplot(312) for i in range(rank): z_f1 = np.transpose(factors[1][1])[i] plt.plot(x, z_f1) plt.subplot(313) for i in range(rank):
mat = scipy.sparse.load_npz(filename).astype(np.float32).todense() # sub_threshold_indices = mat < 0.5 # mat[sub_threshold_indices] = 0 u, s, vh = tl.partial_svd( mat, n_eigenvecs=3 ) # 3 should be enough because the data is x,y,z times beads T[count, :, :] = np.dot(u, np.dot(np.diag(s), vh)) del mat continue else: continue count += 1 factors = non_negative_parafac(T, rank=rank, n_iter_max=10000, verbose=1, init='svd', tol=1e-10) # print(factors[0]) # print([f.shape for f in factors[1]]) # print([tl.norm(factors[1][0][:,ind],2)*tl.norm(factors[1][1][:,ind],2)*tl.norm(factors[1][2][:,ind],2) for ind in range(rank)]) del T import pickle as pkl save = True load = False fileName = path + '/NNparafac' + '_rank' + str(rank) + '_sample' + str( ind) + '_size' + str(samples) + '.pkl' fileObject = open(fileName, 'wb')