def test_align(): # Generate random KTensor. I, J, K, R = 15, 16, 17, 4 U = tt.randn_ktensor((I, J, K), rank=R) X = U.full() # Dense representation of U. # Enumerate all permutations of factors and test that # kruskal_align appropriately inverts this permutation. for prm in itertools.permutations(range(R)): V = U.copy() V.permute(prm) assert (tt.kruskal_align(U, V) - 1) < atol_float64 assert linalg.norm(X - U.full()) < atol_float64 assert linalg.norm(X - V.full()) < atol_float64 # Test that second input to kruskal_align is correctly permuted. for prm in itertools.permutations(range(R)): V = U.copy() V.permute(prm) tt.kruskal_align(U, V, permute_V=True) for fU, fV in zip(U, V): assert linalg.norm(fU - fV) < atol_float64 assert linalg.norm(X - U.full()) < atol_float64 assert linalg.norm(X - V.full()) < atol_float64 # Test that first input to kruskal_align is correctly permuted. for prm in itertools.permutations(range(R)): V = U.copy() V.permute(prm) tt.kruskal_align(V, U, permute_U=True) for fU, fV in zip(U, V): assert linalg.norm(fU - fV) < atol_float64 assert linalg.norm(X - U.full()) < atol_float64 assert linalg.norm(X - V.full()) < atol_float64
def fit(responses, ranks=[1, 2, 3, 4, 5], N=20): results = [] # Loop through ranks and N for r in ranks: for n in range(N): # Do decomposition and record rank, err and factors U = tt.ncp_hals(responses, rank=r, verbose=False) results.append( {'rank': r, 'err': U.obj, 'sim': np.nan, 'factors': U.factors}) # Create results dataframe results = pd.DataFrame(results) # Find U with min error U_min = results.loc[results.groupby('rank')['err'].idxmin(), ['factors']] U_min.index = ranks U_min = U_min.to_dict()['factors'] for i, row in results.iterrows(): U = U_min[row['rank']] V = row['factors'] # Compute similarity results.loc[i, 'sim'] = tt.kruskal_align(U, V) return results, U_min
def plot_similarity_score(X, ranks=[1, 2, 3, 4, 5, 10, 20, 40, 60], n_runs=5): print('this may take awhile....') rank_similarity_scores = [] for rank in ranks: U = [] for n in range(n_runs): U_r = tensortools.ncp_bcd(X, rank=rank, verbose=False) U.append(U_r) similarity_scores = [] for n in range(n_runs - 1): similarity = tensortools.kruskal_align(U[n].factors, U[n + 1].factors, permute_U=True, permute_V=True) similarity_scores.append(similarity) rank_similarity_scores.append(similarity_scores) rank_similarity_scores = np.array(rank_similarity_scores) # plot similarity scores fig = plt.figure(figsize=(7, 3)) x = np.arange(len(ranks)) sem = rank_similarity_scores.std(axis=1) / np.sqrt( rank_similarity_scores.shape[1]) plt.errorbar(x, rank_similarity_scores.mean(axis=1), yerr=sem) plt.xticks(ticks=x, labels=ranks) plt.xlabel('N factors') plt.ylabel('similarity mean+/-SEM') return fig
def cp_als_test(): # Create synthetic dataset. I, J, K, R = 25, 25, 25, 3 # dimensions and rank parameters # Create a random tensor consisting of a low-rank component and noise. X = tensortools.randn_ktensor((I, J, K), rank=R).full() X += np.random.randn(I, J, K) # add some random noise # Perform CP tensor decomposition. U = tensortools.cp_als(X, rank=R, verbose=True) V = tensortools.cp_als(X, rank=R, verbose=True) # Compare the low-dimensional factors from the two fits. fig, _, _ = tensortools.plot_factors(U.factors) tensortools.plot_factors(V.factors, fig=fig) # Align the two fits and print a similarity score. similarity_score = tensortools.kruskal_align(U.factors, V.factors, permute_U=True, permute_V=True) print(similarity_score) # Plot the results to see alignment. fig, ax, po = tensortools.plot_factors(U.factors) tensortools.plot_factors(V.factors, fig=fig) plt.show()
def tca(X, non_neg=True, R=10, prefix="", max_iter=500, epoc="all", effect="all"): # Fit CP tensor decomposition (two times). U = None V = None opti_str = 'ncp_bcd_' if non_neg: U = tt.ncp_bcd(X, rank=R, verbose=True, max_iter=max_iter, tol=1e-6) V = tt.ncp_bcd(X, rank=R, verbose=True, max_iter=max_iter, tol=1e-6) else: U = tt.cp_als(X, rank=R, verbose=True, max_iter=max_iter, tol=1e-6) V = tt.cp_als(X, rank=R, verbose=True, max_iter=max_iter, tol=1e-6) opti_str = 'cp_als_' # Compare the low-dimensional factors from the two fits. # fig, ax, po = tt.plot_factors(U.factors) # tt.plot_factors(V.factors, fig=fig) # fig.suptitle("raw models") # fig.tight_layout() # Align the two fits and print a similarity score. sim = tt.kruskal_align(U.factors, V.factors, permute_U=True, permute_V=True) print(sim) # Plot the results again to see alignment. fig, ax, po = tt.plot_factors(U.factors, plots=["scatter", "scatter", "line"]) tt.plot_factors(V.factors, plots=["scatter", "scatter", "line"], fig=fig) [x.set_xticks([11.5, 15.5, 27.5, 31.5]) for x in ax[:, 2]] ax[-1, 0].set_xlabel("SU #") ax[-1, 1].set_xlabel("Trial #") ax[-1, 2].set_xlabel("Time (s)") ax[-1, 2].set_xticklabels(["S", "+1", "T", "+1"]) fig.suptitle("aligned models") fig.tight_layout() # Show plots. plt.show() fig.set_size_inches(40, 40) fig.set_dpi(300) fig.savefig( opti_str + prefix + "tca_trial_" + epoc + "_" + effect + "_" + str(X.shape[1]) + "_R" + str(R) + ".png", dpi=300, bbox_inches="tight", ) return (U, V, sim)
# Make synthetic dataset. I, J, K, R = 25, 25, 25, 4 # dimensions and rank X = tt.randn_ktensor((I, J, K), rank=R).full() X += np.random.randn(I, J, K) # add noise # Fit CP tensor decomposition (two times). U = tt.cp_als(X, rank=R, verbose=True) V = tt.cp_als(X, rank=R, verbose=True) # Compare the low-dimensional factors from the two fits. fig, _, _ = tt.plot_factors(U.factors) tt.plot_factors(V.factors, fig=fig) # Align the two fits and print a similarity score. sim = tt.kruskal_align(U.factors, V.factors, permute_U=True, permute_V=True) print(sim) # Plot the results again to see alignment. fig, ax, po = tt.plot_factors(U.factors) tt.plot_factors(V.factors, fig=fig) # Show plots. plt.show() # ============================================================================= # ============================================================================= # More structured data to visualize decomposition from sklearn.preprocessing import normalize as normalize tensor_size = (3, 100, 25)
def seq_parafac(input_tensor, max_rank, nb_trial, pred_df, tol=1e-07, mode='non-negative'): """Sequential TCA for model selection This method computes TCA with a number of components ranging from 1 to the maximum rank indicated, and stores reconstruction error, similarity, sparsity for each model obtained. It fits also random forest classifiers to predict reward and odor for each trial using trial factors, and it stores the associated prediction accuracy. Arguments: input_tensor {array} -- 3-dimensional activity array max_rank {scalar} -- maximum rank for TCA nb_trial {scalar} -- number of replication of same TCA pred_df {pandas dataframe} -- reward and odor information for each trial Keyword Arguments: tol {scalar} -- tolerance for optimization convergence mode {string} -- version of TCA to compute, classic or non-negative (default: {'non-negative'}) Returns: pandas err_df -- reconstruction error for each TCA run pandas sim_df -- similarity against best fit for each TCA run pandas spa_df -- sparsity for each TCA run pandas odor_df -- odor prediction accuracy for each TCA run pandas rew_df -- reward prediction accuracy for each TCA run """ # lists used for output dataframes rank_err = [] rank_sim = [] err_list = [] sim_list = [] spa_list = [] odor_acc = [] odor_std = [] rew_acc = [] rew_std = [] for rank in np.arange(1, max_rank + 1): # in this list we store factors extracted with TCA for each run pred_fac = [] # minimal error initialized at maximum 1, useful to identify best fit model min_err = 1 # index of the best fit model in the factors list min_idx = 0 # we iterate over nb_trial, the number of replicates of TCA to run # it allows stability check, i.e by computing sparsity for trial in range(nb_trial): # verbose to know which replicate is running print('Trial', trial) # build a list useful for err_df rank_err.append(rank) if mode == 'non-negative': # where TCA is actually computed, here in its non-negative version pred_fac.append( non_negative_parafac(input_tensor, rank=rank, n_iter_max=1000, init='svd', tol=tol, verbose=1)) else: # where TCA is actually computed, in its classic version pred_fac.append( parafac(input_tensor, rank=rank, n_iter_max=5000, init='random', tol=tol)) # we store all factors in a list, to be able to compute model similarity in the end # transform pred_fac from kruskal form (list of factors) to full-tensor form pred_tensor = tl.kruskal_to_tensor(pred_fac[trial]) # compute reconstruction error, L2 distance from predicted to original tensor err = rec_err(input_tensor, pred_tensor) err_list.append(err) # here we compute sparsity, the proportion of almost-zero elements nb_nonzero = 0 tot_size = 0 for i in range(len(pred_fac[trial])): nb_nonzero += np.count_nonzero(np.round(pred_fac[trial][i], 2)) tot_size += pred_fac[trial][i].size spa = 1 - nb_nonzero / tot_size spa_list.append(spa) # we shuffle samples matrix (here trial factors) and labels (odor and reward) # using same permutation X, y_odor, y_rew = shuffle(pred_fac[trial][2], pred_df['Odor'].tolist(), pred_df['Reward'].tolist()) # initialize random forest classifier clf = RandomForestClassifier(n_estimators=50, max_depth=None, min_samples_split=2, max_features='sqrt') # scale the data before fitting X = StandardScaler().fit_transform(X) # compute cross validated prediction accuracy for odor and reward odor_acc.append(cross_val_score(clf, X, y_odor, cv=8).mean()) odor_std.append(cross_val_score(clf, X, y_odor, cv=8).std()) rew_acc.append(cross_val_score(clf, X, y_rew, cv=8).mean()) rew_std.append(cross_val_score(clf, X, y_rew, cv=8).std()) # we keep track of the model having lowest reconstruction error # we will use this model as a reference to compute model similarity if err < min_err: min_err = err min_idx = trial # we iterate again over all computed models to calculate similarity # versus best fit model for trial in range(nb_trial): # if the model is the best fit, do nothing if trial == min_idx: continue # build a list useful for sim_df rank_sim.append(rank) # align factors to compute similarity sim_list.append( tt.kruskal_align(tt.tensors.KTensor(pred_fac[min_idx]), tt.tensors.KTensor(pred_fac[trial]), permute_U=True, permute_V=True)) # build dataframes to store results err_df = pd.DataFrame(data=np.transpose([rank_err, err_list]), columns=['Rank', 'Reconstruction Error']) sim_df = pd.DataFrame(data=np.transpose([rank_sim, sim_list]), columns=['Rank', 'Similarity']) spa_df = pd.DataFrame(data=np.transpose([rank_err, spa_list]), columns=['Rank', 'Sparsity']) odor_df = pd.DataFrame(data=np.transpose([rank_err, odor_acc, odor_std]), columns=[ 'Rank', 'Accuracy - Odor Prediction', 'Std - Odor Prediction' ]) rew_df = pd.DataFrame(data=np.transpose([rank_err, rew_acc, rew_std]), columns=[ 'Rank', 'Accuracy - Reward Prediction', 'Std - Reward Prediction' ]) return err_df, sim_df, spa_df, odor_df, rew_df
def run_pipeline(filename): data = lm.loadmat(filename) session_name = os.path.basename(filename)[0:-4] (good_cells, pos_edges, trial_idx, spikelocations, spike_idx, location_vec) = prepareData(data) n_trials = 30 n_cells = len(good_cells) shape = (n_cells, len(pos_edges) - 1, n_trials) counts = np.zeros(shape, dtype=float) _fast_bin(counts, trial_idx, spikelocations, spike_idx) occupancy = np.zeros((len(pos_edges) - 1, n_trials), dtype=float) _fast_occ(occupancy, data['trial'] - 1, location_vec) for iT in range(n_trials): tmp = occupancy[:, iT] idx_v = np.flatnonzero(tmp) idx_n = np.flatnonzero(tmp == 0) tmp[idx_n] = np.interp(idx_n, idx_v, tmp[idx_v]) occupancy[:, iT] = tmp spMapN = np.zeros(counts.shape) for iC in range(n_cells): spMapN[iC, :, :] = np.divide(counts[iC, :, :], occupancy) spMapN = spi.gaussian_filter(spMapN, (0, 2, 0)) n_cells = len(good_cells) n_bins = len(pos_edges) - 1 spFlat = np.zeros((n_cells, n_trials * n_bins)) for iC in range(n_cells): spFlat[iC, :] = spMapN[iC, :, :].ravel(order='F') #spFlat = spFlat-spFlat.mean(axis=1)[:,np.newaxis] spFlat = normalize(spFlat, axis=0, norm='l2') for iC in range(n_cells): for iT in range(n_trials): start = iT * n_bins stop = (iT + 1) * n_bins trial_idx = np.arange(start, stop) tmp = spFlat[iC, trial_idx] spMapN[iC, :, iT] = tmp R = 5 # Fit CP tensor decomposition (two times). U = tt.ncp_bcd(spMapN, rank=R, verbose=False) V = tt.ncp_bcd(spMapN, rank=R, verbose=False) # Align the two fits and print a similarity score. sim = tt.kruskal_align(U.factors, V.factors, permute_U=True, permute_V=True) #print(sim) # Plot the results again to see alignment. fig, ax, po = tt.plot_factors(U.factors) tt.plot_factors(V.factors, fig=fig) fig.suptitle("aligned models") fig.tight_layout() fig.savefig('C:\\temp\\try3\\' + session_name + '_tca.png') ff = np.matmul(np.transpose(spFlat), spFlat) plt.figure() ax = plt.imshow(ff) plt.colorbar() plt.axvline(x=n_bins * 20, color='red', ls='--', linewidth=1) plt.axvline(x=n_bins * 21, color='green', ls='--', linewidth=1) plt.axhline(y=n_bins * 20, color='red', ls='--', linewidth=1) plt.axhline(y=n_bins * 21, color='green', ls='--', linewidth=1) plt.savefig('C:\\temp\\try3\\' + session_name + '_cov.png') plt.close('all')