Beispiel #1
0
def test_align():
    # Generate random KTensor.
    I, J, K, R = 15, 16, 17, 4
    U = tt.randn_ktensor((I, J, K), rank=R)
    X = U.full()  # Dense representation of U.

    # Enumerate all permutations of factors and test that
    # kruskal_align appropriately inverts this permutation.
    for prm in itertools.permutations(range(R)):
        V = U.copy()
        V.permute(prm)
        assert (tt.kruskal_align(U, V) - 1) < atol_float64
        assert linalg.norm(X - U.full()) < atol_float64
        assert linalg.norm(X - V.full()) < atol_float64

    # Test that second input to kruskal_align is correctly permuted.
    for prm in itertools.permutations(range(R)):
        V = U.copy()
        V.permute(prm)
        tt.kruskal_align(U, V, permute_V=True)
        for fU, fV in zip(U, V):
            assert linalg.norm(fU - fV) < atol_float64
            assert linalg.norm(X - U.full()) < atol_float64
            assert linalg.norm(X - V.full()) < atol_float64

    # Test that first input to kruskal_align is correctly permuted.
    for prm in itertools.permutations(range(R)):
        V = U.copy()
        V.permute(prm)
        tt.kruskal_align(V, U, permute_U=True)
        for fU, fV in zip(U, V):
            assert linalg.norm(fU - fV) < atol_float64
            assert linalg.norm(X - U.full()) < atol_float64
            assert linalg.norm(X - V.full()) < atol_float64
Beispiel #2
0
def fit(responses, ranks=[1, 2, 3, 4, 5], N=20):
    results = []

    # Loop through ranks and N
    for r in ranks:
        for n in range(N):
            # Do decomposition and record rank, err and factors
            U = tt.ncp_hals(responses, rank=r, verbose=False)
            results.append(
                {'rank': r, 'err': U.obj, 'sim': np.nan, 'factors': U.factors})

    # Create results dataframe
    results = pd.DataFrame(results)

    # Find U with min error
    U_min = results.loc[results.groupby('rank')['err'].idxmin(), ['factors']]
    U_min.index = ranks
    U_min = U_min.to_dict()['factors']

    for i, row in results.iterrows():
        U = U_min[row['rank']]
        V = row['factors']

        # Compute similarity
        results.loc[i, 'sim'] = tt.kruskal_align(U, V)

    return results, U_min
Beispiel #3
0
def plot_similarity_score(X, ranks=[1, 2, 3, 4, 5, 10, 20, 40, 60], n_runs=5):
    print('this may take awhile....')
    rank_similarity_scores = []
    for rank in ranks:
        U = []
        for n in range(n_runs):
            U_r = tensortools.ncp_bcd(X, rank=rank, verbose=False)
            U.append(U_r)

        similarity_scores = []
        for n in range(n_runs - 1):
            similarity = tensortools.kruskal_align(U[n].factors,
                                                   U[n + 1].factors,
                                                   permute_U=True,
                                                   permute_V=True)
            similarity_scores.append(similarity)

        rank_similarity_scores.append(similarity_scores)

    rank_similarity_scores = np.array(rank_similarity_scores)

    # plot similarity scores
    fig = plt.figure(figsize=(7, 3))
    x = np.arange(len(ranks))
    sem = rank_similarity_scores.std(axis=1) / np.sqrt(
        rank_similarity_scores.shape[1])
    plt.errorbar(x, rank_similarity_scores.mean(axis=1), yerr=sem)
    plt.xticks(ticks=x, labels=ranks)
    plt.xlabel('N factors')
    plt.ylabel('similarity mean+/-SEM')

    return fig
Beispiel #4
0
def cp_als_test():
    # Create synthetic dataset.
    I, J, K, R = 25, 25, 25, 3  # dimensions and rank parameters
    # Create a random tensor consisting of a low-rank component and noise.
    X = tensortools.randn_ktensor((I, J, K), rank=R).full()
    X += np.random.randn(I, J, K)  # add some random noise

    # Perform CP tensor decomposition.
    U = tensortools.cp_als(X, rank=R, verbose=True)
    V = tensortools.cp_als(X, rank=R, verbose=True)

    # Compare the low-dimensional factors from the two fits.
    fig, _, _ = tensortools.plot_factors(U.factors)
    tensortools.plot_factors(V.factors, fig=fig)

    # Align the two fits and print a similarity score.
    similarity_score = tensortools.kruskal_align(U.factors,
                                                 V.factors,
                                                 permute_U=True,
                                                 permute_V=True)
    print(similarity_score)

    # Plot the results to see alignment.
    fig, ax, po = tensortools.plot_factors(U.factors)
    tensortools.plot_factors(V.factors, fig=fig)

    plt.show()
Beispiel #5
0
def tca(X,
        non_neg=True,
        R=10,
        prefix="",
        max_iter=500,
        epoc="all",
        effect="all"):

    # Fit CP tensor decomposition (two times).
    U = None
    V = None
    opti_str = 'ncp_bcd_'
    if non_neg:
        U = tt.ncp_bcd(X, rank=R, verbose=True, max_iter=max_iter, tol=1e-6)
        V = tt.ncp_bcd(X, rank=R, verbose=True, max_iter=max_iter, tol=1e-6)
    else:
        U = tt.cp_als(X, rank=R, verbose=True, max_iter=max_iter, tol=1e-6)
        V = tt.cp_als(X, rank=R, verbose=True, max_iter=max_iter, tol=1e-6)
        opti_str = 'cp_als_'
    # Compare the low-dimensional factors from the two fits.
    # fig, ax, po = tt.plot_factors(U.factors)
    # tt.plot_factors(V.factors, fig=fig)
    # fig.suptitle("raw models")
    # fig.tight_layout()

    # Align the two fits and print a similarity score.
    sim = tt.kruskal_align(U.factors,
                           V.factors,
                           permute_U=True,
                           permute_V=True)
    print(sim)

    # Plot the results again to see alignment.
    fig, ax, po = tt.plot_factors(U.factors,
                                  plots=["scatter", "scatter", "line"])
    tt.plot_factors(V.factors, plots=["scatter", "scatter", "line"], fig=fig)
    [x.set_xticks([11.5, 15.5, 27.5, 31.5]) for x in ax[:, 2]]

    ax[-1, 0].set_xlabel("SU #")
    ax[-1, 1].set_xlabel("Trial #")
    ax[-1, 2].set_xlabel("Time (s)")
    ax[-1, 2].set_xticklabels(["S", "+1", "T", "+1"])

    fig.suptitle("aligned models")
    fig.tight_layout()

    # Show plots.
    plt.show()
    fig.set_size_inches(40, 40)
    fig.set_dpi(300)
    fig.savefig(
        opti_str + prefix + "tca_trial_" + epoc + "_" + effect + "_" +
        str(X.shape[1]) + "_R" + str(R) + ".png",
        dpi=300,
        bbox_inches="tight",
    )
    return (U, V, sim)
# Make synthetic dataset.
I, J, K, R = 25, 25, 25, 4  # dimensions and rank
X = tt.randn_ktensor((I, J, K), rank=R).full()
X += np.random.randn(I, J, K)  # add noise

# Fit CP tensor decomposition (two times).
U = tt.cp_als(X, rank=R, verbose=True)
V = tt.cp_als(X, rank=R, verbose=True)

# Compare the low-dimensional factors from the two fits.
fig, _, _ = tt.plot_factors(U.factors)
tt.plot_factors(V.factors, fig=fig)

# Align the two fits and print a similarity score.
sim = tt.kruskal_align(U.factors, V.factors, permute_U=True, permute_V=True)
print(sim)

# Plot the results again to see alignment.
fig, ax, po = tt.plot_factors(U.factors)
tt.plot_factors(V.factors, fig=fig)

# Show plots.
plt.show()

# =============================================================================
# =============================================================================
# More structured data to visualize decomposition
from sklearn.preprocessing import normalize as normalize

tensor_size = (3, 100, 25)
Beispiel #7
0
def seq_parafac(input_tensor,
                max_rank,
                nb_trial,
                pred_df,
                tol=1e-07,
                mode='non-negative'):
    """Sequential TCA for model selection
	
	This method computes TCA with a number of components ranging from 1 to the maximum rank indicated,
	and stores reconstruction error, similarity, sparsity for each model obtained. It fits also random
	forest classifiers to predict reward and odor for each trial using trial factors, and it stores
	the associated prediction accuracy.
	
	Arguments:
		input_tensor {array} -- 3-dimensional activity array
		max_rank {scalar} -- maximum rank for TCA
		nb_trial {scalar} -- number of replication of same TCA
		pred_df {pandas dataframe} -- reward and odor information for each trial
	
	Keyword Arguments:
		tol {scalar} -- tolerance for optimization convergence
		mode {string} -- version of TCA to compute, classic or non-negative (default: {'non-negative'})
	
	Returns:
		pandas err_df -- reconstruction error for each TCA run
		pandas sim_df -- similarity against best fit for each TCA run
		pandas spa_df -- sparsity for each TCA run
		pandas odor_df -- odor prediction accuracy for each TCA run
		pandas rew_df -- reward prediction accuracy for each TCA run
	"""

    # lists used for output dataframes
    rank_err = []
    rank_sim = []
    err_list = []
    sim_list = []
    spa_list = []

    odor_acc = []
    odor_std = []
    rew_acc = []
    rew_std = []

    for rank in np.arange(1, max_rank + 1):

        # in this list we store factors extracted with TCA for each run
        pred_fac = []
        # minimal error initialized at maximum 1, useful to identify best fit model
        min_err = 1
        # index of the best fit model in the factors list
        min_idx = 0

        # we iterate over nb_trial, the number of replicates of TCA to run
        # it allows stability check, i.e by computing sparsity
        for trial in range(nb_trial):

            # verbose to know which replicate is running
            print('Trial', trial)

            # build a list useful for err_df
            rank_err.append(rank)

            if mode == 'non-negative':
                # where TCA is actually computed, here in its non-negative version
                pred_fac.append(
                    non_negative_parafac(input_tensor,
                                         rank=rank,
                                         n_iter_max=1000,
                                         init='svd',
                                         tol=tol,
                                         verbose=1))
            else:
                # where TCA is actually computed, in its classic version
                pred_fac.append(
                    parafac(input_tensor,
                            rank=rank,
                            n_iter_max=5000,
                            init='random',
                            tol=tol))
            # we store all factors in a list, to be able to compute model similarity in the end

            # transform pred_fac from kruskal form (list of factors) to full-tensor form
            pred_tensor = tl.kruskal_to_tensor(pred_fac[trial])
            # compute reconstruction error, L2 distance from predicted to original tensor
            err = rec_err(input_tensor, pred_tensor)
            err_list.append(err)

            # here we compute sparsity, the proportion of almost-zero elements
            nb_nonzero = 0
            tot_size = 0

            for i in range(len(pred_fac[trial])):
                nb_nonzero += np.count_nonzero(np.round(pred_fac[trial][i], 2))
                tot_size += pred_fac[trial][i].size

            spa = 1 - nb_nonzero / tot_size
            spa_list.append(spa)

            # we shuffle samples matrix (here trial factors) and labels (odor and reward)
            # using same permutation
            X, y_odor, y_rew = shuffle(pred_fac[trial][2],
                                       pred_df['Odor'].tolist(),
                                       pred_df['Reward'].tolist())

            # initialize random forest classifier
            clf = RandomForestClassifier(n_estimators=50,
                                         max_depth=None,
                                         min_samples_split=2,
                                         max_features='sqrt')

            # scale the data before fitting
            X = StandardScaler().fit_transform(X)

            # compute cross validated prediction accuracy for odor and reward
            odor_acc.append(cross_val_score(clf, X, y_odor, cv=8).mean())
            odor_std.append(cross_val_score(clf, X, y_odor, cv=8).std())
            rew_acc.append(cross_val_score(clf, X, y_rew, cv=8).mean())
            rew_std.append(cross_val_score(clf, X, y_rew, cv=8).std())

            # we keep track of the model having lowest reconstruction error
            # we will use this model as a reference to compute model similarity
            if err < min_err:
                min_err = err
                min_idx = trial

        # we iterate again over all computed models to calculate similarity
        # versus best fit model
        for trial in range(nb_trial):

            # if the model is the best fit, do nothing
            if trial == min_idx:
                continue

            # build a list useful for sim_df
            rank_sim.append(rank)

            # align factors to compute similarity
            sim_list.append(
                tt.kruskal_align(tt.tensors.KTensor(pred_fac[min_idx]),
                                 tt.tensors.KTensor(pred_fac[trial]),
                                 permute_U=True,
                                 permute_V=True))

    # build dataframes to store results
    err_df = pd.DataFrame(data=np.transpose([rank_err, err_list]),
                          columns=['Rank', 'Reconstruction Error'])
    sim_df = pd.DataFrame(data=np.transpose([rank_sim, sim_list]),
                          columns=['Rank', 'Similarity'])
    spa_df = pd.DataFrame(data=np.transpose([rank_err, spa_list]),
                          columns=['Rank', 'Sparsity'])
    odor_df = pd.DataFrame(data=np.transpose([rank_err, odor_acc, odor_std]),
                           columns=[
                               'Rank', 'Accuracy - Odor Prediction',
                               'Std - Odor Prediction'
                           ])
    rew_df = pd.DataFrame(data=np.transpose([rank_err, rew_acc, rew_std]),
                          columns=[
                              'Rank', 'Accuracy - Reward Prediction',
                              'Std - Reward Prediction'
                          ])

    return err_df, sim_df, spa_df, odor_df, rew_df
Beispiel #8
0
def run_pipeline(filename):

    data = lm.loadmat(filename)
    session_name = os.path.basename(filename)[0:-4]
    (good_cells, pos_edges, trial_idx, spikelocations, spike_idx,
     location_vec) = prepareData(data)
    n_trials = 30
    n_cells = len(good_cells)
    shape = (n_cells, len(pos_edges) - 1, n_trials)
    counts = np.zeros(shape, dtype=float)
    _fast_bin(counts, trial_idx, spikelocations, spike_idx)

    occupancy = np.zeros((len(pos_edges) - 1, n_trials), dtype=float)
    _fast_occ(occupancy, data['trial'] - 1, location_vec)

    for iT in range(n_trials):
        tmp = occupancy[:, iT]
        idx_v = np.flatnonzero(tmp)
        idx_n = np.flatnonzero(tmp == 0)
        tmp[idx_n] = np.interp(idx_n, idx_v, tmp[idx_v])
        occupancy[:, iT] = tmp

    spMapN = np.zeros(counts.shape)
    for iC in range(n_cells):
        spMapN[iC, :, :] = np.divide(counts[iC, :, :], occupancy)

    spMapN = spi.gaussian_filter(spMapN, (0, 2, 0))

    n_cells = len(good_cells)
    n_bins = len(pos_edges) - 1
    spFlat = np.zeros((n_cells, n_trials * n_bins))

    for iC in range(n_cells):
        spFlat[iC, :] = spMapN[iC, :, :].ravel(order='F')
    #spFlat = spFlat-spFlat.mean(axis=1)[:,np.newaxis]
    spFlat = normalize(spFlat, axis=0, norm='l2')
    for iC in range(n_cells):
        for iT in range(n_trials):
            start = iT * n_bins
            stop = (iT + 1) * n_bins
            trial_idx = np.arange(start, stop)
            tmp = spFlat[iC, trial_idx]
            spMapN[iC, :, iT] = tmp

    R = 5
    # Fit CP tensor decomposition (two times).
    U = tt.ncp_bcd(spMapN, rank=R, verbose=False)
    V = tt.ncp_bcd(spMapN, rank=R, verbose=False)

    # Align the two fits and print a similarity score.
    sim = tt.kruskal_align(U.factors,
                           V.factors,
                           permute_U=True,
                           permute_V=True)
    #print(sim)

    # Plot the results again to see alignment.
    fig, ax, po = tt.plot_factors(U.factors)
    tt.plot_factors(V.factors, fig=fig)
    fig.suptitle("aligned models")
    fig.tight_layout()
    fig.savefig('C:\\temp\\try3\\' + session_name + '_tca.png')

    ff = np.matmul(np.transpose(spFlat), spFlat)
    plt.figure()
    ax = plt.imshow(ff)
    plt.colorbar()
    plt.axvline(x=n_bins * 20, color='red', ls='--', linewidth=1)
    plt.axvline(x=n_bins * 21, color='green', ls='--', linewidth=1)
    plt.axhline(y=n_bins * 20, color='red', ls='--', linewidth=1)
    plt.axhline(y=n_bins * 21, color='green', ls='--', linewidth=1)
    plt.savefig('C:\\temp\\try3\\' + session_name + '_cov.png')
    plt.close('all')