Beispiel #1
0
def plot_tsne(ssvae, test_loader, use_cuda=False):
    xs = test_loader.dataset.test_data.float()
    ys = test_loader.dataset.test_labels
    z_mu, z_sigma = ssvae.guide_sample(xs, ys, len(test_loader))

    z_states = z_mu.data.cpu().numpy()
    classes = ys.cpu().numpy()

    logger.info("calculating T-SNE of z embedding..")
    if use_cuda:
        import t_sne_bhcuda.bhtsne_cuda as tsne_bhcuda
        files_dir = Path.cwd() / "tsne"
        Path.mkdir(files_dir, parents=True, exist_ok=True)
        z_embed = tsne_bhcuda.t_sne(z_states, no_dims=2, files_dir=files_dir, gpu_mem=0.9)
        z_embed = np.array([list(x) for x in z_embed])
    else:
        from sklearn.manifold import TSNE
        model_tsne = TSNE(n_components=2, random_state=0)
        z_embed = model_tsne.fit_transform(z_states)

    __plot_tsne_to_visdom(z_embed, classes)
tsne = tsne_spikes.t_sne_spikes(kwx_file_path=kwx_file_path, hdf5_dir_to_pca=r'channel_groups/0/features_masks',
                                  mask_data=True, path_to_save_tmp_data=path,
                                  indices_of_spikes_to_tsne=indices_of_data_for_tsne, use_scikit=False,
                                  perplexity=perplexity, theta=theta, no_dims=no_dims, eta=learning_rate,
                                  iterations=iterations, seed=seed, verbose=2, gpu_mem=gpu_mem)


# C++ wrapper t-sne using CPU
t0 = time.time()
perplexity = 50.0
theta = 0.2
learning_rate = 200.0
iterations = 5000
gpu_mem = 0
t_tsne = tsne_bhcuda.t_sne(data_for_tsne,
                           files_dir=r'D:\Data\George\Projects\SpikeSorting\Joana_Paired_128ch\2015-09-03\Analysis\tsne_results',
                           no_dims=2, perplexity=perplexity, eta=learning_rate, theta=theta,
                           iterations=iterations, gpu_mem=gpu_mem, randseed=-1, verbose=3)
t_tsne = np.transpose(t_tsne)
t1 = time.time()
print("C++ t-sne took {} seconds, ({} minutes), for {} spikes".format(t1-t0, (t1-t0)/60, up_to_extra_spike))


# C++ wrapper t-sne using GPU
t0 = time.time()
perplexity = 1000.0
theta = 0.2
learning_rate = 200.0
iterations = 2000
gpu_mem = 0.8
t_tsne = tsne_bhcuda.t_sne(data_for_tsne,
                           files_dir=r'D:\Data\George\Projects\SpikeSorting\Joana_Paired_128ch\2015-09-03\Analysis\tsne_results',
Beispiel #3
0
    #files=filelist.items

p = PCA(n_components=768)
p.fit(feature_v3)
after_pca = p.fit_transform(feature_v3)
#print(after_pca[0,0:10])
print(after_pca.shape)

#sio.savemat('footwear_afterPCA3.mat',{'footwear_afterPCA':after_pca})

t_sne_result = tsne_bhcuda.t_sne(samples=after_pca,
                                 files_dir="./",
                                 no_dims=2,
                                 perplexity=perplexity,
                                 eta=learning_rate,
                                 theta=theta,
                                 iterations=iterations,
                                 seed=samples,
                                 gpu_mem=gpu_mem,
                                 randseed=-1,
                                 verbose=2)
#t_sne_result = np.transpose(t_sne_result)#

sio.savemat('wholeperson_men_newxy_768.mat', {'Y': t_sne_result})

#np.savetxt('./tsne_xy.txt',t_sne_result)

#fig = plt.figure()
#ax = fig.add_subplot(111)
#ax.scatter(t_sne_result[0], t_sne_result[1])
#fig.show()
def t_sne_spikes(kwx_file_path,
                 hdf5_dir_to_pca=r'channel_groups/1/features_masks',
                 mask_data=False,
                 path_to_save_tmp_data=None,
                 indices_of_spikes_to_tsne=None,
                 use_scikit=False,
                 perplexity=50.0,
                 theta=0.5,
                 iterations=1000,
                 seed=0,
                 gpu_mem=0.8,
                 no_dims=2,
                 eta=200,
                 early_exaggeration=4.0,
                 randseed=-1,
                 verbose=2):
    """
    Uses the PCA (masked or not) results of the spikedetection that the phy module does to embed the 3 X N_channels
    dimensional spikes into no_dims (usually 2) dimensions for visualization and faster manual sorting purposes.
    The embeding is done using the t-sne algorithm. The GUI of the phy module reads the result of the t-sne and plots
    them superimposing and sorting information.

    For embeding spikes using other features (like T_timepoints x N_channels for eaxample) then use directly the
    bhtsne_cuda.t_sne() function.

    Parameters
    ----------
    kwx_file_path -- The path where the .kwx file is resulting from phy's spikedetect (the file that has the PCA and
     mask results for each spike)
    hdf5_dir_to_pca -- The path in the kwx (hdf5) file that the pca and mask matrices are saved in
    mask_data -- Use the masking that spikedetect provides on the PCA results or not
    path_to_save_tmp_data -- If it is not set, the t-sne intermediate files (see bhtsne_cuda.py) will be saved in
    the kwx_file_path.
    indices_of_spikes_to_tsne -- Choose a subgroup of spikes to t-sne from the ones spikedetect found
    use_scikit -- If True then use the sklearn t-sne implementation (Python, no GPU).
    perplexity -- Defines the amount of samples whose distances are comparred to every sample (check sklearn and the
    van der Maatens paper)
    theta -- If > 0 then the algorithm run the burnes hat aproximation (with angle = theta). If = 0 then it runs the
     exact version. Values smaller than 0.5 do not add to much error.
    iterations -- The number of itterations (usually around 1000 should suffice)
    gpu_mem -- If > 0 (and <= 1) then the t_sne_bhcuda.exe will run the eucledian distances calculations on the GPU
    (if possible) and will use (gpu_mem * 100) per cent of the available gpu memory to temporarily store results. If
    == 0 then the t_sne_bhcuda.exe will run only on the CPU. It has no affect if use_scikit = True
    no_dims -- Number of dimensions of the t-sne embedding
    eta -- The learning rate
    early_exaggeration -- The amount by which the samples are initially pushed apart
    randseed -- Set the random seed for the initiallization of the samples on the no_dims plane.
    verbose -- Define verbosity. 0 = No output, 1 = Basic output, 2 = Full output, 3 = Also save t-sne results in
    interim files after every iteration. Option 3 is used to save all steps of t-sne to explore the way the algorithm
    seperates the data (good for movies).

    Returns
    -------
    A N_examples X no_dims array of the embeded spikes. It also saves the same array in a .npy file in the kwx_file_path
    that can be read by the GUI of the phy module
    """

    h5file = h5.File(kwx_file_path, mode='r')
    pca_and_masks = np.array(list(h5file[hdf5_dir_to_pca]))
    h5file.close()
    masks = np.array(pca_and_masks[:, :, 1])
    pca_features = np.array(pca_and_masks[:, :, 0])
    masked_pca_features = pca_features
    if mask_data:
        masked_pca_features = pca_features * masks

    if indices_of_spikes_to_tsne is None:
        num_of_spikes = np.size(masked_pca_features, 0)
        indices_of_spikes_to_tsne = range(num_of_spikes)
    data_for_tsne = masked_pca_features[indices_of_spikes_to_tsne, :]

    if not path_to_save_tmp_data:
        if verbose:
            print(
                'The C++ t-sne executable will save data (data.dat and results.data) in \n{}\n'
                'You might want to change this behaviour by supplying a path_to_save_tmp_data.\n'
                .format(dirname(kwx_file_path)))
        path_to_save_tmp_data = dirname(kwx_file_path)

    del pca_and_masks
    del masks
    del pca_features
    del masked_pca_features

    t0 = time.time()
    t_tsne = TSNE.t_sne(data_for_tsne,
                        use_scikit=use_scikit,
                        files_dir=path_to_save_tmp_data,
                        no_dims=2,
                        perplexity=perplexity,
                        eta=eta,
                        theta=theta,
                        iterations=iterations,
                        seed=seed,
                        early_exaggeration=early_exaggeration,
                        gpu_mem=gpu_mem,
                        randseed=randseed,
                        verbose=verbose)
    t_tsne = np.transpose(t_tsne)
    t1 = time.time()
    if verbose > 1:
        print("CUDA t-sne took {} seconds, ({} minutes)".format(
            t1 - t0, (t1 - t0) / 60))

    np.save(join(dirname(kwx_file_path), 't_sne_results.npy'), t_tsne)

    return t_tsne
def t_sne_spikes(kwx_file_path, hdf5_dir_to_pca=r'channel_groups/1/features_masks', mask_data=False,
                 path_to_save_tmp_data=None, indices_of_spikes_to_tsne=None, use_scikit=False, perplexity=50.0,
                 theta=0.5, iterations=1000, seed=0, gpu_mem=0.8, no_dims=2, eta=200, early_exaggeration=4.0,
                 randseed=-1, verbose=2):
    """
    Uses the PCA (masked or not) results of the spikedetection that the phy module does to embed the 3 X N_channels
    dimensional spikes into no_dims (usually 2) dimensions for visualization and faster manual sorting purposes.
    The embeding is done using the t-sne algorithm. The GUI of the phy module reads the result of the t-sne and plots
    them superimposing and sorting information.

    For embeding spikes using other features (like T_timepoints x N_channels for eaxample) then use directly the
    bhtsne_cuda.t_sne() function.

    Parameters
    ----------
    kwx_file_path -- The path where the .kwx file is resulting from phy's spikedetect (the file that has the PCA and
     mask results for each spike)
    hdf5_dir_to_pca -- The path in the kwx (hdf5) file that the pca and mask matrices are saved in
    mask_data -- Use the masking that spikedetect provides on the PCA results or not
    path_to_save_tmp_data -- If it is not set, the t-sne intermediate files (see bhtsne_cuda.py) will be saved in
    the kwx_file_path.
    indices_of_spikes_to_tsne -- Choose a subgroup of spikes to t-sne from the ones spikedetect found
    use_scikit -- If True then use the sklearn t-sne implementation (Python, no GPU).
    perplexity -- Defines the amount of samples whose distances are comparred to every sample (check sklearn and the
    van der Maatens paper)
    theta -- If > 0 then the algorithm run the burnes hat aproximation (with angle = theta). If = 0 then it runs the
     exact version. Values smaller than 0.5 do not add to much error.
    iterations -- The number of itterations (usually around 1000 should suffice)
    gpu_mem -- If > 0 (and <= 1) then the t_sne_bhcuda.exe will run the eucledian distances calculations on the GPU
    (if possible) and will use (gpu_mem * 100) per cent of the available gpu memory to temporarily store results. If
    == 0 then the t_sne_bhcuda.exe will run only on the CPU. It has no affect if use_scikit = True
    no_dims -- Number of dimensions of the t-sne embedding
    eta -- The learning rate
    early_exaggeration -- The amount by which the samples are initially pushed apart
    randseed -- Set the random seed for the initiallization of the samples on the no_dims plane.
    verbose -- Define verbosity. 0 = No output, 1 = Basic output, 2 = Full output, 3 = Also save t-sne results in
    interim files after every iteration. Option 3 is used to save all steps of t-sne to explore the way the algorithm
    seperates the data (good for movies).

    Returns
    -------
    A N_examples X no_dims array of the embeded spikes. It also saves the same array in a .npy file in the kwx_file_path
    that can be read by the GUI of the phy module
    """

    h5file = h5.File(kwx_file_path, mode='r')
    pca_and_masks = np.array(list(h5file[hdf5_dir_to_pca]))
    h5file.close()
    masks = np.array(pca_and_masks[:, :, 1])
    pca_features = np.array(pca_and_masks[:, :, 0])
    masked_pca_features = pca_features
    if mask_data:
        masked_pca_features = pca_features * masks

    if indices_of_spikes_to_tsne is None:
        num_of_spikes = np.size(masked_pca_features, 0)
        indices_of_spikes_to_tsne = range(num_of_spikes)
    data_for_tsne = masked_pca_features[indices_of_spikes_to_tsne, :]

    if not path_to_save_tmp_data:
        if verbose:
            print('The C++ t-sne executable will save data (data.dat and results.data) in \n{}\n'
                  'You might want to change this behaviour by supplying a path_to_save_tmp_data.\n'.
                  format(dirname(kwx_file_path)))
        path_to_save_tmp_data = dirname(kwx_file_path)

    del pca_and_masks
    del masks
    del pca_features
    del masked_pca_features

    t0 = time.time()
    t_tsne = TSNE.t_sne(data_for_tsne, use_scikit=use_scikit,files_dir=path_to_save_tmp_data,
                        no_dims=2, perplexity=perplexity, eta=eta, theta=theta, iterations=iterations, seed=seed,
                        early_exaggeration=early_exaggeration, gpu_mem=gpu_mem, randseed=randseed, verbose=verbose)
    t_tsne = np.transpose(t_tsne)
    t1 = time.time()
    if verbose > 1:
        print("CUDA t-sne took {} seconds, ({} minutes)".format(t1-t0, (t1-t0)/60))

    np.save(join(dirname(kwx_file_path), 't_sne_results.npy'), t_tsne)

    return t_tsne