perplexity=perplexity, theta=theta, eta=eta, exageration=exageration, iterations=iterations, random_seed=random_seed, verbose=verbose) spikes_used = np.load(join(files_dir, 'indices_of_spikes_used.npy')) spike_templates = np.load(join(base_folder, 'spike_templates.npy')) spike_templates_clean = spike_templates[spikes_used] cluster_info = tsne_cl.create_cluster_info_from_kilosort_spike_templates(join(base_folder, 'cluster_info.pkl'), spike_templates_clean) labels_dict = pf.generate_labels_dict_from_cluster_info_dataframe(cluster_info=cluster_info) markers = ['.', '*', 'o', '>', '<', '_', ','] labeled_sizes = range(20, 100, 20) pf.plot_tsne(tsne.T, cm=plt.cm.prism, labels_dict=labels_dict, legent_on=False, markers=None, labeled_sizes=None) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(tsne[:, 0], tsne[:, 1], tsne[:, 2], zdir='z', s=20, c='b', depthshade=True) pf.make_video_of_tsne_iterations(iterations=3000, video_dir=files_dir, data_file_name='interim_{:0>6}.dat', video_file_name='tsne_video.mp4', figsize=(15, 15), dpi=200, fps=30, labels_dict=labels_dict, cm=plt.cm.prism, label_name='Label', legent_on=False, labeled_sizes=None, markers=None, max_screen=True)
geometry_dir.format(channel_number[code], geometry_descriptions[code]), 'threshold_6_5std.kwik') juxta_cluster_indices_grouped, spike_thresholds_groups = taf.create_juxta_label( kwik_file, spike_thresholds=spike_thresholds, adc_channel_used=adc_channel_used, adc_dtype=adc_dtype, inter_spike_time_distance=inter_spike_time_distance, amp_gain=amp_gain, num_of_raw_data_channels=None, spike_channels=None, verbose=True) # ---------------------------------------------------------------------------------------------------------------------- # PLOTTING pf.plot_tsne(tsne) pf.plot_tsne(tsne, juxta_cluster_indices_grouped, cm=plt.cm.brg, label_name='Peak size in uV', label_array=(spike_thresholds_groups * 1e6).astype(int)) # Show all t-snes for code in geometry_codes: print('---' + code + '---') tsne = load_tsne(code) kwik_file = join( basic_dir, geometry_dir.format(channel_number[code], geometry_descriptions[code]), 'threshold_6_5std.kwik')
pc_and_template_features_sparse = np.concatenate( (pc_features_sparse_flatten, template_features_sparse), axis=1) pc_and_template_features_tsne_selected_indices = TSNE.t_sne( pc_and_template_features_sparse[indices_all], perplexity=100, theta=0.2, files_dir=os.path.join(kilosort_path, 'tsne'), results_filename='tsne_pc_template_features_blob.npy', gpu_mem=0.8, iterations=2000, randseed=1) pf.plot_tsne(np.transpose(pc_and_template_features_tsne_selected_indices), legend_on=False, subtitle='T-sne', cm=plt.cm.coolwarm, sizes=[2, 8]) plt.bar(edges[:-1], hist, width=30) result = plt.hist(diffs, bins=autocor_bin_number) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(tsne_indices_all[:, 0], tsne_indices_all[:, 1], color='blue') ax.scatter( tsne_indices_all[local_indices_sorted_to_distance[len(middle_bin_size):], 0], tsne_indices_all[local_indices_sorted_to_distance[len(middle_bin_size):],
perplexity = 100 theta = 0.2 iterations = 2000 gpu_mem = 0.8 eta = 200 early_exaggeration = 4.0 indices_of_spikes_to_tsne = None #range(spikes_to_do) seed = 100000 verbose = 2 tsne = tsne_spikes.t_sne_spikes( kwx_file_path, hdf5_dir_to_pca=r'channel_groups/0/features_masks', mask_data=True, perplexity=perplexity, theta=theta, iterations=iterations, gpu_mem=gpu_mem, seed=seed, eta=eta, early_exaggeration=early_exaggeration, indices_of_spikes_to_tsne=indices_of_spikes_to_tsne, verbose=verbose) # Load t-sne filename = 't_sne_results_100per_200lr_02theta_2000its_100kseed.npy' tsne = np.load( join(analysis_folder, 'klustakwik_cell{}'.format(cell), 'threshold_6_5std', filename)) fig, ax = pf.plot_tsne(tsne[:, :seed], color='b') pf.plot_tsne(tsne[:, seed:(5 * seed)], color='g', axes=ax)
juxta_cluster_indices_grouped = {} for g in range(0, num_of_spike_groups): juxta_cluster_indices_temp = np.intersect1d( indices_of_data_for_tsne, indices_of_common_extra_spikes_grouped[g + 1]) juxta_cluster_indices_grouped[g] = [ i for i in np.arange(0, len(indices_of_data_for_tsne)) if len( np.where( juxta_cluster_indices_temp == indices_of_data_for_tsne[i])[0]) ] print(len(juxta_cluster_indices_grouped[g])) pf.plot_tsne(tsne, juxta_cluster_indices_grouped, cm=plt.cm.coolwarm, subtitle='T-sne of 74000 spikes from the 32 channel probe', label_name='Peak size in uV', label_array=(spike_thresholds_groups * 1e6).astype(int), sizes=[2, 15]) #------------------------------------------------------------------ # Clustering # Dbscan def fit_dbscan(data, eps, min_samples, show=True, juxta_cluster_indices_grouped=None, threshold_legend=None):
verbose=verbose) tsne = io.load_tsne_result(tsne_folder) # and plotting results ------------------------------ spike_clusters_clean = spike_clusters[spikes_used] labels_dict = {} clean_templates = np.argwhere(template_marking > 0) total_spikes = 0 for i in np.arange(len(clean_templates)): spikes = np.squeeze(np.argwhere(np.in1d(spike_clusters_clean, clean_templates[i]))) labels_dict[i] = spikes pf.plot_tsne(tsne.T, labels_dict=labels_dict, legent_on=False) # --------------------------------------------------- tsne_folder = r'F:\JoanaPaired\128ch\2015-09-03\Analysis\kilosort\thres4_10_10_Fe16_Pc12\tsne\tsne_uncleaned_466Kspikes' spike_info = preproc.generate_spike_info_from_full_tsne(kilosort_folder, tsne_folder) # CREATE SPIKE_INFO FROM KLUSTAKWIK RESULTS AND RUN THIS IN THE TSNE SPIKESORT GUI import numpy as np from os.path import join import pandas as pd import h5py as h5 import pickle
# and then plot the t-sne color coded with the kilosort templates spike_templates = np.load(join(base_folder, 'spike_templates.npy')) template_marking = np.load(join(base_folder, 'template_marking.npy')) # clean out the spike_templates array (remove the spikes in the noise templates) templates_clean_index = np.argwhere(template_marking) spikes_clean_index = np.squeeze( np.argwhere(np.in1d(spike_templates, templates_clean_index))) spike_templates_clean = spike_templates[spikes_clean_index][:number_of_spikes] spike_templates_clean_sorted = spike_templates_clean[ spike_indices_sorted_by_probe_distance] cluster_info = tsne_cl.create_cluster_info_from_kilosort_spike_templates( join(base_folder, 'cluster_info.pkl'), spike_templates_clean_sorted) labels_dict = pl.generate_labels_dict_from_cluster_info_dataframe( cluster_info=cluster_info) pl.plot_tsne(tsne.T, cm=plt.cm.prism, labels_dict=labels_dict) # LOAD SAVED DATA TO CHECK WHAT HAS BEEN SAVED def _read_unpack(fmt, fh): return unpack(fmt, fh.read(calcsize(fmt))) filename = 'data.dat' with open(join(debug_folder, filename), 'rb') as output_file: theta_, eta_, num_of_spikes_, num_of_dims_, num_of_nns_, iterations_, verbose_, perplexity_ =\ _read_unpack('ddiiiiii', output_file) sorted_distances_ = [ _read_unpack('{}d'.format(num_of_nns_), output_file) for _ in range(num_of_spikes_)
markers = ['o', '*'] # Show clusters num_of_clusters = len(np.unique(clusters_of_all_extra_spikes)) labels_dict = {} for g in np.arange(len(np.unique(clusters_of_all_extra_spikes))) + 1: if g not in labels_dict.keys(): labels_dict[g] = [] for spike in np.arange(len(clusters_of_all_extra_spikes)): if clusters_of_all_extra_spikes[spike] == g: labels_dict[g].append(spike) markers = ['.', '*', 'o', '>', '<', '_', ','] fig, ax = pf.plot_tsne(tsne.T, cm=plt.cm.jet, labels_dict=labels_dict, legent_on=False, markers=None, labeled_sizes=[30]) # and add the juxta spikes ax.scatter(tsne[indices_of_common_spikes_in_klusta, 0], tsne[indices_of_common_spikes_in_klusta, 1], marker='*', c='r', s=20) # CALCULATE PRECISION AND RECALL OF JUXTA ON T-SNE BLOB spikes_in_tsne_blob = 1070 # Taken from using the spikesorting gui on the data number_of_juxta_not_in_blob = 17 # Found by counting on the t-sne plot number_of_juxta = len(common_spikes) tp = number_of_juxta - number_of_juxta_not_in_blob
perplexity = 1000.0 theta = 0.2 learning_rate = 200.0 iterations = 2000 gpu_mem = 0.8 t_tsne = tsne_bhcuda.t_sne(data_for_tsne, files_dir=r'D:\Data\George\Projects\SpikeSorting\Joana_Paired_128ch\2015-09-03\Analysis\tsne_results', no_dims=2, perplexity=perplexity, eta=learning_rate, theta=theta, iterations=iterations, gpu_mem=gpu_mem, randseed=-1, verbose=3) t_tsne = np.transpose(t_tsne) t1 = time.time() print("CUDA t-sne took {} seconds, ({} minutes), for {} spikes".format(t1-t0, (t1-t0)/60, up_to_extra_spike)) # 2D plot pf.plot_tsne(t_tsne, juxta_cluster_indices_grouped, subtitle='T-sne', cm=plt.cm.coolwarm, label_name='Peak size in uV', label_array=(spike_thresholds_groups*1e6).astype(int), labeled_sizes=[2, 15]) pf.plot_tsne(t_tsne, subtitle='T-sne of 129000 spikes from Juxta Paired recordings, not labeled', label_name=None, label_array=None) # 3D plot fig = plt.figure() ax = fig.add_subplot(111, projection='3d') s = 10 c = ['r', 'g', 'c', 'm', 'y', 'k', 'w', 'b'] ax.scatter(t_tsne[0], t_tsne[1], t_tsne[2], s=3) for g in np.arange(1, num_of_spike_groups+1): ax.scatter(t_tsne[0][juxta_cluster_indices_grouped[g]], t_tsne[1][juxta_cluster_indices_grouped[g]], t_tsne[2][juxta_cluster_indices_grouped[g]], s=s, color=c[g-1])
verbose=verbose, indices_of_spikes_to_tsne=range(spikes_used), randseed=randseed) # Load t-sne results tsne = TSNE.load_tsne_result( results_dir, 'result_tsne40K_com46k_p500_it1k_th05_eta200.dat') tsne = np.transpose(tsne) tsne = np.load( join(results_dir, 't_sne_results_s130k_100per_200lr_02theta.npy')) # 2D plot pf.plot_tsne(tsne, labels_dict=spikes_labeled_dict, subtitle='T-sne of first 130k spikes from Synthetic Data', label_name='"Cell" No', cm=plt.cm.jet, markers=['.', '^'], sizes=[3, 20]) pf.plot_tsne(tsne, labels_dict=None, subtitle='T-sne of 86000 spikes from Synthetic Data, not labeled', label_name=None) #-------------------------------------------------------------------------------------- # CHECK QUALITY OF FIT # 1) DBSCAN def fit_dbscan(data, eps, min_samples,
filename=filename_kl_data) # Run t-sne kwx_file_path = join(analysis_folder, 'klustakwik_cell{}'.format(good_cells[0]), r'threshold_6_5std/threshold_6_5std.kwx') perplexity = 100 theta = 0.2 iterations = 2000 gpu_mem = 0.8 eta = 200 early_exaggeration = 4.0 indices_of_spikes_to_tsne = range(34000) seed = 0 verbose = 2 tsne = tsne_spikes.t_sne_spikes( kwx_file_path, hdf5_dir_to_pca=r'channel_groups/0/features_masks', mask_data=True, perplexity=perplexity, theta=theta, iterations=iterations, gpu_mem=gpu_mem, seed=seed, eta=eta, early_exaggeration=early_exaggeration, indices_of_spikes_to_tsne=indices_of_spikes_to_tsne, verbose=verbose) pf.plot_tsne(tsne)