def test_distance_matrix(self): for cos in self.cos: for tau in self.tau: d = pymuvr.distance_matrix(self.observations[:3], self.observations[3:], cos, tau) self.assertEqual(d.shape, (3, len(self.observations)-3))
def test_empty_spike_train(self): observations = [o[:] for o in self.observations] observations[0][0] = [] for cos in self.cos: for tau in self.tau: d_rectangular = pymuvr.distance_matrix(observations[:3], observations[3:], cos, tau)
def test_compare_square_and_rectangular(self): for cos in self.cos: for tau in self.tau: d_rectangular = pymuvr.distance_matrix(self.observations, self.observations, cos, tau) d_square = pymuvr.square_distance_matrix(self.observations, cos, tau) np.testing.assert_allclose(d_rectangular, d_square, atol=5e-5)
def test_compare_rectangular_with_spykeutils(self): for cos in self.cos: for tau in self.tau: sutils_d = stm.van_rossum_multiunit_dist(self.sutils_units, weighting=cos, tau=tau) pymuvr_d = pymuvr.distance_matrix(self.pymuvr_observations, self.pymuvr_observations, cos, tau) np.testing.assert_allclose(sutils_d, pymuvr_d, atol=5e-5)
def test_zero_tau(self): observations = [[[0.1, 0.2, 0.3]], [[0.1, 0.2, 0.3]], [[0.1, 0.2, 0.4]], [[0.1, 0.2]]] target_d = np.array([[0, 0, np.sqrt(2), 1], [0, 0, np.sqrt(2), 1], [np.sqrt(2), np.sqrt(2), 0, 1], [1, 1, 1, 0]]) d = pymuvr.distance_matrix(observations, observations, 0, 0) np.testing.assert_array_equal(d, target_d)
def test_compare_rectangular_with_spykeutils(self): for cos in self.cos: for tau in self.tau: sutils_d = stm.van_rossum_multiunit_dist(self.sutils_units, weighting=cos, tau=tau*pq.s) pymuvr_d = pymuvr.distance_matrix(self.pymuvr_observations, self.pymuvr_observations, cos, tau) np.testing.assert_allclose(sutils_d, pymuvr_d, atol=5e-5)
def test_compare_square_and_rectangular_distance(self): for cos in self.cos: for tau in self.tau: d_rectangular = pymuvr.distance_matrix(self.observations, self.observations, cos, tau) d_square = pymuvr.square_distance_matrix(self.observations, cos, tau) np.testing.assert_allclose(d_rectangular, d_square, atol=5e-5)
def test_missing_spike(self): observations = [[[1.,2.]], [[1.]]] d = pymuvr.distance_matrix(observations, observations, self.cos, self.tau) np.testing.assert_array_equal(d, np.array([[0,1],[1,0]]))
def test_identical_trains(self): observations = [[[1.,2.],[1.5]], [[1.,2.],[1.5]]] d = pymuvr.distance_matrix(observations, observations, self.cos, self.tau) np.testing.assert_array_equal(d, np.zeros_like(d))
def test_empty_spike_trains(self): observations = [[[]], [[]]] d = pymuvr.distance_matrix(observations, observations, self.cos, self.tau) np.testing.assert_array_equal(d, np.zeros_like(d))
def run_analysis(self): if self.results_arch.load(): # we have the results already (loaded in memory or on the disk) pass else: # check if the spikes archive to analyse is actually present on disk if not os.path.isfile(self.spike_archive_path): raise Exception("Spike archive {} not found! aborting analysis.".format(self.spike_archive_path)) # we actually need to calculate them print("Analysing for: {0} from spike archive: {1}".format(self, self.spike_archive_path)) n_obs = self.n_stim_patterns * self.n_trials # load data min_clusts_analysed = int(round(self.n_stim_patterns * 1.0)) max_clusts_analysed = int(round(self.n_stim_patterns * 1.0)) clusts_step = max(int(round(self.n_stim_patterns * 0.05)), 1) # choose training and testing set: trials are picked at random, but every stim pattern is represented equally (i.e., get the same number of trials) in both sets. Trials are ordered with respect to their stim pattern. n_tr_obs_per_sp = self.training_size n_ts_obs_per_sp = self.n_trials - n_tr_obs_per_sp train_idxs = list(itertools.chain(*([x+self.n_trials*sp for x in random.sample(range(self.n_trials), n_tr_obs_per_sp)] for sp in range(self.n_stim_patterns)))) test_idxs = [x for x in range(n_obs) if x not in train_idxs] n_tr_obs = len(train_idxs) n_ts_obs = len(test_idxs) Ym = self.n_stim_patterns Ny = np.array([n_ts_obs_per_sp for each in range(self.n_stim_patterns)]) Xn = 1 # the output is effectively one-dimensional # initialize data structures for storage of results ts_decoded_mi_plugin = np.zeros(n_obs) ts_decoded_mi_qe = np.zeros(n_obs) ts_decoded_mi_pt = np.zeros(n_obs) ts_decoded_mi_nsb = np.zeros(n_obs) # compute mutual information by using direct clustering on training data (REMOVED) # --note: fcluster doesn't work in the border case with n_clusts=n_obs, as it never returns the trivial clustering. Cluster number 0 is never present in a clustering. print('counting spikes in output spike trains') i_level_array = self.spikes_arch.get_spike_counts(cell_type='mf') o_level_array = self.spikes_arch.get_spike_counts(cell_type='grc') print('computing mean input and output spike counts') i_mean_count = i_level_array.mean() o_mean_count = o_level_array.mean() print('computing input and output sparsity') i_sparseness_hoyer = hoyer_sparseness(i_level_array) i_sparseness_activity = activity_sparseness(i_level_array) i_sparseness_vinje = vinje_sparseness(i_level_array) o_sparseness_hoyer = hoyer_sparseness(o_level_array) o_sparseness_activity = activity_sparseness(o_level_array) o_sparseness_vinje = vinje_sparseness(o_level_array) print('input sparseness: hoyer {:.2f}, vinje {:.2f}, activity {:.2f}'.format(i_sparseness_hoyer, i_sparseness_vinje, i_sparseness_activity)) print('output sparseness: hoyer {:.2f}, vinje {:.2f}, activity {:.2f}'.format(o_sparseness_hoyer, o_sparseness_vinje, o_sparseness_activity)) if self.linkage_method_string == 'kmeans': spike_counts = o_level_array # divide spike count data in training and testing set tr_spike_counts = np.array([spike_counts[o] for o in train_idxs]) ts_spike_counts = np.array([spike_counts[o] for o in test_idxs]) for n_clusts in range(min_clusts_analysed, max_clusts_analysed+1, clusts_step): clustering = KMeans(n_clusters=n_clusts) print('performing k-means clustering on training set (training the decoder) for k='+str(n_clusts)) clustering.fit(tr_spike_counts) print('using the decoder trained with k-means clustering to classify data points in testing set') decoded_output = clustering.predict(ts_spike_counts) # calculate MI print('calculating MI') Xm = n_clusts X_dims = (Xn, Xm) X = decoded_output s = pe.SortedDiscreteSystem(X, X_dims, Ym, Ny) s.calculate_entropies(method='plugin', calc=['HX', 'HXY']) ts_decoded_mi_plugin[n_clusts-1] = s.I() s.calculate_entropies(method='qe', sampling='naive', calc=['HX', 'HXY'], qe_method='plugin') ts_decoded_mi_qe[n_clusts-1] = s.I() s.calculate_entropies(method='pt', sampling='naive', calc=['HX', 'HXY']) ts_decoded_mi_pt[n_clusts-1] = s.I() s.calculate_entropies(method='nsb', sampling='naive', calc=['HX', 'HXY']) ts_decoded_mi_nsb[n_clusts-1] = s.I() else: tr_tree = np.zeros(shape=(n_tr_obs-1, 3)) import pymuvr spikes = self.spikes_arch.get_spikes(cell_type='grc') self.spikes_arch.load_attrs() tr_spikes = [spikes[o] for o in train_idxs] ts_spikes = [spikes[o] for o in test_idxs] # compute multineuron distance between each pair of training observations print('calculating distances between training observations') tr_distances = pymuvr.square_distance_matrix(tr_spikes, self.multineuron_metric_mixing, self.tau) # cluster training data print('clustering training data') tr_tree = linkage(tr_distances, method=self.linkage_method_string) # train the decoder and use it to calculate mi on the testing dataset print("training the decoder and using it to calculate mi on test data") tr_distances_square = np.square(tr_distances) for n_clusts in range(min_clusts_analysed, max_clusts_analysed+1): # iterate over the number of clusters and, step by # step, train the decoder and use it to calculate mi tr_clustering = fcluster(tr_tree, t=n_clusts, criterion='maxclust') out_alphabet = [] for c in range(1,n_clusts+1): # every cluster is represented in the output # alphabet by the element which minimizes the sum # of intra-cluster square distances obs_in_c = [ob for ob in range(n_tr_obs) if tr_clustering[ob]==c] sum_of_intracluster_square_distances = tr_distances_square[obs_in_c,:][:,obs_in_c].sum(axis=1) out_alphabet.append(tr_spikes[np.argmin(sum_of_intracluster_square_distances)]) distances = pymuvr.distance_matrix(ts_spikes, out_alphabet, self.multineuron_metric_mixing, self.tau) # each observation in the testing set is decoded by # assigning it to the cluster whose representative # element it's closest to decoded_output = distances.argmin(axis=1) # calculate MI Xm = n_clusts X_dims = (Xn, Xm) X = decoded_output s = pe.SortedDiscreteSystem(X, X_dims, Ym, Ny) s.calculate_entropies(method='qe', sampling='naive', calc=['HX', 'HXY'], qe_method='plugin') ts_decoded_mi_qe[n_clusts-1] = s.I() s.calculate_entropies(method='pt', sampling='naive', calc=['HX', 'HXY']) ts_decoded_mi_pt[n_clusts-1] = s.I() s.calculate_entropies(method='nsb', sampling='naive', calc=['HX', 'HXY']) ts_decoded_mi_nsb[n_clusts-1] = s.I() if n_clusts == self.n_stim_patterns: px_at_same_size_point = s.PX # save linkage tree to results archive (only if # performing hierarchical clustering) self.results_arch.update_result('tr_linkage', data=tr_tree) # save analysis results in the archive print('updating results archive') self.results_arch.update_result('ts_decoded_mi_plugin', data=ts_decoded_mi_plugin) self.results_arch.update_result('ts_decoded_mi_qe', data=ts_decoded_mi_qe) self.results_arch.update_result('ts_decoded_mi_pt', data=ts_decoded_mi_pt) self.results_arch.update_result('ts_decoded_mi_nsb', data=ts_decoded_mi_nsb) self.results_arch.update_result('i_mean_count', data=i_mean_count) self.results_arch.update_result('o_mean_count', data=o_mean_count) self.results_arch.update_result('i_sparseness_hoyer', data=i_sparseness_hoyer) self.results_arch.update_result('i_sparseness_activity', data=i_sparseness_activity) self.results_arch.update_result('i_sparseness_vinje', data=i_sparseness_vinje) self.results_arch.update_result('o_sparseness_hoyer', data=o_sparseness_hoyer) self.results_arch.update_result('o_sparseness_activity', data=o_sparseness_activity) self.results_arch.update_result('o_sparseness_vinje', data=o_sparseness_vinje) # update attributes self.results_arch.load()