def responsive_units(spike_times, spike_clusters, event_times, pre_time=[0.5, 0], post_time=[0, 0.5], alpha=0.05): """ Determine responsive neurons by doing a Wilcoxon Signed-Rank test between a baseline period before a certain task event (e.g. stimulus onset) and a period after the task event. Parameters ---------- spike_times : 1D array spike times (in seconds) spike_clusters : 1D array cluster ids corresponding to each event in `spikes` event_times : 1D array times (in seconds) of the events from the two groups pre_time : two-element array time (in seconds) preceding the event to get the baseline (e.g. [0.5, 0.2] would be a window starting 0.5 seconds before the event and ending at 0.2 seconds before the event) post_time : two-element array time (in seconds) to follow the event times alpha : float alpha to use for statistical significance Returns ------- significant_units : ndarray an array with the indices of clusters that are significatly modulated stats : 1D array the statistic of the test that was performed p_values : ndarray the p-values of all the clusters cluster_ids : ndarray cluster ids of the p-values """ # Get spike counts for baseline and event timewindow baseline_times = np.column_stack(((event_times - pre_time[0]), (event_times - pre_time[1]))) baseline_counts, cluster_ids = get_spike_counts_in_bins(spike_times, spike_clusters, baseline_times) times = np.column_stack(((event_times + post_time[0]), (event_times + post_time[1]))) spike_counts, cluster_ids = get_spike_counts_in_bins(spike_times, spike_clusters, times) # Do statistics p_values = np.empty(spike_counts.shape[0]) stats = np.empty(spike_counts.shape[0]) for i in range(spike_counts.shape[0]): if np.sum(baseline_counts[i, :] - spike_counts[i, :]) == 0: p_values[i] = 1 stats[i] = 0 else: stats[i], p_values[i] = wilcoxon(baseline_counts[i, :], spike_counts[i, :]) # Perform FDR correction for multiple testing sig_units, p_values, _, _ = multipletests(p_values, alpha, method='fdr_bh') significant_units = cluster_ids[sig_units] return significant_units, stats, p_values, cluster_ids
def roc_single_event(spike_times, spike_clusters, event_times, pre_time=[0.5, 0], post_time=[0, 0.5]): """ Determine how well neurons respond to a certain task event by calculating the area under the ROC curve between a baseline period before the event and a period after the event. Values of > 0.5 indicate the neuron respons positively to the event and < 0.5 indicate a negative response. Parameters ---------- spike_times : 1D array spike times (in seconds) spike_clusters : 1D array cluster ids corresponding to each event in `spikes` event_times : 1D array times (in seconds) of the events from the two groups pre_time : two-element array time (in seconds) preceding the event to get the baseline (e.g. [0.5, 0.2] would be a window starting 0.5 seconds before the event and ending at 0.2 seconds before the event) post_time : two-element array time (in seconds) to follow the event times Returns ------- auc_roc : 1D array the area under the ROC curve cluster_ids : 1D array cluster ids of the p-values """ # Get spike counts for baseline and event timewindow baseline_times = np.column_stack( ((event_times - pre_time[0]), (event_times - pre_time[1]))) baseline_counts, cluster_ids = get_spike_counts_in_bins( spike_times, spike_clusters, baseline_times) times = np.column_stack( ((event_times + post_time[0]), (event_times + post_time[1]))) spike_counts, cluster_ids = get_spike_counts_in_bins( spike_times, spike_clusters, times) # Calculate area under the ROC curve per neuron auc_roc = np.empty(spike_counts.shape[0]) for i in range(spike_counts.shape[0]): auc_roc[i] = roc_auc_score( np.concatenate((np.zeros(baseline_counts.shape[1]), np.ones(spike_counts.shape[1]))), np.concatenate((baseline_counts[i, :], spike_counts[i, :]))) return auc_roc, cluster_ids
def test_regress(self): if self.test_data is None: return spike_times = self.test_data['spike_times'] spike_clusters = self.test_data['spike_clusters'] event_times = self.test_data['event_times'] event_groups = self.test_data['event_groups'] cv = KFold(n_splits=2) times = np.column_stack(((event_times - 0.5), (event_times + 0.5))) counts, cluster_ids = get_spike_counts_in_bins(spike_times, spike_clusters, times) counts = counts.T # Test all regularization methods WITHOUT cross-validation pred = regress(counts, event_groups, cross_validation=None, return_training=False, regularization=None) self.assertEqual(pred.shape, event_groups.shape) pred = regress(counts, event_groups, cross_validation=None, return_training=False, regularization='L1') self.assertEqual(pred.shape, event_groups.shape) pred = regress(counts, event_groups, cross_validation=None, return_training=False, regularization='L2') self.assertEqual(pred.shape, event_groups.shape) # Test all regularization methods WITH cross-validation pred, pred_training = regress(counts, event_groups, cross_validation=cv, return_training=True, regularization=None) self.assertEqual(pred.shape, event_groups.shape) self.assertEqual(pred_training.shape, event_groups.shape) pred, pred_training = regress(counts, event_groups, cross_validation=cv, return_training=True, regularization='L1') self.assertEqual(pred.shape, event_groups.shape) self.assertEqual(pred_training.shape, event_groups.shape) pred, pred_training = regress(counts, event_groups, cross_validation=cv, return_training=True, regularization='L2') self.assertEqual(pred.shape, event_groups.shape) self.assertEqual(pred_training.shape, event_groups.shape)
def test_get_spike_counts_in_bins(self): if self.test_data is None: return spike_times = self.test_data['spike_times'] spike_clusters = self.test_data['spike_clusters'] event_times = self.test_data['event_times'] times = np.column_stack(((event_times - 0.5), (event_times + 0.5))) counts, cluster_ids = get_spike_counts_in_bins(spike_times, spike_clusters, times) num_clusters = np.size(np.unique(spike_clusters)) self.assertEqual(counts.shape, (num_clusters, np.size(event_times))) self.assertTrue(np.size(cluster_ids) == num_clusters)
def test_regress(self): if self.test_data is None: return spike_times = self.test_data['spike_times'] spike_clusters = self.test_data['spike_clusters'] event_times = self.test_data['event_times'] event_groups = self.test_data['event_groups'] times = np.column_stack(((event_times - 0.5), (event_times + 0.5))) counts, cluster_ids = get_spike_counts_in_bins(spike_times, spike_clusters, times) counts = counts.T pred = regress(counts, event_groups) self.assertEqual(pred.shape, event_groups.shape)
def roc_between_two_events(spike_times, spike_clusters, event_times, event_groups, pre_time=0, post_time=0.25): """ Calcluate area under the ROC curve that indicates how well the activity of the neuron distiguishes between two events (e.g. movement to the right vs left). A value of 0.5 indicates the neuron cannot distiguish between the two events. A value of 0 or 1 indicates maximum distinction. Significance is determined by bootstrapping the ROC curves. If 0.5 is not included in the 95th percentile of the bootstrapped distribution, the neuron is deemed to be significant. Parameters ---------- spike_times : 1D array spike times (in seconds) spike_clusters : 1D array cluster ids corresponding to each event in `spikes` event_times : 1D array times (in seconds) of the events from the two groups event_groups : 1D array group identities of the events as either 0 or 1 pre_time : float time (in seconds) to precede the event times post_time : float time (in seconds) to follow the event times Returns ------- auc_roc : 1D array an array of the area under the ROC curve for every neuron cluster_ids : 1D array cluster ids of the AUC values """ # Get spike counts times = np.column_stack( ((event_times - pre_time), (event_times + post_time))) spike_counts, cluster_ids = get_spike_counts_in_bins( spike_times, spike_clusters, times) # Calculate area under the ROC curve per neuron auc_roc = np.empty(spike_counts.shape[0]) for i in range(spike_counts.shape[0]): auc_roc[i] = roc_auc_score(event_groups, spike_counts[i, :]) return auc_roc, cluster_ids
def test_classify(self): if self.test_data is None: return spike_times = self.test_data['spike_times'] spike_clusters = self.test_data['spike_clusters'] event_times = self.test_data['event_times'] event_groups = self.test_data['event_groups'] clf = MultinomialNB() times = np.column_stack(((event_times - 0.5), (event_times + 0.5))) counts, cluster_ids = get_spike_counts_in_bins(spike_times, spike_clusters, times) counts = counts.T accuracy, pred, prob = classify(counts, event_groups, clf) self.assertTrue(accuracy == 0.8888888888888888) self.assertEqual(pred.shape, event_groups.shape) self.assertEqual(prob.shape, event_groups.shape)
def test_classify(self): if self.test_data is None: return spike_times = self.test_data['spike_times'] spike_clusters = self.test_data['spike_clusters'] event_times = self.test_data['event_times'] event_groups = self.test_data['event_groups'] clf = MultinomialNB() cv = KFold(n_splits=2) times = np.column_stack(((event_times - 0.5), (event_times + 0.5))) counts, cluster_ids = get_spike_counts_in_bins(spike_times, spike_clusters, times) counts = counts.T accuracy, pred, prob, acc_training = classify(counts, event_groups, clf, cross_validation=cv, return_training=True) self.assertTrue(accuracy == 0.2222222222222222) self.assertTrue(acc_training == 0.9444444444444444) self.assertEqual(pred.shape, event_groups.shape) self.assertEqual(prob.shape, event_groups.shape)
] # Select spikes and clusters spks_region = spikes[probe].times[np.isin( spikes[probe].clusters, clusters_in_region)] clus_region = spikes[probe].clusters[np.isin( spikes[probe].clusters, clusters_in_region)] # Check if there are enough neurons in this brain region if np.unique(clus_region).shape[0] < MIN_NEURONS: continue # Get population activity for all trials times = np.column_stack( ((trial_times - PRE_TIME), (trial_times + POST_TIME))) population_activity, cluster_ids = get_spike_counts_in_bins( spks_region, clus_region, times) population_activity = population_activity.T # Subtract mean firing rates for all stim types if 'norm' in TARGET: norm_pop = np.empty(population_activity.shape) for s, contrast in enumerate(trials['signed_contrast']): norm_pop[s, :] = ( population_activity[s, :] - np.mean(population_activity[ trials['signed_contrast'] == contrast, :], axis=0)) population_activity = norm_pop # Initialize cross-validation if VALIDATION == 'kfold-interleaved':
def differentiate_units(spike_times, spike_clusters, event_times, event_groups, pre_time=0, post_time=0.5, test='ranksums', alpha=0.05): """ Determine units which significantly differentiate between two task events (e.g. stimulus left/right) by performing a statistical test between the spike rates elicited by the two events. Default is a Wilcoxon Rank Sum test. Parameters ---------- spike_times : 1D array spike times (in seconds) spike_clusters : 1D array cluster ids corresponding to each event in `spikes` event_times : 1D array times (in seconds) of the events from the two groups event_groups : 1D array group identities of the events as either 0 or 1 pre_time : float time (in seconds) to precede the event times to get the baseline post_time : float time (in seconds) to follow the event times test : string which statistical test to use, options are: 'ranksums' Wilcoxon Rank Sums test 'signrank' Wilcoxon Signed Rank test (for paired observations) 'ttest' independent samples t-test 'paired_ttest' paired t-test alpha : float alpha to use for statistical significance Returns ------- significant_units : 1D array an array with the indices of clusters that are significatly modulated stats : 1D array the statistic of the test that was performed p_values : 1D array the p-values of all the clusters cluster_ids : ndarray cluster ids of the p-values """ # Check input assert test in ['ranksums', 'signrank', 'ttest', 'paired_ttest'] if (test == 'signrank') or (test == 'paired_ttest'): assert np.sum(event_groups == 0) == np.sum(event_groups == 1), \ 'For paired tests the number of events in both groups needs to be the same' # Get spike counts for the two events times_1 = np.column_stack(((event_times[event_groups == 0] - pre_time), (event_times[event_groups == 0] + post_time))) counts_1, cluster_ids = get_spike_counts_in_bins(spike_times, spike_clusters, times_1) times_2 = np.column_stack(((event_times[event_groups == 1] - pre_time), (event_times[event_groups == 1] + post_time))) counts_2, cluster_ids = get_spike_counts_in_bins(spike_times, spike_clusters, times_2) # Do statistics p_values = np.empty(len(cluster_ids)) stats = np.empty(len(cluster_ids)) for i in range(len(cluster_ids)): if (np.sum(counts_1[i, :]) == 0) and (np.sum(counts_2[i, :]) == 0): p_values[i] = 1 stats[i] = 0 else: if test == 'ranksums': stats[i], p_values[i] = ranksums(counts_1[i, :], counts_2[i, :]) elif test == 'signrank': stats[i], p_values[i] = wilcoxon(counts_1[i, :], counts_2[i, :]) elif test == 'ttest': stats[i], p_values[i] = ttest_ind(counts_1[i, :], counts_2[i, :]) elif test == 'paired_ttest': stats[i], p_values[i] = ttest_rel(counts_1[i, :], counts_2[i, :]) # Perform FDR correction for multiple testing sig_units, p_values, _, _ = multipletests(p_values, alpha, method='fdr_bh') significant_units = cluster_ids[sig_units] return significant_units, stats, p_values, cluster_ids
def responsive_units(spike_times, spike_clusters, event_times, pre_time=[0.5, 0], post_time=[0, 0.5], alpha=0.05, use_fr=False): """ Determine responsive neurons by doing a Wilcoxon Signed-Rank test between a baseline period before a certain task event (e.g. stimulus onset) and a period after the task event. Parameters ---------- spike_times : 1D array spike times (in seconds) spike_clusters : 1D array cluster ids corresponding to each event in `spikes` event_times : 1D array times (in seconds) of the events from the two groups pre_time : two-element array time (in seconds) preceding the event to get the baseline (e.g. [0.5, 0.2] would be a window starting 0.5 seconds before the event and ending at 0.2 seconds before the event) post_time : two-element array time (in seconds) to follow the event times alpha : float alpha to use for statistical significance use_fr : bool whether to use the firing rate instead of total spike count Returns ------- significant_units : ndarray an array with the indices of clusters that are significatly modulated stats : 1D array the statistic of the test that was performed p_values : ndarray the p-values of all the clusters cluster_ids : ndarray cluster ids of the p-values """ # Get spike counts for baseline and event timewindow baseline_times = np.column_stack( ((event_times - pre_time[0]), (event_times - pre_time[1]))) baseline_counts, cluster_ids = get_spike_counts_in_bins( spike_times, spike_clusters, baseline_times) times = np.column_stack( ((event_times + post_time[0]), (event_times + post_time[1]))) spike_counts, cluster_ids = get_spike_counts_in_bins( spike_times, spike_clusters, times) if use_fr: baseline_counts = baseline_counts / (pre_time[0] - pre_time[1]) spike_counts = spike_counts / (post_time[1] - post_time[0]) # Do statistics sig_units, stats, p_values = compute_comparison_statistics(baseline_counts, spike_counts, test='signrank', alpha=alpha) significant_units = cluster_ids[sig_units] return significant_units, stats, p_values, cluster_ids
if INCL_NEURONS == 'pass-QC': clusters_pass = np.where(clusters[PROBE]['metrics']['label'] == 1)[0] elif INCL_NEURONS == 'all': clusters_pass = np.arange(clusters[PROBE]['metrics'].shape[0]) # Select spikes and clusters spike_times = spikes[PROBE].times[np.isin(spikes[PROBE].clusters, clusters_pass)] spike_clusters = spikes[PROBE].clusters[np.isin(spikes[PROBE].clusters, clusters_pass)] # Decode prior from model fit times = np.column_stack( ((trials.goCue_times - PRE_TIME), (trials.goCue_times + POST_TIME))) pop_act_all, cluster_ids = get_spike_counts_in_bins(spikes[PROBE].times, spikes[PROBE].clusters, times) pop_act_all = pop_act_all.T pop_act_pass, cluster_ids = get_spike_counts_in_bins(spike_times, spike_clusters, times) pop_act_pass = pop_act_pass.T if VALIDATION == 'kfold-interleaved': cv = KFold(n_splits=NUM_SPLITS, shuffle=True) elif VALIDATION == 'kfold': cv = KFold(n_splits=NUM_SPLITS, shuffle=False) pred_all = regress(pop_act_all, priors, cross_validation=cv, regularization='L1') r_all = pearsonr(priors, pred_all)[0] pred_pass = regress(pop_act_pass,