def test_concatenate_firings(): M, N1, N2 = 4, 2000, 30000 test_offset_str = '300000,123456789' test_offset = [300000, 123456789] fir1 = np.around(np.random.rand(M, N1), decimals=3) mlpy.writemda64(fir1, 'tmp.fir1.mda') fir2 = np.around(np.random.rand(M, N2), decimals=3) mlpy.writemda64(fir2, 'tmp.fir2.mda') fir1_incr = fir1 fir2_incr = fir2 fir12 = np.append(fir1, fir2, axis=1) fir12 = np.around(fir12, decimals=3) fir1_incr[1, :] += test_offset[0] fir2_incr[1, :] += test_offset[1] fir12_incr = np.append(fir1_incr, fir2_incr, axis=1) concatenate_firings(firings_list=['tmp.fir1.mda', 'tmp.fir2.mda'], firings_out='tmp.test_fir12.mda', time_offsets=test_offset_str, increment_labels='false') concatenate_firings(firings_list=['tmp.fir1.mda', 'tmp.fir2.mda'], firings_out='tmp.test_fir12_incr.mda', time_offsets=test_offset_str, increment_labels='true') test_fir12 = mlpy.readmda('tmp.test_fir12.mda') test_fir12 = np.around(test_fir12, decimals=3) test_fir12_incr = mlpy.readmda('tmp.test_fir12_incr.mda') test_fir12_incr = np.around(test_fir12_incr, decimals=3) np.testing.assert_array_almost_equal(fir12, test_fir12, decimal=3) np.testing.assert_array_almost_equal(fir12_incr, test_fir12_incr, decimal=3) return True
def test_bandpass_filter(): M, N = 12, 30000 X = np.random.rand(M, N) writemda32(X, 'tmp.mda') ret = bandpass_filter(timeseries="tmp.mda", timeseries_out="tmp2.mda") assert (ret) A = readmda('tmp.mda') B = readmda('tmp2.mda') assert (A.shape == B.shape) assert (X.shape == B.shape) #np.testing.assert_array_almost_equal(A,B,decimal=6) return True
def test_normalize_channels(): M, N = 4, 1000 X = np.random.rand(M, N) writemda32(X, 'tmp.mda') ret = normalize_channels(timeseries="tmp.mda", timeseries_out="tmp2.mda") assert (ret) A = readmda('tmp.mda') B = readmda('tmp2.mda') A_mean = np.mean(A, axis=1) A_stdev = np.sqrt(np.var(A, axis=1, ddof=1)) A_norm = (A - np.tile(np.reshape(A_mean, (M, 1)), (1, N))) / np.tile(np.reshape(A_stdev, (M, 1)), (1, N)) np.testing.assert_array_almost_equal(A_norm, B, decimal=5) return True
def concat_and_increment(firings_list, time_offsets, increment_labels='true'): if len(firings_list) == len(time_offsets): concatenated_firings = np.zeros( (3, 0)) #default to case where the list is empty first = True for idx, firings in enumerate(firings_list): to_append = readmda(firings) to_append[1, :] += time_offsets[idx] if not first: if increment_labels == 'true': if concatenated_firings.any(): #if not empty to_append[2, :] += max(concatenated_firings[ 2, :]) #add the Kmax from previous else: #if first firings is empty, move on to the next concatenated_firings = to_append if first: concatenated_firings = to_append else: concatenated_firings = np.append(concatenated_firings, to_append, axis=1) first = False return concatenated_firings else: print('Mismatch between number of firings files and number of offsets')
def test_synthesize_random_firings(): K=10 synthesize_random_firings(K=K,firings_out='tmp.firings.mda') firings=readmda('tmp.firings.mda') labels=firings[2,:] assert(max(labels)==K) assert(firings.shape[0]==3) return True
def test_extract_timeseries(): M, N = 4, 10000 X = np.random.rand(M, N) X.astype('float64').transpose().tofile('tmp.dat') ret = extract_timeseries(timeseries="tmp.dat", timeseries_out="tmp2.mda", channels="1,3", t1=-1, t2=-1, timeseries_num_channels=M, timeseries_dtype='float64') writemda64(X, 'tmp.mda') #ret=extract_timeseries(timeseries="tmp.mda",timeseries_out="tmp2.mda",channels="1,3",t1=-1,t2=-1) assert (ret) A = readmda('tmp.mda') B = readmda('tmp2.mda') assert (B.shape[0] == 2) assert (B.shape[1] == N) assert (np.array_equal(X[[0, 2], ], B)) return True
def apply_label_map(*, firings, label_map, firings_out): """ Apply a label map to a given firings, including masking and merging Parameters ---------- firings : INPUT Path of input firings mda file label_map : INPUT Path of input label map mda file [base 1, mapping to zero removes from firings] firings_out : OUTPUT ... """ firings = readmda(firings) label_map = readmda(label_map) label_map = np.reshape(label_map, (-1, 2)) label_map = label_map[np.argsort(label_map[:, 0])] # Assure input is sorted #Propagate merge pairs to lowest label number for idx, label in enumerate(label_map[:, 1]): # jfm changed on 12/8/17 because isin() is not isin() older versions of numpy. :) #label_map[np.isin(label_map[:,0],label),0] = label_map[idx,0] # Input should be sorted label_map[np.where(label_map[:, 0] == label)[0], 0] = label_map[idx, 0] # Input should be sorted #Apply label map for label_pair in range(label_map.shape[0]): # jfm changed on 12/8/17 because isin() is not isin() older versions of numpy. :) #firings[2, np.isin(firings[2, :], label_map[label_pair, 1])] = label_map[label_pair,0] firings[2, np.where( firings[2, :] == label_map[label_pair, 1])[0]] = label_map[label_pair, 0] #Mask out all labels mapped to zero firings = firings[:, firings[2, :] != 0] #Write remapped firings return writemda64(firings, firings_out)
def get_dmatrix_templates(timeseries_list, firings_list): X = DiskReadMda(timeseries_list[0]) M = X.N1() clip_size = 50 num_segments = len(timeseries_list) firings_arrays = [] Kmaxes = [] for j in range(num_segments): F = readmda(firings_list[j]) firings_arrays.append(F) Kmax = 0 for j in range(num_segments): F = firings_arrays[j] print(str(len(F[1, :])) + ' clustered events in segment ' + str(j)) labels = F[2, :] if len(labels) == 0: Kmax = 0 Kmaxes.append(0) else: Kmax = int(max(Kmax, np.max(labels))) Kmaxes.append(np.max(labels)) if max(Kmaxes) > 0: use_max = int(max(Kmaxes)) dmatrix = np.ones((use_max, use_max, num_segments - 1)) * (-1) k1_dmatrix = np.ones((use_max, use_max, num_segments - 1)) * (-1) k2_dmatrix = np.ones((use_max, use_max, num_segments - 1)) * (-1) templates = np.zeros((M, clip_size, use_max, 2 * (num_segments - 1))) for j in range(num_segments - 1): print('Computing dmatrix between segments %d and %d' % (j, j + 1)) #print(timeseries_list) if np.size(firings_arrays[j]) == 0 or np.size( firings_arrays[j + 1]) == 0: #templates = np.zeros((M, clip_size, 1)) continue else: (dmatrix0, k1_dmatrix0, k2_dmatrix0, templates1, templates2) = compute_dmatrix(timeseries_list[j], timeseries_list[j + 1], firings_arrays[j], firings_arrays[j + 1], clip_size=clip_size) dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1], j] = dmatrix0 k1_dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1], j] = k1_dmatrix0 k2_dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1], j] = k2_dmatrix0 templates[:, :, 0:dmatrix0.shape[0], j * 2] = templates1 templates[:, :, 0:dmatrix0.shape[1], j * 2 + 1] = templates2 return (dmatrix, k1_dmatrix, k2_dmatrix, templates, Kmaxes)
def concatenate_firings(*, firings_list, firings_out, time_offsets, increment_labels='false'): """ Combine a list of firings files to form a single firings file Parameters ---------- firings_list : INPUT A list of paths of firings mda files to be concatenated firings_out : OUTPUT ... time_offsets : string An array of time offsets for each firings file. Expect one offset for each firings file. ... increment_labels : string ... """ if time_offsets: time_offsets = np.fromstring(time_offsets, dtype=np.float_, sep=',') else: time_offsets = np.zeros(len(firings_list)) if len(firings_list) == len(time_offsets): concatenated_firings = np.zeros( (3, 0)) #default to case where the list is empty first = True for idx, firings in enumerate(firings_list): to_append = mlpy.readmda(firings) to_append[1, :] += time_offsets[idx] if not first: if increment_labels == 'true': to_append[2, :] += max(concatenated_firings[ 2, :]) #add the Kmax from previous if first: concatenated_firings = to_append else: concatenated_firings = np.append(concatenated_firings, to_append, axis=1) first = False mlpy.writemda64(concatenated_firings, firings_out) return True else: print('Mismatch between number of firings files and number of offsets') return False
def join_segments(*, timeseries_list, firings_list, dmatrix_out, templates_out): """ Join the results of spike sorting on a sequence of time segments to form a single firings file Parameters ---------- timeseries_list : INPUT A list of paths of adjacent preprocessed timeseries segment files firings_list : INPUT A list of paths to corresponding firings files dmatrix_out : OUTPUT dmatrix for debugging templates_out : OUTPUT templates for debugging """ X = DiskReadMda(timeseries_list[0]) M = X.N1() clip_size = 100 num_segments = len(timeseries_list) firings_arrays = [] for j in range(num_segments): F = readmda(firings_list[j]) firings_arrays.append(F) Kmax = 0 for j in range(num_segments): F = firings_arrays[j] labels = F[2, :] Kmax = int(max(Kmax, np.max(labels))) dmatrix = np.ones((Kmax, Kmax, num_segments - 1)) * (-1) templates = np.zeros((M, clip_size, Kmax, 2 * (num_segments - 1))) for j in range(num_segments - 1): print('Computing dmatrix between segments %d and %d' % (j, j + 1)) (dmatrix0, templates1, templates2) = compute_dmatrix(timeseries_list[j], timeseries_list[j + 1], firings_arrays[j], firings_arrays[j + 1], clip_size=clip_size) dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1], j] = dmatrix0 templates[:, :, 0:dmatrix0.shape[0], j * 2] = templates1 templates[:, :, 0:dmatrix0.shape[1], j * 2 + 1] = templates2 writemda64(templates, templates_out) return writemda64(dmatrix, dmatrix_out)
def compute_accuracies(*,confusion_matrix,output,output_format='json'): """ Compute accuracies from a confusion matrix (see ms3.confusion_matrix). The first dimension (rows) of the confusion matrix should correspond to ground truth. Parameters ---------- confusion_matrix : INPUT The path of the confusion matrix in .mda format. The first dimension (rows) should correspond to ground truth. The final row and final column correspond to unclassified events. output : OUTPUT The output file output_format : string For now this should always be 'json' """ print(type(confusion_matrix)) if type(confusion_matrix)==str: CM=readmda(confusion_matrix) else: CM=confusion_matrix K1=CM.shape[0]-1 K2=CM.shape[1]-1 if (K1<0) or (K2<0): print ('Error: not enough rows or columns in confusion matrix') return False row_sums=np.sum(CM,axis=1) row_sums=np.maximum(1,row_sums) # do not permit zeros in denominator col_sums=np.sum(CM,axis=0) col_sums=np.maximum(1,col_sums) # do not permit zeros in denominator accuracies=np.zeros(K1) for k1 in range(1,K1+1): row=CM[k1-1,:] tmp=row/(col_sums+row_sums[k1-1]-row) accuracies[k1-1]=np.max(tmp[0:K2]) accuracies_sorted=np.sort(accuracies)[::-1] obj={'accuracies':accuracies.tolist(),'accuracies_sorted':accuracies_sorted.tolist()}; obj['count99']=len(np.where(accuracies>=0.99)[0]) obj['count95']=len(np.where(accuracies>=0.95)[0]) obj['count90']=len(np.where(accuracies>=0.90)[0]) obj['count85']=len(np.where(accuracies>=0.85)[0]) obj['count80']=len(np.where(accuracies>=0.80)[0]) obj['count75']=len(np.where(accuracies>=0.75)[0]) obj['count70']=len(np.where(accuracies>=0.70)[0]) obj['count60']=len(np.where(accuracies>=0.60)[0]) obj['count50']=len(np.where(accuracies>=0.50)[0]) with open(output, 'w') as outfile: json.dump(obj, outfile, indent=4, sort_keys=True) return True
def test_compute_templates(): M, N, K, T, L = 5, 1000, 6, 50, 100 X = np.random.rand(M, N) writemda32(X, 'tmp.mda') F = np.zeros((3, L)) F[1, :] = 1 + np.random.randint(N, size=(1, L)) F[2, :] = 1 + np.random.randint(K, size=(1, L)) writemda64(F, 'tmp2.mda') ret = compute_templates(timeseries='tmp.mda', firings='tmp2.mda', templates_out='tmp3.mda', clip_size=T) assert (ret) templates0 = readmda('tmp3.mda') assert (templates0.shape == (M, T, K)) return True
def compute_templates_helper(*, timeseries, firings, clip_size=100): X = DiskReadMda(timeseries) M, N = X.N1(), X.N2() N = N F = readmda(firings) L = F.shape[1] L = L T = clip_size times = F[1, :] labels = F[2, :].astype(int) K = np.max(labels) compute_templates._sums = np.zeros((M, T, K)) compute_templates._counts = np.zeros(K) def _kernel(chunk, info): inds = np.where((info.t1 <= times) & (times <= info.t2))[0] times0 = (times[inds] - info.t1 + info.t1a).astype(np.int32) labels0 = labels[inds] clips0 = np.zeros((M, clip_size, len(inds)), dtype=np.float32, order='F') cpp.extract_clips(clips0, chunk, times0, clip_size) for k in range(1, K + 1): inds_kk = np.where(labels0 == k)[0] compute_templates._sums[:, :, k - 1] = compute_templates._sums[:, :, k - 1] + np.sum( clips0[:, :, inds_kk], axis=2) compute_templates._counts[ k - 1] = compute_templates._counts[k - 1] + len(inds_kk) return True TCR = TimeseriesChunkReader(chunk_size_mb=40, overlap_size=clip_size * 2) if not TCR.run(timeseries, _kernel): return None templates = np.zeros((M, T, K)) for k in range(1, K + 1): if compute_templates._counts[k - 1]: templates[:, :, k - 1] = compute_templates._sums[:, :, k - 1] / compute_templates._counts[ k - 1] return templates
def test_extract_clips(): M, T, L, N = 5, 100, 100, 1000 X = np.random.rand(M, N).astype(np.float32) writemda32(X, 'tmp.mda') F = np.zeros((2, L)) F[1, :] = 200 + np.random.randint(N - 400, size=(1, L)) writemda64(F, 'tmp2.mda') ret = extract_clips(timeseries='tmp.mda', firings='tmp2.mda', clips_out='tmp3.mda', clip_size=T) assert (ret) clips0 = readmda('tmp3.mda') assert (clips0.shape == (M, T, L)) t0 = int(F[1, 10]) a = int(np.floor((T + 1) / 2 - 1)) np.array_equal(clips0[:, :, 10], X[:, t0 - a:t0 - a + T]) #np.testing.assert_almost_equal(clips0[:,:,10],X[:,t0-a:t0-a+T],decimal=4) return True
def extract_clips(*, timeseries, firings, clips_out, clip_size=100): """ Extract clips corresponding to spike events Parameters ---------- timeseries : INPUT Path of timeseries mda file (MxN) from which to draw the event clips (snippets) firings : INPUT Path of firings mda file (RxL) where R>=2 and L is the number of events. Second row are timestamps. clips_out : OUTPUT Path of clips mda file (MxTxL). T=clip_size clip_size : int (Optional) clip size, aka snippet size, aka number of timepoints in a single clip """ F = readmda(firings) times = F[1, :] clips = extract_clips_helper(timeseries=timeseries, times=times, clip_size=clip_size) return writemda32(clips, clips_out)
def handle_drift_in_segment(*, timeseries, firings, firings_out): """ Handle drift in segment. Parameters ---------- timeseries : INPUT Path to preprocessed timeseries from which the events are extracted from (MxN) firings : INPUT Path of input firings mda file firings_out : OUTPUT Path of output drift-adjusted firings mda file ... """ subcluster_size = 500 # Size of subclusters for comparison of merge candidate pairs bin_factor = 10 # subcluster_size / bin_factor = numbins for hist corr_comp_thresh = 0.95 # Minimum correlation in templates to consider as merge candidate clip_size = 50 n_pca_dim = 10 ## compute the templates templates = compute_templates_helper(timeseries=timeseries, firings=firings, clip_size=clip_size) templates = np.swapaxes(templates, 0, 1) templates = np.swapaxes( templates, 2, 0) #Makes templates of form Clust x Chan x Clipsize firings = mlpy.readmda(firings) print('templates', templates.shape) ## Determine the merge candidate pairs based on correlation subflat_templates = np.reshape( templates, (templates.shape[0], -1) ) #flatten templates from templates from M x N x L (Clust x Chan x Clipsize) to (clust x flat) pairwise_idxs = np.array( list( it.chain.from_iterable( it.combinations(range(templates.shape[0]), 2))) ) #Generates 1D Array of all poss pairwise comparisons of clusters ([0 1 2] --> [0 1 0 2 1 2]) pairwise_idxs = pairwise_idxs.reshape( -1, 2) #Reshapes array, from above to readable [[0,1],[0,2],[1,2]] pairwise_corrcoef = np.zeros( pairwise_idxs.shape[0] ) #Empty array for all pairs correlation measurements for row in range( pairwise_idxs.shape[0] ): #Calculate the correlation coefficient for each pair of flattened templates pairwise_corrcoef[row] = np.corrcoef( subflat_templates[:, pairwise_idxs[row, 0]], subflat_templates[:, pairwise_idxs[row, 1]])[1, 0] pairs_for_eval = np.array( pairwise_idxs[pairwise_corrcoef >= corr_comp_thresh] ) #Threshold the correlation array, and use to index the pairwise comparison array pairs_to_merge = np.array([]) #holder variable for merging pairs ## Loop through the pairs for comparison for pair_to_test in range( pairs_for_eval.shape[0] ): # Iterate through pairs that are above correlation comparison threshold ## Extract out the times and labels corresponding to the pair firings_subset = firings[:, np.isin( firings[2, :], pairs_for_eval[pair_to_test, :] + 1 )] # Generate subfirings of only events from given pair, correct for base 0 vs. 1 difference test_labels = firings_subset[2, :] # Labels from the pair of clusters test_eventtimes = firings_subset[ 1, :] # Times from the pair of clusters sort_indices = np.argsort( test_eventtimes ) # there's no strict guarantee the firing times will be sorted, so adding a sort step for safety test_labels = test_labels[sort_indices] test_eventtimes = test_eventtimes[sort_indices] ## find the subcluster times and labels subcluster_event_indices = find_random_paired_events( test_eventtimes, test_labels, subcluster_size) subcluster_times = test_eventtimes[subcluster_event_indices] subcluster_labels = test_labels[subcluster_event_indices] ## Extract the clips for the subcluster subcluster_clips = extract_clips_helper(timeseries=timeseries, times=subcluster_times, clip_size=clip_size) ## Compute the centroids and project the clips onto the direction of the line connecting the two centroids # PCA to extract features of clips (number dim = n_pca_dim); subcluster_clips = np.reshape( subcluster_clips, (subcluster_clips.shape[0], -1)) # Flatten clips for PCA (expects 2d array) dimenReduc = PCA(n_components=n_pca_dim, whiten=True) clip_features = dimenReduc.fit_transform(subcluster_clips) # Use label data to separate clips into two groups, and adjust for base 0 vs base 1 difference A_indices = np.isin(subcluster_labels, pairs_for_eval[pair_to_test, 0] + 1) B_indices = np.isin(subcluster_labels, pairs_for_eval[pair_to_test, 1] + 1) clip_features_A = clip_features[A_indices, :] clip_features_B = clip_features[B_indices, :] # Calculate centroid centroidA = np.mean(clip_features_A, axis=0) centroidB = np.mean(clip_features_B, axis=0) # Project points onto line V = centroidA - centroidB V = np.tile(V, (clip_features.shape[0], 1)) clip_1d_projs = np.einsum('ij,ij->i', clip_features, V) #TODO: Test for merge subprocess #If the clusters are to be merged, add to the cluster to merge list if test_for_merge(clip_1d_projs, A_indices, B_indices): pairs_to_merge = np.append(pairs_to_merge, pairs_for_eval[pair_to_test, :] + 1) #Base 1 correction pairs_to_merge = np.reshape(pairs_to_merge, (-1, 2)) #easier to read pairs_to_merge = pairs_to_merge[np.argsort( pairs_to_merge[:, 0])] #Assure that input is sorted #Propagate merge pairs to lowest label number for idx, label in enumerate(pairs_to_merge[:, 1]): pairs_to_merge[np.isin(pairs_to_merge[:, 0], label), 0] = pairs_to_merge[idx, 0] #Input should be sorted #Merge firing labels for merge_pair in range(pairs_to_merge.shape[0]): firings[2, np.isin(firings[2, :], pairs_to_merge[ merge_pair, 1])] = pairs_to_merge[merge_pair, 0] #Already base 1 corrected #Write merged firings mlpy.writemda64(firings, firings_out)
def extract_timeseries(*, timeseries, channels_array='', timeseries_out, channels='', t1=-1, t2=-1, timeseries_dtype='', timeseries_num_channels=0): """ Extract a chunk of a timeseries dataset and possibly a subset of channels Parameters ---------- timeseries : INPUT Path of timeseries, MxN where M is number of channels and N number of timepoints, in either .mda or raw binary format. If raw binary, then you must supply dtype and num_channels. channels_array : INPUT Path of array of channel numbers (positive integers). Either use this or the channels parameter, not both. timeseries_out : OUTPUT Path of output timeseries in .mda format channels : string Comma-separated list of channels to extract. Either use this or the channels_array input, not both. t1 : integer Integer start timepoint (zero-based indexing). If -1 will set to zero. t2 : integer Integer end timepoint (zero-based indexing). If -1 will set to N-1."}, timeseries_dtype : string Only supply this if timeseries is in raw binary format. Choices are int16, uint16, int32, float32, etc. timeseries_num_channels : integer Only supply this if timeseries is in raw binary format. Integer representing number of channels. Number of timepoints will be deduced """ if channels: _channels = np.fromstring(channels, dtype=int, sep=',') elif channels_array: _channels = readmda(channels_array).ravel() else: _channels = np.empty(0) header0 = None if (timeseries_dtype): size_bytes = os.path.getsize(timeseries) num_bytes_per_entry = get_num_bytes_per_entry_from_dt(timeseries_dtype) if t2 >= 0: num_entries = (t2 + 1) * (timeseries_num_channels) else: num_entries = size_bytes / num_bytes_per_entry if (num_entries % timeseries_num_channels != 0): print( "File size (%ld) is not divisible by number of channels (%g) for dtype=%s" % (size_bytes, timeseries_num_channels, timeseries_dtype)) return False num_timepoints = num_entries / timeseries_num_channels header0 = MdaHeader(timeseries_dtype, [timeseries_num_channels, num_timepoints]) X = DiskReadMda(timeseries, header0) M, N = X.N1(), X.N2() if (_channels.size == 0): _channels = np.array(1 + np.arange(M)) M2 = _channels.size if (t1 < 0): t1 = 0 if (t2 < 0): t2 = N - 1 N2 = t2 - t1 + 1 _writer = DiskWriteMda(timeseries_out, [M2, N2], dt=X.dt()) def _kernel(chunk, info): chunk = chunk[(_channels - 1).tolist(), ] return _writer.writeChunk(chunk, i1=0, i2=info.t1) chunk_size_mb = 100 TCR = TimeseriesChunkReader(chunk_size_mb=chunk_size_mb, overlap_size=0, t1=t1, t2=t2) return TCR.run(X, _kernel)
def get_dmatrix_templates(timeseries_list, firings_list): X = DiskReadMda(timeseries_list[0]) M = X.N1() clip_size = 50 num_segments = len(timeseries_list) segment_combos = it.combinations( range(num_segments), 2) # Get all possible segment combinations segment_combos = np.array(list(segment_combos)) # Order segment combinations such that neighbors are first, then non-neighbors segment_combos = np.append( segment_combos[np.where(np.diff(segment_combos) == 1)[0], :], segment_combos[np.where(np.diff(segment_combos) > 1)[0], :], axis=0) num_combos = int(comb(num_segments, 2)) firings_arrays = [] Kmaxes = [] for j in range(num_segments): F = readmda(firings_list[j]) firings_arrays.append(F) Kmax = 0 for j in range(num_segments): F = firings_arrays[j] print(str(len(F[1, :])) + ' clustered events in segment ' + str(j)) labels = F[2, :] if len(labels) == 0: Kmax = 0 Kmaxes.append(0) else: Kmax = int(max(Kmax, np.max(labels))) Kmaxes.append(np.max(labels)) if max(Kmaxes) > 0: use_max = int(max(Kmaxes)) dmatrix = np.ones((use_max, use_max, num_combos)) * (-1) k1_dmatrix = np.ones((use_max, use_max, num_combos)) * (-1) k2_dmatrix = np.ones((use_max, use_max, num_combos)) * (-1) templates = np.zeros((M, clip_size, use_max, 2 * num_combos)) for n in range(num_combos): # count up to number of combinations for dmatrix 3rd dimension indexing j1 = segment_combos[n, 0] j2 = segment_combos[n, 1] print('Computing dmatrix between segments %d and %d' % (j1, j2)) #print(timeseries_list) if np.size(firings_arrays[j1]) == 0 or np.size( firings_arrays[j2]) == 0: #templates = np.zeros((M, clip_size, 1)) continue else: (dmatrix0, k1_dmatrix0, k2_dmatrix0, templates1, templates2) = compute_dmatrix(timeseries_list[j1], timeseries_list[j2], firings_arrays[j1], firings_arrays[j2], clip_size=clip_size) dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1], n] = dmatrix0 k1_dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1], n] = k1_dmatrix0 k2_dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1], n] = k2_dmatrix0 templates[:, :, 0:dmatrix0.shape[0], n * 2] = templates1 templates[:, :, 0:dmatrix0.shape[1], n * 2 + 1] = templates2 return (dmatrix, k1_dmatrix, k2_dmatrix, templates, Kmaxes, segment_combos)
def extract_subfirings(*, firings, t1='', t2='', channels='', channels_array='', timeseries='', firings_out): """ Extract a firings subset based on times and/or channels. If a time subset is extracted, the firings are adjusted to t_new = t_original - t1 If channel(s) are extracted with a timeseries, only clusters with largest amplitude on the given channel (as determined by the average waveform in the time range) will be extracted First developed for use with extract_timeseries in inspecting very large datasets Parameters ---------- firings : INPUT A path of a firings file from which a subset is extracted t1 : INPUT Start time for extracted firings t2 : INPUT End time for extracted firings; use -1 OR no value for end of timeseries channels : INPUT A string of channels from which clusters with maximal energy (based on template) will be extracted channels_array : INPUT An array of channels from which clusters with maximal energy (based on template) will be extracted timeseries : INPUT A path of a timeseries file from which templates will be calculated if a subset of channels is given firings_out : OUTPUT The extracted subfirings path ... """ firings=readmda(firings) if channels: _channels=np.fromstring(channels,dtype=int,sep=',') elif channels_array: _channels=channels_array else: _channels=np.empty(0) if t1: print('Time extraction...') t_valid=(t1<firings[1,:])#Get bool mask in greater than t1 if t2 and t2>0: t_valid = t_valid * (firings[1,:]<t2) firings = firings[:,t_valid] else: print('Using full time chunk') if _channels and timeseries: print('Channels extraction...') #Determine if need to parse from string amps = compute_templates_helper(timeseries, firings, clip_size=1) #Get only amplitude, returns zeroes if empty (M X T X K) #Get indices of max chan for each cluster main_chan=np.zeros(np.max(firings[2,:])) for k in range(np.max(firings[2,:])): if np.max(amps[:,:,k]): main_chan[k]=np.argmax(amps[:,:,k])+1 #base 1 adj labels_valid = np.argwhere(np.isin(main_chan,_channels)) +1 #base 1 adj again k_valid=np.isin(firings[2,:],labels_valid) firings = firings[:,k_valid] else: print('Using all channels') if t1: firings[1,:] -= t1 #adjust t1 to 0 return writemda64(firings,firings_out)
def synthesize_timeseries(*, firings='', waveforms='', timeseries_out=None, noise_level=1, samplerate=30000, duration=60, waveform_upsamplefac=1, amplitudes_row=0): """ Synthesize an electrophysiology timeseries from a set of ground-truth firing events and waveforms Parameters ---------- firings : INPUT (Optional) The path of firing events file in .mda format. RxL where R>=3 and L is the number of events. Second row is the timestamps, third row is the integer labels/ waveforms : INPUT (Optional) The path of (possibly upsampled) waveforms file in .mda format. Mx(T*waveform_upsample_factor)*K, where M is the number of channels, T is the clip size, and K is the number of units. timeseries_out : OUTPUT The output path for the new timeseries. MxN noise_level : double (Optional) Standard deviation of the simulated background noise added to the timeseries samplerate : double (Optional) Sample rate for the synthetic dataset in Hz duration : double (Optional) Duration of the synthetic dataset in seconds. The number of timepoints will be duration*samplerate waveform_upsamplefac : int (Optional) The upsampling factor corresponding to the input waveforms. (avoids digitization artifacts) amplitudes_row : int (Optional) If positive, this is the row in the firings arrays where the amplitude scale factors are found. Otherwise, use all 1's """ num_timepoints = np.int64(samplerate * duration) waveform_upsamplefac = int(waveform_upsamplefac) if type(waveforms) == str: if waveforms: W = readmda(waveforms) else: W = np.zeros((4, 100 * waveform_upsamplefac, 0)) else: W = waveforms if type(firings) == str: if firings: F = readmda(firings) else: F = np.zeros((3, 0)) else: F = firings times = F[1, :] labels = F[2, :].astype('int') M, TT, K = W.shape[0], W.shape[1], W.shape[2] T = int(TT / waveform_upsamplefac) Tmid = int(np.ceil((T + 1) / 2 - 1)) N = num_timepoints if (N == 0): if times.size == 0: N = T else: N = max(times) + T X = np.random.randn(M, N) * noise_level waveform_list = [] for k in range(K): waveform0 = W[:, :, k - 1] waveform_list.append(waveform0) for j in range(times.size): t0 = times[j] k0 = labels[j] amp0 = 1 if amplitudes_row > 0: amp0 = F[amplitudes_row - 1, j] waveform0 = waveform_list[k0 - 1] frac_offset = int(np.floor((t0 - np.floor(t0)) * waveform_upsamplefac)) tstart = np.int64(np.floor(t0)) - Tmid if (0 <= tstart) and (tstart + T <= N): X[:, tstart:tstart + T] = X[:, tstart:tstart + T] + waveform0[:, frac_offset::waveform_upsamplefac] * amp0 if timeseries_out: return writemda32(X, timeseries_out) else: return (X)
def synthesize_drifting_timeseries(*, firings, waveforms, timeseries_out=None, noise_level=1, samplerate=30000, duration=60, waveform_upsamplefac=1, amplitudes_row=0, num_interp_nodes=2): """ Synthesize a electrophysiology timeseries from a set of ground-truth firing events and waveforms, and simulating drift (linear for now) Parameters ---------- firings : INPUT (Optional) The path of firing events file in .mda format. RxL where R>=3 and L is the number of events. Second row is the timestamps, third row is the integer labels waveforms : INPUT (Optional) The path of (possibly upsampled) waveforms file in .mda format. Mx(T*waveform_upsample_factor)*(2K), where M is the number of channels, T is the clip size, and K is the number of units. Each unit has a contiguous pair of waveforms (interpolates from first to second across the timeseries) timeseries_out : OUTPUT The output path for the new timeseries. MxN noise_level : double (Optional) Standard deviation of the simulated background noise added to the timeseries samplerate : double (Optional) Sample rate for the synthetic dataset in Hz duration : double (Optional) Duration of the synthetic dataset in seconds. The number of timepoints will be duration*samplerate waveform_upsamplefac : int (Optional) The upsampling factor corresponding to the input waveforms. (avoids digitization artifacts) amplitudes_row : int (Optional) If positive, this is the row in the firings arrays where the amplitude scale factors are found. Otherwise, use all 1's num_interp_nodes : int (Optional) For drift, the number of timepoints where we specify the waveform (Default 2) """ if type(firings) == str: F = readmda(firings) else: F = firings if amplitudes_row == 0: F = np.concatenate((F, np.ones((1, F.shape[1])))) amplitudes_row = F.shape[0] times = F[1, :] times_normalized = times / (duration * samplerate ) #normalized between 0 and 1 labels = F[2, :] amps = F[amplitudes_row - 1, :] F = np.kron(F, [1] * num_interp_nodes) #duplicate every event! for j in range(num_interp_nodes): F[amplitudes_row - 1, j::num_interp_nodes] = amps * time_basis_func( j, num_interp_nodes, times_normalized) # adjust the labels F[2, j::num_interp_nodes] = ( labels - 1) * num_interp_nodes + j + 1 #remember that labels are 1-indexed return synthesize_timeseries(firings=F, waveforms=waveforms, timeseries_out=timeseries_out, noise_level=noise_level, samplerate=samplerate, duration=duration, waveform_upsamplefac=waveform_upsamplefac, amplitudes_row=amplitudes_row)