def branch_cluster(features, *, branch_depth=2, npca=10): if features.size == 0: return np.array([]) min_size_to_try_split = 20 labels1 = cluster(features, npca=npca).ravel().astype('int64') if np.min(labels1) < 0: tmp_fname = '/tmp/isosplit5-debug-features.mda' mdaio.writemda32(features, tmp_fname) raise Exception( 'Unexpected error in isosplit5. Features written to {}'.format( tmp_fname)) K = int(np.max(labels1)) if K <= 1 or branch_depth <= 1: return labels1 label_offset = 0 labels_new = np.zeros(labels1.shape, dtype='int64') for k in range(1, K + 1): inds_k = np.where(labels1 == k)[0] if len(inds_k) > min_size_to_try_split: labels_k = branch_cluster(features[:, inds_k], branch_depth=branch_depth - 1, npca=npca) K_k = int(np.max(labels_k)) labels_new[inds_k] = label_offset + labels_k label_offset += K_k else: labels_new[inds_k] = label_offset + 1 label_offset += 1 return labels_new
def bandpass_filter(*, timeseries, timeseries_out, samplerate, freq_min, freq_max, freq_wid=1000, padding=3000, chunk_size=3000 * 10, num_processes=os.cpu_count()): """ Apply a bandpass filter to a multi-channel timeseries Parameters ---------- timeseries : INPUT MxN raw timeseries array (M = #channels, N = #timepoints) timeseries_out : OUTPUT Filtered output (MxN array) samplerate : float The sampling rate in Hz freq_min : float The lower endpoint of the frequency band (Hz) freq_max : float The upper endpoint of the frequency band (Hz) freq_wid : float The optional width of the roll-off (Hz) """ X = mdaio.DiskReadMda(timeseries) M = X.N1() # Number of channels N = X.N2() # Number of timepoints num_chunks = int(np.ceil(N / chunk_size)) print('Chunk size: {}, Padding: {}, Num chunks: {}, Num processes: {}'. format(chunk_size, padding, num_chunks, num_processes)) opts = { "timeseries": timeseries, "timeseries_out": timeseries_out, "samplerate": samplerate, "freq_min": freq_min, "freq_max": freq_max, "freq_wid": freq_wid, "chunk_size": chunk_size, "padding": padding, "num_processes": num_processes, "num_chunks": num_chunks } global g_shared_data g_shared_data = SharedChunkInfo(num_chunks) global g_opts g_opts = opts mdaio.writemda32(np.zeros([M, 0]), timeseries_out) pool = multiprocessing.Pool(processes=num_processes) pool.map(filter_chunk, range(num_chunks), chunksize=1) return True
def test_compute_templates(): M,N,K,T,L = 5,1000,6,50,100 X=np.random.rand(M,N) mdaio.writemda32(X,'tmp.mda') F=np.zeros((3,L)) F[1,:]=1+np.random.randint(N,size=(1,L)) F[2,:]=1+np.random.randint(K,size=(1,L)) mdaio.writemda64(F,'tmp2.mda') ret=compute_templates(timeseries='tmp.mda',firings='tmp2.mda',templates_out='tmp3.mda',clip_size=T) assert(ret) templates0=mdaio.readmda('tmp3.mda') assert(templates0.shape==(M,T,K)) return True
def compute_templates(*,timeseries,firings,templates_out,clip_size=100): """ Compute templates (average waveforms) for clusters defined by the labeled events in firings. Parameters ---------- timeseries : INPUT Path of timeseries mda file (MxN) from which to draw the event clips (snippets) for computing the templates. M is number of channels, N is number of timepoints. firings : INPUT Path of firings mda file (RxL) where R>=3 and L is the number of events. Second row are timestamps, third row are integer labels. templates_out : OUTPUT Path of output mda file (MxTxK). T=clip_size, K=maximum cluster label. Note that empty clusters will correspond to a template of all zeros. clip_size : int (Optional) clip size, aka snippet size, number of timepoints in a single template """ templates=compute_templates_helper(timeseries=timeseries,firings=firings,clip_size=clip_size) return mdaio.writemda32(templates,templates_out)
def synthesize_timeseries(*, firings='', waveforms='', timeseries_out, noise_level=1, samplerate=30000, duration=60, waveform_upsamplefac, amplitudes_row=0): """ Synthesize an electrophysiology timeseries from a set of ground-truth firing events and waveforms Parameters ---------- firings : INPUT (Optional) The path of firing events file in .mda format. RxL where R>=3 and L is the number of events. Second row is the timestamps, third row is the integer labels/ waveforms : INPUT (Optional) The path of (possibly upsampled) waveforms file in .mda format. Mx(T*waveform_upsample_factor)*K, where M is the number of channels, T is the clip size, and K is the number of units. timeseries_out : OUTPUT The output path for the new timeseries. MxN noise_level : double (Optional) Standard deviation of the simulated background noise added to the timeseries samplerate : double (Optional) Sample rate for the synthetic dataset in Hz duration : double (Optional) Duration of the synthetic dataset in seconds. The number of timepoints will be duration*samplerate waveform_upsamplefac : int (Optional) The upsampling factor corresponding to the input waveforms. (avoids digitization artifacts) amplitudes_row : int (Optional) If positive, this is the row in the firings arrays where the amplitude scale factors are found. Otherwise, use all 1's """ num_timepoints = np.int64(samplerate * duration) waveform_upsamplefac = int(waveform_upsamplefac) if type(waveforms) == str: if waveforms: W = mdaio.readmda(waveforms) else: W = np.zeros((4, 100 * waveform_upsamplefac, 0)) else: W = waveforms if type(firings) == str: if firings: F = mdaio.readmda(firings) else: F = np.zeros((3, 0)) else: F = firings times = F[1, :] labels = F[2, :].astype('int') M, TT, K = W.shape[0], W.shape[1], W.shape[2] T = int(TT / waveform_upsamplefac) Tmid = int(np.ceil((T + 1) / 2 - 1)) N = num_timepoints if (N == 0): if times.size == 0: N = T else: N = max(times) + T X = np.random.randn(M, N) * noise_level waveform_list = [] for k in range(K): waveform0 = W[:, :, k - 1] waveform_list.append(waveform0) for j in range(times.size): t0 = times[j] k0 = labels[j] amp0 = 1 if amplitudes_row > 0: amp0 = F[amplitudes_row - 1, j] waveform0 = waveform_list[k0 - 1] frac_offset = int(np.floor((t0 - np.floor(t0)) * waveform_upsamplefac)) tstart = np.int64(np.floor(t0)) - Tmid if (0 <= tstart) and (tstart + T <= N): X[:, tstart:tstart + T] = X[:, tstart:tstart + T] + waveform0[:, frac_offset::waveform_upsamplefac] * amp0 if timeseries_out: return mdaio.writemda32(X, timeseries_out) else: return (X)
def synthesize_random_waveforms(*, waveforms_out=None, geometry_out=None, M=5, T=500, K=20, upsamplefac=13, timeshift_factor=3, average_peak_amplitude=10): """ Synthesize random waveforms for use in creating a synthetic timeseries dataset Parameters ---------- waveforms_out : OUTPUT Path to waveforms mda file. Mx(T*upsamplefac)xK geometry_out : OUTPUT (Optional) Path to geometry csv file M : int (Optional) Number of channels T : int (Optional) Number of timepoints for a waveform, before upsampling K : int (Optional) Number of waveforms to synthesize timeshift_factor : int (Optional) Controls amount of timeshift between waveforms on different channels for each template upsamplefac : int (Optional) used for upsampling the waveforms to avoid discretization artifacts average_peak_amplitude : float (Optional) used to scale the peak spike amplitude """ geometry = None avg_durations = [200, 10, 30, 200] avg_amps = [0.5, 10, -1, 0] rand_durations_stdev = [10, 4, 6, 20] rand_amps_stdev = [0.2, 3, 0.5, 0] rand_amp_factor_range = [0.5, 1] geom_spread_coef1 = 0.2 geom_spread_coef2 = 1 if not geometry: geometry = np.zeros((2, M)) geometry[0, :] = np.arange(1, M + 1) geometry = np.array(geometry) avg_durations = np.array(avg_durations) avg_amps = np.array(avg_amps) rand_durations_stdev = np.array(rand_durations_stdev) rand_amps_stdev = np.array(rand_amps_stdev) rand_amp_factor_range = np.array(rand_amp_factor_range) neuron_locations = get_default_neuron_locations(M, K, geometry) ## The waveforms_out WW = np.zeros((M, T * upsamplefac, K)) for k in range(1, K + 1): for m in range(1, M + 1): diff = neuron_locations[:, k - 1] - geometry[:, m - 1] dist = np.sqrt(np.sum(diff**2)) durations0 = np.maximum( np.ones(avg_durations.shape), avg_durations + np.random.randn(1, 4) * rand_durations_stdev) * upsamplefac amps0 = avg_amps + np.random.randn(1, 4) * rand_amps_stdev waveform0 = synthesize_single_waveform(N=T * upsamplefac, durations=durations0, amps=amps0) waveform0 = np.roll(waveform0, int(timeshift_factor * dist * upsamplefac)) waveform0 = waveform0 * np.random.uniform(rand_amp_factor_range[0], rand_amp_factor_range[1]) WW[m - 1, :, k - 1] = waveform0 / (geom_spread_coef1 + dist * geom_spread_coef2) peaks = np.max(np.abs(WW), axis=(0, 1)) WW = WW / np.mean(peaks) * average_peak_amplitude if waveforms_out: mdaio.writemda32(WW, waveforms_out) if geometry_out: np.savetxt(geometry_out, geometry.transpose(), delimiter=",", fmt="%g") return True else: return True else: return (WW, geometry)
def whiten(*, timeseries, timeseries_out, chunk_size=30000 * 10, num_processes=os.cpu_count()): """ Whiten a multi-channel timeseries Parameters ---------- timeseries : INPUT MxN raw timeseries array (M = #channels, N = #timepoints) timeseries_out : OUTPUT Whitened output (MxN array) """ X = mdaio.DiskReadMda(timeseries) M = X.N1() # Number of channels N = X.N2() # Number of timepoints num_chunks_for_computing_cov_matrix = 10 num_chunks = int(np.ceil(N / chunk_size)) print('Chunk size: {}, Num chunks: {}, Num processes: {}'.format( chunk_size, num_chunks, num_processes)) opts = { "timeseries": timeseries, "timeseries_out": timeseries_out, "chunk_size": chunk_size, "num_processes": num_processes, "num_chunks": num_chunks } global g_opts g_opts = opts pool = multiprocessing.Pool(processes=num_processes) step = int( np.maximum(1, np.floor(num_chunks / num_chunks_for_computing_cov_matrix))) AAt_matrices = pool.map(compute_AAt_matrix_for_chunk, range(0, num_chunks, step), chunksize=1) AAt = np.zeros((M, M), dtype='float64') for M0 in AAt_matrices: AAt += M0 / ( len(AAt_matrices) * chunk_size ) ##important: need to fix the denominator here to account for possible smaller chunk U, S, Ut = np.linalg.svd(AAt, full_matrices=True) W = (U @ np.diag(1 / np.sqrt(S))) @ Ut #print ('Whitening matrix:') #print (W) global g_shared_data g_shared_data = SharedChunkInfo(num_chunks) mdaio.writemda32(np.zeros([M, 0]), timeseries_out) pool = multiprocessing.Pool(processes=num_processes) pool.starmap(whiten_chunk, [(num, W) for num in range(0, num_chunks)], chunksize=1) return True