Esempio n. 1
0
def branch_cluster(features, *, branch_depth=2, npca=10):
    if features.size == 0:
        return np.array([])

    min_size_to_try_split = 20
    labels1 = cluster(features, npca=npca).ravel().astype('int64')
    if np.min(labels1) < 0:
        tmp_fname = '/tmp/isosplit5-debug-features.mda'
        mdaio.writemda32(features, tmp_fname)
        raise Exception(
            'Unexpected error in isosplit5. Features written to {}'.format(
                tmp_fname))
    K = int(np.max(labels1))
    if K <= 1 or branch_depth <= 1:
        return labels1
    label_offset = 0
    labels_new = np.zeros(labels1.shape, dtype='int64')
    for k in range(1, K + 1):
        inds_k = np.where(labels1 == k)[0]
        if len(inds_k) > min_size_to_try_split:
            labels_k = branch_cluster(features[:, inds_k],
                                      branch_depth=branch_depth - 1,
                                      npca=npca)
            K_k = int(np.max(labels_k))
            labels_new[inds_k] = label_offset + labels_k
            label_offset += K_k
        else:
            labels_new[inds_k] = label_offset + 1
            label_offset += 1
    return labels_new
Esempio n. 2
0
def bandpass_filter(*,
                    timeseries,
                    timeseries_out,
                    samplerate,
                    freq_min,
                    freq_max,
                    freq_wid=1000,
                    padding=3000,
                    chunk_size=3000 * 10,
                    num_processes=os.cpu_count()):
    """
    Apply a bandpass filter to a multi-channel timeseries

    Parameters
    ----------
    timeseries : INPUT
        MxN raw timeseries array (M = #channels, N = #timepoints)
        
    timeseries_out : OUTPUT
        Filtered output (MxN array)
        
    samplerate : float
        The sampling rate in Hz
    freq_min : float
        The lower endpoint of the frequency band (Hz)
    freq_max : float
        The upper endpoint of the frequency band (Hz)
    freq_wid : float
        The optional width of the roll-off (Hz)
    """
    X = mdaio.DiskReadMda(timeseries)
    M = X.N1()  # Number of channels
    N = X.N2()  # Number of timepoints

    num_chunks = int(np.ceil(N / chunk_size))
    print('Chunk size: {}, Padding: {}, Num chunks: {}, Num processes: {}'.
          format(chunk_size, padding, num_chunks, num_processes))

    opts = {
        "timeseries": timeseries,
        "timeseries_out": timeseries_out,
        "samplerate": samplerate,
        "freq_min": freq_min,
        "freq_max": freq_max,
        "freq_wid": freq_wid,
        "chunk_size": chunk_size,
        "padding": padding,
        "num_processes": num_processes,
        "num_chunks": num_chunks
    }
    global g_shared_data
    g_shared_data = SharedChunkInfo(num_chunks)
    global g_opts
    g_opts = opts
    mdaio.writemda32(np.zeros([M, 0]), timeseries_out)

    pool = multiprocessing.Pool(processes=num_processes)
    pool.map(filter_chunk, range(num_chunks), chunksize=1)
    return True
Esempio n. 3
0
def test_compute_templates():
    M,N,K,T,L = 5,1000,6,50,100
    X=np.random.rand(M,N)
    mdaio.writemda32(X,'tmp.mda')
    F=np.zeros((3,L))
    F[1,:]=1+np.random.randint(N,size=(1,L))
    F[2,:]=1+np.random.randint(K,size=(1,L))
    mdaio.writemda64(F,'tmp2.mda')
    ret=compute_templates(timeseries='tmp.mda',firings='tmp2.mda',templates_out='tmp3.mda',clip_size=T)
    assert(ret)
    templates0=mdaio.readmda('tmp3.mda')
    assert(templates0.shape==(M,T,K))
    return True
Esempio n. 4
0
def compute_templates(*,timeseries,firings,templates_out,clip_size=100):
    """
    Compute templates (average waveforms) for clusters defined by the labeled events in firings.

    Parameters
    ----------
    timeseries : INPUT
        Path of timeseries mda file (MxN) from which to draw the event clips (snippets) for computing the templates. M is number of channels, N is number of timepoints.
    firings : INPUT
        Path of firings mda file (RxL) where R>=3 and L is the number of events. Second row are timestamps, third row are integer labels.    
        
    templates_out : OUTPUT
        Path of output mda file (MxTxK). T=clip_size, K=maximum cluster label. Note that empty clusters will correspond to a template of all zeros. 
        
    clip_size : int
        (Optional) clip size, aka snippet size, number of timepoints in a single template
    """    
    templates=compute_templates_helper(timeseries=timeseries,firings=firings,clip_size=clip_size)
    return mdaio.writemda32(templates,templates_out)
def synthesize_timeseries(*,
                          firings='',
                          waveforms='',
                          timeseries_out,
                          noise_level=1,
                          samplerate=30000,
                          duration=60,
                          waveform_upsamplefac,
                          amplitudes_row=0):
    """
    Synthesize an electrophysiology timeseries from a set of ground-truth firing events and waveforms

    Parameters
    ----------
    firings : INPUT
        (Optional) The path of firing events file in .mda format. RxL where R>=3 and L is the number of events. Second row is the timestamps, third row is the integer labels/
    waveforms : INPUT
        (Optional) The path of (possibly upsampled) waveforms file in .mda format. Mx(T*waveform_upsample_factor)*K, where M is the number of channels, T is the clip size, and K is the number of units.
    
    timeseries_out : OUTPUT
        The output path for the new timeseries. MxN

    noise_level : double
        (Optional) Standard deviation of the simulated background noise added to the timeseries
    samplerate : double
        (Optional) Sample rate for the synthetic dataset in Hz
    duration : double
        (Optional) Duration of the synthetic dataset in seconds. The number of timepoints will be duration*samplerate
    waveform_upsamplefac : int
        (Optional) The upsampling factor corresponding to the input waveforms. (avoids digitization artifacts)
    amplitudes_row : int
        (Optional) If positive, this is the row in the firings arrays where the amplitude scale factors are found. Otherwise, use all 1's
    """
    num_timepoints = np.int64(samplerate * duration)
    waveform_upsamplefac = int(waveform_upsamplefac)

    if type(waveforms) == str:
        if waveforms:
            W = mdaio.readmda(waveforms)
        else:
            W = np.zeros((4, 100 * waveform_upsamplefac, 0))
    else:
        W = waveforms

    if type(firings) == str:
        if firings:
            F = mdaio.readmda(firings)
        else:
            F = np.zeros((3, 0))
    else:
        F = firings

    times = F[1, :]
    labels = F[2, :].astype('int')

    M, TT, K = W.shape[0], W.shape[1], W.shape[2]
    T = int(TT / waveform_upsamplefac)
    Tmid = int(np.ceil((T + 1) / 2 - 1))

    N = num_timepoints
    if (N == 0):
        if times.size == 0:
            N = T
        else:
            N = max(times) + T

    X = np.random.randn(M, N) * noise_level

    waveform_list = []
    for k in range(K):
        waveform0 = W[:, :, k - 1]
        waveform_list.append(waveform0)

    for j in range(times.size):
        t0 = times[j]
        k0 = labels[j]
        amp0 = 1
        if amplitudes_row > 0:
            amp0 = F[amplitudes_row - 1, j]
        waveform0 = waveform_list[k0 - 1]
        frac_offset = int(np.floor((t0 - np.floor(t0)) * waveform_upsamplefac))
        tstart = np.int64(np.floor(t0)) - Tmid
        if (0 <= tstart) and (tstart + T <= N):
            X[:, tstart:tstart +
              T] = X[:, tstart:tstart +
                     T] + waveform0[:,
                                    frac_offset::waveform_upsamplefac] * amp0

    if timeseries_out:
        return mdaio.writemda32(X, timeseries_out)
    else:
        return (X)
Esempio n. 6
0
def synthesize_random_waveforms(*,
                                waveforms_out=None,
                                geometry_out=None,
                                M=5,
                                T=500,
                                K=20,
                                upsamplefac=13,
                                timeshift_factor=3,
                                average_peak_amplitude=10):
    """
    Synthesize random waveforms for use in creating a synthetic timeseries dataset

    Parameters
    ----------
    waveforms_out : OUTPUT
        Path to waveforms mda file. Mx(T*upsamplefac)xK
    geometry_out : OUTPUT
        (Optional) Path to geometry csv file
    M : int
        (Optional) Number of channels
    T : int
        (Optional) Number of timepoints for a waveform, before upsampling
    K : int
        (Optional) Number of waveforms to synthesize
    timeshift_factor : int
        (Optional) Controls amount of timeshift between waveforms on different channels for each template
    upsamplefac : int
        (Optional) used for upsampling the waveforms to avoid discretization artifacts
    average_peak_amplitude : float
        (Optional) used to scale the peak spike amplitude 
    """
    geometry = None
    avg_durations = [200, 10, 30, 200]
    avg_amps = [0.5, 10, -1, 0]
    rand_durations_stdev = [10, 4, 6, 20]
    rand_amps_stdev = [0.2, 3, 0.5, 0]
    rand_amp_factor_range = [0.5, 1]
    geom_spread_coef1 = 0.2
    geom_spread_coef2 = 1

    if not geometry:
        geometry = np.zeros((2, M))
        geometry[0, :] = np.arange(1, M + 1)

    geometry = np.array(geometry)
    avg_durations = np.array(avg_durations)
    avg_amps = np.array(avg_amps)
    rand_durations_stdev = np.array(rand_durations_stdev)
    rand_amps_stdev = np.array(rand_amps_stdev)
    rand_amp_factor_range = np.array(rand_amp_factor_range)

    neuron_locations = get_default_neuron_locations(M, K, geometry)

    ## The waveforms_out
    WW = np.zeros((M, T * upsamplefac, K))

    for k in range(1, K + 1):
        for m in range(1, M + 1):
            diff = neuron_locations[:, k - 1] - geometry[:, m - 1]
            dist = np.sqrt(np.sum(diff**2))
            durations0 = np.maximum(
                np.ones(avg_durations.shape), avg_durations +
                np.random.randn(1, 4) * rand_durations_stdev) * upsamplefac
            amps0 = avg_amps + np.random.randn(1, 4) * rand_amps_stdev
            waveform0 = synthesize_single_waveform(N=T * upsamplefac,
                                                   durations=durations0,
                                                   amps=amps0)
            waveform0 = np.roll(waveform0,
                                int(timeshift_factor * dist * upsamplefac))
            waveform0 = waveform0 * np.random.uniform(rand_amp_factor_range[0],
                                                      rand_amp_factor_range[1])
            WW[m - 1, :, k -
               1] = waveform0 / (geom_spread_coef1 + dist * geom_spread_coef2)

    peaks = np.max(np.abs(WW), axis=(0, 1))
    WW = WW / np.mean(peaks) * average_peak_amplitude

    if waveforms_out:
        mdaio.writemda32(WW, waveforms_out)
        if geometry_out:
            np.savetxt(geometry_out,
                       geometry.transpose(),
                       delimiter=",",
                       fmt="%g")
            return True
        else:
            return True
    else:
        return (WW, geometry)
Esempio n. 7
0
def whiten(*,
           timeseries,
           timeseries_out,
           chunk_size=30000 * 10,
           num_processes=os.cpu_count()):
    """
    Whiten a multi-channel timeseries

    Parameters
    ----------
    timeseries : INPUT
        MxN raw timeseries array (M = #channels, N = #timepoints)
        
    timeseries_out : OUTPUT
        Whitened output (MxN array)

    """
    X = mdaio.DiskReadMda(timeseries)
    M = X.N1()  # Number of channels
    N = X.N2()  # Number of timepoints

    num_chunks_for_computing_cov_matrix = 10

    num_chunks = int(np.ceil(N / chunk_size))
    print('Chunk size: {}, Num chunks: {}, Num processes: {}'.format(
        chunk_size, num_chunks, num_processes))

    opts = {
        "timeseries": timeseries,
        "timeseries_out": timeseries_out,
        "chunk_size": chunk_size,
        "num_processes": num_processes,
        "num_chunks": num_chunks
    }
    global g_opts
    g_opts = opts

    pool = multiprocessing.Pool(processes=num_processes)
    step = int(
        np.maximum(1,
                   np.floor(num_chunks / num_chunks_for_computing_cov_matrix)))
    AAt_matrices = pool.map(compute_AAt_matrix_for_chunk,
                            range(0, num_chunks, step),
                            chunksize=1)

    AAt = np.zeros((M, M), dtype='float64')

    for M0 in AAt_matrices:
        AAt += M0 / (
            len(AAt_matrices) * chunk_size
        )  ##important: need to fix the denominator here to account for possible smaller chunk

    U, S, Ut = np.linalg.svd(AAt, full_matrices=True)

    W = (U @ np.diag(1 / np.sqrt(S))) @ Ut
    #print ('Whitening matrix:')
    #print (W)

    global g_shared_data
    g_shared_data = SharedChunkInfo(num_chunks)
    mdaio.writemda32(np.zeros([M, 0]), timeseries_out)

    pool = multiprocessing.Pool(processes=num_processes)
    pool.starmap(whiten_chunk, [(num, W) for num in range(0, num_chunks)],
                 chunksize=1)

    return True