Python readmda 예제들, mlpy.readmda Python 예제들

예제 #1

0

파일 보기

def test_concatenate_firings():
    M, N1, N2 = 4, 2000, 30000
    test_offset_str = '300000,123456789'
    test_offset = [300000, 123456789]
    fir1 = np.around(np.random.rand(M, N1), decimals=3)
    mlpy.writemda64(fir1, 'tmp.fir1.mda')
    fir2 = np.around(np.random.rand(M, N2), decimals=3)
    mlpy.writemda64(fir2, 'tmp.fir2.mda')
    fir1_incr = fir1
    fir2_incr = fir2
    fir12 = np.append(fir1, fir2, axis=1)
    fir12 = np.around(fir12, decimals=3)
    fir1_incr[1, :] += test_offset[0]
    fir2_incr[1, :] += test_offset[1]
    fir12_incr = np.append(fir1_incr, fir2_incr, axis=1)
    concatenate_firings(firings_list=['tmp.fir1.mda', 'tmp.fir2.mda'],
                        firings_out='tmp.test_fir12.mda',
                        time_offsets=test_offset_str,
                        increment_labels='false')
    concatenate_firings(firings_list=['tmp.fir1.mda', 'tmp.fir2.mda'],
                        firings_out='tmp.test_fir12_incr.mda',
                        time_offsets=test_offset_str,
                        increment_labels='true')
    test_fir12 = mlpy.readmda('tmp.test_fir12.mda')
    test_fir12 = np.around(test_fir12, decimals=3)
    test_fir12_incr = mlpy.readmda('tmp.test_fir12_incr.mda')
    test_fir12_incr = np.around(test_fir12_incr, decimals=3)
    np.testing.assert_array_almost_equal(fir12, test_fir12, decimal=3)
    np.testing.assert_array_almost_equal(fir12_incr,
                                         test_fir12_incr,
                                         decimal=3)
    return True

예제 #2

0

파일 보기

파일: p_bandpass_filter.py 프로젝트: tjd2002/mountainsort

def test_bandpass_filter():
    M, N = 12, 30000
    X = np.random.rand(M, N)
    writemda32(X, 'tmp.mda')
    ret = bandpass_filter(timeseries="tmp.mda", timeseries_out="tmp2.mda")
    assert (ret)
    A = readmda('tmp.mda')
    B = readmda('tmp2.mda')
    assert (A.shape == B.shape)
    assert (X.shape == B.shape)
    #np.testing.assert_array_almost_equal(A,B,decimal=6)
    return True

예제 #3

0

파일 보기

def test_normalize_channels():
    M, N = 4, 1000
    X = np.random.rand(M, N)
    writemda32(X, 'tmp.mda')
    ret = normalize_channels(timeseries="tmp.mda", timeseries_out="tmp2.mda")
    assert (ret)
    A = readmda('tmp.mda')
    B = readmda('tmp2.mda')
    A_mean = np.mean(A, axis=1)
    A_stdev = np.sqrt(np.var(A, axis=1, ddof=1))
    A_norm = (A - np.tile(np.reshape(A_mean, (M, 1)),
                          (1, N))) / np.tile(np.reshape(A_stdev, (M, 1)),
                                             (1, N))
    np.testing.assert_array_almost_equal(A_norm, B, decimal=5)
    return True

예제 #4

0

파일 보기

파일: p_anneal_segments.py 프로젝트: mari-sosa/msdrift

def concat_and_increment(firings_list, time_offsets, increment_labels='true'):
    if len(firings_list) == len(time_offsets):
        concatenated_firings = np.zeros(
            (3, 0))  #default to case where the list is empty
        first = True
        for idx, firings in enumerate(firings_list):
            to_append = readmda(firings)
            to_append[1, :] += time_offsets[idx]
            if not first:
                if increment_labels == 'true':
                    if concatenated_firings.any():  #if not empty
                        to_append[2, :] += max(concatenated_firings[
                            2, :])  #add the Kmax from previous
                    else:  #if first firings is empty, move on to the next
                        concatenated_firings = to_append
            if first:
                concatenated_firings = to_append
            else:
                concatenated_firings = np.append(concatenated_firings,
                                                 to_append,
                                                 axis=1)
            first = False
        return concatenated_firings
    else:
        print('Mismatch between number of firings files and number of offsets')

예제 #5

0

파일 보기

def test_synthesize_random_firings():
    K=10
    synthesize_random_firings(K=K,firings_out='tmp.firings.mda')
    firings=readmda('tmp.firings.mda')
    labels=firings[2,:]
    assert(max(labels)==K)
    assert(firings.shape[0]==3)    
    return True

예제 #6

0

파일 보기

파일: p_extract_timeseries.py 프로젝트: tjd2002/mountainsort

def test_extract_timeseries():
    M, N = 4, 10000
    X = np.random.rand(M, N)
    X.astype('float64').transpose().tofile('tmp.dat')
    ret = extract_timeseries(timeseries="tmp.dat",
                             timeseries_out="tmp2.mda",
                             channels="1,3",
                             t1=-1,
                             t2=-1,
                             timeseries_num_channels=M,
                             timeseries_dtype='float64')
    writemda64(X, 'tmp.mda')
    #ret=extract_timeseries(timeseries="tmp.mda",timeseries_out="tmp2.mda",channels="1,3",t1=-1,t2=-1)
    assert (ret)
    A = readmda('tmp.mda')
    B = readmda('tmp2.mda')
    assert (B.shape[0] == 2)
    assert (B.shape[1] == N)
    assert (np.array_equal(X[[0, 2], ], B))
    return True

예제 #7

0

파일 보기

파일: p_apply_label_map.py 프로젝트: tjd2002/mountainsort

def apply_label_map(*, firings, label_map, firings_out):
    """
    Apply a label map to a given firings, including masking and merging

    Parameters
    ----------
    firings : INPUT
        Path of input firings mda file
    label_map : INPUT
        Path of input label map mda file [base 1, mapping to zero removes from firings]
    firings_out : OUTPUT
        ...
    """
    firings = readmda(firings)
    label_map = readmda(label_map)
    label_map = np.reshape(label_map, (-1, 2))
    label_map = label_map[np.argsort(label_map[:,
                                               0])]  # Assure input is sorted

    #Propagate merge pairs to lowest label number
    for idx, label in enumerate(label_map[:, 1]):
        # jfm changed on 12/8/17 because isin() is not isin() older versions of numpy. :)
        #label_map[np.isin(label_map[:,0],label),0] = label_map[idx,0] # Input should be sorted
        label_map[np.where(label_map[:, 0] == label)[0],
                  0] = label_map[idx, 0]  # Input should be sorted

    #Apply label map
    for label_pair in range(label_map.shape[0]):
        # jfm changed on 12/8/17 because isin() is not isin() older versions of numpy. :)
        #firings[2, np.isin(firings[2, :], label_map[label_pair, 1])] = label_map[label_pair,0]
        firings[2, np.where(
            firings[2, :] == label_map[label_pair,
                                       1])[0]] = label_map[label_pair, 0]

    #Mask out all labels mapped to zero
    firings = firings[:, firings[2, :] != 0]

    #Write remapped firings
    return writemda64(firings, firings_out)

예제 #8

0

파일 보기

파일: p_anneal_segments.py 프로젝트: mari-sosa/msdrift

def get_dmatrix_templates(timeseries_list, firings_list):
    X = DiskReadMda(timeseries_list[0])
    M = X.N1()
    clip_size = 50
    num_segments = len(timeseries_list)
    firings_arrays = []
    Kmaxes = []
    for j in range(num_segments):
        F = readmda(firings_list[j])
        firings_arrays.append(F)
    Kmax = 0
    for j in range(num_segments):
        F = firings_arrays[j]
        print(str(len(F[1, :])) + ' clustered events in segment ' + str(j))
        labels = F[2, :]
        if len(labels) == 0:
            Kmax = 0
            Kmaxes.append(0)
        else:
            Kmax = int(max(Kmax, np.max(labels)))
            Kmaxes.append(np.max(labels))
    if max(Kmaxes) > 0:
        use_max = int(max(Kmaxes))
        dmatrix = np.ones((use_max, use_max, num_segments - 1)) * (-1)
        k1_dmatrix = np.ones((use_max, use_max, num_segments - 1)) * (-1)
        k2_dmatrix = np.ones((use_max, use_max, num_segments - 1)) * (-1)
        templates = np.zeros((M, clip_size, use_max, 2 * (num_segments - 1)))

    for j in range(num_segments - 1):
        print('Computing dmatrix between segments %d and %d' % (j, j + 1))
        #print(timeseries_list)
        if np.size(firings_arrays[j]) == 0 or np.size(
                firings_arrays[j + 1]) == 0:
            #templates = np.zeros((M, clip_size, 1))
            continue
        else:
            (dmatrix0, k1_dmatrix0, k2_dmatrix0, templates1,
             templates2) = compute_dmatrix(timeseries_list[j],
                                           timeseries_list[j + 1],
                                           firings_arrays[j],
                                           firings_arrays[j + 1],
                                           clip_size=clip_size)
            dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1], j] = dmatrix0
            k1_dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1],
                       j] = k1_dmatrix0
            k2_dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1],
                       j] = k2_dmatrix0
            templates[:, :, 0:dmatrix0.shape[0], j * 2] = templates1
            templates[:, :, 0:dmatrix0.shape[1], j * 2 + 1] = templates2
    return (dmatrix, k1_dmatrix, k2_dmatrix, templates, Kmaxes)

예제 #9

0

파일 보기

def concatenate_firings(*,
                        firings_list,
                        firings_out,
                        time_offsets,
                        increment_labels='false'):
    """
    Combine a list of firings files to form a single firings file

    Parameters
    ----------
    firings_list : INPUT
        A list of paths of firings mda files to be concatenated
    firings_out : OUTPUT
        ...

    time_offsets : string
        An array of time offsets for each firings file. Expect one offset for each firings file.
        ...
    increment_labels : string
        ...
    """
    if time_offsets:
        time_offsets = np.fromstring(time_offsets, dtype=np.float_, sep=',')
    else:
        time_offsets = np.zeros(len(firings_list))
    if len(firings_list) == len(time_offsets):
        concatenated_firings = np.zeros(
            (3, 0))  #default to case where the list is empty
        first = True
        for idx, firings in enumerate(firings_list):
            to_append = mlpy.readmda(firings)
            to_append[1, :] += time_offsets[idx]
            if not first:
                if increment_labels == 'true':
                    to_append[2, :] += max(concatenated_firings[
                        2, :])  #add the Kmax from previous
            if first:
                concatenated_firings = to_append
            else:
                concatenated_firings = np.append(concatenated_firings,
                                                 to_append,
                                                 axis=1)
            first = False
        mlpy.writemda64(concatenated_firings, firings_out)
        return True
    else:
        print('Mismatch between number of firings files and number of offsets')
        return False

예제 #10

0

파일 보기

def join_segments(*, timeseries_list, firings_list, dmatrix_out,
                  templates_out):
    """
    Join the results of spike sorting on a sequence of time segments to form a single firings file

    Parameters
    ----------
    timeseries_list : INPUT
        A list of paths of adjacent preprocessed timeseries segment files
    firings_list : INPUT
        A list of paths to corresponding firings files
        
    dmatrix_out : OUTPUT
        dmatrix for debugging    
    templates_out : OUTPUT
        templates for debugging

    """
    X = DiskReadMda(timeseries_list[0])
    M = X.N1()
    clip_size = 100
    num_segments = len(timeseries_list)
    firings_arrays = []
    for j in range(num_segments):
        F = readmda(firings_list[j])
        firings_arrays.append(F)
    Kmax = 0
    for j in range(num_segments):
        F = firings_arrays[j]
        labels = F[2, :]
        Kmax = int(max(Kmax, np.max(labels)))
    dmatrix = np.ones((Kmax, Kmax, num_segments - 1)) * (-1)
    templates = np.zeros((M, clip_size, Kmax, 2 * (num_segments - 1)))

    for j in range(num_segments - 1):
        print('Computing dmatrix between segments %d and %d' % (j, j + 1))
        (dmatrix0, templates1,
         templates2) = compute_dmatrix(timeseries_list[j],
                                       timeseries_list[j + 1],
                                       firings_arrays[j],
                                       firings_arrays[j + 1],
                                       clip_size=clip_size)
        dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1], j] = dmatrix0
        templates[:, :, 0:dmatrix0.shape[0], j * 2] = templates1
        templates[:, :, 0:dmatrix0.shape[1], j * 2 + 1] = templates2

    writemda64(templates, templates_out)
    return writemda64(dmatrix, dmatrix_out)

예제 #11

0

파일 보기

def compute_accuracies(*,confusion_matrix,output,output_format='json'):
    """
    Compute accuracies from a confusion matrix (see ms3.confusion_matrix). The first dimension (rows) of the confusion matrix should correspond to ground truth.

    Parameters
    ----------
    confusion_matrix : INPUT
        The path of the confusion matrix in .mda format. The first dimension (rows) should correspond to ground truth. The final row and final column correspond to unclassified events.
    output : OUTPUT
        The output file
    
    output_format : string
        For now this should always be 'json'
    """
    print(type(confusion_matrix))
    if type(confusion_matrix)==str:
        CM=readmda(confusion_matrix)
    else:
        CM=confusion_matrix
    K1=CM.shape[0]-1
    K2=CM.shape[1]-1
    if (K1<0) or (K2<0):
        print ('Error: not enough rows or columns in confusion matrix')
        return False
    row_sums=np.sum(CM,axis=1)
    row_sums=np.maximum(1,row_sums) # do not permit zeros in denominator
    col_sums=np.sum(CM,axis=0)
    col_sums=np.maximum(1,col_sums) # do not permit zeros in denominator
    accuracies=np.zeros(K1)
    for k1 in range(1,K1+1):
        row=CM[k1-1,:]
        tmp=row/(col_sums+row_sums[k1-1]-row)
        accuracies[k1-1]=np.max(tmp[0:K2])
    
    accuracies_sorted=np.sort(accuracies)[::-1]
    obj={'accuracies':accuracies.tolist(),'accuracies_sorted':accuracies_sorted.tolist()};
    obj['count99']=len(np.where(accuracies>=0.99)[0])
    obj['count95']=len(np.where(accuracies>=0.95)[0])
    obj['count90']=len(np.where(accuracies>=0.90)[0])
    obj['count85']=len(np.where(accuracies>=0.85)[0])
    obj['count80']=len(np.where(accuracies>=0.80)[0])
    obj['count75']=len(np.where(accuracies>=0.75)[0])
    obj['count70']=len(np.where(accuracies>=0.70)[0])
    obj['count60']=len(np.where(accuracies>=0.60)[0])
    obj['count50']=len(np.where(accuracies>=0.50)[0])
    with open(output, 'w') as outfile:
        json.dump(obj, outfile, indent=4, sort_keys=True)    
    return True

예제 #12

0

파일 보기

def test_compute_templates():
    M, N, K, T, L = 5, 1000, 6, 50, 100
    X = np.random.rand(M, N)
    writemda32(X, 'tmp.mda')
    F = np.zeros((3, L))
    F[1, :] = 1 + np.random.randint(N, size=(1, L))
    F[2, :] = 1 + np.random.randint(K, size=(1, L))
    writemda64(F, 'tmp2.mda')
    ret = compute_templates(timeseries='tmp.mda',
                            firings='tmp2.mda',
                            templates_out='tmp3.mda',
                            clip_size=T)
    assert (ret)
    templates0 = readmda('tmp3.mda')
    assert (templates0.shape == (M, T, K))
    return True

예제 #13

0

파일 보기

def compute_templates_helper(*, timeseries, firings, clip_size=100):
    X = DiskReadMda(timeseries)
    M, N = X.N1(), X.N2()
    N = N
    F = readmda(firings)
    L = F.shape[1]
    L = L
    T = clip_size
    times = F[1, :]
    labels = F[2, :].astype(int)
    K = np.max(labels)
    compute_templates._sums = np.zeros((M, T, K))
    compute_templates._counts = np.zeros(K)

    def _kernel(chunk, info):
        inds = np.where((info.t1 <= times) & (times <= info.t2))[0]
        times0 = (times[inds] - info.t1 + info.t1a).astype(np.int32)
        labels0 = labels[inds]

        clips0 = np.zeros((M, clip_size, len(inds)),
                          dtype=np.float32,
                          order='F')
        cpp.extract_clips(clips0, chunk, times0, clip_size)

        for k in range(1, K + 1):
            inds_kk = np.where(labels0 == k)[0]
            compute_templates._sums[:, :, k -
                                    1] = compute_templates._sums[:, :, k -
                                                                 1] + np.sum(
                                                                     clips0[:, :,
                                                                            inds_kk],
                                                                     axis=2)
            compute_templates._counts[
                k - 1] = compute_templates._counts[k - 1] + len(inds_kk)
        return True

    TCR = TimeseriesChunkReader(chunk_size_mb=40, overlap_size=clip_size * 2)
    if not TCR.run(timeseries, _kernel):
        return None
    templates = np.zeros((M, T, K))
    for k in range(1, K + 1):
        if compute_templates._counts[k - 1]:
            templates[:, :, k -
                      1] = compute_templates._sums[:, :, k -
                                                   1] / compute_templates._counts[
                                                       k - 1]
    return templates

예제 #14

0

파일 보기

파일: p_extract_clips.py 프로젝트: tjd2002/mountainsort

def test_extract_clips():
    M, T, L, N = 5, 100, 100, 1000
    X = np.random.rand(M, N).astype(np.float32)
    writemda32(X, 'tmp.mda')
    F = np.zeros((2, L))
    F[1, :] = 200 + np.random.randint(N - 400, size=(1, L))
    writemda64(F, 'tmp2.mda')
    ret = extract_clips(timeseries='tmp.mda',
                        firings='tmp2.mda',
                        clips_out='tmp3.mda',
                        clip_size=T)
    assert (ret)
    clips0 = readmda('tmp3.mda')
    assert (clips0.shape == (M, T, L))
    t0 = int(F[1, 10])
    a = int(np.floor((T + 1) / 2 - 1))
    np.array_equal(clips0[:, :, 10], X[:, t0 - a:t0 - a + T])
    #np.testing.assert_almost_equal(clips0[:,:,10],X[:,t0-a:t0-a+T],decimal=4)
    return True

예제 #15

0

파일 보기

파일: p_extract_clips.py 프로젝트: tjd2002/mountainsort

def extract_clips(*, timeseries, firings, clips_out, clip_size=100):
    """
    Extract clips corresponding to spike events

    Parameters
    ----------
    timeseries : INPUT
        Path of timeseries mda file (MxN) from which to draw the event clips (snippets)
    firings : INPUT
        Path of firings mda file (RxL) where R>=2 and L is the number of events. Second row are timestamps.
        
    clips_out : OUTPUT
        Path of clips mda file (MxTxL). T=clip_size
        
    clip_size : int
        (Optional) clip size, aka snippet size, aka number of timepoints in a single clip
    """
    F = readmda(firings)
    times = F[1, :]
    clips = extract_clips_helper(timeseries=timeseries,
                                 times=times,
                                 clip_size=clip_size)
    return writemda32(clips, clips_out)

예제 #16

0

파일 보기

def handle_drift_in_segment(*, timeseries, firings, firings_out):
    """
    Handle drift in segment.

    Parameters
    ----------
    timeseries : INPUT
        Path to preprocessed timeseries from which the events are extracted from (MxN)
    firings : INPUT
        Path of input firings mda file
    firings_out : OUTPUT
        Path of output drift-adjusted firings mda file
        ...
    """
    subcluster_size = 500  # Size of subclusters for comparison of merge candidate pairs
    bin_factor = 10  # subcluster_size / bin_factor = numbins for hist
    corr_comp_thresh = 0.95  # Minimum correlation in templates to consider as merge candidate
    clip_size = 50
    n_pca_dim = 10

    ## compute the templates
    templates = compute_templates_helper(timeseries=timeseries,
                                         firings=firings,
                                         clip_size=clip_size)
    templates = np.swapaxes(templates, 0, 1)
    templates = np.swapaxes(
        templates, 2, 0)  #Makes templates of form Clust x Chan x Clipsize
    firings = mlpy.readmda(firings)
    print('templates', templates.shape)

    ## Determine the merge candidate pairs based on correlation
    subflat_templates = np.reshape(
        templates, (templates.shape[0], -1)
    )  #flatten templates from templates from M x N x L (Clust x Chan x Clipsize) to (clust x flat)
    pairwise_idxs = np.array(
        list(
            it.chain.from_iterable(
                it.combinations(range(templates.shape[0]), 2)))
    )  #Generates 1D Array of all poss pairwise comparisons of clusters ([0 1 2] --> [0 1 0 2 1 2])
    pairwise_idxs = pairwise_idxs.reshape(
        -1, 2)  #Reshapes array, from above to readable [[0,1],[0,2],[1,2]]
    pairwise_corrcoef = np.zeros(
        pairwise_idxs.shape[0]
    )  #Empty array for all pairs correlation measurements
    for row in range(
            pairwise_idxs.shape[0]
    ):  #Calculate the correlation coefficient for each pair of flattened templates
        pairwise_corrcoef[row] = np.corrcoef(
            subflat_templates[:, pairwise_idxs[row, 0]],
            subflat_templates[:, pairwise_idxs[row, 1]])[1, 0]
    pairs_for_eval = np.array(
        pairwise_idxs[pairwise_corrcoef >= corr_comp_thresh]
    )  #Threshold the correlation array, and use to index the pairwise comparison array
    pairs_to_merge = np.array([])  #holder variable for merging pairs
    ## Loop through the pairs for comparison

    for pair_to_test in range(
            pairs_for_eval.shape[0]
    ):  # Iterate through pairs that are above correlation comparison threshold

        ## Extract out the times and labels corresponding to the pair
        firings_subset = firings[:,
                                 np.isin(
                                     firings[2, :],
                                     pairs_for_eval[pair_to_test, :] + 1
                                 )]  # Generate subfirings of only events from given pair, correct for base 0 vs. 1 difference
        test_labels = firings_subset[2, :]  # Labels from the pair of clusters
        test_eventtimes = firings_subset[
            1, :]  # Times from the pair of clusters
        sort_indices = np.argsort(
            test_eventtimes
        )  # there's no strict guarantee the firing times will be sorted, so adding a sort step for safety
        test_labels = test_labels[sort_indices]
        test_eventtimes = test_eventtimes[sort_indices]

        ## find the subcluster times and labels
        subcluster_event_indices = find_random_paired_events(
            test_eventtimes, test_labels, subcluster_size)
        subcluster_times = test_eventtimes[subcluster_event_indices]
        subcluster_labels = test_labels[subcluster_event_indices]

        ## Extract the clips for the subcluster
        subcluster_clips = extract_clips_helper(timeseries=timeseries,
                                                times=subcluster_times,
                                                clip_size=clip_size)

        ## Compute the centroids and project the clips onto the direction of the line connecting the two centroids

        # PCA to extract features of clips (number dim = n_pca_dim);
        subcluster_clips = np.reshape(
            subcluster_clips, (subcluster_clips.shape[0],
                               -1))  # Flatten clips for PCA (expects 2d array)
        dimenReduc = PCA(n_components=n_pca_dim, whiten=True)
        clip_features = dimenReduc.fit_transform(subcluster_clips)

        # Use label data to separate clips into two groups, and adjust for base 0 vs base 1 difference
        A_indices = np.isin(subcluster_labels,
                            pairs_for_eval[pair_to_test, 0] + 1)
        B_indices = np.isin(subcluster_labels,
                            pairs_for_eval[pair_to_test, 1] + 1)
        clip_features_A = clip_features[A_indices, :]
        clip_features_B = clip_features[B_indices, :]

        # Calculate centroid
        centroidA = np.mean(clip_features_A, axis=0)
        centroidB = np.mean(clip_features_B, axis=0)

        # Project points onto line
        V = centroidA - centroidB
        V = np.tile(V, (clip_features.shape[0], 1))
        clip_1d_projs = np.einsum('ij,ij->i', clip_features, V)

        #TODO: Test for merge subprocess
        #If the clusters are to be merged, add to the cluster to merge list
        if test_for_merge(clip_1d_projs, A_indices, B_indices):
            pairs_to_merge = np.append(pairs_to_merge,
                                       pairs_for_eval[pair_to_test, :] +
                                       1)  #Base 1 correction

    pairs_to_merge = np.reshape(pairs_to_merge, (-1, 2))  #easier to read
    pairs_to_merge = pairs_to_merge[np.argsort(
        pairs_to_merge[:, 0])]  #Assure that input is sorted

    #Propagate merge pairs to lowest label number
    for idx, label in enumerate(pairs_to_merge[:, 1]):
        pairs_to_merge[np.isin(pairs_to_merge[:, 0], label),
                       0] = pairs_to_merge[idx, 0]  #Input should be sorted

    #Merge firing labels
    for merge_pair in range(pairs_to_merge.shape[0]):
        firings[2, np.isin(firings[2, :], pairs_to_merge[
            merge_pair, 1])] = pairs_to_merge[merge_pair,
                                              0]  #Already base 1 corrected

    #Write merged firings
    mlpy.writemda64(firings, firings_out)

예제 #17

0

파일 보기

파일: p_extract_timeseries.py 프로젝트: tjd2002/mountainsort

def extract_timeseries(*,
                       timeseries,
                       channels_array='',
                       timeseries_out,
                       channels='',
                       t1=-1,
                       t2=-1,
                       timeseries_dtype='',
                       timeseries_num_channels=0):
    """
    Extract a chunk of a timeseries dataset and possibly a subset of channels

    Parameters
    ----------
    timeseries : INPUT
        Path of timeseries, MxN where M is number of channels and N number of timepoints, in either .mda or raw binary format. If raw binary, then you must supply dtype and num_channels.
    channels_array : INPUT 
        Path of array of channel numbers (positive integers). Either use this or the channels parameter, not both.
        
    timeseries_out : OUTPUT
        Path of output timeseries in .mda format    
        
    channels : string
        Comma-separated list of channels to extract. Either use this or the channels_array input, not both.
    t1 : integer
        Integer start timepoint (zero-based indexing). If -1 will set to zero.
    t2 : integer
        Integer end timepoint (zero-based indexing). If -1 will set to N-1."},
    timeseries_dtype : string
        Only supply this if timeseries is in raw binary format. Choices are int16, uint16, int32, float32, etc.
    timeseries_num_channels : integer
        Only supply this if timeseries is in raw binary format. Integer representing number of channels. Number of timepoints will be deduced
    """
    if channels:
        _channels = np.fromstring(channels, dtype=int, sep=',')
    elif channels_array:
        _channels = readmda(channels_array).ravel()
    else:
        _channels = np.empty(0)

    header0 = None
    if (timeseries_dtype):
        size_bytes = os.path.getsize(timeseries)
        num_bytes_per_entry = get_num_bytes_per_entry_from_dt(timeseries_dtype)
        if t2 >= 0:
            num_entries = (t2 + 1) * (timeseries_num_channels)
        else:
            num_entries = size_bytes / num_bytes_per_entry
            if (num_entries % timeseries_num_channels != 0):
                print(
                    "File size (%ld) is not divisible by number of channels (%g) for dtype=%s"
                    % (size_bytes, timeseries_num_channels, timeseries_dtype))
                return False
        num_timepoints = num_entries / timeseries_num_channels
        header0 = MdaHeader(timeseries_dtype,
                            [timeseries_num_channels, num_timepoints])

    X = DiskReadMda(timeseries, header0)
    M, N = X.N1(), X.N2()
    if (_channels.size == 0):
        _channels = np.array(1 + np.arange(M))
    M2 = _channels.size

    if (t1 < 0):
        t1 = 0
    if (t2 < 0):
        t2 = N - 1

    N2 = t2 - t1 + 1

    _writer = DiskWriteMda(timeseries_out, [M2, N2], dt=X.dt())

    def _kernel(chunk, info):
        chunk = chunk[(_channels - 1).tolist(), ]
        return _writer.writeChunk(chunk, i1=0, i2=info.t1)

    chunk_size_mb = 100
    TCR = TimeseriesChunkReader(chunk_size_mb=chunk_size_mb,
                                overlap_size=0,
                                t1=t1,
                                t2=t2)
    return TCR.run(X, _kernel)

예제 #18

0

파일 보기

파일: p_anneal_segments.py 프로젝트: mari-sosa/msdrift

def get_dmatrix_templates(timeseries_list, firings_list):
    X = DiskReadMda(timeseries_list[0])
    M = X.N1()
    clip_size = 50
    num_segments = len(timeseries_list)
    segment_combos = it.combinations(
        range(num_segments), 2)  # Get all possible segment combinations
    segment_combos = np.array(list(segment_combos))
    # Order segment combinations such that neighbors are first, then non-neighbors
    segment_combos = np.append(
        segment_combos[np.where(np.diff(segment_combos) == 1)[0], :],
        segment_combos[np.where(np.diff(segment_combos) > 1)[0], :],
        axis=0)
    num_combos = int(comb(num_segments, 2))
    firings_arrays = []
    Kmaxes = []
    for j in range(num_segments):
        F = readmda(firings_list[j])
        firings_arrays.append(F)
    Kmax = 0
    for j in range(num_segments):
        F = firings_arrays[j]
        print(str(len(F[1, :])) + ' clustered events in segment ' + str(j))
        labels = F[2, :]
        if len(labels) == 0:
            Kmax = 0
            Kmaxes.append(0)
        else:
            Kmax = int(max(Kmax, np.max(labels)))
            Kmaxes.append(np.max(labels))
    if max(Kmaxes) > 0:
        use_max = int(max(Kmaxes))
        dmatrix = np.ones((use_max, use_max, num_combos)) * (-1)
        k1_dmatrix = np.ones((use_max, use_max, num_combos)) * (-1)
        k2_dmatrix = np.ones((use_max, use_max, num_combos)) * (-1)
        templates = np.zeros((M, clip_size, use_max, 2 * num_combos))

    for n in range(num_combos):
        # count up to number of combinations for dmatrix 3rd dimension indexing
        j1 = segment_combos[n, 0]
        j2 = segment_combos[n, 1]
        print('Computing dmatrix between segments %d and %d' % (j1, j2))
        #print(timeseries_list)
        if np.size(firings_arrays[j1]) == 0 or np.size(
                firings_arrays[j2]) == 0:
            #templates = np.zeros((M, clip_size, 1))
            continue
        else:
            (dmatrix0, k1_dmatrix0, k2_dmatrix0, templates1,
             templates2) = compute_dmatrix(timeseries_list[j1],
                                           timeseries_list[j2],
                                           firings_arrays[j1],
                                           firings_arrays[j2],
                                           clip_size=clip_size)
            dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1], n] = dmatrix0
            k1_dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1],
                       n] = k1_dmatrix0
            k2_dmatrix[0:dmatrix0.shape[0], 0:dmatrix0.shape[1],
                       n] = k2_dmatrix0
            templates[:, :, 0:dmatrix0.shape[0], n * 2] = templates1
            templates[:, :, 0:dmatrix0.shape[1], n * 2 + 1] = templates2
    return (dmatrix, k1_dmatrix, k2_dmatrix, templates, Kmaxes, segment_combos)

예제 #19

0

파일 보기

파일: p_extract_subfirings.py 프로젝트: mari-sosa/msdrift

def extract_subfirings(*, firings, t1='', t2='', channels='', channels_array='', timeseries='', firings_out):
    """
    Extract a firings subset based on times and/or channels.
    If a time subset is extracted, the firings are adjusted to t_new = t_original - t1
    If channel(s) are extracted with a timeseries, only clusters with largest amplitude on the given channel (as determined by the average waveform in the time range) will be extracted
    First developed for use with extract_timeseries in inspecting very large datasets

    Parameters
    ----------
    firings : INPUT
        A path of a firings file from which a subset is extracted
    t1 : INPUT
        Start time for extracted firings
    t2 : INPUT
        End time for extracted firings; use -1 OR no value for end of timeseries
    channels : INPUT
        A string of channels from which clusters with maximal energy (based on template) will be extracted
    channels_array : INPUT
        An array of channels from which clusters with maximal energy (based on template) will be extracted
    timeseries : INPUT
        A path of a timeseries file from which templates will be calculated if a subset of channels is given
    firings_out : OUTPUT
        The extracted subfirings path
        ...
    """
    firings=readmda(firings)

    if channels:
        _channels=np.fromstring(channels,dtype=int,sep=',')
    elif channels_array:
        _channels=channels_array
    else:
        _channels=np.empty(0)

    if t1:
        print('Time extraction...')
        t_valid=(t1<firings[1,:])#Get bool mask in greater than t1
        if t2 and t2>0:
            t_valid = t_valid * (firings[1,:]<t2)
        firings = firings[:,t_valid]
    else:
        print('Using full time chunk')

    if _channels and timeseries:
        print('Channels extraction...')
        #Determine if need to parse from string
        amps = compute_templates_helper(timeseries, firings, clip_size=1) #Get only amplitude, returns zeroes if empty (M X T X K)
        #Get indices of max chan for each cluster
        main_chan=np.zeros(np.max(firings[2,:]))
        for k in range(np.max(firings[2,:])):
            if np.max(amps[:,:,k]):
                main_chan[k]=np.argmax(amps[:,:,k])+1 #base 1 adj
        labels_valid = np.argwhere(np.isin(main_chan,_channels)) +1 #base 1 adj again
        k_valid=np.isin(firings[2,:],labels_valid)
        firings = firings[:,k_valid]
    else:
        print('Using all channels')

    if t1:
        firings[1,:] -= t1 #adjust t1 to 0

    return writemda64(firings,firings_out)

예제 #20

0

파일 보기

def synthesize_timeseries(*,
                          firings='',
                          waveforms='',
                          timeseries_out=None,
                          noise_level=1,
                          samplerate=30000,
                          duration=60,
                          waveform_upsamplefac=1,
                          amplitudes_row=0):
    """
    Synthesize an electrophysiology timeseries from a set of ground-truth firing events and waveforms

    Parameters
    ----------
    firings : INPUT
        (Optional) The path of firing events file in .mda format. RxL where R>=3 and L is the number of events. Second row is the timestamps, third row is the integer labels/
    waveforms : INPUT
        (Optional) The path of (possibly upsampled) waveforms file in .mda format. Mx(T*waveform_upsample_factor)*K, where M is the number of channels, T is the clip size, and K is the number of units.
    
    timeseries_out : OUTPUT
        The output path for the new timeseries. MxN

    noise_level : double
        (Optional) Standard deviation of the simulated background noise added to the timeseries
    samplerate : double
        (Optional) Sample rate for the synthetic dataset in Hz
    duration : double
        (Optional) Duration of the synthetic dataset in seconds. The number of timepoints will be duration*samplerate
    waveform_upsamplefac : int
        (Optional) The upsampling factor corresponding to the input waveforms. (avoids digitization artifacts)
    amplitudes_row : int
        (Optional) If positive, this is the row in the firings arrays where the amplitude scale factors are found. Otherwise, use all 1's
    """
    num_timepoints = np.int64(samplerate * duration)
    waveform_upsamplefac = int(waveform_upsamplefac)

    if type(waveforms) == str:
        if waveforms:
            W = readmda(waveforms)
        else:
            W = np.zeros((4, 100 * waveform_upsamplefac, 0))
    else:
        W = waveforms

    if type(firings) == str:
        if firings:
            F = readmda(firings)
        else:
            F = np.zeros((3, 0))
    else:
        F = firings

    times = F[1, :]
    labels = F[2, :].astype('int')

    M, TT, K = W.shape[0], W.shape[1], W.shape[2]
    T = int(TT / waveform_upsamplefac)
    Tmid = int(np.ceil((T + 1) / 2 - 1))

    N = num_timepoints
    if (N == 0):
        if times.size == 0:
            N = T
        else:
            N = max(times) + T

    X = np.random.randn(M, N) * noise_level

    waveform_list = []
    for k in range(K):
        waveform0 = W[:, :, k - 1]
        waveform_list.append(waveform0)

    for j in range(times.size):
        t0 = times[j]
        k0 = labels[j]
        amp0 = 1
        if amplitudes_row > 0:
            amp0 = F[amplitudes_row - 1, j]
        waveform0 = waveform_list[k0 - 1]
        frac_offset = int(np.floor((t0 - np.floor(t0)) * waveform_upsamplefac))
        tstart = np.int64(np.floor(t0)) - Tmid
        if (0 <= tstart) and (tstart + T <= N):
            X[:, tstart:tstart +
              T] = X[:, tstart:tstart +
                     T] + waveform0[:,
                                    frac_offset::waveform_upsamplefac] * amp0

    if timeseries_out:
        return writemda32(X, timeseries_out)
    else:
        return (X)

예제 #21

0

파일 보기

파일: p_synthesize_drifting_timeseries.py 프로젝트: tjd2002/mountainsort

def synthesize_drifting_timeseries(*,
                                   firings,
                                   waveforms,
                                   timeseries_out=None,
                                   noise_level=1,
                                   samplerate=30000,
                                   duration=60,
                                   waveform_upsamplefac=1,
                                   amplitudes_row=0,
                                   num_interp_nodes=2):
    """
    Synthesize a electrophysiology timeseries from a set of ground-truth firing events and waveforms, and simulating drift (linear for now)

    Parameters
    ----------
    firings : INPUT
        (Optional) The path of firing events file in .mda format. RxL where 
        R>=3 and L is the number of events. Second row is the timestamps, 
        third row is the integer labels
    waveforms : INPUT
        (Optional) The path of (possibly upsampled) waveforms file in .mda
        format. Mx(T*waveform_upsample_factor)*(2K), where M is the number of
        channels, T is the clip size, and K is the number of units. Each unit
        has a contiguous pair of waveforms (interpolates from first to second
        across the timeseries)
    
    timeseries_out : OUTPUT
        The output path for the new timeseries. MxN

    noise_level : double
        (Optional) Standard deviation of the simulated background noise added to the timeseries
    samplerate : double
        (Optional) Sample rate for the synthetic dataset in Hz
    duration : double
        (Optional) Duration of the synthetic dataset in seconds. The number of timepoints will be duration*samplerate
    waveform_upsamplefac : int
        (Optional) The upsampling factor corresponding to the input waveforms. (avoids digitization artifacts)
    amplitudes_row : int
        (Optional) If positive, this is the row in the firings arrays where the amplitude scale factors are found. Otherwise, use all 1's
    num_interp_nodes : int
        (Optional) For drift, the number of timepoints where we specify the waveform (Default 2)
    """

    if type(firings) == str:
        F = readmda(firings)
    else:
        F = firings

    if amplitudes_row == 0:
        F = np.concatenate((F, np.ones((1, F.shape[1]))))
        amplitudes_row = F.shape[0]

    times = F[1, :]
    times_normalized = times / (duration * samplerate
                                )  #normalized between 0 and 1
    labels = F[2, :]
    amps = F[amplitudes_row - 1, :]

    F = np.kron(F, [1] * num_interp_nodes)  #duplicate every event!

    for j in range(num_interp_nodes):
        F[amplitudes_row - 1, j::num_interp_nodes] = amps * time_basis_func(
            j, num_interp_nodes, times_normalized)
        # adjust the labels
        F[2, j::num_interp_nodes] = (
            labels -
            1) * num_interp_nodes + j + 1  #remember that labels are 1-indexed
    return synthesize_timeseries(firings=F,
                                 waveforms=waveforms,
                                 timeseries_out=timeseries_out,
                                 noise_level=noise_level,
                                 samplerate=samplerate,
                                 duration=duration,
                                 waveform_upsamplefac=waveform_upsamplefac,
                                 amplitudes_row=amplitudes_row)