Beispiel #1
0
def SNEMIPointCloud(prefix):
    filename = 'raw_data/segmentations/{}/seg.h5'.format(prefix)

    # open this h5 file
    with h5py.File(filename, 'r') as hf:
        # use np.array to decompress
        keys = [key for key in hf.keys()]
        data = np.array(hf[keys[0]])

    # verify the resolutions match
    zres, yres, xres = dataIO.GridSize(prefix)
    assert (zres == data.shape[OR_Z] and yres == data.shape[OR_Y]
            and xres == data.shape[OR_X])

    # get all of the non-zero points in a list
    point_clouds = ExtractSNEMIPointClouds(data)

    for label, point_cloud in enumerate(point_clouds):
        # skip over missing elements from these slices
        if not len(point_cloud): continue

        # write the point cloud to file
        output_filename = 'segmentations/{}/{:06d}.pts'.format(prefix, label)
        with open(output_filename, 'wb') as fd:
            npoints = len(point_cloud)
            fd.write(struct.pack('qqqq', zres, yres, xres, npoints))
            fd.write(struct.pack('%sq' % npoints, *point_cloud))
Beispiel #2
0
def H5Section2PointCloud(prefix, filename, section_index, section_width):
    # get grid size as filler for saved files
    zres, yres, xres = dataIO.GridSize(prefix)

    # open this file and read data
    with h5py.File(filename, 'r') as hf:
        data = np.array(hf['main'])

    # z index should not start at zero most times
    z_start = section_width * section_index
    point_clouds = SectionExtractPointCloud(data, z_start)

    for label, point_cloud in enumerate(point_clouds):
        # skip over missing elements from these slices
        if not len(point_cloud): continue

        # write the point cloud to file
        if not os.path.isdir(
                'original_data/segmentations/{}/sections'.format(prefix)):
            os.mkdir('original_data/segmentations/{}/sections'.format(prefix))
        output_filename = 'original_data/segmentations/{}/sections/section-{:03d}-label-{:06d}.pts'.format(
            prefix, section_index, label)
        with open(output_filename, 'wb') as fd:
            npoints = len(point_cloud)
            fd.write(struct.pack('qqqq', zres, yres, xres, npoints))
            fd.write(struct.pack('%sq' % npoints, *point_cloud))
Beispiel #3
0
def CorrectIsthmusEndpoints(prefix, method):
    # go through every label for this method and dataset
    directory = '{}/{}'.format(method, prefix)

    labels = []
    for filename in sorted(os.listdir(directory)):
        if 'endpoints' in filename: continue
        labels.append(int(filename[:-4]))
    
    for label in labels:
        filename = '{}/{}/{:06d}.pts'.format(method, prefix, label)
        print (filename)

        zres, yres, xres = dataIO.GridSize(prefix)

        with open(filename, 'rb') as fd:
            zres, yres, xres, npoints = struct.unpack('qqqq', fd.read(32))
            points = list(struct.unpack('%sq' % npoints, fd.read(8 * npoints)))
        
        for ip in range(npoints):
            if points[ip] < 0: 
                points[ip] = -1 * points[ip]

        endpoints = CreateEndpoints(set(points), zres, yres, xres)

        output_filename = '{}/{}/{:06d}-endpoints.pts'.format(method, prefix, label)

        with open(output_filename, 'wb') as fd:
            nendpoints = len(endpoints)
            fd.write(struct.pack('qqqq', zres, yres, xres, nendpoints))
            fd.write(struct.pack('%sq' % nendpoints, *endpoints))
Beispiel #4
0
def JWRSomae(label):
    prefix = 'JWR'

    # get the grid size to convert to linear coordinates
    zres, yres, xres = dataIO.GridSize(prefix)

    # get the original filename
    filename = 'raw_data/somae/JWR/cell{:03d}_d.txt'.format(label)

    with open(filename, 'r') as fd:
        # remove the new line
        line = fd.readline().strip().split(',')

        ix = int(line[0])
        iy = int(line[1])
        iz = int(line[2])

        # convert to linear index
        iv = iz * yres * xres + iy * xres + ix

        output_filename = 'somae/JWR/{:06d}.pts'.format(label)
        with open(output_filename, 'wb') as fd:
            npoints = 1
            fd.write(struct.pack('qqqq', zres, yres, xres, npoints))
            fd.write(struct.pack('q', iv))
Beispiel #5
0
def SNEMISynapses(prefix):
    # read in the synapse h5 file
    with h5py.File('raw_data/synapses/{}/synapses.h5'.format(prefix), 'r') as hf:
        syn_data = np.array(hf[hf.keys()[0]])

    # read in all of the synapses 
    syn_per_seg = dataIO.ReadAllPoints(prefix, 'synapses')

    # get the grid size
    zres, yres, xres = dataIO.GridSize(prefix)

    # make sure that every location actually falls on a synapse
    for segment in syn_per_seg:
        synapses = syn_per_seg[segment]

        for synapse in synapses:
            # make sure this location is a synapse
            iz = synapse / (yres * xres)
            iy = (synapse - iz * yres * xres) / xres
            ix = synapse % xres

            # make sure this is a non-zero location
            assert (syn_data[iz,iy,ix])

    # read in the segmentation h5 file
    with h5py.File('raw_data/segmentations/{}/seg.h5'.format(prefix), 'r') as hf:
        seg_data = np.array(hf[hf.keys()[0]])

    syn_seg_pairs = set()
    syn_seg_found = set()

    # make sure every (synapse, segment) pair occurs once and only once
    for iz in range(zres):
        for iy in range(yres):
            for ix in range(xres):
                synapse = syn_data[iz,iy,ix]
                segment = seg_data[iz,iy,ix]

                # skip background data
                if not synapse or not segment: continue

                syn_seg_pairs.add((synapse, segment))

                # see if this point is in the list of synapses
                iv = iz * yres * xres + iy * xres + ix

                if iv in syn_per_seg[segment]:
                    # make sure there is only one element for this pair
                    assert (not (synapse, segment) in syn_seg_found)

                    # add this pair to the list of found locations
                    syn_seg_found.add((synapse, segment))

    for (synapse, segment) in syn_seg_pairs:
        assert ((synapse, segment) in syn_seg_found)
Beispiel #6
0
def CreateVolumetricSomae(prefix, label):
    somae_filename = 'raw_data/somae/{}/cell{:03d}_d.h5'.format(prefix, label)
    if not os.path.exists(somae_filename): return

    segment_filename = 'surfaces/{}/{:06d}.pts'.format(prefix, label)
    if not os.path.exists(segment_filename): return

    if prefix == 'JWR':
        downz, downy, downx = 4, 4, 4
    elif prefix == 'Zebrafinch':
        downz, downy, downx = 8, 8, 8

    # get the grid size
    zres, yres, xres = dataIO.GridSize(prefix)

    with h5py.File(somae_filename, 'r') as hf:
        somae = np.array(hf['main'])

    # read all segment points
    point_cloud = dataIO.ReadPoints(prefix, label, subset)
    somae_point_cloud = set()

    low_zres, low_yres, low_xres = somae.shape

    for index in point_cloud:
        iz = index // (yres * xres)
        iy = (index - iz * yres * xres) // xres
        ix = index % xres

        zdown = iz // downz
        ydown = iy // downy
        xdown = ix // downx

        # bounds checking
        if low_zres - 1 < zdown:
            zdown = low_zres - 1
        if low_yres - 1 < ydown:
            ydown = low_yres - 1
        if low_xres - 1 < xdown:
            xdown = low_xres - 1

        if somae[zdown, ydown, xdown]:
            somae_point_cloud.add(index)

    somae_point_cloud = list(somae_point_cloud)

    output_filename = 'volumetric_somae/{}/{}/{:06d}.pts'.format(subset, prefix, label)
    with open(output_filename, 'wb') as fd:
        nsegment_points = len(somae_point_cloud)
        fd.write(struct.pack('qqqq', zres, yres, xres, nsegment_points))
        fd.write(struct.pack('%sq' % nsegment_points, *somae_point_cloud))
Beispiel #7
0
def Segment2Surface(prefix, label):
    if not os.path.exists('segmentations/{}/{:06d}.pts'.format(prefix, label)):
        return

    # get the grid size for this prefix
    zres, yres, xres = dataIO.GridSize(prefix)

    point_cloud = set(dataIO.ReadPoints(prefix, label, 'segmentations'))

    surface_points = FindSurface(point_cloud, zres, yres, xres)

    surface_filename = 'surfaces/{}/{:06d}.pts'.format(prefix, label)

    with open(surface_filename, 'wb') as fd:
        nsurface_points = len(surface_points)
        fd.write(struct.pack('qqqq', zres, yres, xres, nsurface_points))
        fd.write(struct.pack('%sq' % nsurface_points, *surface_points))
Beispiel #8
0
def CombineSectionPointClouds(prefix):
    # get the grid size for this prefix
    zres, yres, xres = dataIO.GridSize(prefix)

    sub_directory = 'original_data/segmentations/{}/sections'.format(prefix)

    # get the maximum label from the filenames
    max_label = max(
        int(filename.split('-')[-1][:-4])
        for filename in os.listdir(sub_directory)) + 1

    # go through every label
    for label in range(max_label):
        filenames = sorted(
            glob.glob('{}/*-label-{:06d}.pts'.format(sub_directory, label)))
        if not len(filenames): continue

        # write the point cloud to this final file
        output_filename = 'original_data/segmentations/{}/{:06d}.pts'.format(
            prefix, label)
        with open(output_filename, 'wb') as wfd:
            # write nonsense number of points first and overwrite later
            npoints = 0
            wfd.write(struct.pack('qqqq', zres, yres, xres, npoints))
            for filename in filenames:
                with open(filename, 'rb') as rfd:
                    zres, yres, xres, section_npoints = struct.unpack(
                        'qqqq', rfd.read(32))
                    point_cloud = struct.unpack('%sq' % section_npoints,
                                                rfd.read(8 * section_npoints))
                    wfd.write(
                        struct.pack('%sq' % section_npoints, *point_cloud))
                    npoints += section_npoints

            # reset now that we know the number of true points
            wfd.seek(0)
            wfd.write(struct.pack('qqqq', zres, yres, xres, npoints))
Beispiel #9
0
def JWRPointCloud(label):
    filename = 'raw_data/segmentations/JWR/cell{:03d}_d.h5'.format(label)

    # open this binary file
    with h5py.File(filename, 'r') as hf:
        # use np.array to decompress
        keys = [key for key in hf.keys()]
        data = np.array(hf[keys[0]])

    # verify the resolutions match
    zres, yres, xres = dataIO.GridSize('JWR')
    assert (zres == data.shape[OR_Z] and yres == data.shape[OR_Y]
            and xres == data.shape[OR_X])

    # get all of the non-zero points in a list
    point_cloud = ExtractJWRPointCloud(data)

    # write the point cloud to file
    output_filename = 'original_data/segmentations/JWR/{:06d}.pts'.format(
        label)
    with open(output_filename, 'wb') as fd:
        npoints = len(point_cloud)
        fd.write(struct.pack('qqqq', zres, yres, xres, npoints))
        fd.write(struct.pack('%sq' % npoints, *point_cloud))
Beispiel #10
0
def Fib25Synapses(start_index):
    prefix = 'Fib25'

    # get the grid size to convert to linear coordinates
    zres, yres, xres = dataIO.GridSize(prefix)

    # get the labels for this dataset
    labels = [
        int(label[:-4])
        for label in sorted(os.listdir('segmentations/{}'.format(prefix)))
    ]

    synapses_per_segment = {}
    for label in labels:
        synapses_per_segment[label] = []

    pre_filename = 'raw_data/synapses/Fib25/synapse_gid_ffn_pre_v1.txt'
    with open(pre_filename, 'r') as fd:
        for line in fd:
            # remove the new line and separate parts
            line = line.strip().split()

            # get the id and location
            segment = int(line[0]) + 1
            iz = int(line[1])
            iy = int(line[2])
            ix = int(line[3])

            # verify input
            assert (0 <= ix and ix < xres)
            assert (0 <= iy and iy < yres)
            assert (0 <= iz and iz < zres)

            # oddly this occurs
            if not segment in synapses_per_segment: continue

            iv = iz * yres * xres + iy * xres + ix
            synapses_per_segment[segment].append(iv)

    post_filename = 'raw_data/synapses/Fib25/synapse_gid_ffn_post_v1.txt'
    with open(post_filename, 'r') as fd:
        for line in fd:
            # remove the new line and separate parts
            line = line.strip().split()

            # get the id and location
            segment = int(line[1]) + 1
            iz = int(line[2])
            iy = int(line[3])
            ix = int(line[4])

            # verify input
            assert (0 <= ix and ix < xres)
            assert (0 <= iy and iy < yres)
            assert (0 <= iz and iz < zres)

            # oddly this occurs
            if not segment in synapses_per_segment: continue

            iv = iz * yres * xres + iy * xres + ix
            synapses_per_segment[segment].append(iv)

    # save all synapses for each label
    for segment in synapses_per_segment:
        output_filename = 'synapses/Fib25/{:06d}.pts'.format(segment)
        if os.path.exists(output_filename): continue

        if segment < start_index: continue

        start_time = time.time()

        # make sure all the synapses fall near the segment
        surface_point_cloud = dataIO.ReadPoints(prefix, segment, 'surfaces')
        npoints = len(surface_point_cloud)

        np_point_cloud = np.zeros((npoints, 3), dtype=np.int32)
        for index, iv in enumerate(surface_point_cloud):
            iz = iv // (yres * xres)
            iy = (iv - iz * yres * xres) // xres
            ix = iv % xres

            np_point_cloud[index, :] = (ix, iy, iz)

        synapses = []

        mse = 0.0

        for synapse in synapses_per_segment[segment]:
            iz = synapse // (yres * xres)
            iy = (synapse - iz * yres * xres) // xres
            ix = synapse % xres

            # create a 2D vector for this point
            vec = np.zeros((1, 3), dtype=np.int32)
            vec[0, :] = (ix, iy, iz)

            closest_point = surface_point_cloud[scipy.spatial.distance.cdist(
                np_point_cloud, vec).argmin()]

            point_iz = closest_point // (yres * xres)
            point_iy = (closest_point - point_iz * yres * xres) // xres
            point_ix = closest_point % xres

            distance = math.sqrt((ix - point_ix) * (ix - point_ix) +
                                 (iy - point_iy) * (iy - point_iy) +
                                 (iz - point_iz) * (iz - point_iz))
            # skip over clearly wrong synapses
            if distance > 30: continue
            mse += distance

            synapses.append(closest_point)

        if not len(synapses): continue
        with open(output_filename, 'wb') as fd:
            nsynapses = len(synapses)

            fd.write(struct.pack('qqqq', zres, yres, xres, nsynapses))
            fd.write(struct.pack('%sq' % nsynapses, *synapses))

        print('Mean Squared Error {:0.2f} for label {} in {:0.2f} seconds'.
              format(mse / len(synapses), segment,
                     time.time() - start_time))
Beispiel #11
0
def JWRandZebrafinchSynapses(prefix, label):
    # skip if no surface filename
    surface_filename = 'surfaces/{}/{:06d}.pts'.format(prefix, label)
    if not os.path.exists(surface_filename): return

    # get the grid size to convert to linear coordinates
    zres, yres, xres = dataIO.GridSize(prefix)

    # JWR has a downsampled segmentation
    if prefix == 'JWR':
        downsample_rate = (1, 8, 8)
        xyz_coordinates = True
    else:
        downsample_rate = (1, 1, 1)
        xyz_coordinates = False

    start_time = time.time()
    # get the original filename
    if prefix == 'JWR':
        filename = 'raw_data/synapses/JWR/cell{:03d}_d.txt'.format(label)
    else:
        filename = 'raw_data/synapses/Zebrafinch/syn_{:04d}.txt'.format(label)

    if not os.path.exists(filename): return

    # read the segmentation points for this label and convert to numpy array
    surface_point_cloud = dataIO.ReadPoints(prefix, label, 'surfaces')
    npoints = len(surface_point_cloud)

    np_point_cloud = np.zeros((npoints, 3), dtype=np.int32)
    for index, iv in enumerate(surface_point_cloud):
        iz = iv // (yres * xres)
        iy = (iv - iz * yres * xres) // xres
        ix = iv % xres

        np_point_cloud[index, :] = (ix, iy, iz)

    synapses = []

    mse = 0.0
    with open(filename, 'r') as fd:
        for line in fd:
            # remove the new line and separate parts
            line = line.strip().split()

            if xyz_coordinates:
                ix = int(line[0]) // downsample_rate[OR_X]
                iy = int(line[1]) // downsample_rate[OR_Y]
                iz = int(line[2]) // downsample_rate[OR_Z]
            else:
                iz = int(line[0]) // downsample_rate[OR_Z]
                iy = int(line[1]) // downsample_rate[OR_Y]
                ix = int(line[2]) // downsample_rate[OR_X]

            # create a 2D vector for this point
            vec = np.zeros((1, 3), dtype=np.int32)
            vec[0, :] = (ix, iy, iz)

            closest_point = surface_point_cloud[scipy.spatial.distance.cdist(
                np_point_cloud, vec).argmin()]

            point_iz = closest_point // (yres * xres)
            point_iy = (closest_point - point_iz * yres * xres) // xres
            point_ix = closest_point % xres

            distance = math.sqrt((ix - point_ix) * (ix - point_ix) +
                                 (iy - point_iy) * (iy - point_iy) +
                                 (iz - point_iz) * (iz - point_iz))
            # skip over clearly wrong synapses
            if distance > 30: continue
            mse += distance

            synapses.append(closest_point)

    with open(output_filename, 'wb') as fd:
        nsynapses = len(synapses)

        fd.write(struct.pack('qqqq', zres, yres, xres, nsynapses))
        fd.write(struct.pack('%sq' % nsynapses, *synapses))

    print('Mean Squared Error {:0.2f} for label {} in {:0.2f} seconds'.format(
        mse / len(synapses), label,
        time.time() - start_time))
Beispiel #12
0
def SynapseEvaluate(prefix, method, label):
    # go through every label for this method and dataset
    synapse_filename = 'synapses/{}/{:06d}.pts'.format(prefix, label)
    if not os.path.exists(synapse_filename): return
    endpoint_filename = '{}/{}/{:06d}.pts'.format(method, prefix, label)
    if not os.path.exists(endpoint_filename): return
    nri_filename = 'nris/{}/{}-{:06d}.txt'.format(prefix, method.replace('/', '-'), label)
    if os.path.exists(nri_filename): return

    # get the grid size
    zres, yres, xres = dataIO.GridSize(prefix)
    resolution = dataIO.Resolution(prefix)

    max_distance = 800

    # read the true synapse locations
    synapses = dataIO.ReadPoints(prefix, label, 'synapses')

    # read the predicted locations
    predictions = ReadPredictions(prefix, method, label)

    ngt_pts = len(synapses)
    npr_pts = len(predictions)
    

    gt_pts = np.zeros((ngt_pts, 3), dtype=np.int64)
    pr_pts = np.zeros((npr_pts, 3), dtype=np.int64)
    npoints = ngt_pts * npr_pts

    for pt in range(ngt_pts):
        # get x, y, z locations
        index = synapses[pt]

        iz = index // (yres * xres)
        iy = (index - iz * yres * xres) // xres
        ix = index % xres 

        # coordinates are (x, y, z)
        gt_pts[pt,0] = resolution[OR_X] * ix
        gt_pts[pt,1] = resolution[OR_Y] * iy
        gt_pts[pt,2] = resolution[OR_Z] * iz
    
    for pt in range(npr_pts):
        # get x, y, z locations
        index = predictions[pt]

        iz = index // (yres * xres)
        iy = (index - iz * yres * xres) // xres
        ix = index % xres

        # coordinates are (x, y, z)
        pr_pts[pt,0] = resolution[OR_X] * ix
        pr_pts[pt,1] = resolution[OR_Y] * iy
        pr_pts[pt,2] = resolution[OR_Z] * iz

    cost_matrix = scipy.spatial.distance.cdist(gt_pts, pr_pts)
    matching = scipy.optimize.linear_sum_assignment(cost_matrix)

    valid_matches = set()
    for match in zip(matching[0], matching[1]):
        # valid pairs must be within max_distance in nanometers
        if cost_matrix[match[0], match[1]] > max_distance: continue

        valid_matches.add(match)

    ncorrect_synapses = len(valid_matches)
    nadded_synapses = npr_pts - len(valid_matches)
    nmissed_synapses = ngt_pts = len(valid_matches)

    # the number of true positives is the number of paths between the valid locations
    true_positives = ncorrect_synapses * (ncorrect_synapses - 1) // 2
    # the number of false positives is every pair of paths between true and added synapses
    false_positives = ncorrect_synapses * nadded_synapses
    # the number of false negatives is every synapse pair that is divided
    false_negatives = ncorrect_synapses * nmissed_synapses

    if true_positives == 0:
        nri = 0
    else:
        precision = true_positives / float(true_positives + false_positives)
        recall = true_positives / float(true_positives + false_negatives)

        nri = 2 * (precision * recall) / (precision + recall)
    
    with open(nri_filename, 'w') as fd:
        fd.write('{} {} {}\n'.format(true_positives, false_positives, false_negatives))
        fd.write('{}\n'.format(nri))

    return nri
Beispiel #13
0
def EvaluateWidths(prefix, label):
    start_time = time.time()

    # get the resolution, surface voxels, and radii for this prefix label pair
    resolution = dataIO.Resolution(prefix)
    zres, yres, xres = dataIO.GridSize(prefix)

    # surface information
    surface_point_cloud = np.array(dataIO.ReadPoints(prefix, label,
                                                     'surfaces'),
                                   dtype=np.int64)
    npoints = len(surface_point_cloud)

    np_point_cloud = np.zeros((npoints, 3), dtype=np.int32)
    for index, iv in enumerate(surface_point_cloud):
        iz = iv // (yres * xres)
        iy = (iv - iz * yres * xres) // xres
        ix = iv % xres

        np_point_cloud[index, :] = (resolution[OR_X] * ix,
                                    resolution[OR_Y] * iy,
                                    resolution[OR_Z] * iz)
        index += 1

    widths = dataIO.ReadWidths(prefix, label)
    skeletons = dataIO.ReadPoints(prefix, label, 'connectomes')

    # keep track of the error over time
    mean_absolute_error = 0.0

    count = 0
    for iv, index in enumerate(skeletons):
        if random.random() < 0.80: continue

        # some of the soma locations will not be in the widths
        if not index in widths: continue

        iz = index // (yres * xres)
        iy = (index - iz * yres * xres) // xres
        ix = index % xres

        # create a 2D vector for this point
        vec = np.zeros((1, 3), dtype=np.int32)
        vec[0, :] = (resolution[OR_X] * ix, resolution[OR_Y] * iy,
                     resolution[OR_Z] * iz)

        # get the radius at this index
        radius = widths[index]

        minimum_distance = scipy.spatial.distance.cdist(np_point_cloud,
                                                        vec).min()

        error = abs(radius - minimum_distance)

        mean_absolute_error += error
        count += 1

    print('Mean Absolute Error: {:0.2f} nanometers'.format(
        mean_absolute_error / count))

    print(time.time() - start_time)

    output_filename = 'width-errors/{}-{:06d}.txt'.format(prefix, label)
    with open(output_filename, 'w') as fd:
        fd.write('{} {}\n'.format(mean_absolute_error, count))