예제 #1
0
def CalculatePerBlockStatistics(data, iz, iy, ix):
    # start timing statistics
    total_time = time.time()

    # create the output directory if it exists
    statistics_directory = '{}/statistics'.format(data.TempDirectory())
    if not os.path.exists(statistics_directory):
        os.makedirs(statistics_directory, exist_ok=True)

    # calculate raw block statistics
    raw_seg = data.ReadRawSegmentationBlock(iz, iy, ix)
    raw_n_non_zero, raw_nlabels, raw_voxel_counts = BlockStatistics(raw_seg)
    del raw_seg
    # calculate filled block statistics
    seg = data.ReadSegmentationBlock(iz, iy, ix)
    filled_n_non_zero, filled_nlabels, filled_voxel_counts = BlockStatistics(
        seg)
    del seg

    assert (filled_nlabels == raw_nlabels)

    nfilled_voxels = filled_n_non_zero - raw_n_non_zero

    # create a dictionary for saving
    statistics = {}

    statistics['nlabels'] = raw_nlabels
    statistics['raw_n_non_zero'] = raw_n_non_zero
    statistics['raw_voxel_counts'] = raw_voxel_counts
    statistics['filled_n_non_zero'] = filled_n_non_zero
    statistics['filled_voxel_counts'] = filled_voxel_counts

    statistics_filename = '{}/{:04d}z-{:04d}y-{:04d}x.pickle'.format(
        statistics_directory, iz, iy, ix)
    PickleData(statistics, statistics_filename)

    total_time = time.time() - total_time

    print('Total Time: {:0.2f} seconds.'.format(total_time))
예제 #2
0
def CalculateSomataStatistics(meta_filename):
    data = ReadMetaData(meta_filename)

    somata_statistics = {}

    # iterate over all blocks
    for iz in range(data.StartZ(), data.EndZ()):
        for iy in range(data.StartY(), data.EndY()):
            for ix in range(data.StartX(), data.EndX()):
                print('{} {:04d}z-{:04d}y-{:04d}x'.format(
                    meta_filename, iz, iy, ix))
                # some datasets have no somata (default value)
                upsampled_non_zero_voxels = 0

                if data.SomataDownsampleRate():
                    somata = data.ReadSomataBlock(iz, iy, ix)

                    # get the number of non zero voxels
                    non_zero_voxels = np.count_nonzero(somata)

                    # the upsample factor is the number of voxels at full resolution correspond
                    # to one voxel at the downsampled resolution
                    upsample_factor = data.SomataDownsampleRate()**3

                    # the number of voxels masked as full resolution
                    upsampled_non_zero_voxels = upsample_factor * non_zero_voxels

                somata_statistics[(iz, iy, ix)] = upsampled_non_zero_voxels

    statistics_directory = '{}/statistics'.format(data.TempDirectory())
    if not os.path.exists(statistics_directory):
        os.makedirs(statistics_directory, exist_ok=True)

    statistics_filename = '{}/somata-statistics.pickle'.format(
        statistics_directory)
    PickleData(somata_statistics, statistics_filename)
예제 #3
0
def EvaluateGeodesicDistances(data, label):
    # get the resolution of this data
    resolution = data.Resolution()

    # read the distance attributes filename
    distances_directory = '{}/distances'.format(data.SkeletonOutputDirectory())
    distance_filename = '{}/{:016d}.pts'.format(distances_directory, label)

    # skip over labels not processed
    if not os.path.exists(distance_filename): return

    # read the distance attributes
    distances, input_label = ReadAttributePtsFile(data, distance_filename)
    assert (input_label == label)

    # get the synapses filename
    synapses_filename = '{}/synapses/{:016d}.pts'.format(data.TempDirectory(), label)
    if not os.path.exists(synapses_filename): return

    synapses, _ = ReadPtsFile(data, synapses_filename)
    synapses = synapses[label]

    # get the somata surface filename
    somata_surface_filename = '{}/somata_surfaces/{:016d}.pts'.format(data.TempDirectory(), label)
    if not os.path.exists(somata_surface_filename): return

    somata_surfaces, _ = ReadPtsFile(data, somata_surface_filename)
    somata_surface = somata_surfaces[label]
    npoints = len(somata_surface)

    # convert the somata surfaces into a numpy point cloud
    np_point_cloud = np.zeros((npoints, 3), dtype=np.float32)
    for index, iv in enumerate(somata_surface):
        # convert the index into indices
        iz, iy, ix = data.GlobalIndexToIndices(iv)

        # set the piont cloud value according to the resolution
        np_point_cloud[index,:] = (resolution[OR_Z] * iz, resolution[OR_Y] * iy, resolution[OR_X] * ix)

    # create empty dictionary for all results
    results = {}

    # keep track of all errors for this label
    results['diffs'] = []
    results['euclidean'] = 0
    results['geodesic'] = 0

    # if there are no points return the empty set
    if not npoints: return

    for iv in synapses:
        # get the estimated distance at this synapse point
        distance = distances[iv]

        iz, iy, ix = data.GlobalIndexToIndices(iv)

        # convert the coordinates into a 2d vector with the resolutions
        vec = np.zeros((1, 3), dtype=np.float32)
        vec[0,:] = (resolution[OR_Z] * iz, resolution[OR_Y] * iy, resolution[OR_X] * ix)

        # get the min distance from this point to the surface (euclidean distance)
        euclidean_distance = scipy.spatial.distance.cdist(np_point_cloud, vec).min()

        # geodesic distances could be less than euclidean only when the synapse is on the cell body
        # surface but downsampling causes a disconnect between the assumed surface and the cell
        # body surface. skip these trivial points
        if (distance < euclidean_distance): continue

        results['diffs'].append(abs(distance - euclidean_distance))
        results['euclidean'] += euclidean_distance
        results['geodesic'] += distance

    if len(results['diffs']) < 2: return

    # output the differences, euclidean, and geodesic distances
    tmp_distances_directory = '{}/results/distances'.format(data.TempDirectory())
    if not os.path.exists(tmp_distances_directory):
        os.makedirs(tmp_distances_directory, exist_ok=True)

    output_filename = '{}/{:016d}.pickle'.format(tmp_distances_directory, label)
    PickleData(results, output_filename)
예제 #4
0
def EvaluateWidths(data, label):
    # get the resolution of this data
    resolution = data.Resolution()

    # read the width attributes filename
    widths_directory = '{}/widths'.format(data.SkeletonOutputDirectory())
    width_filename = '{}/{:016d}.pts'.format(widths_directory, label)

    # skip over labels not processed
    if not os.path.exists(width_filename): return

    # read the width attributes
    widths, input_label = ReadAttributePtsFile(data, width_filename)
    assert (input_label == label)

    # get the surface filename
    surfaces_filename = '{}/{:016d}.pts'.format(data.SurfacesDirectory(), label)

    # read the surfaces, ignore local coordinates
    surfaces, _ = ReadPtsFile(data, surfaces_filename)
    surface = surfaces[label]
    npoints = len(surface)

    # convert the surface into a numpy point cloud
    np_point_cloud = np.zeros((npoints, 3), dtype=np.float32)
    for index, iv in enumerate(surface):
        # convert the index into indices
        iz, iy, ix = data.GlobalIndexToIndices(iv)

        # set the point cloud value according to the resolutions
        np_point_cloud[index,:] = (resolution[OR_Z] * iz, resolution[OR_Y] * iy, resolution[OR_X] * ix)

    # create empty dictionary for all results
    results = {}

    # keep track of all errors for this label
    results['errors'] = []
    results['estimates'] = 0
    results['ground_truths'] = 0

    # iterate over all skeleton points
    for iv in widths.keys():
        # get the estimated width at this location
        width = widths[iv]

        iz, iy, ix = data.GlobalIndexToIndices(iv)

        # convert the coordinates into a 2d vector with the resolutions
        vec = np.zeros((1, 3), dtype=np.float32)
        vec[0,:] = (resolution[OR_Z] * iz, resolution[OR_Y] * iy, resolution[OR_X] * ix)

        # get the min distance from this point to the surface (true width)
        min_distance = scipy.spatial.distance.cdist(np_point_cloud, vec).min()

        results['errors'].append(abs(width - min_distance))
        results['estimates'] += width
        results['ground_truths'] += min_distance

    # skip over vacuous skeletons
    if len(results['errors']) < 2: return

    # output the errors, estimates, and ground truths to a pickled file
    tmp_widths_directory = '{}/results/widths'.format(data.TempDirectory())
    if not os.path.exists(tmp_widths_directory):
        os.makedirs(tmp_widths_directory, exist_ok=True)

    output_filename = '{}/{:016d}.pickle'.format(tmp_widths_directory, label)
    PickleData(results, output_filename)
예제 #5
0
def CombineStatistics(data):
    # start timing statistics
    total_time = time.time()

    # the statistics directory must already exist for previous results
    statistics_directory = '{}/statistics'.format(data.TempDirectory())

    label_volumes_with_holes = {}
    label_volumes_filled = {}
    label_volumes = {}
    neuronal_volume_with_holes = 0
    neuronal_volume = 0

    # read the pickle file generated for each block
    for iz in range(data.StartZ(), data.EndZ()):
        for iy in range(data.StartY(), data.EndY()):
            for ix in range(data.StartX(), data.EndX()):
                statistics_filename = '{}/{:04d}z-{:04d}y-{:04d}x.pickle'.format(
                    statistics_directory, iz, iy, ix)
                statistics = ReadPickledData(statistics_filename)

                for label in statistics['raw_voxel_counts'].keys():
                    if not label in label_volumes_with_holes:
                        label_volumes_with_holes[label] = 0
                        label_volumes[label] = 0

                    label_volumes_with_holes[label] += statistics[
                        'raw_voxel_counts'][label]
                    label_volumes[label] += statistics['filled_voxel_counts'][
                        label]

                neuronal_volume_with_holes += statistics['raw_n_non_zero']
                neuronal_volume += statistics['filled_n_non_zero']

    labels = label_volumes.keys()
    for label in labels:
        label_volume = label_volumes[label]
        label_volume_filled = label_volume - label_volumes_with_holes[label]

        print('Label {}:'.format(label))
        print('  Volume:        {:14d}'.format(label_volume))
        print('  Filled Volume: {:14d}   ({:5.2f}%)\n'.format(
            label_volume_filled, 100 * label_volume_filled / label_volume))

        # add to dictionary of filled volues

        label_volumes_filled[label] = label_volume_filled
    # calculate what percent of the total volume of holes were filled
    neuronal_volume_filled = neuronal_volume - neuronal_volume_with_holes
    total_volume = data.NVoxels()

    print('Volume Size:     {:14d}'.format(total_volume))
    print('  Neuron Volume: {:14d}   ({:5.2f}%)'.format(
        neuronal_volume, 100 * neuronal_volume / total_volume))
    print('  Filled Volume: {:14d}   ({:5.2f}%)'.format(
        neuronal_volume_filled,
        100 * neuronal_volume_filled / neuronal_volume))

    # output the aggregated data to a pickle file
    statistics = {}

    statistics['label_volumes'] = label_volumes
    statistics['label_volumes_with_holes'] = label_volumes_with_holes
    statistics['label_volumes_filled'] = label_volumes_filled

    statistics['neuronal_volume'] = neuronal_volume
    statistics['neuronal_volume_with_holes'] = neuronal_volume_with_holes
    statistics['neuronal_volumes_filled'] = neuronal_volume_filled

    statistics_filename = '{}/combined-statistics.pickle'.format(
        statistics_directory)
    PickleData(statistics, statistics_filename)

    total_time = time.time() - total_time

    print('Total Time: {:0.2f} seconds.'.format(total_time))
예제 #6
0
def FindPerBlockConnectedComponents(data, iz, iy, ix):

    # start timing statistics
    total_time = time.time()

    # get the number of blocks in each dimension
    nblocks = data.NBlocks()
    block_volume = data.BlockVolume()

    # get the index for this block
    block_index = data.IndexFromIndices(iz, iy, ix)

    # get the index for the background volumes
    background_start_label = -1 - (block_index * block_volume)

    # read in this volume
    read_time = time.time()
    seg = data.ReadRawSegmentationBlock(iz, iy, ix)
    read_time = time.time() - read_time

    # make sure the block is not larger than mentioned in param file
    assert (seg.shape[OR_Z] <= data.BlockZLength())
    assert (seg.shape[OR_Y] <= data.BlockYLength())
    assert (seg.shape[OR_X] <= data.BlockXLength())

    # pad the block with zeroes at the ends
    if seg.shape[OR_Z] < data.BlockZLength(
    ) or seg.shape[OR_Y] < data.BlockYLength(
    ) or seg.shape[OR_X] < data.BlockXLength():
        # make sure that the block is on one of the far edges
        assert (iz == data.EndZ() - 1 or iy == data.EndY() - 1
                or ix == data.EndX() - 1)

        zpadding = data.ZBlockLength() - seg.shape[OR_Z]
        ypadding = data.YBlockLength() - seg.shape[OR_Y]
        xpadding = data.XBlockLength() - seg.shape[OR_X]

        # padding only goes at the far edges of the block
        seg = np.pad(seg, ((0, zpadding), (0, ypadding), (0, xpadding)),
                     'constant',
                     constant_values=0)

        # make sure the block is not smaller than mentioned in param file
        assert (seg.shape[OR_Z] == data.BlockZLength())
        assert (seg.shape[OR_Y] == data.BlockYLength())
        assert (seg.shape[OR_X] == data.BlockXLength())

    # call connected components algorithm for this block
    components_time = time.time()

    components = ComputeConnected6Components(seg, background_start_label)

    # delete original segmentation
    del seg

    # save the components file to disk
    tmp_directory = data.TempBlockDirectory(iz, iy, ix)

    # create the folder if it does not exist
    if not os.path.exists(tmp_directory):
        os.makedirs(tmp_directory, exist_ok=True)

    # write the components and all walls to file
    WriteH5File(components, '{}/components.h5'.format(tmp_directory))
    WriteH5File(components[0, :, :],
                '{}/z-min-hole-filling.h5'.format(tmp_directory))
    WriteH5File(components[-1, :, :],
                '{}/z-max-hole-filling.h5'.format(tmp_directory))
    WriteH5File(components[:, 0, :],
                '{}/y-min-hole-filling.h5'.format(tmp_directory))
    WriteH5File(components[:, -1, :],
                '{}/y-max-hole-filling.h5'.format(tmp_directory))
    WriteH5File(components[:, :, 0],
                '{}/x-min-hole-filling.h5'.format(tmp_directory))
    WriteH5File(components[:, :, -1],
                '{}/x-max-hole-filling.h5'.format(tmp_directory))

    components_time = time.time() - components_time

    # find the set of adjacent labels, both inside the volume and the ones connected at the local border
    adjacency_set_time = time.time()
    neighbor_label_set = FindAdjacentLabelSetLocal(components)
    adjacency_set_time = time.time() - adjacency_set_time

    # create a dictionary of labels from the set
    background_associated_labels_time = time.time()
    neighbor_label_dict = Set2Dictionary(neighbor_label_set)

    # to start, none of the background components are determined
    undetermined_label_set = set(neighbor_label_dict.keys())
    # dictionary associated background components to labels
    associated_label_dict = Dict.empty(key_type=types.int64,
                                       value_type=types.int64)
    associated_label_dict, undetermined_label_set, holes, non_holes = FindBackgroundComponentsAssociatedLabels(
        neighbor_label_dict, undetermined_label_set, associated_label_dict)
    background_associated_labels_time = time.time(
    ) - background_associated_labels_time

    # remove from the neighbor label set border elements and those already determined as holes and non holes
    neighbor_label_set_reduced = PruneNeighborLabelSet(neighbor_label_set,
                                                       holes, non_holes)
    neighbor_label_dict_reduced = Set2Dictionary(neighbor_label_set_reduced)

    # delete the temporary generated set and dictionary
    del neighbor_label_set, neighbor_label_dict

    # write the relevant files to disk
    write_time = time.time()
    PickleNumbaData(
        associated_label_dict,
        '{}/associated-label-set-local.pickle'.format(tmp_directory))
    PickleData(undetermined_label_set,
               '{}/undetermined-label-set-local.pickle'.format(tmp_directory))
    PickleData(
        neighbor_label_dict_reduced,
        '{}/neighbor-label-dictionary-reduced.pickle'.format(tmp_directory))
    write_time = time.time() - write_time

    total_time = time.time() - total_time

    print('Read Time: {:0.2f} seconds.'.format(read_time))
    print('Components Time: {:0.2f} seconds.'.format(components_time))
    print('Adjacency Set Time: {:0.2f} seconds.'.format(adjacency_set_time))
    print('Background Components Associated Labels: {:0.2f} seconds.'.format(
        background_associated_labels_time))
    print('Write Time: {:0.2f} seconds.'.format(write_time))
    print('Total Time: {:0.2f} seconds.'.format(total_time))

    # generate statistics for the holes
    # does not count towards total computation time
    labels, counts = np.unique(components, return_counts=True)

    hole_sizes = {}

    for iv, label in enumerate(labels):
        # skip the actual neurons in the volume
        if label > 0: continue
        hole_sizes[label] = counts[iv]

    # save the output file
    PickleData(hole_sizes, '{}/hole-sizes.pickle'.format(tmp_directory))

    # delete the components (no longer needed)
    del components

    # output timing statistics
    timing_directory = '{}/connected-components'.format(data.TimingDirectory())
    if not os.path.exists(timing_directory):
        os.makedirs(timing_directory, exist_ok=True)
    timing_filename = '{}/{:04d}z-{:04d}y-{:04d}x.txt'.format(
        timing_directory, iz, iy, ix)
    with open(timing_filename, 'w') as fd:
        fd.write('Read Time: {:0.2f} seconds.\n'.format(read_time))
        fd.write('Components Time: {:0.2f} seconds.\n'.format(components_time))
        fd.write('Adjacency Set Time: {:0.2f} seconds.\n'.format(
            adjacency_set_time))
        fd.write('Background Components Associated Labels: {:0.2f} seconds.\n'.
                 format(background_associated_labels_time))
        fd.write('Write Time: {:0.2f} seconds.\n'.format(write_time))
        fd.write('Total Time: {:0.2f} seconds.\n'.format(total_time))
예제 #7
0
def ConnectLabelsAcrossBlocks(data, iz, iy, ix):
    # start timing statistics
    total_time = time.time()

    # find all of the adjacent components across the boundaries
    adjacency_set_time = time.time()

    # create an empty list of adjacency sets
    neighbor_label_set_global = set()
    # add a fake tuple for numba to know fingerprint
    neighbor_label_set_global.add((BORDER_CONTACT, BORDER_CONTACT))

    # get the temporary directory for this dataset
    tmp_directory = data.TempBlockDirectory(iz, iy, ix)

    # this block occurs at the minimum in the z direction
    if iz == data.StartZ():
        neighbor_label_set_global = ConnectBlockToGlobalBorder(
            neighbor_label_set_global, tmp_directory, 'z', 'min')

    # this block occurs at the minimum of the y direction
    if iy == data.StartY():
        neighbor_label_set_global = ConnectBlockToGlobalBorder(
            neighbor_label_set_global, tmp_directory, 'y', 'min')

    # this block occurs at the minimum of the x direction
    if ix == data.StartX():
        neighbor_label_set_global = ConnectBlockToGlobalBorder(
            neighbor_label_set_global, tmp_directory, 'x', 'min')

    # this block occurs at the maximum in the z direction
    if iz == data.EndZ() - 1:
        neighbor_label_set_global = ConnectBlockToGlobalBorder(
            neighbor_label_set_global, tmp_directory, 'z', 'max')
    # this block has a neighbor in the positive z direction
    else:
        neighbor_label_set_global = ConnectBlocks(data,
                                                  neighbor_label_set_global,
                                                  iz, iy, ix, 'z')

    # this block occurs at the maximum of the y direction
    if iy == data.EndY() - 1:
        neighbor_label_set_global = ConnectBlockToGlobalBorder(
            neighbor_label_set_global, tmp_directory, 'y', 'max')
    # this block has a neighbor in the positive y direction
    else:
        neighbor_label_set_global = ConnectBlocks(data,
                                                  neighbor_label_set_global,
                                                  iz, iy, ix, 'y')

    # this block occurs at the maximum of the x direction
    if ix == data.EndX() - 1:
        neighbor_label_set_global = ConnectBlockToGlobalBorder(
            neighbor_label_set_global, tmp_directory, 'x', 'max')
    # this block has a neighbor in the positive y direction
    else:
        neighbor_label_set_global = ConnectBlocks(data,
                                                  neighbor_label_set_global,
                                                  iz, iy, ix, 'x')

    # remove fake tuple from set
    neighbor_label_set_global.remove((BORDER_CONTACT, BORDER_CONTACT))

    adjacency_set_time = time.time() - adjacency_set_time

    # write the relevant files to disk
    write_time = time.time()
    PickleData(neighbor_label_set_global,
               '{}/neighbor-label-set-global.pickle'.format(tmp_directory))
    write_time = time.time() - write_time

    total_time = time.time() - total_time

    print('Adjacency Set Time: {:0.2f} seconds.'.format(adjacency_set_time))
    print('Write Time: {:0.2f} seconds.'.format(write_time))
    print('Total Time: {:0.2f} seconds.'.format(total_time))

    # output timing statistics
    timing_directory = '{}/connect-labels-across-blocks'.format(
        data.TimingDirectory())
    if not os.path.exists(timing_directory):
        os.makedirs(timing_directory, exist_ok=True)
    timing_filename = '{}/{:04d}z-{:04d}y-{:04d}x.txt'.format(
        timing_directory, iz, iy, ix)
    with open(timing_filename, 'w') as fd:
        fd.write('Adjacency Set Time: {:0.2f} seconds.\n'.format(
            adjacency_set_time))
        fd.write('Write Time: {:0.2f} seconds.\n'.format(write_time))
        fd.write('Total Time: {:0.2f} seconds.\n'.format(total_time))