def CombineEvaluatedWidths(data): errors = [] estimates = 0 ground_truths = 0 # get the output filename evaluation_directory = data.EvaluationDirectory() if not os.path.exists(evaluation_directory): os.makedirs(evaluation_directory, exist_ok=True) output_filename = '{}/widths-results.txt'.format(evaluation_directory) fd = open(output_filename, 'w') # for each label, read in the surface and the widths generated for label in range(1, data.NLabels()): # read the generated widths tmp_widths_directory = '{}/results/widths'.format(data.TempDirectory()) widths_filename = '{}/{:016d}.pickle'.format(tmp_widths_directory, label) # skip over files that do not exist if not os.path.exists(widths_filename): continue results = ReadPickledData(widths_filename) # output the results and update the average error print ('Label: {}'.format(label)) print (' Mean Absolute Error: {:0.4f} (\u00B1{:0.2f}) nanometers'.format(statistics.mean(results['errors']), statistics.stdev(results['errors']))) print (' Estimated Widths: {:0.4f}'.format(results['estimates'])) print (' Ground Truth Widths: {:0.4f}'.format(results['ground_truths'])) print (' Percent Different: {:0.2f}%'.format(100.0 * (results['estimates'] - results['ground_truths']) / results['ground_truths'])) fd.write ('Label: {}\n'.format(label)) fd.write (' Mean Absolute Error: {:0.4f} (\u00B1{:0.2f}) nanometers\n'.format(statistics.mean(results['errors']), statistics.stdev(results['errors']))) fd.write (' Estimated Widths: {:0.4f}\n'.format(results['estimates'])) fd.write (' Ground Truth Widths: {:0.2f}\n'.format(results['ground_truths'])) fd.write (' Percent Different: {:0.2f}%\n'.format(100.0 * (results['estimates'] - results['ground_truths']) / results['ground_truths'])) # update global information errors += results['errors'] estimates += results['estimates'] ground_truths += results['ground_truths'] print ('Total Volume') print (' Mean Absolute Error: {:0.4f} (\u00B1{:0.2f}) nanometers'.format(statistics.mean(errors), statistics.stdev(errors))) print (' Estimated Widths: {:0.4f}'.format(estimates)) print (' Ground Truth Widths: {:0.4f}'.format(ground_truths)) print (' Percent Different: {:0.2f}%'.format(100.0 * (estimates - ground_truths) / ground_truths)) fd.write ('Total Volume\n') fd.write (' Mean Absolute Error: {:0.4f} (\u00B1{:0.2f}) nanometers\n'.format(statistics.mean(errors), statistics.stdev(errors))) fd.write (' Estimated Widths: {:0.4f}\n'.format(estimates)) fd.write (' Ground Truth Widths: {:0.4f}\n'.format(ground_truths)) fd.write (' Percent Different: {:0.2f}%\n'.format(100.0 * (estimates - ground_truths) / ground_truths))
def CombineGeodesicDistances(data): diffs = [] euclideans = 0 geodesics = 0 # get the output filename evaluation_directory = data.EvaluationDirectory() if not os.path.exists(evaluation_directory): os.makedirs(evaluation_directory, exist_ok=True) output_filename = '{}/distance-results.txt'.format(evaluation_directory) fd = open(output_filename, 'w') for label in range(1, data.NLabels()): # read the generated distances tmp_distances_directory = '{}/results/distances'.format(data.TempDirectory()) distances_filename = '{}/{:016d}.pickle'.format(tmp_distances_directory, label) # skip over files that do not exist if not os.path.exists(distances_filename): continue results = ReadPickledData(distances_filename) # output the results and update the average distances print ('Label: {}'.format(label)) print (' Mean Absolute Difference: {:0.4f} (\u00B1{:0.2f}) nanometers'.format(statistics.mean(results['diffs']), statistics.stdev(results['diffs']))) print (' Euclidean Distances: {:0.4f}'.format(results['euclidean'])) print (' Geodesic Distances: {:0.4f}'.format(results['geodesic'])) print (' Difference: {:0.4f}%'.format(100.0 * (results['geodesic'] - results['euclidean']) / results['euclidean'])) fd.write ('Label: {}\n'.format(label)) fd.write (' Mean Absolute Difference: {:0.4f} (\u00B1{:0.2f}) nanometers\n'.format(statistics.mean(results['diffs']), statistics.stdev(results['diffs']))) fd.write (' Euclidean Distances: {:0.4f}\n'.format(results['euclidean'])) fd.write (' Geodesic Distances: {:0.4f}\n'.format(results['geodesic'])) fd.write (' Difference: {:0.4f}%\n'.format(100.0 * (results['geodesic'] - results['euclidean']) / results['euclidean'])) # update global information diffs += results['diffs'] euclideans += results['euclidean'] geodesics += results['geodesic'] print ('Total Volume') print (' Mean Absolute Difference: {:0.4f} (\u00B1{:0.2f}) nanometers'.format(statistics.mean(diffs), statistics.stdev(diffs))) print (' Euclidean Distances: {:0.4f}'.format(euclideans)) print (' Geodesic Distances: {:0.4f}'.format(geodesics)) print (' Difference: {:0.4f}%'.format(100.0 * (geodesics - euclideans) / euclideans)) fd.write ('Total Volume\n') fd.write (' Mean Absolute Difference: {:0.4f} (\u00B1{:0.2f}) nanometers\n'.format(statistics.mean(diffs), statistics.stdev(diffs))) fd.write (' Euclidean Distances: {:0.4f}\n'.format(euclideans)) fd.write (' Geodesic Distances: {:0.4f}\n'.format(geodesics)) fd.write (' Difference: {:0.4f}%\n'.format(100.0 * (geodesics - euclideans) / euclideans))
def RemoveHoles(data, iz, iy, ix): # start timing statistics total_time = time.time() # read in the associated labels and the connected components read_time = time.time() components = ReadH5File('{}/components.h5'.format( data.TempBlockDirectory(iz, iy, ix))) # need to first create separate empty numba Dict associated_label_dict = Dict.empty(key_type=types.int64, value_type=types.int64) associated_label_dict.update( ReadPickledData('{}/hole-filling-associated-labels.pickle'.format( data.TempDirectory()))) read_time = time.time() - read_time # remove all the holes with the associated labels dictionary hole_fill_time = time.time() components = AssignBackgroundAssociatedLabels(components, associated_label_dict) hole_fill_time = time.time() - hole_fill_time # write the updated components to disk write_time = time.time() output_directory = data.HoleFillingOutputDirectory() output_filename = '{}/{:04d}z-{:04d}y-{:04d}x.h5'.format( output_directory, iz, iy, ix) WriteH5File(components, output_filename) write_time = time.time() - write_time total_time = time.time() - total_time print('Read Time: {:0.2f} seconds.'.format(read_time)) print('Hole Fill Time: {:0.2f} seconds.'.format(hole_fill_time)) print('Write Time: {:0.2f} seconds.'.format(write_time)) print('Total Time: {:0.2f} seconds.'.format(total_time)) # output timing statistics timing_directory = '{}/fill-holes'.format(data.TimingDirectory()) if not os.path.exists(timing_directory): os.makedirs(timing_directory, exist_ok=True) timing_filename = '{}/{:04d}z-{:04d}y-{:04d}x.txt'.format( timing_directory, iz, iy, ix) with open(timing_filename, 'w') as fd: fd.write('Read Time: {:0.2f} seconds.\n'.format(read_time)) fd.write('Hole Fill Time: {:0.2f} seconds.\n'.format(hole_fill_time)) fd.write('Write Time: {:0.2f} seconds.\n'.format(write_time)) fd.write('Total Time: {:0.2f} seconds.\n'.format(total_time))
def EvaluateHoleFilling(meta_filename): data = ReadMetaData(meta_filename) # make sure a results folder is specified assert (not data.EvaluationDirectory() == None) hole_sizes = {} neighbor_label_dicts = {} # read in the hole sizes from each block for iz in range(data.StartZ(), data.EndZ()): for iy in range(data.StartY(), data.EndY()): for ix in range(data.StartX(), data.EndX()): tmp_block_directory = data.TempBlockDirectory(iz, iy, ix) # read the saved hole sizes for this block hole_sizes_filename = '{}/hole-sizes.pickle'.format(tmp_block_directory) holes_sizes_per_block = ReadPickledData(hole_sizes_filename) for label in holes_sizes_per_block: hole_sizes[label] = holes_sizes_per_block[label] # any value already determined in the local step mush have no neighbors associated_label_dict = ReadPickledData('{}/associated-label-set-local.pickle'.format(tmp_block_directory)) for label in associated_label_dict: neighbor_label_dicts[label] = [] # read in the neighbor label dictionary that maps values to its neighbors tmp_directory = data.TempDirectory() neighbor_label_filename = '{}/hole-filling-neighbor-label-dict-global.pickle'.format(tmp_directory) neighbor_label_dict_global = ReadPickledData(neighbor_label_filename) associated_label_filename = '{}/hole-filling-associated-labels.pickle'.format(tmp_directory) associated_label_dict = ReadPickledData(associated_label_filename) # make sure that the keys are identical for hole sizes and associated labels (sanity check) assert (sorted(hole_sizes.keys()) == sorted(associated_label_dict.keys())) # make sure no query component in the global dictionary occurs in the local dictionary for label in neighbor_label_dict_global.keys(): assert (not label in neighbor_label_dicts) # create a unified neighbor labels dictionary that combines local and global information neighbor_label_dicts.update(neighbor_label_dict_global) # make sure that the keys are identical for hole sizes and the neighbor label dicts assert (sorted(hole_sizes.keys()) == sorted(neighbor_label_dicts.keys())) # union find data structure to link together holes across blocks class UnionFindElement: def __init__(self, label): self.label = label self.parent = self self.rank = 0 def Find(element): if not element.parent == element: element.parent = Find(element.parent) return element.parent def Union(element_one, element_two): root_one = Find(element_one) root_two = Find(element_two) if root_one == root_two: return if root_one.rank < root_two.rank: root_one.parent = root_two elif root_one.rank > root_two.rank: root_two.parent = root_one else: root_two.parent = root_one root_one.rank = root_one.rank + 1 union_find_elements = {} for label in neighbor_label_dicts.keys(): # skip over elements that remain background if not associated_label_dict[label]: continue union_find_elements[label] = UnionFindElement(label) for label in neighbor_label_dicts.keys(): # skip over elements that remain background if not associated_label_dict[label]: continue for neighbor_label in neighbor_label_dicts[label]: # skip over the actual neuron label if neighbor_label > 0: continue # merge these two labels together Union(union_find_elements[label], union_find_elements[neighbor_label]) root_holes_sizes = {} # go through all labels in the union find data structure and update the hole size for the parent for label in union_find_elements.keys(): root_label = Find(union_find_elements[label]) # create this hole if it does not already exist if not root_label.label in root_holes_sizes: root_holes_sizes[root_label.label] = 0 root_holes_sizes[root_label.label] += hole_sizes[label] # read in the statistics data to find total volume size statistics_directory = '{}/statistics'.format(data.TempDirectory()) statistics_filename = '{}/combined-statistics.pickle'.format(statistics_directory) volume_statistics = ReadPickledData(statistics_filename) total_volume = volume_statistics['neuronal_volume'] holes = [] small_holes = 0 for root_label in root_holes_sizes.keys(): if root_holes_sizes[root_label] < 5: small_holes += 1 holes.append(root_holes_sizes[root_label]) # get statistics on the number of holes nholes = len(holes) total_hole_volume = sum(holes) print ('Percent Small: {}'.format(100.0 * small_holes / nholes)) print ('No. Holes: {}'.format(nholes)) print ('Total Volume: {} ({:0.2f}%)'.format(total_hole_volume, 100.0 * total_hole_volume / total_volume))
def EvaluateSkeletons(meta_filename): data = ReadMetaData(meta_filename) # make sure a results folder is specified assert (not data.EvaluationDirectory() == None) # read in statistics about this data set statistics_directory = '{}/statistics'.format(data.TempDirectory()) statistics_filename = '{}/combined-statistics.pickle'.format(statistics_directory) statistics = ReadPickledData(statistics_filename) label_volumes = statistics['label_volumes'] total_volume = 0 total_thinned_skeleton_length = 0 total_refined_skeleton_length = 0 nlabels = 0 # get the output filename evaluation_directory = data.EvaluationDirectory() if not os.path.exists(evaluation_directory): os.makedirs(evaluation_directory, exist_ok=True) output_filename = '{}/skeleton-results.txt'.format(evaluation_directory) fd = open(output_filename, 'w') for label in sorted(label_volumes): # read pre-refinement skeleton thinning_filename = '{}/skeletons/{:016d}.pts'.format(data.TempDirectory(), label) # skip files that do not exist (no synapses, e.g.) if not os.path.exists(thinning_filename): continue thinned_skeletons_global_pts, _ = ReadPtsFile(data, thinning_filename) thinned_skeletons = thinned_skeletons_global_pts[label] refined_filename = '{}/skeletons/{:016d}.pts'.format(data.SkeletonOutputDirectory(), label) refined_skeletons_global_pts, _ = ReadPtsFile(data, refined_filename) refined_skeletons = refined_skeletons_global_pts[label] # get the volume and total remaining voxels volume = label_volumes[label] thinned_skeleton_length = len(thinned_skeletons) refined_skeleton_length = len(refined_skeletons) # update the variables that aggregate all labels total_volume += volume total_thinned_skeleton_length += thinned_skeleton_length total_refined_skeleton_length += refined_skeleton_length # calculate the percent and reduction of total voxels remaining thinned_remaining_percent = 100 * thinned_skeleton_length / volume thinning_reduction_factor = volume / thinned_skeleton_length refined_remaining_percent = 100 * refined_skeleton_length / thinned_skeleton_length refinement_reduction_factor = thinned_skeleton_length / refined_skeleton_length # caluclate the total percent/reduction after all steps total_skeleton_percent = 100 * refined_skeleton_length / volume total_skeleton_reduction = volume / refined_skeleton_length print ('Label: {}'.format(label)) print (' Input Volume: {:10d}'.format(volume)) print (' Topological Thinning: {:10d} ({:05.2f}%) {:10.2f}x'.format(thinned_skeleton_length, thinned_remaining_percent, thinning_reduction_factor)) print (' Skeleton Refinement: {:10d} ({:05.2f}%) {:10.2f}x'.format(refined_skeleton_length, refined_remaining_percent, refinement_reduction_factor)) print (' Total: ({:05.2f}%) {:10.2f}x'.format(total_skeleton_percent, total_skeleton_reduction)) fd.write ('Label: {}\n'.format(label)) fd.write (' Input Volume: {:10d}\n'.format(volume)) fd.write (' Topological Thinning: {:10d} ({:05.2f}%) {:10.2f}x\n'.format(thinned_skeleton_length, thinned_remaining_percent, thinning_reduction_factor)) fd.write (' Skeleton Refinement: {:10d} ({:05.2f}%) {:10.2f}x\n'.format(refined_skeleton_length, refined_remaining_percent, refinement_reduction_factor)) fd.write (' Total: ({:05.2f}%) {:10.2f}x\n'.format(total_skeleton_percent, total_skeleton_reduction)) nlabels += 1 # calculate the percent and reduction of total voxels remaining thinned_remaining_percent = 100 * total_thinned_skeleton_length / total_volume thinning_reduction_factor = total_volume / total_thinned_skeleton_length refined_remaining_percent = 100 * total_refined_skeleton_length / total_thinned_skeleton_length refinement_reduction_factor = total_thinned_skeleton_length / total_refined_skeleton_length # caluclate the total percent/reduction after all steps total_skeleton_percent = 100 * total_refined_skeleton_length / total_volume total_skeleton_reduction = total_volume / total_refined_skeleton_length print ('Input Volume: {:10d}'.format(total_volume)) print ('Topological Thinning: {:10d} ({:05.2f}%) {:10.2f}x'.format(total_thinned_skeleton_length, thinned_remaining_percent, thinning_reduction_factor)) print ('Skeleton Refinement: {:10d} ({:05.2f}%) {:10.2f}x'.format(total_refined_skeleton_length, refined_remaining_percent, refinement_reduction_factor)) print ('Total: ({:05.2f}%) {:10.2f}x'.format(total_skeleton_percent, total_skeleton_reduction)) print ('Average Skeleton: {:0.0f}'.format(total_refined_skeleton_length / nlabels)) fd.write ('Input Volume: {:10d}\n'.format(total_volume)) fd.write ('Topological Thinning: {:10d} ({:05.2f}%) {:10.2f}x\n'.format(total_thinned_skeleton_length, thinned_remaining_percent, thinning_reduction_factor)) fd.write ('Skeleton Refinement: {:10d} ({:05.2f}%) {:10.2f}x\n'.format(total_refined_skeleton_length, refined_remaining_percent, refinement_reduction_factor)) fd.write ('Total: ({:05.2f}%) {:10.2f}x\n'.format(total_skeleton_percent, total_skeleton_reduction)) fd.write ('Average Skeleton: {:0.0f}'.format(total_refined_skeleton_length / nlabels)) # close the file fd.close()
def CombineStatistics(data): # start timing statistics total_time = time.time() # the statistics directory must already exist for previous results statistics_directory = '{}/statistics'.format(data.TempDirectory()) label_volumes_with_holes = {} label_volumes_filled = {} label_volumes = {} neuronal_volume_with_holes = 0 neuronal_volume = 0 # read the pickle file generated for each block for iz in range(data.StartZ(), data.EndZ()): for iy in range(data.StartY(), data.EndY()): for ix in range(data.StartX(), data.EndX()): statistics_filename = '{}/{:04d}z-{:04d}y-{:04d}x.pickle'.format( statistics_directory, iz, iy, ix) statistics = ReadPickledData(statistics_filename) for label in statistics['raw_voxel_counts'].keys(): if not label in label_volumes_with_holes: label_volumes_with_holes[label] = 0 label_volumes[label] = 0 label_volumes_with_holes[label] += statistics[ 'raw_voxel_counts'][label] label_volumes[label] += statistics['filled_voxel_counts'][ label] neuronal_volume_with_holes += statistics['raw_n_non_zero'] neuronal_volume += statistics['filled_n_non_zero'] labels = label_volumes.keys() for label in labels: label_volume = label_volumes[label] label_volume_filled = label_volume - label_volumes_with_holes[label] print('Label {}:'.format(label)) print(' Volume: {:14d}'.format(label_volume)) print(' Filled Volume: {:14d} ({:5.2f}%)\n'.format( label_volume_filled, 100 * label_volume_filled / label_volume)) # add to dictionary of filled volues label_volumes_filled[label] = label_volume_filled # calculate what percent of the total volume of holes were filled neuronal_volume_filled = neuronal_volume - neuronal_volume_with_holes total_volume = data.NVoxels() print('Volume Size: {:14d}'.format(total_volume)) print(' Neuron Volume: {:14d} ({:5.2f}%)'.format( neuronal_volume, 100 * neuronal_volume / total_volume)) print(' Filled Volume: {:14d} ({:5.2f}%)'.format( neuronal_volume_filled, 100 * neuronal_volume_filled / neuronal_volume)) # output the aggregated data to a pickle file statistics = {} statistics['label_volumes'] = label_volumes statistics['label_volumes_with_holes'] = label_volumes_with_holes statistics['label_volumes_filled'] = label_volumes_filled statistics['neuronal_volume'] = neuronal_volume statistics['neuronal_volume_with_holes'] = neuronal_volume_with_holes statistics['neuronal_volumes_filled'] = neuronal_volume_filled statistics_filename = '{}/combined-statistics.pickle'.format( statistics_directory) PickleData(statistics, statistics_filename) total_time = time.time() - total_time print('Total Time: {:0.2f} seconds.'.format(total_time))
def CombineAssociatedLabels(data): # start timing statistics total_time = time.time() # create empty sets/dicts neighbor_label_set_global = set() associated_label_dict_global = Dict.empty(key_type=types.int64, value_type=types.int64) undetermined_label_set_global = set() neighbor_label_dict_local = dict() read_time = time.time() # iterate over all blocks and read in global/local dicts for iz in range(data.StartZ(), data.EndZ()): for iy in range(data.StartY(), data.EndY()): for ix in range(data.StartX(), data.EndX()): # get the location for the temporary directory tmp_directory = data.TempBlockDirectory(iz, iy, ix) # read the four sets/dicts for this one block block_neighbor_label_set_global = ReadPickledData( '{}/neighbor-label-set-global.pickle'.format( tmp_directory)) block_associated_label_dict = ReadPickledData( '{}/associated-label-set-local.pickle'.format( tmp_directory)) block_undetermined_label_set = ReadPickledData( '{}/undetermined-label-set-local.pickle'.format( tmp_directory)) block_neighbor_label_dict_local = ReadPickledData( '{}/neighbor-label-dictionary-reduced.pickle'.format( tmp_directory)) # combine the local datasets with the global ones neighbor_label_set_global = neighbor_label_set_global.union( block_neighbor_label_set_global) associated_label_dict_global.update( block_associated_label_dict) undetermined_label_set_global = undetermined_label_set_global.union( block_undetermined_label_set) neighbor_label_dict_local.update( block_neighbor_label_dict_local) # free memory del block_neighbor_label_set_global, block_associated_label_dict, block_undetermined_label_set, block_neighbor_label_dict_local read_time = time.time() - read_time background_associated_labels_time = time.time() # create a neighbor label dict building on the reduced labels read in for each block neighbor_label_dict_global = Set2Dictionary( neighbor_label_set_global, label_dict=neighbor_label_dict_local) # find groupings of negative neighbors surrounded by a single positive label associated_label_dict, undetermined_label_set, holes, non_holes = FindBackgroundComponentsAssociatedLabels( neighbor_label_dict_global, undetermined_label_set_global, associated_label_dict_global) # set all of the undetermined values to non_holes: for label in undetermined_label_set: associated_label_dict[label] = 0 background_associated_labels_time = time.time( ) - background_associated_labels_time # write the associated labels to disk write_time = time.time() # write only one associated labels dictionary for all blocks tmp_directory = data.TempDirectory() PickleNumbaData( associated_label_dict, '{}/hole-filling-associated-labels.pickle'.format(tmp_directory)) # save the neighbor label dict global which has linked background components across all blocks PickleNumbaData( neighbor_label_dict_global, '{}/hole-filling-neighbor-label-dict-global.pickle'.format( tmp_directory)) write_time = time.time() - write_time total_time = time.time() - total_time print('Read Time: {:0.2f} seconds.'.format(read_time)) print('Background Components Associated Labels: {:0.2f} seconds.'.format( background_associated_labels_time)) print('Write Time: {:0.2f} seconds.'.format(write_time)) print('Total Time: {:0.2f} seconds.'.format(total_time)) # output timing statistics timing_directory = data.TimingDirectory() if not os.path.exists(timing_directory): os.makedirs(timing_directory, exist_ok=True) timing_filename = '{}/combine-associated-labels.txt'.format( timing_directory) with open(timing_filename, 'w') as fd: fd.write('Read Time: {:0.2f} seconds.\n'.format(read_time)) fd.write('Background Components Associated Labels: {:0.2f} seconds.\n'. format(background_associated_labels_time)) fd.write('Write Time: {:0.2f} seconds.\n'.format(write_time)) fd.write('Total Time: {:0.2f} seconds.\n'.format(total_time))