def filtering_wrapper(d, k, v, kl): yparams.logging( '===============================\nWorking on image: {}', kl) targetfile = params[thisparams['targets'][kl[0]][0]] + params[ thisparams['targets'][kl[0]][1]] parallelize_filtering = False if thisparams['filtering_threads'] > 1: parallelize_filtering = True result = hp() result[kl[1:]] = libhp.remove_small_objects_relabel( np.array(v), thisparams['bysize'], relabel=thisparams['relabel'], consecutive_labels=thisparams['consecutive_labels'], parallelize=parallelize_filtering, max_threads=thisparams['filtering_threads'], logger=yparams) # Write the result to file result.write(filepath=targetfile) return result[kl[1:]]
def load_data(filepath, skeys=None, recursive_search=False, logger=None): if logger is not None: logger.logging('Loading data from \n{}', filepath) else: print 'Loading data from \n{}'.format(filepath) data = hp() data.data_from_file(filepath=filepath, skeys=skeys, recursive_search=recursive_search, nodata=True) return data
def rf_combine_sources(features, pathlist): """ features: --------- What we have: [somesource_0] [features]: [f_00, f_10, ..., f_n0] # with n being the number of paths ... [somesource_N]: [features]: [f_0N, f_1N, ..., f_nN] What we want to have: [features]: [f_00, ..., f_n0, f_01, ..., f_n1, ..., f_0N, ..., fnN] pathlist: --------- What we have: [somesource_0]: [kl_00, kl_10, ..., kl_n0] # with n being the number of paths ... [somesource_N]: [kl_0N, kl_1N, ..., kl_nN] What we want to have: [somesource_0 + kl_00, ..., somesource_N + kl_nN] :return: """ outfeatures = hp() newpathlist = [] # print 'Starting rf_combine_sources_new\n' for d, k, v, kl in pathlist.data_iterator(leaves_only=True): # print kl newpathlist += [kl + list(x) for x in pathlist[kl]] for d2, k2, v2, kl2 in features[kl].data_iterator(leaves_only=True): if outfeatures.inkeys(kl2): outfeatures[kl2] \ = np.concatenate((outfeatures[kl2], np.array(v2)), axis=0) else: outfeatures[kl2] = np.array(v2) return outfeatures, newpathlist
# seg_skey = ['x', '1', 'beta_0.5'] # # gt_path = seg_path # gt_file = 'cremi.splA.train.gtlarge.crop.crop_x10_110_y200_712_z200_712.split_x.h5' # gt_skey = ['x', '1', 'neuron_ids'] # # paths_path = '/mnt/localdata02/jhennies/neuraldata/results/cremi_2016/170124_neurobioseg_x_cropped_avoid_duplicates_develop/intermed/' # paths_file = 'cremi.splA.train.paths.crop.crop_x10_110_y200_712_z200_712.split_x.h5' # pathlist_file = 'cremi.splA.train.pathlist.crop.crop_x10_110_y200_712_z200_712.split_x.pkl' # paths_skey = ['predict', 'truepaths', 'x', '1', 'beta_0.5'] # crop = np.s_[0:10, 0:100, 0:100] # Load path paths = hp(filepath=paths_path + paths_file, nodata=True, skeys=[paths_skey])[paths_skey] print paths.keys() if label == 'random': paths_list = [] for d, k, v, kl in paths.data_iterator(leaves_only=True): paths_list.append(kl) import random random.seed() chosen_path = random.choice(paths_list) label = chosen_path[0] pathid = chosen_path[1] # label = paths.keys()[1] print 'Selected label = {}'.format(label) print 'Selected pathid = {}'.format(pathid)
def compute_paths(yparams): all_params = yparams.get_params() # Zero'th layer: # -------------- zeroth = Rdict(all_params['compute_paths']) if 'default' in zeroth: zeroth_defaults = zeroth.pop('default') else: zeroth_defaults = hp() for exp_lbl, experiment in zeroth.iteritems(): # First layer # ----------- # An experiment is now selected and performed yparams.logging( 'Performing experiment {}\n==============================\n', exp_lbl) first = zeroth_defaults.dcp() if experiment is not None: first.merge(experiment) if 'default' in first: first_defaults = first.pop('default') else: first_defaults = hp() statistics = Rdict() for exp_class_lbl in ['truepaths', 'falsepaths']: # Final layer # ----------- # The true or false paths for the current experiment are here computed, respectively yparams.logging( 'Computing {}...\n------------------------------\n', exp_class_lbl) final = first_defaults.dcp() final.merge(first[exp_class_lbl]) exp_sources = final['sources'] exp_params = final['params'] exp_target = final['target'] # Load the necessary images data = hp() for datakey, content in exp_sources.iteritems(): data[datakey] = load_images(all_params[content[0]] + all_params[content[1]], skeys=content[2]['skeys'], recursive_search=False, logger=yparams) yparams.logging('\nInitial datastructure: \n\n{}', data.datastructure2string(maxdepth=4)) yparams.logging('experiment_params: \n{}', exp_params) # Compute the paths # ----------------- paths = hp() for_class = False if exp_class_lbl == 'truepaths': for_class = True paths[exp_lbl][exp_class_lbl], statistics[exp_lbl][ exp_class_lbl] = libhp.compute_paths_for_class( data, 'segm', 'conts', 'dt', 'gt', exp_params, for_class=for_class, ignore=[], debug=all_params['debug'], logger=yparams) yparams.logging('\nPaths datastructure after running {}: \n\n{}', exp_class_lbl, paths.datastructure2string()) def val(x): return x yparams.logging( '\nStatistics after {}: \n\n{}', exp_class_lbl, simplify_statistics( statistics[exp_lbl]).datastructure2string(function=val)) # Save the result to disk # ----------------------- targetfile = all_params[exp_target[0]] + all_params[exp_target[1]] paths.write(filepath=targetfile) def val(x): return x yparams.logging( '\nStatistics after full experiment: \n\n{}', simplify_statistics( statistics[exp_lbl]).datastructure2string(function=val))
def random_forest(yparams, debug=False): all_params = yparams.get_params() # Zero'th layer: # -------------- zeroth = Rdict(all_params['random_forest']) if 'default' in zeroth: zeroth_defaults = zeroth.pop('default') else: zeroth_defaults = hp() # pathlist = ipl() featlistfile = zeroth_defaults['targets', 'featlist'] featlistfile = all_params[featlistfile[0]] + all_params[featlistfile[1]] classifier_file = zeroth_defaults['targets', 'classifier'] classifier_file = all_params[classifier_file[0]] + all_params[ classifier_file[1]] # yparams.logging('\nDatastructure of pathlistin:\n\n{}', pathlistin.datastructure2string()) feature_space_lists = dict() classifiers = dict() for exp_lbl, experiment in zeroth.iteritems(): # First layer # ----------- # An experiment is now selected and performed yparams.logging( '\n\nPerforming experiment {}\n==============================', exp_lbl) final = zeroth_defaults.dcp() final.merge(experiment) exp_sources = final['sources'] exp_params = final['params'] exp_targets = final['targets'] exp_source_kl = [exp_lbl] if len(exp_sources['train']) == 4: exp_source_kl = exp_sources['train'][3] exp_predict_kl = ['predict'] if len(exp_sources['predict']) == 4: exp_predict_kl = exp_sources['predict'][3] if type(exp_source_kl) is str: exp_source_kl = [exp_source_kl] if type(exp_predict_kl) is str: exp_predict_kl = [exp_predict_kl] # Loading of the training pathlist(s) # -------------------------- # Get the pathlist stored in features_of_paths pathlist_source = exp_sources.pop('train_pl') # Check for list or single file if type(pathlist_source) is Rdict: pathlistin_train = Rdict() for key, val in pathlist_source.iteritems(): pathlistfile = all_params[val[0]] + all_params[val[1]] with open(pathlistfile, 'r') as f: pathlistin_train[key] = Rdict(pickle.load(f)) if 'skeys' in val[2]: pathlistin_train[key] = pathlistin_train[key].subset( *val[2]['skeys']) else: pathlistfile = all_params[pathlist_source[0]] \ + all_params[pathlist_source[1]] with open(pathlistfile, 'r') as f: pathlistin_train = Rdict(pickle.load(f)) if 'skeys' in pathlist_source[2]: pathlistin_train = pathlistin_train.subset( *pathlist_source[2]['skeys']) yparams.logging('pathlistin_train.datastructure: \n{}\n', pathlistin_train.datastructure2string(maxdepth=4)) pathlistout = hp() # Loading of the prediction pathlist pathlist_source = exp_sources.pop('predict_pl') pathlistfile = all_params[pathlist_source[0]] \ + all_params[pathlist_source[1]] with open(pathlistfile, 'r') as f: pathlistin_predict = Rdict(pickle.load(f)) if 'skeys' in pathlist_source[2]: pathlistin_predict = pathlistin_predict.subset( *pathlist_source[2]['skeys']) yparams.logging('pathlistin_predict.datastructure: \n{}\n', pathlistin_predict.datastructure2string(maxdepth=4)) # Load training data # ------------------ if 'train' in exp_sources.keys(): truesource = exp_sources['train'] falsesource = exp_sources['train'] else: truesource = exp_sources['traintrue'] falsesource = exp_sources['trainfalse'] # Check for list or single file if type(truesource) is Rdict: truetrainfeats = hp() for key, val in truesource.iteritems(): truetrainfeats[key] = load_data(all_params[val[0]] + all_params[val[1]], logger=yparams, **val[2]).subset('truepaths', search=True) else: truetrainfeats = load_data(all_params[truesource[0]] + all_params[truesource[1]], logger=yparams, **truesource[2]).subset('truepaths', search=True) if type(falsesource) is Rdict: falsetrainfeats = hp() for key, val in falsesource.iteritems(): falsetrainfeats[key] = load_data(all_params[val[0]] + all_params[val[1]], logger=yparams, **val[2]).subset('falsepaths', search=True) else: falsetrainfeats = load_data(all_params[falsesource[0]] + all_params[falsesource[1]], logger=yparams, **falsesource[2]).subset('falsepaths', search=True) # ------------------ yparams.logging('\ntruetrainfeats.datastructure: \n{}\n', truetrainfeats.datastructure2string(maxdepth=4)) yparams.logging('\nfalsetrainfeats.datastructure: \n{}\n', falsetrainfeats.datastructure2string(maxdepth=4)) # Load prediction data predictsource = exp_sources['predict'] predictfeats = load_data(all_params[predictsource[0]] + all_params[predictsource[1]], logger=yparams, **predictsource[2]) yparams.logging('\npredictfeats.datastructure: \n{}\n', predictfeats.datastructure2string(maxdepth=4)) # # Load the data into memory # truetrainfeats.populate() # falsetrainfeats.populate() # predictfeats.populate() # Concatenate the different sources # 1. Of training data plo_true_train = hp() plo_false_train = hp() # truetrainfeats, plo_true['truepaths'] = libip.rf_combine_sources_new( # truetrainfeats[exp_source_kl]['truepaths'].dcp(), # pathlistin[exp_source_kl]['truepaths'].dcp() # ) truetrainfeats, plo_true_train['train', 'truepaths'] = libhp.rf_combine_sources( truetrainfeats, pathlistin_train.subset( 'truepaths', search=True)) falsetrainfeats, plo_false_train[ 'train', 'falsepaths'] = libhp.rf_combine_sources( falsetrainfeats, pathlistin_train.subset('falsepaths', search=True)) pathlistout[exp_source_kl] = plo_true_train + plo_false_train # 2. Of prediction data ipf_true = hp() plo_true_predict = hp() ipf_true['truepaths'], plo_true_predict[ 'predict', 'truepaths'] = libhp.rf_combine_sources( predictfeats.subset('truepaths', search=True), pathlistin_predict.subset('truepaths', search=True)) ipf_false = hp() plo_false_predict = hp() ipf_false['falsepaths'], plo_false_predict[ 'predict', 'falsepaths'] = libhp.rf_combine_sources( predictfeats.subset('falsepaths', search=True), pathlistin_predict.subset('falsepaths', search=True)) inpredictfeats = ipf_true + ipf_false pathlistout[exp_source_kl, 'predict'] = plo_true_predict + plo_false_predict # Note: # Due to the feature input being a dictionary organized by the feature images where # the feature values come from # # [source] # 'truepaths'|'falsepaths' # [featureims] # 'Sum': [s1, ..., sN] # 'Variance': [v1, ..., vN] # ... # [Pathlength]: [l1, ..., lN] # # the exact order in which items are iterated over by data_iterator() is not known. # # Solution: # Iterate over it once and store the keylist in an array (which conserves the order) # When accumulating the features for each of the four corresponding subsets, namely # training and testing set with true and false paths each, i.e. # ['0'|'1']['truefeats'|'falsefeats'], # the the keylist is used, thus maintaining the correct order in every subset. # # And that is what is happening here: # # 1. Get the keylist of a full feature list, e.g. one of true paths # example_kl = None # for d2, k2, v2, kl2 in truetrainfeats.data_iterator(): # if k2 == 'truepaths': # example_kl = kl2 # break # 2. Get the keylist order of the feature space # TODO: Write this to file feature_space_list = [] for d2, k2, v2, kl2 in truetrainfeats.data_iterator(): if type(v2) is not type(truetrainfeats): feature_space_list.append(kl2) feature_space_lists[exp_lbl] = feature_space_list intrain = hp() intrain['true'] = libhp.rf_make_feature_array_with_keylist( truetrainfeats, feature_space_list) yparams.logging( "Computed feature array for train['true'] with shape {}", intrain['true'].shape) intrain['false'] = libhp.rf_make_feature_array_with_keylist( falsetrainfeats, feature_space_list) yparams.logging( "Computed feature array for train['false'] with shape {}", intrain['false'].shape) inpredict = hp() inpredict['true'] = libhp.rf_make_feature_array_with_keylist( inpredictfeats['truepaths'], feature_space_list) yparams.logging( "Computed feature array for predict['true'] with shape {}", inpredict['true'].shape) inpredict['false'] = libhp.rf_make_feature_array_with_keylist( inpredictfeats['falsepaths'], feature_space_list) yparams.logging( "Computed feature array for predict['false'] with shape {}", inpredict['false'].shape) # Classify result = hp() result[exp_lbl], classifiers[exp_lbl] = libhp.random_forest( intrain, inpredict, debug=debug, balance=exp_params['balance_classes'], logger=yparams) # Evaluate new_eval = hp() # print [x[0] for x in result[kl]] # print [x[1] for x in result[kl]] new_eval[exp_lbl] = libhp.new_eval([x[0] for x in result[exp_lbl]], [x[1] for x in result[exp_lbl]]) yparams.logging('+++ RESULTS +++') yparams.logging("[kl]") # for i in result[kl]: # yparams.logging('{}', i) for key, value in new_eval[exp_lbl].iteritems(): yparams.logging('{} = {}', key, value) with open(featlistfile, 'wb') as f: pickle.dump(feature_space_lists, f) # Store the classifiers with open(classifier_file, 'wb') as f: pickle.dump(classifiers, f)
def compute_paths_for_class(indata, labelskey, pathendkey, disttransfkey, gtkey, params, for_class=True, ignore=[], logger=None, debug=False): def shortest_paths(penaltypower, bounds, lbl, keylist_lblim, gt, disttransf, pathends, for_class=True, correspondence={}, avoid_duplicates=True, max_paths_per_object=[], max_paths_per_object_seed=[], yield_in_bounds=False, return_pathim=True, minimum_alternative_label_count=0, logger=None): """ :param penaltypower: :param bounds: :param lbl: :param keylist_lblim: Needed for correspondence table :param disttransf: :param pathends: :param for_class: True: paths are computed for when endpoints are in the same ground truth oject False: paths are computed for when endpoints are in different ground truth objects :param correspondence: :param avoid_duplicates: :param max_paths_per_object: :param max_paths_per_object_seed: :param yield_in_bounds: :param return_pathim: :param minimum_alternative_label_count: Paths of merges (for_class=False) are removed if too little pixels of the merged object are found :param logger: :return: """ # Pick up some statistics along the way stats_excluded_paths = 0 statistics = Rdict() # Determine the endpoints of the current object indices = np.where(pathends) coords = zip(indices[0], indices[1], indices[2]) # Make pairwise list of coordinates serving as source and target # First determine all pairings all_pairs = [] for i in xrange(0, len(coords) - 1): for j in xrange(i + 1, len(coords)): all_pairs.append((coords[i], coords[j])) # And only use those that satisfy certain criteria: # a) Are in either the same gt object (for_class=True) # or in different gt objects (for_class=False) # b) Are not in the correspondence list pairs = [] label_pairs = [] # if avoid_duplicates: new_correspondence = {} for pair in all_pairs: # Determine whether the endpoints are in different gt objects if (gt[pair[0]] == gt[pair[1]]) == for_class: # Check correspondence list if pairings were already computed in different image labelpair = tuple(sorted([gt[pair[0]], gt[pair[1]]])) if avoid_duplicates: if labelpair not in correspondence.keys(): pairs.append(pair) label_pairs.append(labelpair) # new_correspondence[labelpair] = [keylist_lblim, lbl] if logger is not None: logger.logging('Found pairing: {}', labelpair) else: if logger is not None: logger.logging( 'Pairing already in correspondence table: {}', labelpair) else: pairs.append(pair) if logger is not None: logger.logging('Found pairing: {}', labelpair) # if avoid_duplicates: # correspondence.update(new_correspondence) # Select a certain number of pairs if number is too high if max_paths_per_object: if len(pairs) > max_paths_per_object: if logger is not None: logger.logging('Reducing number of pairs to {}', max_paths_per_object) if max_paths_per_object_seed: random.seed(max_paths_per_object_seed) else: random.seed() pairs = random.sample(pairs, max_paths_per_object) if logger is not None: logger.logging('Modified pairs list: {}', pairs) # If pairs are found that satisfy all conditions if pairs: if logger is not None: logger.logging('Found {} pairings which satisfy all criteria', len(pairs)) else: print 'Found {} pairings which satisfy all criteria'.format( len(pairs)) # Pre-processing of the distance transform # a) Invert: the lowest values (i.e. the lowest penalty for the shortest path # detection) should be at the center of the current process disttransf = lib.invert_image(disttransf) # # b) Set all values outside the process to infinity disttransf = lib.filter_values(disttransf, np.amax(disttransf), type='eq', setto=np.inf) # # c) Increase the value difference between pixels near the boundaries and pixels # central within the processes. This increases the likelihood of the paths to # follow the center of processes, thus avoiding short-cuts disttransf = lib.power(disttransf, penaltypower) # Compute the shortest paths according to the pairs list ps_computed, ps_in_bounds = lib.shortest_paths( disttransf, pairs, bounds=bounds, logger=logger, return_pathim=return_pathim, yield_in_bounds=yield_in_bounds) # Criteria for keeping paths which can only be computed after path computation if for_class: # A path without merge must not switch labels on the way! ps = [] for i in xrange(0, len(ps_computed)): if len( np.unique(gt[ps_in_bounds[i][:, 0], ps_in_bounds[i][:, 1], ps_in_bounds[i][:, 2]])) == 1: ps.append(ps_computed[i]) if logger is not None: logger.logging('Path label = True') # Add entry to correspondence table if avoid_duplicates: new_correspondence[label_pairs[i]] = [ keylist_lblim, lbl ] else: # The path switched objects multiple times on the way and is not added to the list\ if logger is not None: logger.logging( 'Path starting and ending in label = {} had multiple labels and was excluded', gt[tuple(ps_in_bounds[i][0])]) stats_excluded_paths += 1 else: ps = [] for i in xrange(0, len(ps_computed)): un, counts = np.unique(gt[ps_in_bounds[i][:, 0], ps_in_bounds[i][:, 1], ps_in_bounds[i][:, 2]], return_counts=True) # At least two of the entries in counts have to be larger than the threshold c = 0 for count in counts: if count >= minimum_alternative_label_count: c += 1 if c > 1: break if c > 1: ps.append(ps_computed[i]) # Add entry to correspondence table if avoid_duplicates: new_correspondence[label_pairs[i]] = [ keylist_lblim, lbl ] else: if logger is not None: logger.logging( 'Path starting in label {} and ending in {} only crossed one of the labels for {} voxels', gt[tuple(ps_in_bounds[i][0])], gt[tuple(ps_in_bounds[i][-1])], np.min(counts)) statistics['excluded_paths'] = stats_excluded_paths statistics['kept_paths'] = len(ps) return ps, new_correspondence, statistics else: statistics['excluded_paths'] = 0 statistics['kept_paths'] = 0 return [], new_correspondence, statistics def shortest_paths_wrapper(labelim, gt_im, dt_im, bc_im, lbl, kl, k, params, for_class=True, correspondence={}, logger=None): print 'Wrapper called...' # Create an image that contains only the one object lblim = np.zeros(labelim.shape, dtype=np.uint16) lblim[labelim == lbl] = lbl # Get the region of the one object bounds = lib.find_bounding_rect(lblim, s_=True) # Crop the label image lblim = lib.crop_bounding_rect(lblim, bounds) # Crop the gt as well gt_im = lib.crop_bounding_rect(gt_im, bounds=bounds) # Crop and mask the distance transform dt_im = lib.crop_bounding_rect(dt_im, bounds=bounds) dt_im[lblim == 0] = 0 # Crop and mask border contacts bc_im = lib.crop_bounding_rect(bc_im, bounds=bounds) bc_im[lblim == 0] = 0 # Done: Check for correctness # Compute all paths within this object which start and end in different # gt-objects # Supply the correspondence table to this function and only compute a path # if the respective correspondence is not found return shortest_paths( params['penaltypower'], bounds, lbl, kl + [k], gt_im, dt_im, bc_im, for_class=for_class, correspondence=correspondence, avoid_duplicates=params['avoid_duplicates'], max_paths_per_object=params['max_paths_per_object'], max_paths_per_object_seed=params['max_paths_per_object_seed'], yield_in_bounds=True, return_pathim=False, minimum_alternative_label_count=params[ 'minimum_alternative_label_count'], logger=logger) correspondence_table = {} # correspondence_table (type=dict) should have the form: # {tuple(labels_in_gt_i): [kl_labelsimage_i, label_i] paths = hp() statistics = Rdict() if params['order_of_betas'] is not None: key_lists = [] for i in params['order_of_betas']: key_lists += indata[labelskey].find_key_lists(i) else: key_lists = [] for d, k, v, kl in indata[labelskey].data_iterator(leaves_only=True): key_lists.append(kl) # Iterate over segmentations # for d, k, v, kl in indata[labelskey].data_iterator(leaves_only=True, yield_short_kl=True): for i in key_lists: k = i[-1] kl = i[0:-1] if logger is not None: logger.logging('====================') logger.logging('Working on image {}', k) logger.logging('correspondence_table = {}', correspondence_table) else: print '====================' print 'Working on image {}'.format(k) print 'correspondence_table = {}'.format(correspondence_table) # Load the current segmentation image labelim = np.array(indata[labelskey][kl][k]) # indata[labelskey][kl].populate(k) # TODO: Parallelize here # Iterate over all labels of that image (including cropping for speed-up) # Determine a list of present labels label_list = np.unique(labelim) label_list = filter(lambda x: x != 0, label_list) if params['parallelize']: logger.logging('Starting thread pool with {} threads', params['max_threads']) with futures.ThreadPoolExecutor(params['max_threads']) as do_stuff: tasks = Rdict() for lbl in label_list: tasks[lbl] = do_stuff.submit( shortest_paths_wrapper, labelim, np.array( indata[gtkey][kl][indata[gtkey][kl].keys()[0]]), np.array(indata[disttransfkey][kl][k]['disttransf', 'raw']), np.array(indata[pathendkey][kl][k]['contacts']), lbl, kl, k, params, for_class=for_class, correspondence=correspondence_table, logger=logger) for lbl in label_list: newpaths, new_correspondence_table, new_statistics = tasks[ lbl].result() correspondence_table.update(new_correspondence_table) statistics[kl + [k] + [lbl]] = new_statistics # If new paths were detected if newpaths: # Store them # paths.merge(newpaths) pskeys = range(0, len(newpaths)) paths[kl + [k] + [lbl]] = hp(data=dict(zip(pskeys, newpaths))) if logger is not None: logger.logging( 'Found {} paths in image {} at label {}', len(newpaths), k, lbl) logger.logging('-------------------') else: print 'Found {} paths in image {} at label {}'.format( len(newpaths), k, lbl) print '-------------------' else: # Iterate over these labels for lbl in label_list: newpaths, new_correspondence_table, new_statistics = shortest_paths_wrapper( labelim, np.array(indata[gtkey][kl][indata[gtkey][kl].keys()[0]]), np.array(indata[disttransfkey][kl][k]['disttransf', 'raw']), np.array(indata[pathendkey][kl][k]['contacts']), lbl, kl, k, params, for_class=for_class, correspondence=correspondence_table, logger=logger) correspondence_table.update(new_correspondence_table) statistics[kl + [k] + [lbl]] = new_statistics # If new paths were detected if newpaths: # Store them # paths.merge(newpaths) pskeys = range(0, len(newpaths)) paths[kl + [k] + [lbl]] = hp(data=dict(zip(pskeys, newpaths))) if logger is not None: logger.logging( 'Found {} paths in image {} at label {}', len(newpaths), k, lbl) logger.logging('-------------------') else: print 'Found {} paths in image {} at label {}'.format( len(newpaths), k, lbl) print '-------------------' # # Unload the current segmentation image # indata[labelskey][kl].unpopulate() return paths, statistics
def extract_region_features(feat, im, ignore_label, featlist): return hp( vigra.analysis.extractRegionFeatures( feat, im, ignoreLabel=ignore_label, features=featlist))
def get_features(paths, shp, featureimages, featurelist, max_paths_per_label, logger=None, anisotropy=[1, 1, 1], return_pathlist=False, parallelized=False, max_threads=5): """ :param paths: :param featureimages: :param featurelist: :param max_paths_per_label: :param ipl: :param anisotropy: :param return_pathlist: When True a list of the path keys is returned in the same order as their features are stored -> Can be used for back-translation of the path classification to the respective object the path is in. It is basically a concatenation of the key list as yielded by the simultaneous iterator. :return: """ newfeats = hp() # The path lengths only have to be computed once without using the vigra region features def compute_path_lengths(paths, anisotropy): path_lengths = [] # for d, k, v, kl in paths.data_iterator(): # if type(v) is not type(paths): for path in paths: path_lengths.append( lib.compute_path_length(np.array(path), anisotropy)) return np.array(path_lengths) # And only do it when desired pathlength = False try: featurelist.remove('Pathlength') except ValueError: # Means that 'Pathlength' was not in the list pass else: # 'Pathlength' was in the list and is now successfully removed pathlength = True if max_paths_per_label is not None: keylist = range(0, max_paths_per_label - 1) keylist = [str(x) for x in keylist] else: keylist = None if return_pathlist: pathlist = [] # Iterate over all paths, yielding a list of one path per label object until no paths are left for i, keys, vals in paths.simultaneous_iterator( max_count_per_item=max_paths_per_label, keylist=keylist): # i is the iteration number # keys are respective labels and ids of the paths # vals are the coordinates of the path positions if return_pathlist: pathlist += keys if logger is not None: logger.logging('Working in iteration = {}', i) logger.logging('Keys: {}', keys) if not keys: continue # Create a working image image = np.zeros(shp, dtype=np.uint32) # And fill it with one path per label object c = 1 for curk, curv in (dict(zip(keys, vals))).iteritems(): curv = np.array(curv) if pathlength: if not newfeats.inkeys(['Pathlength']): newfeats['Pathlength'] = np.array( [lib.compute_path_length(curv, anisotropy)]) else: newfeats['Pathlength'] = np.concatenate( (newfeats['Pathlength'], [lib.compute_path_length(curv, anisotropy)])) curv = lib.swapaxes(curv, 0, 1) lib.positions2value(image, curv, c) c += 1 # TODO: If this loop iterated over the parameter list it would be more broadly applicable if not parallelized: for d, k, v, kl in featureimages.data_iterator(): if type(v) is not hp: # Extract the region features of the working image newnewfeats = hp(data=vigra.analysis.extractRegionFeatures( np.array(v).astype(np.float32), image, ignoreLabel=0, features=featurelist)) # Pick out the features that we asked for newnewfeats = newnewfeats.subset(*featurelist) # Done: Extract feature 'Count' manually due to anisotropy # Append to the recently computed list of features for nk, nv in newnewfeats.iteritems(): nv = nv[1:] if newfeats.inkeys(kl + [nk]): try: newfeats[kl + [nk]] = np.concatenate( (newfeats[kl + [nk]], nv)) except ValueError: pass else: newfeats[kl + [nk]] = nv elif parallelized: def extract_region_features(feat, im, ignore_label, featlist): return hp( vigra.analysis.extractRegionFeatures( feat, im, ignoreLabel=ignore_label, features=featlist)) logger.logging('Starting thread pool with a max of {} threads', max_threads) with futures.ThreadPoolExecutor(max_threads) as do_stuff: keys = [] vals = [] tasks = Rdict() for d, k, v, kl in featureimages.data_iterator( leaves_only=True): # tasks[kl] = do_stuff.submit( # hp(vigra.analysis.extractRegionFeatures( # np.array(v).astype(np.float32), image, ignoreLabel=0, # features=featurelist # )) # ) tasks[kl] = do_stuff.submit(extract_region_features, np.array(v).astype(np.float32), image, 0, featurelist) keys.append(kl) for kl in keys: newnewfeats = tasks[kl].result() newnewfeats = newnewfeats.subset(*featurelist) for nk, nv in newnewfeats.iteritems(): nv = nv[1:] if newfeats.inkeys(kl + [nk]): try: newfeats[kl + [nk]] = np.concatenate( (newfeats[kl + [nk]], nv)) except ValueError: pass else: newfeats[kl + [nk]] = nv if return_pathlist: return newfeats, pathlist else: return newfeats
def remove_small_objects(yparams): """ :param ipl: A Hdf5ImageProcessingLib instance containing labelimages named as specified in ipl.get_params()['labelsname'] ipl.get_params() remove_small_objects bysize relabel largeobjname labelsname :param key: the source key for calculation """ params = yparams.get_params() thisparams = params['remove_small_objects'] # Make dictionary of all sources all_data = hp() for i in xrange(0, len(thisparams['sources'])): kwargs = None if len(thisparams['sources'][i]) > 2: kwargs = thisparams['sources'][i][2] all_data[i] = load_images(params[thisparams['sources'][i][0]] + params[thisparams['sources'][i][1]], logger=yparams, **kwargs) # Process all data items def filtering_wrapper(d, k, v, kl): yparams.logging( '===============================\nWorking on image: {}', kl) targetfile = params[thisparams['targets'][kl[0]][0]] + params[ thisparams['targets'][kl[0]][1]] parallelize_filtering = False if thisparams['filtering_threads'] > 1: parallelize_filtering = True result = hp() result[kl[1:]] = libhp.remove_small_objects_relabel( np.array(v), thisparams['bysize'], relabel=thisparams['relabel'], consecutive_labels=thisparams['consecutive_labels'], parallelize=parallelize_filtering, max_threads=thisparams['filtering_threads'], logger=yparams) # Write the result to file result.write(filepath=targetfile) return result[kl[1:]] if thisparams['image_threads'] > 1: with futures.ThreadPoolExecutor( thisparams['image_threads']) as filter_small: tasks = hp() for d, k, v, kl in all_data.data_iterator(leaves_only=True): tasks[kl] = filter_small.submit(filtering_wrapper, d, k, v, kl) # for d, k, v, kl in tasks.data_iterator(leaves_only=True): # result = v.result() else: for d, k, v, kl in all_data.data_iterator(leaves_only=True): filtering_wrapper(d, k, v, kl) # Close the source files for k, v in all_data.iteritems(): v.close()
def features_of_paths(yparams): all_params = yparams.get_params() # Zero'th layer: # -------------- zeroth = Rdict(all_params['features_of_paths']) if 'default' in zeroth: zeroth_defaults = zeroth.pop('default') else: zeroth_defaults = hp() pathlist = hp() pathlistfile = zeroth_defaults['targets', 'pathlist'] pathlistfile = all_params[pathlistfile[0]] + all_params[pathlistfile[1]] for exp_lbl, experiment in zeroth.iteritems(): # First layer # ----------- # An experiment is now selected and performed yparams.logging( '\n\nPerforming experiment {}\n==============================', exp_lbl) final = zeroth_defaults.dcp() final.merge(experiment) exp_sources = final['sources'] exp_params = final['params'] exp_targets = final['targets'] def val(x): return x yparams.logging('exp_sources = \n{}', exp_sources.datastructure2string(function=val)) yparams.logging('exp_params = \n{}', exp_sources.datastructure2string(function=val)) yparams.logging('exp_targets = \n{}', exp_targets.datastructure2string(function=val)) # Load feature images # ------------------- featureims = hp() for k, v in exp_sources['featureims'].iteritems(): skeys = None if 'skeys' in v[2]: skeys = v[2]['skeys'] featureims[k] = load_images(all_params[v[0]] + all_params[v[1]], skeys=skeys, logger=yparams) yparams.logging('\nFeatureims datastructure: \n\n{}', featureims.datastructure2string(maxdepth=4)) for exp_class_lbl, exp_class_src in exp_sources['paths'].iteritems(): yparams.logging('\nWorking on {}\n------------------------------', exp_class_lbl) # Load paths # ---------- skeys = None if 'skeys' in exp_class_src[2]: skeys = exp_class_src[2]['skeys'] paths = load_images(all_params[exp_class_src[0]] + all_params[exp_class_src[1]], skeys=skeys, logger=yparams) yparams.logging('\nPaths datastructure: \n\n{}', paths.datastructure2string(maxdepth=4)) # Iterate over the paths for d, k, v, kl in paths[exp_class_src[2]['skeys'] [0]].data_iterator(leaves_only=True, yield_short_kl=True, maxdepth=3): yparams.logging( '\nPath keylist: {}\n..............................', kl + [k]) segm_kl = kl + [k] imgs_kl = kl yparams.logging('segm_kl = {}', segm_kl) yparams.logging('imgs_kl = {}', imgs_kl) # Bild an input featureims dict for the path computation infeatims = hp() sourcelist = exp_sources['featureims'].dcp() if 'segmentation' in sourcelist: infeatims['segmentation'] = featureims['segmentation'][ segm_kl] sourcelist.pop('segmentation') for source in sourcelist: # TODO: This is not nice... Here I try to remove a redundant key infeatims[source] = featureims[source][imgs_kl][ featureims[source][imgs_kl].keys()[0]] # infeatims.populate() # Bild an input dict for true paths inpaths = v.dcp() # inpaths.populate() # Get the necessary image shape for d2, k2, v2, kl2 in infeatims.data_iterator( leaves_only=True): im_shp = v2.shape break features = hp() # import time # start = time.time() # print 'Starting get_features' features[exp_lbl][[exp_class_lbl] + kl + [k]], pathlist[ exp_lbl][[exp_class_lbl] + kl + [k]] = libhp.get_features( inpaths, np.array(im_shp)[0:3], infeatims, list(exp_params['features']), exp_params['max_paths_per_label'], logger=yparams, anisotropy=exp_params['anisotropy'], return_pathlist=True, parallelized=exp_params['parallelize'], max_threads=exp_params['max_threads']) # print 'Stopping get_features' # stop = time.time() # print stop-start yparams.logging('\nFeatures datastructure: \n\n{}', features.datastructure2string(maxdepth=4)) # Write the result to file features.write( filepath=all_params[exp_targets['features'][0]] + all_params[exp_targets['features'][1]]) with open(pathlistfile, 'wb') as f: pickle.dump(pathlist, f)