def experiment_parser(yparams, function, name):

    all_params = yparams.get_params()

    # Zero'th layer:
    # --------------
    zeroth = Rdict(all_params[name])
    if 'default' in zeroth:
        zeroth_defaults = zeroth.pop('default')
    else:
        zeroth_defaults = Rdict()

    for exp_lbl, experiment in zeroth.iteritems():

        # First layer
        # -----------
        # An experiment is now selected and performed
        yparams.logging(
            '\n\nPerforming experiment {}\n==============================',
            exp_lbl)

        final = zeroth_defaults.dcp()
        final.merge(experiment)

        function(final, yparams)
예제 #2
0
def compute_paths(yparams):

    all_params = yparams.get_params()

    # Zero'th layer:
    # --------------
    zeroth = Rdict(all_params['compute_paths'])
    if 'default' in zeroth:
        zeroth_defaults = zeroth.pop('default')
    else:
        zeroth_defaults = hp()

    for exp_lbl, experiment in zeroth.iteritems():

        # First layer
        # -----------
        # An experiment is now selected and performed
        yparams.logging(
            'Performing experiment {}\n==============================\n',
            exp_lbl)

        first = zeroth_defaults.dcp()
        if experiment is not None:
            first.merge(experiment)
        if 'default' in first:
            first_defaults = first.pop('default')
        else:
            first_defaults = hp()

        statistics = Rdict()

        for exp_class_lbl in ['truepaths', 'falsepaths']:

            # Final layer
            # -----------
            # The true or false paths for the current experiment are here computed, respectively
            yparams.logging(
                'Computing {}...\n------------------------------\n',
                exp_class_lbl)
            final = first_defaults.dcp()
            final.merge(first[exp_class_lbl])

            exp_sources = final['sources']
            exp_params = final['params']
            exp_target = final['target']

            # Load the necessary images
            data = hp()
            for datakey, content in exp_sources.iteritems():
                data[datakey] = load_images(all_params[content[0]] +
                                            all_params[content[1]],
                                            skeys=content[2]['skeys'],
                                            recursive_search=False,
                                            logger=yparams)

            yparams.logging('\nInitial datastructure: \n\n{}',
                            data.datastructure2string(maxdepth=4))
            yparams.logging('experiment_params: \n{}', exp_params)

            # Compute the paths
            # -----------------
            paths = hp()

            for_class = False
            if exp_class_lbl == 'truepaths':
                for_class = True
            paths[exp_lbl][exp_class_lbl], statistics[exp_lbl][
                exp_class_lbl] = libhp.compute_paths_for_class(
                    data,
                    'segm',
                    'conts',
                    'dt',
                    'gt',
                    exp_params,
                    for_class=for_class,
                    ignore=[],
                    debug=all_params['debug'],
                    logger=yparams)

            yparams.logging('\nPaths datastructure after running {}: \n\n{}',
                            exp_class_lbl, paths.datastructure2string())

            def val(x):
                return x

            yparams.logging(
                '\nStatistics after {}: \n\n{}', exp_class_lbl,
                simplify_statistics(
                    statistics[exp_lbl]).datastructure2string(function=val))

            # Save the result to disk
            # -----------------------
            targetfile = all_params[exp_target[0]] + all_params[exp_target[1]]
            paths.write(filepath=targetfile)

        def val(x):
            return x

        yparams.logging(
            '\nStatistics after full experiment: \n\n{}',
            simplify_statistics(
                statistics[exp_lbl]).datastructure2string(function=val))
예제 #3
0
def random_forest(yparams, debug=False):

    all_params = yparams.get_params()

    # Zero'th layer:
    # --------------
    zeroth = Rdict(all_params['random_forest'])
    if 'default' in zeroth:
        zeroth_defaults = zeroth.pop('default')
    else:
        zeroth_defaults = hp()

    # pathlist = ipl()
    featlistfile = zeroth_defaults['targets', 'featlist']
    featlistfile = all_params[featlistfile[0]] + all_params[featlistfile[1]]

    classifier_file = zeroth_defaults['targets', 'classifier']
    classifier_file = all_params[classifier_file[0]] + all_params[
        classifier_file[1]]

    # yparams.logging('\nDatastructure of pathlistin:\n\n{}', pathlistin.datastructure2string())

    feature_space_lists = dict()
    classifiers = dict()

    for exp_lbl, experiment in zeroth.iteritems():

        # First layer
        # -----------
        # An experiment is now selected and performed
        yparams.logging(
            '\n\nPerforming experiment {}\n==============================',
            exp_lbl)

        final = zeroth_defaults.dcp()
        final.merge(experiment)

        exp_sources = final['sources']
        exp_params = final['params']
        exp_targets = final['targets']
        exp_source_kl = [exp_lbl]
        if len(exp_sources['train']) == 4:
            exp_source_kl = exp_sources['train'][3]
        exp_predict_kl = ['predict']
        if len(exp_sources['predict']) == 4:
            exp_predict_kl = exp_sources['predict'][3]
        if type(exp_source_kl) is str:
            exp_source_kl = [exp_source_kl]
        if type(exp_predict_kl) is str:
            exp_predict_kl = [exp_predict_kl]

        # Loading of the training pathlist(s)
        # --------------------------
        # Get the pathlist stored in features_of_paths
        pathlist_source = exp_sources.pop('train_pl')

        # Check for list or single file
        if type(pathlist_source) is Rdict:
            pathlistin_train = Rdict()
            for key, val in pathlist_source.iteritems():
                pathlistfile = all_params[val[0]] + all_params[val[1]]
                with open(pathlistfile, 'r') as f:
                    pathlistin_train[key] = Rdict(pickle.load(f))
                if 'skeys' in val[2]:
                    pathlistin_train[key] = pathlistin_train[key].subset(
                        *val[2]['skeys'])
        else:
            pathlistfile = all_params[pathlist_source[0]] \
                           + all_params[pathlist_source[1]]
            with open(pathlistfile, 'r') as f:
                pathlistin_train = Rdict(pickle.load(f))
            if 'skeys' in pathlist_source[2]:
                pathlistin_train = pathlistin_train.subset(
                    *pathlist_source[2]['skeys'])
        yparams.logging('pathlistin_train.datastructure: \n{}\n',
                        pathlistin_train.datastructure2string(maxdepth=4))
        pathlistout = hp()

        # Loading of the prediction pathlist
        pathlist_source = exp_sources.pop('predict_pl')

        pathlistfile = all_params[pathlist_source[0]] \
                       + all_params[pathlist_source[1]]
        with open(pathlistfile, 'r') as f:
            pathlistin_predict = Rdict(pickle.load(f))
        if 'skeys' in pathlist_source[2]:
            pathlistin_predict = pathlistin_predict.subset(
                *pathlist_source[2]['skeys'])
        yparams.logging('pathlistin_predict.datastructure: \n{}\n',
                        pathlistin_predict.datastructure2string(maxdepth=4))

        # Load training data
        # ------------------

        if 'train' in exp_sources.keys():
            truesource = exp_sources['train']
            falsesource = exp_sources['train']
        else:
            truesource = exp_sources['traintrue']
            falsesource = exp_sources['trainfalse']

        # Check for list or single file
        if type(truesource) is Rdict:
            truetrainfeats = hp()
            for key, val in truesource.iteritems():
                truetrainfeats[key] = load_data(all_params[val[0]] +
                                                all_params[val[1]],
                                                logger=yparams,
                                                **val[2]).subset('truepaths',
                                                                 search=True)
        else:
            truetrainfeats = load_data(all_params[truesource[0]] +
                                       all_params[truesource[1]],
                                       logger=yparams,
                                       **truesource[2]).subset('truepaths',
                                                               search=True)
        if type(falsesource) is Rdict:
            falsetrainfeats = hp()
            for key, val in falsesource.iteritems():
                falsetrainfeats[key] = load_data(all_params[val[0]] +
                                                 all_params[val[1]],
                                                 logger=yparams,
                                                 **val[2]).subset('falsepaths',
                                                                  search=True)
        else:
            falsetrainfeats = load_data(all_params[falsesource[0]] +
                                        all_params[falsesource[1]],
                                        logger=yparams,
                                        **falsesource[2]).subset('falsepaths',
                                                                 search=True)

        # ------------------

        yparams.logging('\ntruetrainfeats.datastructure: \n{}\n',
                        truetrainfeats.datastructure2string(maxdepth=4))
        yparams.logging('\nfalsetrainfeats.datastructure: \n{}\n',
                        falsetrainfeats.datastructure2string(maxdepth=4))

        # Load prediction data
        predictsource = exp_sources['predict']
        predictfeats = load_data(all_params[predictsource[0]] +
                                 all_params[predictsource[1]],
                                 logger=yparams,
                                 **predictsource[2])
        yparams.logging('\npredictfeats.datastructure: \n{}\n',
                        predictfeats.datastructure2string(maxdepth=4))

        # # Load the data into memory
        # truetrainfeats.populate()
        # falsetrainfeats.populate()
        # predictfeats.populate()

        # Concatenate the different sources
        # 1. Of training data
        plo_true_train = hp()
        plo_false_train = hp()
        # truetrainfeats, plo_true['truepaths'] = libip.rf_combine_sources_new(
        #     truetrainfeats[exp_source_kl]['truepaths'].dcp(),
        #     pathlistin[exp_source_kl]['truepaths'].dcp()
        # )
        truetrainfeats, plo_true_train['train',
                                       'truepaths'] = libhp.rf_combine_sources(
                                           truetrainfeats,
                                           pathlistin_train.subset(
                                               'truepaths', search=True))
        falsetrainfeats, plo_false_train[
            'train', 'falsepaths'] = libhp.rf_combine_sources(
                falsetrainfeats,
                pathlistin_train.subset('falsepaths', search=True))
        pathlistout[exp_source_kl] = plo_true_train + plo_false_train
        # 2. Of prediction data
        ipf_true = hp()
        plo_true_predict = hp()
        ipf_true['truepaths'], plo_true_predict[
            'predict', 'truepaths'] = libhp.rf_combine_sources(
                predictfeats.subset('truepaths', search=True),
                pathlistin_predict.subset('truepaths', search=True))
        ipf_false = hp()
        plo_false_predict = hp()
        ipf_false['falsepaths'], plo_false_predict[
            'predict', 'falsepaths'] = libhp.rf_combine_sources(
                predictfeats.subset('falsepaths', search=True),
                pathlistin_predict.subset('falsepaths', search=True))
        inpredictfeats = ipf_true + ipf_false
        pathlistout[exp_source_kl,
                    'predict'] = plo_true_predict + plo_false_predict

        # Note:
        #   Due to the feature input being a dictionary organized by the feature images where
        #   the feature values come from
        #
        #       [source]
        #           'truepaths'|'falsepaths'
        #               [featureims]
        #                   'Sum':      [s1, ..., sN]
        #                   'Variance': [v1, ..., vN]
        #                   ...
        #               [Pathlength]:   [l1, ..., lN]
        #
        #   the exact order in which items are iterated over by data_iterator() is not known.
        #
        # Solution:
        #   Iterate over it once and store the keylist in an array (which conserves the order)
        #   When accumulating the features for each of the four corresponding subsets, namely
        #   training and testing set with true and false paths each, i.e.
        #   ['0'|'1']['truefeats'|'falsefeats'],
        #   the the keylist is used, thus maintaining the correct order in every subset.
        #
        # And that is what is happening here:
        # #   1. Get the keylist of a full feature list, e.g. one of true paths
        # example_kl = None
        # for d2, k2, v2, kl2 in truetrainfeats.data_iterator():
        #     if k2 == 'truepaths':
        #         example_kl = kl2
        #         break
        # 2. Get the keylist order of the feature space
        # TODO: Write this to file
        feature_space_list = []
        for d2, k2, v2, kl2 in truetrainfeats.data_iterator():
            if type(v2) is not type(truetrainfeats):
                feature_space_list.append(kl2)

        feature_space_lists[exp_lbl] = feature_space_list

        intrain = hp()
        intrain['true'] = libhp.rf_make_feature_array_with_keylist(
            truetrainfeats, feature_space_list)
        yparams.logging(
            "Computed feature array for train['true'] with shape {}",
            intrain['true'].shape)
        intrain['false'] = libhp.rf_make_feature_array_with_keylist(
            falsetrainfeats, feature_space_list)
        yparams.logging(
            "Computed feature array for train['false'] with shape {}",
            intrain['false'].shape)

        inpredict = hp()
        inpredict['true'] = libhp.rf_make_feature_array_with_keylist(
            inpredictfeats['truepaths'], feature_space_list)
        yparams.logging(
            "Computed feature array for predict['true'] with shape {}",
            inpredict['true'].shape)
        inpredict['false'] = libhp.rf_make_feature_array_with_keylist(
            inpredictfeats['falsepaths'], feature_space_list)
        yparams.logging(
            "Computed feature array for predict['false'] with shape {}",
            inpredict['false'].shape)

        # Classify
        result = hp()
        result[exp_lbl], classifiers[exp_lbl] = libhp.random_forest(
            intrain,
            inpredict,
            debug=debug,
            balance=exp_params['balance_classes'],
            logger=yparams)

        # Evaluate
        new_eval = hp()
        # print [x[0] for x in result[kl]]
        # print [x[1] for x in result[kl]]
        new_eval[exp_lbl] = libhp.new_eval([x[0] for x in result[exp_lbl]],
                                           [x[1] for x in result[exp_lbl]])

        yparams.logging('+++ RESULTS +++')
        yparams.logging("[kl]")
        # for i in result[kl]:
        #     yparams.logging('{}', i)
        for key, value in new_eval[exp_lbl].iteritems():
            yparams.logging('{} = {}', key, value)

    with open(featlistfile, 'wb') as f:
        pickle.dump(feature_space_lists, f)

    # Store the classifiers
    with open(classifier_file, 'wb') as f:
        pickle.dump(classifiers, f)
예제 #4
0
    def shortest_paths(penaltypower,
                       bounds,
                       lbl,
                       keylist_lblim,
                       gt,
                       disttransf,
                       pathends,
                       for_class=True,
                       correspondence={},
                       avoid_duplicates=True,
                       max_paths_per_object=[],
                       max_paths_per_object_seed=[],
                       yield_in_bounds=False,
                       return_pathim=True,
                       minimum_alternative_label_count=0,
                       logger=None):
        """
        :param penaltypower:
        :param bounds:
        :param lbl:
        :param keylist_lblim: Needed for correspondence table
        :param disttransf:
        :param pathends:
        :param for_class:
            True: paths are computed for when endpoints are in the same ground truth oject
            False: paths are computed for when endpoints are in different ground truth objects
        :param correspondence:
        :param avoid_duplicates:
        :param max_paths_per_object:
        :param max_paths_per_object_seed:
        :param yield_in_bounds:
        :param return_pathim:
        :param minimum_alternative_label_count: Paths of merges (for_class=False) are removed if
            too little pixels of the merged object are found
        :param logger:
        :return:
        """

        # Pick up some statistics along the way
        stats_excluded_paths = 0
        statistics = Rdict()

        # Determine the endpoints of the current object
        indices = np.where(pathends)
        coords = zip(indices[0], indices[1], indices[2])

        # Make pairwise list of coordinates serving as source and target
        # First determine all pairings
        all_pairs = []
        for i in xrange(0, len(coords) - 1):
            for j in xrange(i + 1, len(coords)):
                all_pairs.append((coords[i], coords[j]))
        # And only use those that satisfy certain criteria:
        # a) Are in either the same gt object (for_class=True)
        #    or in different gt objects (for_class=False)
        # b) Are not in the correspondence list
        pairs = []
        label_pairs = []
        # if avoid_duplicates:
        new_correspondence = {}
        for pair in all_pairs:
            # Determine whether the endpoints are in different gt objects
            if (gt[pair[0]] == gt[pair[1]]) == for_class:
                # Check correspondence list if pairings were already computed in different image
                labelpair = tuple(sorted([gt[pair[0]], gt[pair[1]]]))
                if avoid_duplicates:
                    if labelpair not in correspondence.keys():
                        pairs.append(pair)
                        label_pairs.append(labelpair)
                        # new_correspondence[labelpair] = [keylist_lblim, lbl]
                        if logger is not None:
                            logger.logging('Found pairing: {}', labelpair)
                    else:
                        if logger is not None:
                            logger.logging(
                                'Pairing already in correspondence table: {}',
                                labelpair)
                else:
                    pairs.append(pair)
                    if logger is not None:
                        logger.logging('Found pairing: {}', labelpair)
        # if avoid_duplicates:
        #     correspondence.update(new_correspondence)

        # Select a certain number of pairs if number is too high
        if max_paths_per_object:
            if len(pairs) > max_paths_per_object:
                if logger is not None:
                    logger.logging('Reducing number of pairs to {}',
                                   max_paths_per_object)
                if max_paths_per_object_seed:
                    random.seed(max_paths_per_object_seed)
                else:
                    random.seed()
                pairs = random.sample(pairs, max_paths_per_object)
                if logger is not None:
                    logger.logging('Modified pairs list: {}', pairs)

        # If pairs are found that satisfy all conditions
        if pairs:

            if logger is not None:
                logger.logging('Found {} pairings which satisfy all criteria',
                               len(pairs))
            else:
                print 'Found {} pairings which satisfy all criteria'.format(
                    len(pairs))

            # Pre-processing of the distance transform
            # a) Invert: the lowest values (i.e. the lowest penalty for the shortest path
            #    detection) should be at the center of the current process
            disttransf = lib.invert_image(disttransf)
            #
            # b) Set all values outside the process to infinity
            disttransf = lib.filter_values(disttransf,
                                           np.amax(disttransf),
                                           type='eq',
                                           setto=np.inf)
            #
            # c) Increase the value difference between pixels near the boundaries and pixels
            #    central within the processes. This increases the likelihood of the paths to
            #    follow the center of processes, thus avoiding short-cuts
            disttransf = lib.power(disttransf, penaltypower)

            # Compute the shortest paths according to the pairs list
            ps_computed, ps_in_bounds = lib.shortest_paths(
                disttransf,
                pairs,
                bounds=bounds,
                logger=logger,
                return_pathim=return_pathim,
                yield_in_bounds=yield_in_bounds)

            # Criteria for keeping paths which can only be computed after path computation
            if for_class:
                # A path without merge must not switch labels on the way!
                ps = []
                for i in xrange(0, len(ps_computed)):
                    if len(
                            np.unique(gt[ps_in_bounds[i][:, 0],
                                         ps_in_bounds[i][:, 1],
                                         ps_in_bounds[i][:, 2]])) == 1:
                        ps.append(ps_computed[i])
                        if logger is not None:
                            logger.logging('Path label = True')

                        # Add entry to correspondence table
                        if avoid_duplicates:
                            new_correspondence[label_pairs[i]] = [
                                keylist_lblim, lbl
                            ]

                    else:
                        # The path switched objects multiple times on the way and is not added to the list\
                        if logger is not None:
                            logger.logging(
                                'Path starting and ending in label = {} had multiple labels and was excluded',
                                gt[tuple(ps_in_bounds[i][0])])

                        stats_excluded_paths += 1
            else:
                ps = []
                for i in xrange(0, len(ps_computed)):
                    un, counts = np.unique(gt[ps_in_bounds[i][:, 0],
                                              ps_in_bounds[i][:, 1],
                                              ps_in_bounds[i][:, 2]],
                                           return_counts=True)
                    # At least two of the entries in counts have to be larger than the threshold
                    c = 0
                    for count in counts:
                        if count >= minimum_alternative_label_count:
                            c += 1
                        if c > 1:
                            break
                    if c > 1:
                        ps.append(ps_computed[i])

                        # Add entry to correspondence table
                        if avoid_duplicates:
                            new_correspondence[label_pairs[i]] = [
                                keylist_lblim, lbl
                            ]

                    else:
                        if logger is not None:
                            logger.logging(
                                'Path starting in label {} and ending in {} only crossed one of the labels for {} voxels',
                                gt[tuple(ps_in_bounds[i][0])],
                                gt[tuple(ps_in_bounds[i][-1])], np.min(counts))

            statistics['excluded_paths'] = stats_excluded_paths
            statistics['kept_paths'] = len(ps)
            return ps, new_correspondence, statistics

        else:
            statistics['excluded_paths'] = 0
            statistics['kept_paths'] = 0
            return [], new_correspondence, statistics
예제 #5
0
def compute_paths_for_class(indata,
                            labelskey,
                            pathendkey,
                            disttransfkey,
                            gtkey,
                            params,
                            for_class=True,
                            ignore=[],
                            logger=None,
                            debug=False):
    def shortest_paths(penaltypower,
                       bounds,
                       lbl,
                       keylist_lblim,
                       gt,
                       disttransf,
                       pathends,
                       for_class=True,
                       correspondence={},
                       avoid_duplicates=True,
                       max_paths_per_object=[],
                       max_paths_per_object_seed=[],
                       yield_in_bounds=False,
                       return_pathim=True,
                       minimum_alternative_label_count=0,
                       logger=None):
        """
        :param penaltypower:
        :param bounds:
        :param lbl:
        :param keylist_lblim: Needed for correspondence table
        :param disttransf:
        :param pathends:
        :param for_class:
            True: paths are computed for when endpoints are in the same ground truth oject
            False: paths are computed for when endpoints are in different ground truth objects
        :param correspondence:
        :param avoid_duplicates:
        :param max_paths_per_object:
        :param max_paths_per_object_seed:
        :param yield_in_bounds:
        :param return_pathim:
        :param minimum_alternative_label_count: Paths of merges (for_class=False) are removed if
            too little pixels of the merged object are found
        :param logger:
        :return:
        """

        # Pick up some statistics along the way
        stats_excluded_paths = 0
        statistics = Rdict()

        # Determine the endpoints of the current object
        indices = np.where(pathends)
        coords = zip(indices[0], indices[1], indices[2])

        # Make pairwise list of coordinates serving as source and target
        # First determine all pairings
        all_pairs = []
        for i in xrange(0, len(coords) - 1):
            for j in xrange(i + 1, len(coords)):
                all_pairs.append((coords[i], coords[j]))
        # And only use those that satisfy certain criteria:
        # a) Are in either the same gt object (for_class=True)
        #    or in different gt objects (for_class=False)
        # b) Are not in the correspondence list
        pairs = []
        label_pairs = []
        # if avoid_duplicates:
        new_correspondence = {}
        for pair in all_pairs:
            # Determine whether the endpoints are in different gt objects
            if (gt[pair[0]] == gt[pair[1]]) == for_class:
                # Check correspondence list if pairings were already computed in different image
                labelpair = tuple(sorted([gt[pair[0]], gt[pair[1]]]))
                if avoid_duplicates:
                    if labelpair not in correspondence.keys():
                        pairs.append(pair)
                        label_pairs.append(labelpair)
                        # new_correspondence[labelpair] = [keylist_lblim, lbl]
                        if logger is not None:
                            logger.logging('Found pairing: {}', labelpair)
                    else:
                        if logger is not None:
                            logger.logging(
                                'Pairing already in correspondence table: {}',
                                labelpair)
                else:
                    pairs.append(pair)
                    if logger is not None:
                        logger.logging('Found pairing: {}', labelpair)
        # if avoid_duplicates:
        #     correspondence.update(new_correspondence)

        # Select a certain number of pairs if number is too high
        if max_paths_per_object:
            if len(pairs) > max_paths_per_object:
                if logger is not None:
                    logger.logging('Reducing number of pairs to {}',
                                   max_paths_per_object)
                if max_paths_per_object_seed:
                    random.seed(max_paths_per_object_seed)
                else:
                    random.seed()
                pairs = random.sample(pairs, max_paths_per_object)
                if logger is not None:
                    logger.logging('Modified pairs list: {}', pairs)

        # If pairs are found that satisfy all conditions
        if pairs:

            if logger is not None:
                logger.logging('Found {} pairings which satisfy all criteria',
                               len(pairs))
            else:
                print 'Found {} pairings which satisfy all criteria'.format(
                    len(pairs))

            # Pre-processing of the distance transform
            # a) Invert: the lowest values (i.e. the lowest penalty for the shortest path
            #    detection) should be at the center of the current process
            disttransf = lib.invert_image(disttransf)
            #
            # b) Set all values outside the process to infinity
            disttransf = lib.filter_values(disttransf,
                                           np.amax(disttransf),
                                           type='eq',
                                           setto=np.inf)
            #
            # c) Increase the value difference between pixels near the boundaries and pixels
            #    central within the processes. This increases the likelihood of the paths to
            #    follow the center of processes, thus avoiding short-cuts
            disttransf = lib.power(disttransf, penaltypower)

            # Compute the shortest paths according to the pairs list
            ps_computed, ps_in_bounds = lib.shortest_paths(
                disttransf,
                pairs,
                bounds=bounds,
                logger=logger,
                return_pathim=return_pathim,
                yield_in_bounds=yield_in_bounds)

            # Criteria for keeping paths which can only be computed after path computation
            if for_class:
                # A path without merge must not switch labels on the way!
                ps = []
                for i in xrange(0, len(ps_computed)):
                    if len(
                            np.unique(gt[ps_in_bounds[i][:, 0],
                                         ps_in_bounds[i][:, 1],
                                         ps_in_bounds[i][:, 2]])) == 1:
                        ps.append(ps_computed[i])
                        if logger is not None:
                            logger.logging('Path label = True')

                        # Add entry to correspondence table
                        if avoid_duplicates:
                            new_correspondence[label_pairs[i]] = [
                                keylist_lblim, lbl
                            ]

                    else:
                        # The path switched objects multiple times on the way and is not added to the list\
                        if logger is not None:
                            logger.logging(
                                'Path starting and ending in label = {} had multiple labels and was excluded',
                                gt[tuple(ps_in_bounds[i][0])])

                        stats_excluded_paths += 1
            else:
                ps = []
                for i in xrange(0, len(ps_computed)):
                    un, counts = np.unique(gt[ps_in_bounds[i][:, 0],
                                              ps_in_bounds[i][:, 1],
                                              ps_in_bounds[i][:, 2]],
                                           return_counts=True)
                    # At least two of the entries in counts have to be larger than the threshold
                    c = 0
                    for count in counts:
                        if count >= minimum_alternative_label_count:
                            c += 1
                        if c > 1:
                            break
                    if c > 1:
                        ps.append(ps_computed[i])

                        # Add entry to correspondence table
                        if avoid_duplicates:
                            new_correspondence[label_pairs[i]] = [
                                keylist_lblim, lbl
                            ]

                    else:
                        if logger is not None:
                            logger.logging(
                                'Path starting in label {} and ending in {} only crossed one of the labels for {} voxels',
                                gt[tuple(ps_in_bounds[i][0])],
                                gt[tuple(ps_in_bounds[i][-1])], np.min(counts))

            statistics['excluded_paths'] = stats_excluded_paths
            statistics['kept_paths'] = len(ps)
            return ps, new_correspondence, statistics

        else:
            statistics['excluded_paths'] = 0
            statistics['kept_paths'] = 0
            return [], new_correspondence, statistics

    def shortest_paths_wrapper(labelim,
                               gt_im,
                               dt_im,
                               bc_im,
                               lbl,
                               kl,
                               k,
                               params,
                               for_class=True,
                               correspondence={},
                               logger=None):

        print 'Wrapper called...'

        # Create an image that contains only the one object
        lblim = np.zeros(labelim.shape, dtype=np.uint16)
        lblim[labelim == lbl] = lbl

        # Get the region of the one object
        bounds = lib.find_bounding_rect(lblim, s_=True)

        # Crop the label image
        lblim = lib.crop_bounding_rect(lblim, bounds)

        # Crop the gt as well
        gt_im = lib.crop_bounding_rect(gt_im, bounds=bounds)
        # Crop and mask the distance transform
        dt_im = lib.crop_bounding_rect(dt_im, bounds=bounds)
        dt_im[lblim == 0] = 0
        # Crop and mask border contacts
        bc_im = lib.crop_bounding_rect(bc_im, bounds=bounds)
        bc_im[lblim == 0] = 0
        # Done: Check for correctness

        # Compute all paths within this object which start and end in different
        #      gt-objects
        # Supply the correspondence table to this function and only compute a path
        #     if the respective correspondence is not found
        return shortest_paths(
            params['penaltypower'],
            bounds,
            lbl,
            kl + [k],
            gt_im,
            dt_im,
            bc_im,
            for_class=for_class,
            correspondence=correspondence,
            avoid_duplicates=params['avoid_duplicates'],
            max_paths_per_object=params['max_paths_per_object'],
            max_paths_per_object_seed=params['max_paths_per_object_seed'],
            yield_in_bounds=True,
            return_pathim=False,
            minimum_alternative_label_count=params[
                'minimum_alternative_label_count'],
            logger=logger)

    correspondence_table = {}
    # correspondence_table (type=dict) should have the form:
    # {tuple(labels_in_gt_i): [kl_labelsimage_i, label_i]

    paths = hp()
    statistics = Rdict()

    if params['order_of_betas'] is not None:

        key_lists = []
        for i in params['order_of_betas']:
            key_lists += indata[labelskey].find_key_lists(i)

    else:

        key_lists = []
        for d, k, v, kl in indata[labelskey].data_iterator(leaves_only=True):
            key_lists.append(kl)

    # Iterate over segmentations
    # for d, k, v, kl in indata[labelskey].data_iterator(leaves_only=True, yield_short_kl=True):
    for i in key_lists:

        k = i[-1]
        kl = i[0:-1]

        if logger is not None:
            logger.logging('====================')
            logger.logging('Working on image {}', k)
            logger.logging('correspondence_table = {}', correspondence_table)
        else:
            print '===================='
            print 'Working on image {}'.format(k)
            print 'correspondence_table = {}'.format(correspondence_table)

        # Load the current segmentation image
        labelim = np.array(indata[labelskey][kl][k])
        # indata[labelskey][kl].populate(k)

        # TODO: Parallelize here
        # Iterate over all labels of that image (including cropping for speed-up)

        # Determine a list of present labels
        label_list = np.unique(labelim)
        label_list = filter(lambda x: x != 0, label_list)

        if params['parallelize']:

            logger.logging('Starting thread pool with {} threads',
                           params['max_threads'])
            with futures.ThreadPoolExecutor(params['max_threads']) as do_stuff:
                tasks = Rdict()

                for lbl in label_list:

                    tasks[lbl] = do_stuff.submit(
                        shortest_paths_wrapper,
                        labelim,
                        np.array(
                            indata[gtkey][kl][indata[gtkey][kl].keys()[0]]),
                        np.array(indata[disttransfkey][kl][k]['disttransf',
                                                              'raw']),
                        np.array(indata[pathendkey][kl][k]['contacts']),
                        lbl,
                        kl,
                        k,
                        params,
                        for_class=for_class,
                        correspondence=correspondence_table,
                        logger=logger)

            for lbl in label_list:

                newpaths, new_correspondence_table, new_statistics = tasks[
                    lbl].result()

                correspondence_table.update(new_correspondence_table)

                statistics[kl + [k] + [lbl]] = new_statistics

                # If new paths were detected
                if newpaths:
                    # Store them
                    # paths.merge(newpaths)

                    pskeys = range(0, len(newpaths))
                    paths[kl + [k] +
                          [lbl]] = hp(data=dict(zip(pskeys, newpaths)))

                    if logger is not None:
                        logger.logging(
                            'Found {} paths in image {} at label {}',
                            len(newpaths), k, lbl)
                        logger.logging('-------------------')
                    else:
                        print 'Found {} paths in image {} at label {}'.format(
                            len(newpaths), k, lbl)
                        print '-------------------'

        else:

            # Iterate over these labels
            for lbl in label_list:

                newpaths, new_correspondence_table, new_statistics = shortest_paths_wrapper(
                    labelim,
                    np.array(indata[gtkey][kl][indata[gtkey][kl].keys()[0]]),
                    np.array(indata[disttransfkey][kl][k]['disttransf',
                                                          'raw']),
                    np.array(indata[pathendkey][kl][k]['contacts']),
                    lbl,
                    kl,
                    k,
                    params,
                    for_class=for_class,
                    correspondence=correspondence_table,
                    logger=logger)

                correspondence_table.update(new_correspondence_table)

                statistics[kl + [k] + [lbl]] = new_statistics

                # If new paths were detected
                if newpaths:
                    # Store them
                    # paths.merge(newpaths)

                    pskeys = range(0, len(newpaths))
                    paths[kl + [k] +
                          [lbl]] = hp(data=dict(zip(pskeys, newpaths)))

                    if logger is not None:
                        logger.logging(
                            'Found {} paths in image {} at label {}',
                            len(newpaths), k, lbl)
                        logger.logging('-------------------')
                    else:
                        print 'Found {} paths in image {} at label {}'.format(
                            len(newpaths), k, lbl)
                        print '-------------------'

        # # Unload the current segmentation image
        # indata[labelskey][kl].unpopulate()

    return paths, statistics
예제 #6
0
def get_features(paths,
                 shp,
                 featureimages,
                 featurelist,
                 max_paths_per_label,
                 logger=None,
                 anisotropy=[1, 1, 1],
                 return_pathlist=False,
                 parallelized=False,
                 max_threads=5):
    """
    :param paths:
    :param featureimages:
    :param featurelist:
    :param max_paths_per_label:
    :param ipl:
    :param anisotropy:
    :param return_pathlist: When True a list of the path keys is returned in the same order as
        their features are stored -> Can be used for back-translation of the path classification
        to the respective object the path is in.
        It is basically a concatenation of the key list as yielded by the simultaneous iterator.
    :return:
    """

    newfeats = hp()

    # The path lengths only have to be computed once without using the vigra region features
    def compute_path_lengths(paths, anisotropy):

        path_lengths = []
        # for d, k, v, kl in paths.data_iterator():
        #     if type(v) is not type(paths):
        for path in paths:
            path_lengths.append(
                lib.compute_path_length(np.array(path), anisotropy))

        return np.array(path_lengths)

    # And only do it when desired
    pathlength = False
    try:
        featurelist.remove('Pathlength')
    except ValueError:
        # Means that 'Pathlength' was not in the list
        pass
    else:
        # 'Pathlength' was in the list and is now successfully removed
        pathlength = True

    if max_paths_per_label is not None:
        keylist = range(0, max_paths_per_label - 1)
        keylist = [str(x) for x in keylist]
    else:
        keylist = None

    if return_pathlist:
        pathlist = []

    # Iterate over all paths, yielding a list of one path per label object until no paths are left
    for i, keys, vals in paths.simultaneous_iterator(
            max_count_per_item=max_paths_per_label, keylist=keylist):
        # i is the iteration number
        # keys are respective labels and ids of the paths
        # vals are the coordinates of the path positions

        if return_pathlist:
            pathlist += keys

        if logger is not None:
            logger.logging('Working in iteration = {}', i)
            logger.logging('Keys: {}', keys)

        if not keys:
            continue

        # Create a working image
        image = np.zeros(shp, dtype=np.uint32)
        # And fill it with one path per label object
        c = 1
        for curk, curv in (dict(zip(keys, vals))).iteritems():
            curv = np.array(curv)
            if pathlength:
                if not newfeats.inkeys(['Pathlength']):
                    newfeats['Pathlength'] = np.array(
                        [lib.compute_path_length(curv, anisotropy)])
                else:
                    newfeats['Pathlength'] = np.concatenate(
                        (newfeats['Pathlength'],
                         [lib.compute_path_length(curv, anisotropy)]))
            curv = lib.swapaxes(curv, 0, 1)
            lib.positions2value(image, curv, c)
            c += 1

        # TODO: If this loop iterated over the parameter list it would be more broadly applicable
        if not parallelized:
            for d, k, v, kl in featureimages.data_iterator():

                if type(v) is not hp:

                    # Extract the region features of the working image
                    newnewfeats = hp(data=vigra.analysis.extractRegionFeatures(
                        np.array(v).astype(np.float32),
                        image,
                        ignoreLabel=0,
                        features=featurelist))
                    # Pick out the features that we asked for
                    newnewfeats = newnewfeats.subset(*featurelist)

                    # Done: Extract feature 'Count' manually due to anisotropy

                    # Append to the recently computed list of features
                    for nk, nv in newnewfeats.iteritems():
                        nv = nv[1:]
                        if newfeats.inkeys(kl + [nk]):
                            try:
                                newfeats[kl + [nk]] = np.concatenate(
                                    (newfeats[kl + [nk]], nv))
                            except ValueError:
                                pass
                        else:
                            newfeats[kl + [nk]] = nv

        elif parallelized:

            def extract_region_features(feat, im, ignore_label, featlist):
                return hp(
                    vigra.analysis.extractRegionFeatures(
                        feat, im, ignoreLabel=ignore_label, features=featlist))

            logger.logging('Starting thread pool with a max of {} threads',
                           max_threads)
            with futures.ThreadPoolExecutor(max_threads) as do_stuff:

                keys = []
                vals = []
                tasks = Rdict()

                for d, k, v, kl in featureimages.data_iterator(
                        leaves_only=True):

                    # tasks[kl] = do_stuff.submit(
                    #     hp(vigra.analysis.extractRegionFeatures(
                    #         np.array(v).astype(np.float32), image, ignoreLabel=0,
                    #         features=featurelist
                    #     ))
                    # )
                    tasks[kl] = do_stuff.submit(extract_region_features,
                                                np.array(v).astype(np.float32),
                                                image, 0, featurelist)
                    keys.append(kl)

            for kl in keys:

                newnewfeats = tasks[kl].result()
                newnewfeats = newnewfeats.subset(*featurelist)
                for nk, nv in newnewfeats.iteritems():
                    nv = nv[1:]
                    if newfeats.inkeys(kl + [nk]):
                        try:
                            newfeats[kl + [nk]] = np.concatenate(
                                (newfeats[kl + [nk]], nv))
                        except ValueError:
                            pass
                    else:
                        newfeats[kl + [nk]] = nv

    if return_pathlist:
        return newfeats, pathlist
    else:
        return newfeats
def features_of_paths(yparams):

    all_params = yparams.get_params()

    # Zero'th layer:
    # --------------
    zeroth = Rdict(all_params['features_of_paths'])
    if 'default' in zeroth:
        zeroth_defaults = zeroth.pop('default')
    else:
        zeroth_defaults = hp()

    pathlist = hp()
    pathlistfile = zeroth_defaults['targets', 'pathlist']
    pathlistfile = all_params[pathlistfile[0]] + all_params[pathlistfile[1]]

    for exp_lbl, experiment in zeroth.iteritems():

        # First layer
        # -----------
        # An experiment is now selected and performed
        yparams.logging(
            '\n\nPerforming experiment {}\n==============================',
            exp_lbl)

        final = zeroth_defaults.dcp()
        final.merge(experiment)

        exp_sources = final['sources']
        exp_params = final['params']
        exp_targets = final['targets']

        def val(x):
            return x

        yparams.logging('exp_sources = \n{}',
                        exp_sources.datastructure2string(function=val))
        yparams.logging('exp_params = \n{}',
                        exp_sources.datastructure2string(function=val))
        yparams.logging('exp_targets = \n{}',
                        exp_targets.datastructure2string(function=val))

        # Load feature images
        # -------------------
        featureims = hp()
        for k, v in exp_sources['featureims'].iteritems():
            skeys = None
            if 'skeys' in v[2]:
                skeys = v[2]['skeys']
            featureims[k] = load_images(all_params[v[0]] + all_params[v[1]],
                                        skeys=skeys,
                                        logger=yparams)
        yparams.logging('\nFeatureims datastructure: \n\n{}',
                        featureims.datastructure2string(maxdepth=4))

        for exp_class_lbl, exp_class_src in exp_sources['paths'].iteritems():

            yparams.logging('\nWorking on {}\n------------------------------',
                            exp_class_lbl)

            # Load paths
            # ----------
            skeys = None
            if 'skeys' in exp_class_src[2]:
                skeys = exp_class_src[2]['skeys']
            paths = load_images(all_params[exp_class_src[0]] +
                                all_params[exp_class_src[1]],
                                skeys=skeys,
                                logger=yparams)
            yparams.logging('\nPaths datastructure: \n\n{}',
                            paths.datastructure2string(maxdepth=4))

            # Iterate over the paths
            for d, k, v, kl in paths[exp_class_src[2]['skeys']
                                     [0]].data_iterator(leaves_only=True,
                                                        yield_short_kl=True,
                                                        maxdepth=3):
                yparams.logging(
                    '\nPath keylist: {}\n..............................',
                    kl + [k])

                segm_kl = kl + [k]
                imgs_kl = kl
                yparams.logging('segm_kl = {}', segm_kl)
                yparams.logging('imgs_kl = {}', imgs_kl)

                # Bild an input featureims dict for the path computation
                infeatims = hp()
                sourcelist = exp_sources['featureims'].dcp()
                if 'segmentation' in sourcelist:
                    infeatims['segmentation'] = featureims['segmentation'][
                        segm_kl]
                    sourcelist.pop('segmentation')
                for source in sourcelist:
                    # TODO: This is not nice... Here I try to remove a redundant key
                    infeatims[source] = featureims[source][imgs_kl][
                        featureims[source][imgs_kl].keys()[0]]
                # infeatims.populate()

                # Bild an input dict for true paths
                inpaths = v.dcp()
                # inpaths.populate()

                # Get the necessary image shape
                for d2, k2, v2, kl2 in infeatims.data_iterator(
                        leaves_only=True):
                    im_shp = v2.shape
                    break

                features = hp()
                # import time
                # start = time.time()
                # print 'Starting get_features'
                features[exp_lbl][[exp_class_lbl] + kl + [k]], pathlist[
                    exp_lbl][[exp_class_lbl] + kl + [k]] = libhp.get_features(
                        inpaths,
                        np.array(im_shp)[0:3],
                        infeatims,
                        list(exp_params['features']),
                        exp_params['max_paths_per_label'],
                        logger=yparams,
                        anisotropy=exp_params['anisotropy'],
                        return_pathlist=True,
                        parallelized=exp_params['parallelize'],
                        max_threads=exp_params['max_threads'])
                # print 'Stopping get_features'
                # stop = time.time()
                # print stop-start

                yparams.logging('\nFeatures datastructure: \n\n{}',
                                features.datastructure2string(maxdepth=4))

                # Write the result to file
                features.write(
                    filepath=all_params[exp_targets['features'][0]] +
                    all_params[exp_targets['features'][1]])

    with open(pathlistfile, 'wb') as f:
        pickle.dump(pathlist, f)