Esempio n. 1
0
def make_feature_arrays(ipl):

    params = ipl.get_params()
    thisparams = rdict(params['random_forest'])
    targetfile = params['resultfolder'] + params['resultsfile']

    # Load the necessary images
    load_images(ipl)
    ipl.logging('\nInitial datastructure: \n\n{}',
                ipl.datastructure2string(maxdepth=3))

    result = IPL()
    evaluation = rdict()

    for d, k, v, kl in ipl.data_iterator(yield_short_kl=True):

        if k == '0':

            ipl.logging(
                '===============================\nWorking on group: {}', kl)

            # TODO: Implement copy full logger
            ipl[kl].set_logger(ipl.get_logger())

            # Load the image data into memory
            ipl[kl].populate()

            # def shp(x):
            #     return x.shape

            # print ipl[kl]['0', 'true']
            # print ipl[kl].dss(function=shp)

            ipl[kl]['0', 'true'] = libip.rf_make_feature_array(ipl[kl]['0',
                                                                       'true'])
            ipl.logging(
                "Computed feature array for ['0', 'true'] with shape {}",
                ipl[kl]['0', 'true'].shape)
            ipl[kl]['0',
                    'false'] = libip.rf_make_feature_array(ipl[kl]['0',
                                                                   'false'])
            ipl.logging(
                "Computed feature array for ['0', 'false'] with shape {}",
                ipl[kl]['0', 'false'].shape)
            ipl[kl]['1', 'true'] = libip.rf_make_feature_array(ipl[kl]['1',
                                                                       'true'])
            ipl.logging(
                "Computed feature array for ['1', 'true'] with shape {}",
                ipl[kl]['1', 'true'].shape)
            ipl[kl]['1',
                    'false'] = libip.rf_make_feature_array(ipl[kl]['1',
                                                                   'false'])
            ipl.logging(
                "Computed feature array for ['1', 'false'] with shape {}",
                ipl[kl]['1', 'false'].shape)

            ipl.write(filepath=params['intermedfolder'] + 'feature_arrays.h5',
                      keys=[kl])
Esempio n. 2
0
def find_border_contacts(yparams):

    params = yparams.get_params()
    thisparams = rdict(params['find_border_contacts'])
    # targetfile = params['intermedfolder'] + params['featureimsfile']

    data = ipl()
    for sourcekey, source in thisparams['sources'].iteritems():

        # Load the necessary images
        #   1. Determine the settings for fetching the data
        try:
            recursive_search = False
            recursive_search = thisparams['skwargs', 'default',
                                          'recursive_search']
            recursive_search = thisparams['skwargs', sourcekey,
                                          'recursive_search']
        except KeyError:
            pass
        if len(source) > 2:
            skeys = source[2]
        else:
            skeys = None

        #   2. Load the data
        data[sourcekey] = load_images(params[source[0]] + params[source[1]],
                                      skeys=skeys,
                                      recursive_search=recursive_search,
                                      logger=yparams)

    # TODO: Get rid of this at some point! Probable re-implement the whole data-loading step
    data['disttransf'].reduce_from_leaves(iterate=False)
    data['disttransf'].reduce_from_leaves(iterate=False)

    # Set targetfile
    targetfile = params[thisparams['target'][0]] \
                 + params[thisparams['target'][1]]

    yparams.logging('\nInitial datastructure: \n\n{}',
                    data.datastructure2string(maxdepth=4))

    for d, k, v, kl in data['disttransf'].data_iterator(yield_short_kl=True,
                                                        leaves_only=True):

        yparams.logging(
            '===============================\nWorking on image: {}', kl + [k])

        # # TODO: Implement copy full logger
        # data[kl].set_logger(data.get_logger())

        # We need: the distance transform of the MERGED labels (i.e. segmentation) and the
        #   corresponding segmentation
        data['segmentation'][kl][k] = libip.find_border_contacts_arr(
            data['segmentation'][kl][k],
            data['disttransf'][kl][k],
            tkey=params['borderctname'],
            debug=params['debug'])

        # Write the result to file
        data['segmentation'].write(filepath=targetfile, keys=[kl + [k]])
Esempio n. 3
0
def evaluation(x):

    def tp(l):
        return len([x for x in l if (x[0] + x[1] == 0)])

    def fp(l):
        return len([x for x in l if (x[0] + x[1] == 1 and x[0] == 0)])

    def fn(l):
        return len([x for x in l if (x[0] + x[1] == 1 and x[0] == 1)])

    def tn(l):
        return len([x for x in l if (x[0] + x[1] == 2)])

    def p(l):
        return len([x for x in l if (x[0] == 0)])

    def n(l):
        return len([x for x in l if (x[0] == 1)])

    def recall(l):
        return float(tp(l)) / (tp(l) + fn(l))

    def precision(l):
        return float(tp(l)) / (tp(l) + fp(l))

    def f1(l):
        return float(2 * tp(l)) / (2 * tp(l) + fp(l) + fn(l))

    def accuracy(l):
        return float(tp(l) + tn(l)) / (p(l) + n(l))

    return rdict(data={
        'precision': precision(x), 'recall': recall(x), 'f1': f1(x), 'accuracy': accuracy(x)
    })
Esempio n. 4
0
def features_of_paths(ipl):

    params = ipl.get_params()
    thisparams = rdict(params['features_of_paths'])
    targetfile = params['intermedfolder'] + params['featuresfile']

    # Load the necessary images
    paths_true, paths_false, featureims_true, featureims_false = load_images(
        ipl)
    ipl.logging('\nInitial datastructure: \n\n{}',
                ipl.datastructure2string(maxdepth=3))

    for d, k, v, kl in paths_true.data_iterator(yield_short_kl=True):

        if k == 'path':

            ipl.logging(
                '===============================\nWorking on group: {}', kl)

            # # TODO: Implement copy full logger
            # ipl[kl].set_logger(ipl.get_logger())

            # # Load the image data into memory
            # ipl[kl].populate()

            ipl[kl] = libip.features_of_paths(ipl, paths_true[kl][k],
                                              paths_false[kl][k],
                                              featureims_true[kl],
                                              featureims_false[kl], kl)

            # Write the result to file
            ipl.write(filepath=targetfile, keys=[kl])
            # # Free memory
            ipl[kl] = None
def compute_selected_features(ipl, params):

    thisparams = rdict(data=params['compute_feature_images'])
    targetfile = params['intermedfolder'] + params['featureimsfile']

    maxd = ipl.maxdepth()

    for d, k, v, kl in  ipl.data_iterator(yield_short_kl=True):

        if d == maxd:
            ipl.logging('----------------------\nWorking on image: {}', k)

            ipl[kl].populate(k)

            if k in [params['rawdataname'], params['probsname'], params['largeobjname'], params['largeobjmnames'][0]]:
                general_params = thisparams.dcp()
                del general_params['features']

                if k == params['rawdataname']:
                    subfeature_params = thisparams['features']['rawdata']
                    ipl[kl][k] = compute_features(ipl[kl][k], general_params, subfeature_params)
                elif k == params['probsname']:
                    subfeature_params = thisparams['features']['probs']
                    ipl[kl][k] = compute_features(ipl[kl][k], general_params, subfeature_params)
                elif k == params['largeobjname']:
                    subfeature_params = thisparams['features']['largeobj']
                    ipl[kl][k] = compute_features(ipl[kl][k], general_params, subfeature_params)
                elif k == params['largeobjmnames'][0]:
                    subfeature_params = thisparams['features']['largeobjm']
                    ipl[kl][k] = compute_features(ipl[kl][k], general_params, subfeature_params)

                ipl.write(filepath=targetfile, keys=[kl + [k]])
                ipl[kl][k] = None
def run_compute_feature_images(yamlfile):

    ipl = IPL(yaml=yamlfile)

    ipl.set_indent(1)

    params = rdict(data=ipl.get_params())
    ipl.startlogger(filename=params['resultfolder'] + 'compute_feature_images.log', type='w', name='ComputeFeatureImages')

    try:

        # # Copy the script file and the parameters to the scriptsfolder
        # copy(inspect.stack()[0][1], params['scriptsfolder'])
        # copy(yamlfile, params['scriptsfolder'] + 'compute_feature_images.parameters.yml')

        # ipl.logging('\nInitial datastructure: \n\n{}', ipl.datastructure2string(maxdepth=3))

        compute_feature_images(ipl)

        # ipl.logging('\nFinal datastructure: \n\n{}', ipl.datastructure2string(maxdepth=3))

        # ipl.write(filepath=params['intermedfolder'] + params['largeobjfile'])

        ipl.logging('')
        ipl.stoplogger()

    except:

        ipl.errout('Unexpected error')
def compute_feature_images(ipl):

    params = ipl.get_params()
    thisparams = rdict(params['compute_feature_images'])
    # targetfile = params['intermedfolder'] + params['featureimsfile']

    # Load the necessary images
    load_images(ipl, thisparams['features'].keys())

    ipl.logging('\nInitial datastructure: \n\n{}', ipl.datastructure2string(maxdepth=3))

    maxd = ipl.maxdepth()
    for d, k, v, kl in ipl.data_iterator(maxdepth=ipl.maxdepth() - 1):

        if d == maxd - 1:

            ipl.logging('===============================\nWorking on group: {}', kl)

            # TODO: Implement copy full logger
            ipl[kl].set_logger(ipl.get_logger())

            # # Load the image data into memory
            # ipl[kl].populate(k)

            compute_selected_features(ipl.subset(kl), params)
def merge_adjacent_objects(ipl):

    params = ipl.get_params()
    thisparams = rdict(params['merge_adjacent_objects'])
    targetfile = params['intermedfolder'] + params['largeobjmfile']

    # Load the necessary images
    load_images(ipl, params['intermedfolder'], params['largeobjfile'], params['largeobjname'])
    ipl.logging('\nInitial datastructure: \n\n{}', ipl.datastructure2string(maxdepth=3))

    for d, k, v, kl in ipl.data_iterator(yield_short_kl=True):

        if k == params['largeobjname']:

            ipl.logging('===============================\nWorking on image: {}', kl + [k])

            ipl[kl].set_logger(ipl.get_logger())

            # Load the image data into memory
            ipl[kl].populate(k)

            ipl[kl] = libip.merge_adjacent_objects(
                ipl[kl], k,
                thisparams['numberbysize'], thisparams['numberbyrandom'], thisparams['seed'],
                targetnames=params['largeobjmnames'], algorithm=thisparams['algorithm']
            )

            # Write the result to file
            ipl.write(filepath=targetfile, keys=[kl])
            # Free memory
            ipl[kl] = None
Esempio n. 9
0
def run_paths_of_merges(yamlfile, logging=True):

    ipl = IPL(yaml=yamlfile)

    ipl.set_indent(1)

    params = rdict(data=ipl.get_params())
    if logging:
        ipl.startlogger(filename=params['resultfolder'] +
                        'paths_of_merges.log',
                        type='w',
                        name='PathsOfMerges')
    else:
        ipl.startlogger()

    try:

        # # Copy the script file and the parameters to the scriptsfolder
        # copy(inspect.stack()[0][1], params['scriptsfolder'])
        # copy(yamlfile, params['scriptsfolder'] + 'paths_of_merges.parameters.yml')

        # ipl.logging('\nInitial datastructure: \n\n{}', ipl.datastructure2string(maxdepth=3))

        paths_of_merges(ipl, params['debug'])

        # ipl.logging('\nFinal datastructure: \n\n{}', ipl.datastructure2string(maxdepth=3))

        # ipl.write(filepath=params['intermedfolder'] + params['largeobjfile'])

        ipl.logging('')
        ipl.stoplogger()

    except:

        ipl.errout('Unexpected error')
def run_find_border_contacts(yamlfile, logging=True):

    ipl = IPL(yaml=yamlfile)

    ipl.set_indent(1)

    params = rdict(data=ipl.get_params())
    if logging:
        ipl.startlogger(filename=params['resultfolder'] + 'find_border_contacts.log', type='w', name='FindBorderContacts')
    else:
        ipl.startlogger()

    try:

        # # Copy the script file and the parameters to the scriptsfolder
        # copy(inspect.stack()[0][1], params['scriptsfolder'])
        # copy(yamlfile, params['scriptsfolder'] + 'find_border_contacts.parameters.yml')

        # ipl.logging('\nInitial datastructure: \n\n{}', ipl.datastructure2string(maxdepth=3))

        find_border_contacts(ipl)

        # ipl.logging('\nFinal datastructure: \n\n{}', ipl.datastructure2string(maxdepth=3))

        # ipl.write(filepath=params['intermedfolder'] + params['largeobjfile'])

        ipl.logging('')
        ipl.stoplogger()

    except:

        ipl.errout('Unexpected error')
def compute_feature_images(ipl):

    params = ipl.get_params()
    thisparams = rdict(params['compute_feature_images'])
    # targetfile = params['intermedfolder'] + params['featureimsfile']

    # Load the necessary images
    load_images(ipl, thisparams['features'].keys())

    ipl.logging('\nInitial datastructure: \n\n{}',
                ipl.datastructure2string(maxdepth=3))

    maxd = ipl.maxdepth()
    for d, k, v, kl in ipl.data_iterator(maxdepth=ipl.maxdepth() - 1):

        if d == maxd - 1:

            ipl.logging(
                '===============================\nWorking on group: {}', kl)

            # TODO: Implement copy full logger
            ipl[kl].set_logger(ipl.get_logger())

            # # Load the image data into memory
            # ipl[kl].populate(k)

            compute_selected_features(ipl.subset(kl), params)
def features_of_paths(ipl):

    params = ipl.get_params()
    thisparams = rdict(params['features_of_paths'])
    targetfile = params['intermedfolder'] + params['featuresfile']

    # Load the necessary images
    paths_true, paths_false, featureims_true, featureims_false = load_images(ipl)
    ipl.logging('\nInitial datastructure: \n\n{}', ipl.datastructure2string(maxdepth=3))

    for d, k, v, kl in paths_true.data_iterator(yield_short_kl=True):

        if k == 'path':

            ipl.logging('===============================\nWorking on group: {}', kl)

            # # TODO: Implement copy full logger
            # ipl[kl].set_logger(ipl.get_logger())

            # # Load the image data into memory
            # ipl[kl].populate()

            ipl[kl] = libip.features_of_paths(
                ipl,
                paths_true[kl][k], paths_false[kl][k],
                featureims_true[kl], featureims_false[kl],
                kl
            )

            # Write the result to file
            ipl.write(filepath=targetfile, keys=[kl])
            # # Free memory
            ipl[kl] = None
def compute_feature_images(yparams):

    params = yparams.get_params()
    thisparams = rdict(params['compute_feature_images'])
    # targetfile = params['intermedfolder'] + params['featureimsfile']
    general_params = thisparams['general_params']

    for sourcekey, source in thisparams['sources'].iteritems():

        # Load the necessary images
        #   1. Determine the settings for fetching the data
        try:
            recursive_search = False
            recursive_search = thisparams['skwargs', 'default',
                                          'recursive_search']
            recursive_search = thisparams['skwargs', sourcekey,
                                          'recursive_search']
        except KeyError:
            pass
        if len(source) > 2:
            skeys = source[2]
        else:
            skeys = None

        #   2. Load the data
        data = load_images(params[source[0]] + params[source[1]],
                           skeys=skeys,
                           recursive_search=recursive_search,
                           logger=yparams)

        # Set targetfile
        targetfile = params[thisparams['targets', sourcekey][0]] \
                     + params[thisparams['targets', sourcekey][1]]

        yparams.logging('\nInitial datastructure: \n\n{}',
                        data.datastructure2string(maxdepth=3))

        for d, k, v, kl in data.data_iterator(yield_short_kl=True,
                                              leaves_only=True):

            yparams.logging(
                '===============================\nWorking on image: {}',
                kl + [k])

            # # TODO: Implement copy full logger
            # data[kl].set_logger(data.get_logger())

            # Load the image data into memory
            data[kl].populate(k)

            # compute_selected_features(data.subset(kl), params)
            subfeature_params = thisparams['features', sourcekey].dcp()
            data[kl][k] = compute_features(data[kl][k], general_params,
                                           subfeature_params)

            # Write the result to file
            data.write(filepath=targetfile, keys=[kl + [k]])
            # Free memory
            data[kl][k] = None
def make_feature_arrays(ipl):

    params = ipl.get_params()
    thisparams = rdict(params['random_forest'])
    targetfile = params['resultfolder'] + params['resultsfile']

    # Load the necessary images
    load_images(ipl)
    ipl.logging('\nInitial datastructure: \n\n{}', ipl.datastructure2string(maxdepth=3))

    result = IPL()
    evaluation = rdict()

    for d, k, v, kl in ipl.data_iterator(yield_short_kl=True):

        if k == '0':

            ipl.logging('===============================\nWorking on group: {}', kl)

            # TODO: Implement copy full logger
            ipl[kl].set_logger(ipl.get_logger())

            # Load the image data into memory
            ipl[kl].populate()

            # def shp(x):
            #     return x.shape

            # print ipl[kl]['0', 'true']
            # print ipl[kl].dss(function=shp)

            ipl[kl]['0', 'true'] = libip.rf_make_feature_array(ipl[kl]['0', 'true'])
            ipl.logging("Computed feature array for ['0', 'true'] with shape {}", ipl[kl]['0', 'true'].shape)
            ipl[kl]['0', 'false'] = libip.rf_make_feature_array(ipl[kl]['0', 'false'])
            ipl.logging("Computed feature array for ['0', 'false'] with shape {}", ipl[kl]['0', 'false'].shape)
            ipl[kl]['1', 'true'] = libip.rf_make_feature_array(ipl[kl]['1', 'true'])
            ipl.logging("Computed feature array for ['1', 'true'] with shape {}", ipl[kl]['1', 'true'].shape)
            ipl[kl]['1', 'false'] = libip.rf_make_feature_array(ipl[kl]['1', 'false'])
            ipl.logging("Computed feature array for ['1', 'false'] with shape {}", ipl[kl]['1', 'false'].shape)

            ipl.write(filepath=params['intermedfolder'] + 'feature_arrays.h5', keys=[kl])
Esempio n. 15
0
def run_random_forest(yamlfile,
                      logging=True,
                      make_only_feature_array=False,
                      debug=False,
                      write=True):

    ipl = IPL(yaml=yamlfile)

    ipl.set_indent(1)

    params = rdict(data=ipl.get_params())
    if logging:
        ipl.startlogger(filename=params['resultfolder'] + 'random_forest.log',
                        type='w',
                        name='RandomForest')
    else:
        ipl.startlogger()

    try:

        # # Copy the script file and the parameters to the scriptsfolder
        # copy(inspect.stack()[0][1], params['scriptsfolder'])
        # copy(yamlfile, params['scriptsfolder'] + 'random_forest.parameters.yml')

        # ipl.logging('\nInitial datastructure: \n\n{}', ipl.datastructure2string(maxdepth=3))

        if make_only_feature_array:
            make_feature_arrays(ipl)
        else:
            result = IPL()
            result['result'], result['evaluation'] = random_forest(ipl,
                                                                   debug=debug)

            # ipl.logging('\nFinal datastructure: \n\n{}', ipl.datastructure2string(maxdepth=3))

            if write:
                result.write(filepath=params['resultfolder'] +
                             params['resultsfile'])

        ipl.logging('')
        ipl.stoplogger()

    except:
        ipl.errout('Unexpected error')
Esempio n. 16
0
def compute_paths(ipl):

    params = ipl.get_params()
    thisparams = rdict(params['compute_paths'])
    targetfile = params['intermedfolder'] + params['pathsfile']

    # Load the necessary images
    load_images(ipl)
    ipl.logging('\nInitial datastructure: \n\n{}',
                ipl.datastructure2string(maxdepth=3))

    maxd = ipl.maxdepth()
    for d, k, v, kl in ipl.data_iterator(maxdepth=ipl.maxdepth() - 1):

        if d == maxd - 1:

            ipl.logging(
                '===============================\nWorking on group: {}', kl)

            # TODO: Implement copy full logger
            ipl[kl].set_logger(ipl.get_logger())

            # Load the image data into memory
            ipl[kl].populate()

            # The first step would be to just compute the paths, regardless whether they cross a
            # label barrier or not
            ipl[kl] = libip.compute_paths_with_class(
                ipl[kl],
                params['largeobjmnames'][0],
                params['borderctname'],
                'disttransf',
                params['largeobjname'],
                thisparams,
                ignore=thisparams['ignorelabels'],
                max_end_count=thisparams['max_end_count'],
                max_end_count_seed=thisparams['max_end_count_seed'],
                debug=params['debug'])

            # Write the result to file
            ipl.write(filepath=targetfile, keys=[kl])
            # Free memory
            ipl[kl] = None
Esempio n. 17
0
def paths_of_merges(ipl, debug=False):

    params = ipl.get_params()
    thisparams = rdict(params['paths_of_merges'])
    targetfile = params['intermedfolder'] + params['pathsfalsefile']

    # Load the necessary images
    load_images(ipl)
    ipl.logging('\nInitial datastructure: \n\n{}',
                ipl.datastructure2string(maxdepth=3))

    maxd = ipl.maxdepth()
    for d, k, v, kl in ipl.data_iterator(maxdepth=ipl.maxdepth() - 1):

        if d == maxd - 1:

            ipl.logging(
                '===============================\nWorking on group: {}', kl)

            # TODO: Implement copy full logger
            ipl[kl].set_logger(ipl.get_logger())

            # Load the image data into memory
            ipl[kl].populate()

            ipl[kl] = libip.paths_of_labelpairs(
                ipl[kl],
                params['largeobjmnames'][0],
                params['largeobjname'],
                params['largeobjmnames'][4],
                params['borderctname'],
                'disttransf',
                thisparams,
                ignore=thisparams['ignorelabels'],
                max_end_count=thisparams['max_end_count'],
                max_end_count_seed=thisparams['max_end_count_seed'],
                debug=debug)

            # Write the result to file
            ipl.write(filepath=targetfile, keys=[kl])
            # Free memory
            ipl[kl] = None
def paths_of_merges(ipl):

    params = ipl.get_params()
    thisparams = rdict(params['paths_of_merges'])
    targetfile = params['intermedfolder'] + params['pathsfalsefile']

    # Load the necessary images
    load_images(ipl)
    ipl.logging('\nInitial datastructure: \n\n{}', ipl.datastructure2string(maxdepth=3))

    maxd = ipl.maxdepth()
    for d, k, v, kl in ipl.data_iterator(maxdepth=ipl.maxdepth() - 1):

        if d == maxd - 1:

            ipl.logging('===============================\nWorking on group: {}', kl)

            # TODO: Implement copy full logger
            ipl[kl].set_logger(ipl.get_logger())

            # Load the image data into memory
            ipl[kl].populate()

            ipl[kl] = libip.paths_of_labelpairs(
                ipl[kl],
                params['largeobjmnames'][0],
                params['largeobjname'],
                params['largeobjmnames'][4],
                params['borderctname'],
                'disttransf',
                thisparams,
                ignore=thisparams['ignorelabels'],
                max_end_count=thisparams['max_end_count'],
                max_end_count_seed=thisparams['max_end_count_seed'],
                debug=False
            )

            # Write the result to file
            ipl.write(filepath=targetfile, keys=[kl])
            # Free memory
            ipl[kl] = None
def compute_selected_features(ipl, params):

    thisparams = rdict(data=params['compute_feature_images'])
    targetfile = params['intermedfolder'] + params['featureimsfile']

    maxd = ipl.maxdepth()

    for d, k, v, kl in ipl.data_iterator(yield_short_kl=True):

        if d == maxd:
            ipl.logging('----------------------\nWorking on image: {}', k)

            ipl[kl].populate(k)

            if k in [
                    params['rawdataname'], params['probsname'],
                    params['largeobjname'], params['largeobjmnames'][0]
            ]:
                general_params = thisparams.dcp()
                del general_params['features']

                if k == params['rawdataname']:
                    subfeature_params = thisparams['features']['rawdata']
                    ipl[kl][k] = compute_features(ipl[kl][k], general_params,
                                                  subfeature_params)
                elif k == params['probsname']:
                    subfeature_params = thisparams['features']['probs']
                    ipl[kl][k] = compute_features(ipl[kl][k], general_params,
                                                  subfeature_params)
                elif k == params['largeobjname']:
                    subfeature_params = thisparams['features']['largeobj']
                    ipl[kl][k] = compute_features(ipl[kl][k], general_params,
                                                  subfeature_params)
                elif k == params['largeobjmnames'][0]:
                    subfeature_params = thisparams['features']['largeobjm']
                    ipl[kl][k] = compute_features(ipl[kl][k], general_params,
                                                  subfeature_params)

                ipl.write(filepath=targetfile, keys=[kl + [k]])
                ipl[kl][k] = None
def merge_adjacent_objects(ipl):

    params = ipl.get_params()
    thisparams = rdict(params['merge_adjacent_objects'])
    targetfile = params['intermedfolder'] + params['largeobjmfile']

    # Load the necessary images
    load_images(ipl, params['intermedfolder'], params['largeobjfile'],
                params['largeobjname'])
    ipl.logging('\nInitial datastructure: \n\n{}',
                ipl.datastructure2string(maxdepth=3))

    for d, k, v, kl in ipl.data_iterator(yield_short_kl=True):

        if k == params['largeobjname']:

            ipl.logging(
                '===============================\nWorking on image: {}',
                kl + [k])

            ipl[kl].set_logger(ipl.get_logger())

            # Load the image data into memory
            ipl[kl].populate(k)

            ipl[kl] = libip.merge_adjacent_objects(
                ipl[kl],
                k,
                thisparams['numberbysize'],
                thisparams['numberbyrandom'],
                thisparams['seed'],
                targetnames=params['largeobjmnames'],
                algorithm=thisparams['algorithm'])

            # Write the result to file
            ipl.write(filepath=targetfile, keys=[kl])
            # Free memory
            ipl[kl] = None
def run_random_forest(yamlfile, logging=True, make_only_feature_array=False, debug=False, write=True):

    ipl = IPL(yaml=yamlfile)

    ipl.set_indent(1)

    params = rdict(data=ipl.get_params())
    if logging:
        ipl.startlogger(filename=params['resultfolder'] + 'random_forest.log', type='w', name='RandomForest')
    else:
        ipl.startlogger()

    try:

        # # Copy the script file and the parameters to the scriptsfolder
        # copy(inspect.stack()[0][1], params['scriptsfolder'])
        # copy(yamlfile, params['scriptsfolder'] + 'random_forest.parameters.yml')

        # ipl.logging('\nInitial datastructure: \n\n{}', ipl.datastructure2string(maxdepth=3))

        if make_only_feature_array:
            make_feature_arrays(ipl)
        else:
            result = IPL()
            result['result'], result['evaluation'] = random_forest(ipl, debug=debug)

            # ipl.logging('\nFinal datastructure: \n\n{}', ipl.datastructure2string(maxdepth=3))

            if write:
                result.write(filepath=params['resultfolder'] + params['resultsfile'])

        ipl.logging('')
        ipl.stoplogger()

    except:
        ipl.errout('Unexpected error')
def features_of_paths(yparams):

    params = yparams.get_params()
    thisparams = rdict(params['features_of_paths'])

    featureims = ipl()

    # Load feature images
    feature_sources = thisparams['sources', 'featureims']
    feature_skwargs = thisparams['skwargs', 'featureims']
    for sourcekey, source in feature_sources.iteritems():

        # Load the necessary images
        #   1. Determine the settings for fetching the data
        try:
            recursive_search = False
            recursive_search = feature_skwargs['default', 'recursive_search']
            recursive_search = feature_skwargs[sourcekey, 'recursive_search']
        except KeyError:
            pass
        if len(source) > 2:
            skeys = source[2]
        else:
            skeys = None

        #   2. Load the data
        yparams.logging('skeys = {}', skeys)
        yparams.logging('recursive_search = {}', recursive_search)
        featureims[sourcekey] = load_images(params[source[0]] +
                                            params[source[1]],
                                            skeys=skeys,
                                            recursive_search=recursive_search,
                                            logger=yparams)

    yparams.logging('\nFeatureims datastructure: \n\n{}',
                    featureims.datastructure2string())

    # Load true and false paths
    paths = ipl()
    paths_sources = thisparams['sources', 'paths']
    paths_skwargs = thisparams['skwargs', 'paths']
    for sourcekey, source in paths_sources.iteritems():

        # Load the necessary images
        #   1. Determine the settings for fetching the data
        try:
            recursive_search = False
            recursive_search = paths_skwargs['default', 'recursive_search']
            recursive_search = paths_skwargs[sourcekey, 'recursive_search']
        except KeyError:
            pass
        if len(source) > 2:
            skeys = source[2]
        else:
            skeys = None

        #   2. Load the data
        yparams.logging('skeys = {}', skeys)
        yparams.logging('recursive_search = {}', recursive_search)
        paths[sourcekey] = load_images(params[source[0]] + params[source[1]],
                                       skeys=skeys,
                                       recursive_search=recursive_search,
                                       logger=yparams)

    yparams.logging('\nPaths datastructure: \n\n{}',
                    paths.datastructure2string(maxdepth=4))

    # Load the segmentation image datastructure (We just require the datastructure, not the data
    # itself)
    try:
        recursive_search = False
        recursive_search = thisparams['skwargs', 'segmentation',
                                      'recursive_search']
    except KeyError:
        pass
    if len(thisparams['sources', 'segmentation']) > 2:
        skeys = thisparams['sources', 'segmentation'][2]
    else:
        skeys = None
    segmentation = load_images(
        params[thisparams['sources', 'segmentation'][0]] +
        params[thisparams['sources', 'segmentation'][1]],
        skeys=skeys,
        recursive_search=recursive_search,
        logger=yparams)

    yparams.logging('\nSegmentation datastructure: \n\n{}',
                    segmentation.datastructure2string(maxdepth=4))

    # data['contacts'].reduce_from_leafs(iterate=True)
    # data['disttransf'].reduce_from_leafs(iterate=True)

    # Set targetfile
    featuresfile = params[thisparams['target'][0]] \
                 + params[thisparams['target'][1]]
    pathlistfile = params[thisparams['pathlist'][0]] \
                 + params[thisparams['pathlist'][1]]

    pathlist = ipl()

    for d, k, v, kl in segmentation.data_iterator(yield_short_kl=True,
                                                  leaves_only=True):
        yparams.logging(
            '===============================\nWorking on image: {}', kl + [k])

        # # TODO: Implement copy full logger
        # data[kl].set_logger(data.get_logger())

        # Bild an input featureims dict for the path computation
        infeatims = ipl()
        sourcelist = thisparams['sources', 'featureims'].dcp()
        if 'segmentation' in sourcelist:
            infeatims['segmentation'] = featureims['segmentation'][kl][k]
            sourcelist.pop('segmentation')
        for source in sourcelist:
            infeatims[source] = featureims[source][kl]
        infeatims.populate()

        # Bild an input dict for true paths
        intruepaths = paths['truepaths'][kl][k]['truepaths']
        infalsepaths = paths['falsepaths'][kl][k]['falsepaths']
        intruepaths.populate()
        infalsepaths.populate()

        yparams.logging('\nInfeatims datastructure: \n\n{}',
                        infeatims.datastructure2string())
        yparams.logging('\nIntruepaths datastructure: \n\n{}',
                        intruepaths.datastructure2string(maxdepth=3))
        yparams.logging('\nInfalsepaths datastructure: \n\n{}',
                        infalsepaths.datastructure2string(maxdepth=3))

        features = ipl()
        features[kl + [k]], pathlist[kl + [k]] = libip.features_of_paths(
            yparams,
            intruepaths,
            infalsepaths,
            infeatims,
            infeatims,
            kl,
            return_pathlist=True)

        yparams.logging(
            '\nPathlist datastructure: \n\n{}',
            pathlist.datastructure2string(function=type, leaves_only=False))

        # Write the result to file
        features.write(filepath=featuresfile)
        # pathlist.astype(np.uint8)
        # pathlist.write(filepath=pathlistfile)

    with open(pathlistfile, 'wb') as f:
        pickle.dump(pathlist, f)
Esempio n. 23
0
def random_forest(ipl, debug=False):

    params = ipl.get_params()
    thisparams = rdict(params['random_forest'])
    targetfile = params['resultfolder'] + params['resultsfile']

    # Load the necessary images
    load_images(ipl)
    ipl.logging('\nInitial datastructure: \n\n{}',
                ipl.datastructure2string(maxdepth=3))

    result = IPL()
    new_eval = rdict()
    evaluation = rdict()

    for d, k, v, kl in ipl.data_iterator(yield_short_kl=True):

        if k == '0':

            ipl.logging(
                '===============================\nWorking on group: {}', kl)

            # TODO: Implement copy full logger
            ipl[kl].set_logger(ipl.get_logger())

            # Load the image data into memory
            ipl[kl].populate()

            # def shp(x):
            #     return x.shape

            # print ipl[kl]['0', 'true']
            # print ipl[kl].dss(function=shp)

            ipl[kl]['0', 'true'] = libip.rf_make_feature_array(ipl[kl]['0',
                                                                       'true'])
            ipl.logging(
                "Computed feature array for ['0', 'true'] with shape {}",
                ipl[kl]['0', 'true'].shape)
            ipl[kl]['0',
                    'false'] = libip.rf_make_feature_array(ipl[kl]['0',
                                                                   'false'])
            ipl.logging(
                "Computed feature array for ['0', 'false'] with shape {}",
                ipl[kl]['0', 'false'].shape)
            ipl[kl]['1', 'true'] = libip.rf_make_feature_array(ipl[kl]['1',
                                                                       'true'])
            ipl.logging(
                "Computed feature array for ['1', 'true'] with shape {}",
                ipl[kl]['1', 'true'].shape)
            ipl[kl]['1',
                    'false'] = libip.rf_make_feature_array(ipl[kl]['1',
                                                                   'false'])
            ipl.logging(
                "Computed feature array for ['1', 'false'] with shape {}",
                ipl[kl]['1', 'false'].shape)

            # print '...'
            # print ipl[kl]['0']

            result[kl + ['0']] = libip.random_forest(ipl[kl]['0'],
                                                     ipl[kl]['1'],
                                                     debug=debug)
            result[kl + ['1']] = libip.random_forest(ipl[kl]['1'],
                                                     ipl[kl]['0'],
                                                     debug=debug)

            new_eval[kl +
                     ['0']] = libip.new_eval([x[0] for x in result[kl]['0']],
                                             [x[1] for x in result[kl]['0']])
            new_eval[kl +
                     ['1']] = libip.new_eval([x[0] for x in result[kl]['1']],
                                             [x[1] for x in result[kl]['1']])

            evaluation[kl + ['0']] = libip.evaluation(result[kl]['0'])
            evaluation[kl + ['1']] = libip.evaluation(result[kl]['1'])

            ipl.logging('+++ RESULTS +++')
            ipl.logging("[kl]['0']")
            # for i in result[kl]['0']:
            #     ipl.logging('{}', i)
            for key, value in evaluation[kl]['0'].iteritems():
                ipl.logging('{} = {}', key, value)
            for key, value in new_eval[kl]['0'].iteritems():
                ipl.logging('{} = {}', key, value)

            ipl.logging('+++')
            ipl.logging("[kl]['1']")
            # for i in result[kl]['1']:
            #     ipl.logging('{}', i)
            for key, value in evaluation[kl]['1'].iteritems():
                ipl.logging('{} = {}', key, value)
            for key, value in new_eval[kl]['1'].iteritems():
                ipl.logging('{} = {}', key, value)

            # # Write the result to file
            # ipl.write(filepath=targetfile, keys=[kl])
            # Free memory
            ipl[kl] = None

    return IPL(data=result), IPL(data=evaluation)
def compute_paths(yparams):

    params = yparams.get_params()
    thisparams = rdict(params['compute_paths'])

    data = ipl()
    for sourcekey, source in thisparams['sources'].iteritems():

        # Load the necessary images
        #   1. Determine the settings for fetching the data
        try:
            recursive_search = False
            recursive_search = thisparams['skwargs', 'default',
                                          'recursive_search']
            recursive_search = thisparams['skwargs', sourcekey,
                                          'recursive_search']
        except KeyError:
            pass
        if len(source) > 2:
            skeys = source[2]
        else:
            skeys = None

        #   2. Load the data
        yparams.logging('skeys = {}', skeys)
        yparams.logging('recursive_search = {}', recursive_search)
        data[sourcekey] = load_images(params[source[0]] + params[source[1]],
                                      skeys=skeys,
                                      recursive_search=recursive_search,
                                      logger=yparams)

    data['contacts'].reduce_from_leafs(iterate=True)
    data['disttransf'].reduce_from_leafs(iterate=True)

    # Set targetfile
    targetfile = params[thisparams['target'][0]] \
                 + params[thisparams['target'][1]]

    yparams.logging('\nInitial datastructure: \n\n{}',
                    data.datastructure2string(maxdepth=3))

    for d, k, v, kl in data['segmentation'].data_iterator(yield_short_kl=True,
                                                          leaves_only=True):
        yparams.logging(
            '===============================\nWorking on image: {}', kl + [k])

        # # TODO: Implement copy full logger
        # data[kl].set_logger(data.get_logger())

        # prepare the dict for the path computation
        indata = ipl()
        indata['segmentation'] = np.array(data['segmentation'][kl][k])
        indata['contacts'] = np.array(data['contacts'][kl][k])
        indata['groundtruth'] = np.array(
            data['groundtruth'][kl][params['gtruthname']])
        indata['disttransf'] = np.array(data['disttransf'][kl][k])
        yparams.logging('Input datastructure: \n\n{}',
                        indata.datastructure2string())
        # Compute the paths sorted into their respective class
        paths = ipl()
        paths[kl + [k]] = libip.compute_paths_with_class(
            indata,
            'segmentation',
            'contacts',
            'disttransf',
            'groundtruth',
            thisparams,
            ignore=thisparams['ignorelabels'],
            max_end_count=thisparams['max_end_count'],
            max_end_count_seed=thisparams['max_end_count_seed'],
            debug=params['debug'])

        # Write the result to file
        paths.write(filepath=targetfile)
def random_forest(ipl, debug=False):

    params = ipl.get_params()
    thisparams = rdict(params['random_forest'])
    targetfile = params['resultfolder'] + params['resultsfile']

    # Load the necessary images
    load_images(ipl)
    ipl.logging('\nInitial datastructure: \n\n{}', ipl.datastructure2string(maxdepth=3))

    result = IPL()
    new_eval = rdict()
    evaluation = rdict()

    for d, k, v, kl in ipl.data_iterator(yield_short_kl=True):

        if k == '0':

            ipl.logging('===============================\nWorking on group: {}', kl)

            # TODO: Implement copy full logger
            ipl[kl].set_logger(ipl.get_logger())

            # Note:
            #   Due to the feature input being a dictionary organized by the feature images where
            #   the feature values come from
            #
            #   [kl]
            #       '0'|'1'
            #           'true'|'false'
            #               [featureims]
            #                   'Sum':      [s1, ..., sN]
            #                   'Variance': [v1, ..., vN]
            #                   ...
            #               [Pathlength]:   [l1, ..., lN]
            #
            #   the exact order in which items are iterated over by data_iterator() is not known.
            #
            # Solution:
            #   Iterate over it once and store the keylist in an array (which conserves the order)
            #   When accumulating the featrues for each of the four corresponding subsets, namely
            #   training and testing set with true and false paths each, i.e.
            #   ['0'|'1']['true'|'false'],
            #   the the keylist is used, thus maintaining the correct order in every subset.
            #
            # And that is what is happening here:
            keylist = []
            for d2, k2, v2, kl2 in ipl[kl]['0', 'true'].data_iterator():
                if type(v2) is not type(ipl[kl]['0', 'true']):
                    keylist.append(kl2)

            # Load the image data into memory
            ipl[kl].populate()

            # ipl[kl]['0', 'true'] = libip.rf_make_feature_array(ipl[kl]['0', 'true'])
            # ipl.logging("Computed feature array for ['0', 'true'] with shape {}", ipl[kl]['0', 'true'].shape)
            # ipl[kl]['0', 'false'] = libip.rf_make_feature_array(ipl[kl]['0', 'false'])
            # ipl.logging("Computed feature array for ['0', 'false'] with shape {}", ipl[kl]['0', 'false'].shape)
            # ipl[kl]['1', 'true'] = libip.rf_make_feature_array(ipl[kl]['1', 'true'])
            # ipl.logging("Computed feature array for ['1', 'true'] with shape {}", ipl[kl]['1', 'true'].shape)
            # ipl[kl]['1', 'false'] = libip.rf_make_feature_array(ipl[kl]['1', 'false'])
            # ipl.logging("Computed feature array for ['1', 'false'] with shape {}", ipl[kl]['1', 'false'].shape)

            ipl[kl]['0', 'true'] = libip.rf_make_feature_array_with_keylist(ipl[kl]['0', 'true'], keylist)
            ipl.logging("Computed feature array for ['0', 'true'] with shape {}", ipl[kl]['0', 'true'].shape)
            ipl[kl]['0', 'false'] = libip.rf_make_feature_array_with_keylist(ipl[kl]['0', 'false'], keylist)
            ipl.logging("Computed feature array for ['0', 'false'] with shape {}", ipl[kl]['0', 'false'].shape)
            ipl[kl]['1', 'true'] = libip.rf_make_feature_array_with_keylist(ipl[kl]['1', 'true'], keylist)
            ipl.logging("Computed feature array for ['1', 'true'] with shape {}", ipl[kl]['1', 'true'].shape)
            ipl[kl]['1', 'false'] = libip.rf_make_feature_array_with_keylist(ipl[kl]['1', 'false'], keylist)
            ipl.logging("Computed feature array for ['1', 'false'] with shape {}", ipl[kl]['1', 'false'].shape)

            # print '...'
            # print ipl[kl]['0']

            result[kl + ['0']] = libip.random_forest(ipl[kl]['0'], ipl[kl]['1'], debug=debug)
            result[kl + ['1']] = libip.random_forest(ipl[kl]['1'], ipl[kl]['0'], debug=debug)

            new_eval[kl + ['0']] = libip.new_eval([x[0] for x in result[kl]['0']], [x[1] for x in result[kl]['0']])
            new_eval[kl + ['1']] = libip.new_eval([x[0] for x in result[kl]['1']], [x[1] for x in result[kl]['1']])

            evaluation[kl + ['0']] = libip.evaluation(result[kl]['0'])
            evaluation[kl + ['1']] = libip.evaluation(result[kl]['1'])

            ipl.logging('+++ RESULTS +++')
            ipl.logging("[kl]['0']")
            for i in result[kl]['0']:
                ipl.logging('{}', i)
            # for key, value in evaluation[kl]['0'].iteritems():
            #     ipl.logging('{} = {}', key, value)
            for key, value in new_eval[kl]['0'].iteritems():
                ipl.logging('{} = {}', key, value)

            ipl.logging('+++')
            ipl.logging("[kl]['1']")
            for i in result[kl]['1']:
                ipl.logging('{}', i)
            # for key, value in evaluation[kl]['1'].iteritems():
            #     ipl.logging('{} = {}', key, value)
            for key, value in new_eval[kl]['1'].iteritems():
                ipl.logging('{} = {}', key, value)

            # # Write the result to file
            # ipl.write(filepath=targetfile, keys=[kl])
            # Free memory
            ipl[kl] = None

    return IPL(data=result), IPL(data=evaluation)
def compute_paths(yparams):

    all_params = yparams.get_params()

    # Zero'th layer:
    # --------------
    zeroth = rdict(all_params['compute_paths'])
    if 'default' in zeroth:
        zeroth_defaults = zeroth.pop('default')
    else:
        zeroth_defaults = ipl()

    for exp_lbl, experiment in zeroth.iteritems():

        # First layer
        # -----------
        # An experiment is now selected and performed
        yparams.logging(
            'Performing experiment {}\n==============================\n',
            exp_lbl)

        first = zeroth_defaults.dcp()
        first.merge(experiment)
        if 'default' in first:
            first_defaults = first.pop('default')
        else:
            first_defaults = ipl()

        statistics = rdict()

        for exp_class_lbl in ['truepaths', 'falsepaths']:

            # Final layer
            # -----------
            # The true or false paths for the current experiment are here computed, respectively
            yparams.logging(
                'Computing {}...\n------------------------------\n',
                exp_class_lbl)
            final = first_defaults.dcp()
            final.merge(first[exp_class_lbl])

            exp_sources = final['sources']
            exp_params = final['params']
            exp_target = final['target']

            # Load the necessary images
            data = ipl()
            for datakey, content in exp_sources.iteritems():
                data[datakey] = load_images(all_params[content[0]] +
                                            all_params[content[1]],
                                            skeys=content[2]['skeys'],
                                            recursive_search=False,
                                            logger=yparams)

            yparams.logging('\nInitial datastructure: \n\n{}',
                            data.datastructure2string(maxdepth=4))
            yparams.logging('experiment_params: \n{}', exp_params)

            # Compute the paths
            # -----------------
            paths = ipl()

            for_class = False
            if exp_class_lbl == 'truepaths':
                for_class = True
            paths[exp_lbl][exp_class_lbl], statistics[exp_lbl][
                exp_class_lbl] = libip.compute_paths_for_class(
                    data,
                    'segm',
                    'conts',
                    'dt',
                    'gt',
                    exp_params,
                    for_class=for_class,
                    ignore=[],
                    debug=all_params['debug'],
                    logger=yparams)

            yparams.logging('\nPaths datastructure after running {}: \n\n{}',
                            exp_class_lbl, paths.datastructure2string())

            def val(x):
                return x

            yparams.logging(
                '\nStatistics after {}: \n\n{}', exp_class_lbl,
                simplify_statistics(
                    statistics[exp_lbl]).datastructure2string(function=val))

            # Save the result to disk
            # -----------------------
            targetfile = all_params[exp_target[0]] + all_params[exp_target[1]]
            paths.write(filepath=targetfile)

        def val(x):
            return x

        yparams.logging(
            '\nStatistics after full experiment: \n\n{}',
            simplify_statistics(
                statistics[exp_lbl]).datastructure2string(function=val))
def random_forest(ipl, debug=False):

    params = ipl.get_params()
    thisparams = rdict(params['random_forest'])
    targetfile = params['resultfolder'] + params['resultsfile']

    # Load the necessary images
    load_images(ipl)
    ipl.logging('\nInitial datastructure: \n\n{}', ipl.datastructure2string(maxdepth=3))

    result = IPL()
    new_eval = rdict()
    evaluation = rdict()

    for d, k, v, kl in ipl.data_iterator(yield_short_kl=True):

        if k == '0':

            ipl.logging('===============================\nWorking on group: {}', kl)

            # TODO: Implement copy full logger
            ipl[kl].set_logger(ipl.get_logger())

            # Load the image data into memory
            ipl[kl].populate()

            # def shp(x):
            #     return x.shape

            # print ipl[kl]['0', 'true']
            # print ipl[kl].dss(function=shp)

            ipl[kl]['0', 'true'] = libip.rf_make_feature_array(ipl[kl]['0', 'true'])
            ipl.logging("Computed feature array for ['0', 'true'] with shape {}", ipl[kl]['0', 'true'].shape)
            ipl[kl]['0', 'false'] = libip.rf_make_feature_array(ipl[kl]['0', 'false'])
            ipl.logging("Computed feature array for ['0', 'false'] with shape {}", ipl[kl]['0', 'false'].shape)
            ipl[kl]['1', 'true'] = libip.rf_make_feature_array(ipl[kl]['1', 'true'])
            ipl.logging("Computed feature array for ['1', 'true'] with shape {}", ipl[kl]['1', 'true'].shape)
            ipl[kl]['1', 'false'] = libip.rf_make_feature_array(ipl[kl]['1', 'false'])
            ipl.logging("Computed feature array for ['1', 'false'] with shape {}", ipl[kl]['1', 'false'].shape)

            # print '...'
            # print ipl[kl]['0']

            result[kl + ['0']] = libip.random_forest(ipl[kl]['0'], ipl[kl]['1'], debug=debug)
            result[kl + ['1']] = libip.random_forest(ipl[kl]['1'], ipl[kl]['0'], debug=debug)

            new_eval[kl + ['0']] = libip.new_eval([x[0] for x in result[kl]['0']], [x[1] for x in result[kl]['0']])
            new_eval[kl + ['1']] = libip.new_eval([x[0] for x in result[kl]['1']], [x[1] for x in result[kl]['1']])

            evaluation[kl + ['0']] = libip.evaluation(result[kl]['0'])
            evaluation[kl + ['1']] = libip.evaluation(result[kl]['1'])

            ipl.logging('+++ RESULTS +++')
            ipl.logging("[kl]['0']")
            # for i in result[kl]['0']:
            #     ipl.logging('{}', i)
            for key, value in evaluation[kl]['0'].iteritems():
                ipl.logging('{} = {}', key, value)
            for key, value in new_eval[kl]['0'].iteritems():
                ipl.logging('{} = {}', key, value)

            ipl.logging('+++')
            ipl.logging("[kl]['1']")
            # for i in result[kl]['1']:
            #     ipl.logging('{}', i)
            for key, value in evaluation[kl]['1'].iteritems():
                ipl.logging('{} = {}', key, value)
            for key, value in new_eval[kl]['1'].iteritems():
                ipl.logging('{} = {}', key, value)

            # # Write the result to file
            # ipl.write(filepath=targetfile, keys=[kl])
            # Free memory
            ipl[kl] = None

    return IPL(data=result), IPL(data=evaluation)
def random_forest(yparams, debug=False):

    all_params = yparams.get_params()

    # Zero'th layer:
    # --------------
    zeroth = rdict(all_params['random_forest'])
    if 'default' in zeroth:
        zeroth_defaults = zeroth.pop('default')
    else:
        zeroth_defaults = ipl()

    # pathlist = ipl()
    # pathlistfile = zeroth_defaults['targets', 'pathlist']
    # pathlistfile = all_params[pathlistfile[0]] + all_params[pathlistfile[1]]

    # yparams.logging('\nDatastructure of pathlistin:\n\n{}', pathlistin.datastructure2string())

    for exp_lbl, experiment in zeroth.iteritems():

        # First layer
        # -----------
        # An experiment is now selected and performed
        yparams.logging('\n\nPerforming experiment {}\n==============================', exp_lbl)

        final = zeroth_defaults.dcp()
        final.merge(experiment)

        exp_sources = final['sources']
        exp_params = final['params']
        exp_targets = final['targets']
        exp_source_kl = [exp_lbl]
        if len(exp_sources['train']) == 4:
            exp_source_kl = exp_sources['train'][3]
        exp_predict_kl = ['predict']
        if len(exp_sources['predict']) == 4:
            exp_predict_kl = exp_sources['predict'][3]
        if type(exp_source_kl) is str:
            exp_source_kl = [exp_source_kl]
        if type(exp_predict_kl) is str:
            exp_predict_kl = [exp_predict_kl]

        # Get the pathlist stored in features_of_paths
        pathlist_source = exp_sources.pop('pathlist')
        pathlistfile = all_params[pathlist_source[0]] \
                       + all_params[pathlist_source[1]]
        with open(pathlistfile, 'r') as f:
            pathlistin = pickle.load(f)

        if len(pathlist_source) > 2:
            if 'skeys' in pathlist_source[2]:
                pathlistin = pathlistin.subset(*pathlist_source[2]['skeys'])
                print 'I was here...'

        yparams.logging('pathlistin.datastructure: \n{}\n', pathlistin.datastructure2string(maxdepth=4))
        pathlistout = ipl()

        # Load training data
        if 'train' in exp_sources.keys():
            truesource = exp_sources['train']
            falsesource = exp_sources['train']
        else:
            truesource = exp_sources['traintrue']
            falsesource = exp_sources['trainfalse']
        truetrainfeats = load_data(
            all_params[truesource[0]] + all_params[truesource[1]], logger=yparams, **truesource[2]
        ).subset('truepaths', search=True)
        falsetrainfeats = load_data(
            all_params[falsesource[0]] + all_params[falsesource[1]], logger=yparams, **falsesource[2]
        ).subset('falsepaths', search=True)

        yparams.logging('\ntruetrainfeats.datastructure: \n{}\n', truetrainfeats.datastructure2string(maxdepth=4))
        yparams.logging('\nfalsetrainfeats.datastructure: \n{}\n', falsetrainfeats.datastructure2string(maxdepth=4))

        # Load prediction data
        predictsource = exp_sources['predict']
        predictfeats = load_data(
            all_params[predictsource[0]] + all_params[predictsource[1]],
            logger=yparams, **predictsource[2]
        )
        yparams.logging('\npredictfeats.datastructure: \n{}\n', predictfeats.datastructure2string(maxdepth=4))

        # Load the data into memory
        truetrainfeats.populate()
        falsetrainfeats.populate()
        predictfeats.populate()

        # Concatenate the different sources
        # 1. Of training data
        plo_true = ipl()
        truetrainfeats, plo_true['truepaths'] = libip.rf_combine_sources_new(
            truetrainfeats[exp_source_kl]['truepaths'].dcp(),
            pathlistin[exp_source_kl]['truepaths'].dcp()
        )
        plo_false = ipl()
        falsetrainfeats, plo_false['falsepaths'] = libip.rf_combine_sources_new(
            falsetrainfeats[exp_source_kl]['falsepaths'].dcp(),
            pathlistin[exp_source_kl]['falsepaths'].dcp()
        )
        pathlistout[exp_source_kl + ['train']] = plo_true + plo_false
        # 2. Of prediction data
        ipf_true = ipl()
        plo_true = ipl()
        ipf_true['truepaths'], plo_true['truepaths'] = libip.rf_combine_sources_new(
            predictfeats[exp_predict_kl]['truepaths'].dcp(),
            pathlistin[exp_predict_kl]['truepaths'].dcp()
        )
        ipf_false = ipl()
        plo_false = ipl()
        ipf_false['falsepaths'], plo_false['falsepaths'] = libip.rf_combine_sources_new(
            predictfeats[exp_predict_kl]['falsepaths'].dcp(),
            pathlistin[exp_predict_kl]['falsepaths'].dcp()
        )
        inpredictfeats = ipf_true + ipf_false
        pathlistout[exp_source_kl, 'predict'] = plo_true + plo_false

        # Note:
        #   Due to the feature input being a dictionary organized by the feature images where
        #   the feature values come from
        #
        #       [source]
        #           'truepaths'|'falsepaths'
        #               [featureims]
        #                   'Sum':      [s1, ..., sN]
        #                   'Variance': [v1, ..., vN]
        #                   ...
        #               [Pathlength]:   [l1, ..., lN]
        #
        #   the exact order in which items are iterated over by data_iterator() is not known.
        #
        # Solution:
        #   Iterate over it once and store the keylist in an array (which conserves the order)
        #   When accumulating the features for each of the four corresponding subsets, namely
        #   training and testing set with true and false paths each, i.e.
        #   ['0'|'1']['truefeats'|'falsefeats'],
        #   the the keylist is used, thus maintaining the correct order in every subset.
        #
        # And that is what is happening here:
        # #   1. Get the keylist of a full feature list, e.g. one of true paths
        # example_kl = None
        # for d2, k2, v2, kl2 in truetrainfeats.data_iterator():
        #     if k2 == 'truepaths':
        #         example_kl = kl2
        #         break
        # 2. Get the keylist order of the feature space
        feature_space_list = []
        for d2, k2, v2, kl2 in truetrainfeats.data_iterator():
            if type(v2) is not type(truetrainfeats):
                feature_space_list.append(kl2)

        intrain = ipl()
        intrain['true'] = libip.rf_make_feature_array_with_keylist(truetrainfeats, feature_space_list)
        yparams.logging("Computed feature array for train['true'] with shape {}", intrain['true'].shape)
        intrain['false'] = libip.rf_make_feature_array_with_keylist(falsetrainfeats, feature_space_list)
        yparams.logging("Computed feature array for train['false'] with shape {}", intrain['false'].shape)

        inpredictfeats['true'] = libip.rf_make_feature_array_with_keylist(inpredictfeats['truepaths'], feature_space_list)
        yparams.logging("Computed feature array for predict['true'] with shape {}", inpredictfeats['true'].shape)
        inpredictfeats['false'] = libip.rf_make_feature_array_with_keylist(inpredictfeats['falsepaths'], feature_space_list)
        yparams.logging("Computed feature array for predict['false'] with shape {}", inpredictfeats['false'].shape)

        # Classify
        result = ipl()
        result[exp_lbl] = libip.random_forest(
            intrain, inpredictfeats, debug=debug, balance=exp_params['balance_classes'],
            logger=yparams
        )

        # Evaluate
        new_eval = ipl()
        # print [x[0] for x in result[kl]]
        # print [x[1] for x in result[kl]]
        new_eval[exp_lbl] = libip.new_eval([x[0] for x in result[exp_lbl]], [x[1] for x in result[exp_lbl]])

        yparams.logging('+++ RESULTS +++')
        yparams.logging("[kl]")
        # for i in result[kl]:
        #     yparams.logging('{}', i)
        for key, value in new_eval[exp_lbl].iteritems():
            yparams.logging('{} = {}', key, value)

        pass

    sys.exit()


    params = yparams.get_params()
    thisparams = rdict(params['random_forest'])

    # Get the pathlist stored in features_of_paths
    pathlistfile = params[thisparams['pathlistin'][0]] \
                 + params[thisparams['pathlistin'][1]]
    with open(pathlistfile, 'r') as f:
        pathlistin = pickle.load(f)
    pathlistout = ipl()

    yparams.logging('\nDatastructure of pathlistin:\n\n{}', pathlistin.datastructure2string())

    # for i in xrange(0, len(thisparams['sources'])):
    for d, k, v, kl in thisparams['sources'].data_iterator(yield_short_kl=True):

        if k == 'predict':

            yparams.logging('===============================\nWorking on group: {}', kl)

            # Get parameters (currently only 'balance_classes') and set defaults
            balance_classes = False
            if 'balance_classes' in thisparams['sources'][kl].keys():
                balance_classes = thisparams['sources'][kl]['balance_classes']

            # Load training data
            if 'train' in thisparams['sources'][kl].keys():
                truesource = thisparams['sources'][kl]['train']
                falsesource = thisparams['sources'][kl]['train']
            else:
                truesource = thisparams['sources'][kl]['traintrue']
                falsesource = thisparams['sources'][kl]['trainfalse']
            truetrainfeats = load_data(
                params[truesource[0]] + params[truesource[1]], logger=yparams, **truesource[2]
            ).subset('true', search=True)
            falsetrainfeats = load_data(
                params[falsesource[0]] + params[falsesource[1]], logger=yparams, **falsesource[2]
            ).subset('false', search=True)
            # # The plus operator is overloaded to perform a merging operation on RecursiveDicts
            # trainfeats = truetrainfeats + falsetrainfeats
            # yparams.logging(
            #     '\nDatastructure of truetrainfeats\n\n{}',
            #     truetrainfeats.datastructure2string(maxdepth=3)
            # )
            # yparams.logging(
            #     '\nDatastructure of falsetrainfeats\n\n{}',
            #     falsetrainfeats.datastructure2string(maxdepth=3)
            # )

            # Load prediction data
            predictsource = thisparams['sources'][kl]['predict']
            predictfeats = load_data(
                params[predictsource[0]] + params[predictsource[1]],
                logger=yparams, **predictsource[2]
            )

            # Note:
            #   Due to the feature input being a dictionary organized by the feature images where
            #   the feature values come from
            #
            #       [source]
            #           'true'|'false'
            #               [featureims]
            #                   'Sum':      [s1, ..., sN]
            #                   'Variance': [v1, ..., vN]
            #                   ...
            #               [Pathlength]:   [l1, ..., lN]
            #
            #   the exact order in which items are iterated over by data_iterator() is not known.
            #
            # Solution:
            #   Iterate over it once and store the keylist in an array (which conserves the order)
            #   When accumulating the featrues for each of the four corresponding subsets, namely
            #   training and testing set with true and false paths each, i.e.
            #   ['0'|'1']['true'|'false'],
            #   the the keylist is used, thus maintaining the correct order in every subset.
            #
            # And that is what is happening here:
            #   1. Get the keylist of a full feature list, e.g. one of true paths
            example_kl = None
            for d2, k2, v2, kl2 in truetrainfeats.data_iterator():
                if k2 == 'true':
                    example_kl = kl2
                    break
            #   2. Get the keylist order of the feature space
            feature_space_list = []
            for d2, k2, v2, kl2 in truetrainfeats[example_kl].data_iterator():
                if type(v2) is not type(truetrainfeats[example_kl]):
                    feature_space_list.append(kl2)
            # yparams.logging('feature_space_list[i] = {}', feature_space_list, listed=True)

            # Load the data into memory
            truetrainfeats.populate()
            falsetrainfeats.populate()
            predictfeats.populate()

            truetrainfeats, plo_true = libip.rf_combine_sources(
                truetrainfeats, search_for='true', pathlist=pathlistin
            )
            falsetrainfeats, plo_false = libip.rf_combine_sources(
                falsetrainfeats, search_for='false', pathlist=pathlistin
            )
            pathlistout[kl + ['train']] = plo_true + plo_false

            ipf_true, plo_true = libip.rf_combine_sources(
                predictfeats, search_for='true', pathlist=pathlistin
            )
            ipf_false, plo_false = libip.rf_combine_sources(
                predictfeats, search_for='false', pathlist=pathlistin
            )
            inpredictfeats = ipf_true + ipf_false
            pathlistout[kl + ['predict']] = plo_true + plo_false

            # yparams.logging(
            #     '\nDatastructure of truetrainfeats\n\n{}',
            #     truetrainfeats.datastructure2string(maxdepth=3)
            # )
            # yparams.logging(
            #     '\nDatastructure of falsetrainfeats\n\n{}',
            #     falsetrainfeats.datastructure2string(maxdepth=3)
            # )

            intrain = ipl()
            intrain['true'] = libip.rf_make_feature_array_with_keylist(truetrainfeats['true'], feature_space_list)
            yparams.logging("Computed feature array for train['true'] with shape {}", intrain['true'].shape)
            intrain['false'] = libip.rf_make_feature_array_with_keylist(falsetrainfeats['false'], feature_space_list)
            yparams.logging("Computed feature array for train['false'] with shape {}", intrain['false'].shape)

            inpredictfeats['true'] = libip.rf_make_feature_array_with_keylist(inpredictfeats['true'], feature_space_list)
            yparams.logging("Computed feature array for predict['true'] with shape {}", inpredictfeats['true'].shape)
            inpredictfeats['false'] = libip.rf_make_feature_array_with_keylist(inpredictfeats['false'], feature_space_list)
            yparams.logging("Computed feature array for predict['false'] with shape {}", inpredictfeats['false'].shape)

            # Classify
            result = ipl()
            result[kl] = libip.random_forest(
                intrain, inpredictfeats, debug=debug, balance=balance_classes, logger=yparams
            )

            # Evaluate
            new_eval = ipl()
            # print [x[0] for x in result[kl]]
            # print [x[1] for x in result[kl]]
            new_eval[kl] = libip.new_eval([x[0] for x in result[kl]], [x[1] for x in result[kl]])

            yparams.logging('+++ RESULTS +++')
            yparams.logging("[kl]")
            # for i in result[kl]:
            #     yparams.logging('{}', i)
            for key, value in new_eval[kl].iteritems():
                yparams.logging('{} = {}', key, value)
Esempio n. 29
0
def features_of_paths(yparams):

    all_params = yparams.get_params()

    # Zero'th layer:
    # --------------
    zeroth = rdict(all_params['features_of_paths'])
    if 'default' in zeroth:
        zeroth_defaults = zeroth.pop('default')
    else:
        zeroth_defaults = ipl()

    pathlist = ipl()
    pathlistfile = zeroth_defaults['targets', 'pathlist']
    pathlistfile = all_params[pathlistfile[0]] + all_params[pathlistfile[1]]

    for exp_lbl, experiment in zeroth.iteritems():

        # First layer
        # -----------
        # An experiment is now selected and performed
        yparams.logging(
            '\n\nPerforming experiment {}\n==============================',
            exp_lbl)

        final = zeroth_defaults.dcp()
        final.merge(experiment)

        exp_sources = final['sources']
        exp_params = final['params']
        exp_targets = final['targets']

        def val(x):
            return x

        yparams.logging('exp_sources = \n{}',
                        exp_sources.datastructure2string(function=val))
        yparams.logging('exp_params = \n{}',
                        exp_sources.datastructure2string(function=val))
        yparams.logging('exp_targets = \n{}',
                        exp_targets.datastructure2string(function=val))

        # Load feature images
        # -------------------
        featureims = ipl()
        for k, v in exp_sources['featureims'].iteritems():
            skeys = None
            if 'skeys' in v[2]:
                skeys = v[2]['skeys']
            featureims[k] = load_images(all_params[v[0]] + all_params[v[1]],
                                        skeys=skeys,
                                        logger=yparams)
        yparams.logging('\nFeatureims datastructure: \n\n{}',
                        featureims.datastructure2string(maxdepth=4))

        for exp_class_lbl, exp_class_src in exp_sources['paths'].iteritems():

            yparams.logging('\nWorking on {}\n------------------------------',
                            exp_class_lbl)

            # Load paths
            # ----------
            skeys = None
            if 'skeys' in exp_class_src[2]:
                skeys = exp_class_src[2]['skeys']
            paths = load_images(all_params[exp_class_src[0]] +
                                all_params[exp_class_src[1]],
                                skeys=skeys,
                                logger=yparams)
            yparams.logging('\nPaths datastructure: \n\n{}',
                            paths.datastructure2string(maxdepth=4))

            # Iterate over the segmentation images
            for d, k, v, kl in paths[exp_class_src[2]['skeys']
                                     [0]].data_iterator(leaves_only=True,
                                                        yield_short_kl=True,
                                                        maxdepth=3):
                yparams.logging(
                    '\nImage keylist: {}\n..............................',
                    kl + [k])

                segm_kl = kl + [k]
                imgs_kl = kl
                yparams.logging('segm_kl = {}', segm_kl)
                yparams.logging('imgs_kl = {}', imgs_kl)

                # Bild an input featureims dict for the path computation
                infeatims = ipl()
                sourcelist = exp_sources['featureims'].dcp()
                if 'segmentation' in sourcelist:
                    infeatims['segmentation'] = featureims['segmentation'][
                        segm_kl]
                    sourcelist.pop('segmentation')
                for source in sourcelist:
                    infeatims[source] = featureims[source][imgs_kl]
                infeatims.populate()

                # Bild an input dict for true paths
                inpaths = v.dcp()
                inpaths.populate()

                features = ipl()
                features[exp_lbl][[exp_class_lbl] + kl + [k]], pathlist[
                    exp_lbl][[exp_class_lbl] + kl + [k]] = libip.get_features(
                        inpaths,
                        np.array(np.array(
                            infeatims.yield_an_item()).shape)[0:3],
                        infeatims,
                        list(exp_params['features']),
                        exp_params['max_paths_per_label'],
                        ipl=yparams,
                        anisotropy=exp_params['anisotropy'],
                        return_pathlist=True)

                yparams.logging('\nFeatures datastructure: \n\n{}',
                                features.datastructure2string(maxdepth=4))

                # Write the result to file
                features.write(
                    filepath=all_params[exp_targets['features'][0]] +
                    all_params[exp_targets['features'][1]])

    with open(pathlistfile, 'wb') as f:
        pickle.dump(pathlist, f)