Example #1
0
def classifier_predict(listname, modelname, outdir=None, n_jobs=None):
    if outdir == None:
        outdir = tempfile.mkdtemp(dir=os.curdir, prefix='out')
    else:
        if not os.path.exists(outdir):
            tsh.makedirs(outdir)
    inputname = os.path.splitext(os.path.basename(listname))[0]
    if listname.endswith('.gz'):
        inputname = os.path.splitext(inputname)[0]
    meta, data = read_listfile(listname)
    classifier = read_classifierfile(modelname)
    feature_method = classifier['features']['meta']['feature_method']
    feature_args = meta.copy()
    # Training input_name would shadow the current one.
    del classifier['features']['meta']['input_name']
    featurename = os.path.join(outdir, inputname + '-feats.csv.gz')
    if os.path.exists(featurename):
        _, features = read_listfile(featurename)
    else:
        feature_args.update(classifier['features']['meta'])
        args, features = compute_features(feature_method, feature_args, data,
                input_name=inputname, n_jobs=n_jobs, output_dir=outdir)
        assert (data['id'] == features['id']).all()
        clean_args(args)
        write_listfile(featurename, features, input_name=inputname, **args)
    labels_name = classifier['meta']['truth'] + '_labels'
    labels = classifier['meta'][labels_name]
    pred = predict(classifier['classifier'], sorted(labels.keys()), features,
            output_dir=outdir)
    write_listfile(os.path.join(outdir, inputname + '-predictions.csv.gz'), pred,
            classifier_name=modelname, truth=classifier['meta']['truth'],
            labels_name=labels, input_name=inputname)
def dissimilarities(methodname,
                    listname,
                    argsname=None,
                    n_jobs=None,
                    outdir=None):
    if outdir == None:
        outdir = tempfile.mkdtemp(dir=os.curdir, prefix='out')
    else:
        if not os.path.exists(outdir):
            tsh.makedirs(outdir)
    inputname = os.path.splitext(os.path.basename(listname))[0]
    if listname.endswith('.gz'):
        inputname = os.path.splitext(inputname)[0]
    meta, data = read_listfile(listname)
    args = meta
    if argsname != None:
        args.update(read_argsfile(argsname))
    args, w = compute_dissimilarity(methodname,
                                    args,
                                    data,
                                    n_jobs=n_jobs,
                                    output_dir=outdir,
                                    input_name=inputname)
    if 'threshold' in args and args['threshold'] != 'False':
        args, w = threshold_dissimilarity(args['threshold'], args, w)
    dissim = prepare_weights_data(data['id'], data.dtype['id'], w)
    clean_args(args)
    write_listfile(os.path.join(outdir, inputname + '-dissim.csv.gz'),
                   dissim,
                   input_name=inputname,
                   **args)
Example #3
0
def propagate_predict(modelname, dissimname, predictionsname, outdir=None):
    if outdir == None:
        outdir = tempfile.mkdtemp(dir=os.curdir, prefix='out')
    else:
        if not os.path.exists(outdir):
            tsh.makedirs(outdir)
    args = {}
    predictions_meta, predictions = read_listfile(predictionsname)
    args.update(predictions_meta)
    dissim_meta, sample_ids, dissim = read_weightsfile(dissimname)
    assert (predictions['id'] == np.array(sample_ids)).all()
    assert predictions_meta['input_name'] == dissim_meta['input_name'], \
            'Expecting same input names (%s x %s)' % (predictions_meta['input_name'], dissim_meta['input_name'])
    inputname = predictions_meta['input_name']
    args.update(dissim_meta)
    model = read_propagatorfile(modelname)
    args.update(model['meta'])
    method_name = model['propagator']['method_name']
    del model['propagator']['method_name']
    args.update(model['propagator'])
    args, prop = propagate(method_name,
                           args,
                           predictions,
                           dissim,
                           output_dir=outdir)
    clean_args(args)
    del args['cv_results']
    write_listfile(os.path.join(outdir, inputname + '-propagated.csv.gz'),
                   prop, **args)
def dissimilarities(methodname, listname, argsname=None, n_jobs=None, outdir=None):
    if outdir == None:
        outdir = tempfile.mkdtemp(dir=os.curdir, prefix='out')
    else:
        if not os.path.exists(outdir):
            tsh.makedirs(outdir)
    inputname = os.path.splitext(os.path.basename(listname))[0]
    if listname.endswith('.gz'):
        inputname = os.path.splitext(inputname)[0]
    meta, data = read_listfile(listname)
    args = meta
    if argsname != None:
        args.update(read_argsfile(argsname))
    args, w = compute_dissimilarity(methodname, args, data, n_jobs=n_jobs, output_dir=outdir, input_name=inputname)
    if 'threshold' in args and args['threshold'] != 'False':
        args, w = threshold_dissimilarity(args['threshold'], args, w)
    dissim = prepare_weights_data(data['id'], data.dtype['id'], w)
    clean_args(args)
    write_listfile(os.path.join(outdir, inputname + '-dissim.csv.gz'), dissim, input_name=inputname, **args)
def propagate_predict(modelname, dissimname, predictionsname, outdir=None):
    if outdir == None:
        outdir = tempfile.mkdtemp(dir=os.curdir, prefix='out')
    else:
        if not os.path.exists(outdir):
            tsh.makedirs(outdir)
    args = {}
    predictions_meta, predictions = read_listfile(predictionsname)
    args.update(predictions_meta)
    dissim_meta, sample_ids, dissim = read_weightsfile(dissimname)
    assert (predictions['id'] == np.array(sample_ids)).all()
    assert predictions_meta['input_name'] == dissim_meta['input_name'], \
            'Expecting same input names (%s x %s)' % (predictions_meta['input_name'], dissim_meta['input_name'])
    inputname = predictions_meta['input_name']
    args.update(dissim_meta)
    model = read_propagatorfile(modelname)
    args.update(model['meta'])
    method_name = model['propagator']['method_name']
    del model['propagator']['method_name']
    args.update(model['propagator'])
    args, prop = propagate(method_name, args, predictions, dissim, output_dir=outdir)
    clean_args(args)
    del args['cv_results']
    write_listfile(os.path.join(outdir, inputname + '-propagated.csv.gz'), prop, **args)
Example #6
0
                     help='Propagator file.')
 parser.add_argument('-o',
                     '--output',
                     dest='output',
                     required=False,
                     action='store',
                     default=None,
                     help='Output directory.')
 opts = parser.parse_args()
 if opts.output == None:
     outdir = tempfile.mkdtemp(dir=os.curdir, prefix='out')
     logger.info('Output directory %s', outdir)
 else:
     outdir = opts.output
     if not os.path.exists(outdir):
         tsh.makedirs(outdir)
 config = tsh.read_config(opts, __file__)
 basename = os.path.splitext(os.path.basename(opts.model))[0]
 propagator = read_propagatorfile(opts.model)
 cv_results = propagator['meta']['cv_results']
 data = {}
 for score, params, cm in cv_results:
     key = tuple(params.items())
     if key not in data:
         data[key] = []
     data[key] += [score]
 all_param_keys = sorted(data.keys())
 data = tsh.dict_values(data, all_param_keys)
 plt.errorbar(range(len(all_param_keys)),
              np.mean(data, axis=1),
              fmt='ro',
Example #7
0
 import argparse
 parser = argparse.ArgumentParser(description='Computes features for all the input data.')
 parser.add_argument('-c', '--config', dest='config', required=False, action='store', default=None, help='Path to the config file')
 parser.add_argument('-m', '--method', dest='method', required=True, action='store', choices=method_table.keys(), default=None, help='Method name.')
 parser.add_argument('-a', '--args', dest='args', required=False, action='store', default=None, help='Method arguments file.')
 parser.add_argument('-l', '--list', dest='list', required=True, action='store', default=None, help='List file.')
 parser.add_argument('-j', '--jobs', dest='jobs', required=False, action='store', default=None, type=int, help='Number of parallel processes.')
 parser.add_argument('-o', '--output', dest='output', required=False, action='store', default=None, help='Output directory.')
 opts = parser.parse_args()
 if opts.output == None:
     outdir = tempfile.mkdtemp(dir=os.curdir, prefix='out')
     logger.info('Output directory %s', outdir)
 else:
     outdir = opts.output
     if not os.path.exists(outdir):
         tsh.makedirs(outdir)
 inputname = os.path.splitext(os.path.basename(opts.list))[0]
 if opts.list.endswith('.gz'):
     inputname = os.path.splitext(inputname)[0]
 outputname = os.path.join(outdir, inputname + '-feats.csv.gz')
 if os.path.exists(outputname):
     logger.info('Skipping file %s, already exists.', outputname)
 else:
     config = tsh.read_config(opts, __file__)
     meta, data = read_listfile(opts.list)
     args = meta
     if opts.args != None:
         args.update(read_argsfile(opts.args))
     args, features = compute_features(opts.method, args, data, input_name=inputname, n_jobs=opts.jobs, output_dir=outdir)
     clean_args(args)
     write_listfile(outputname, features, input_name=inputname, **args)
def get_dissimilarities(data,
                        output_dir=None,
                        input_name=None,
                        image_prefix=None,
                        mask_prefix=None,
                        n_jobs=None,
                        **kwargs):
    assert image_prefix != None
    assert mask_prefix != None
    assert output_dir != None
    assert input_name != None
    measure = kwargs['measure']
    distance_name = kwargs['distance_name']
    rotation_invariance = kwargs['rotation_invariance']
    normalized_width = kwargs['normalized_width']
    normalized_height = kwargs['normalized_height']
    if n_jobs == None:
        n_jobs = 1

    image_prefix = os.path.expanduser(image_prefix)
    mask_prefix = os.path.expanduser(image_prefix)
    distance_name = distance_name.format(OUT=output_dir,
                                         INPUTNAME=input_name,
                                         **kwargs)
    tsh.makedirs(os.path.dirname(distance_name))
    kwargs['distance_name'] = distance_name

    expr_dir = os.path.join(
        output_dir, 'expr/%04dx%04d' % (normalized_width, normalized_height))
    tsh.makedirs(expr_dir)

    # Make it easier for evaluate.py to create nice html reports.
    if 'create_links' in kwargs and kwargs['create_links'] == True:
        if os.path.exists('expr'):
            os.unlink('expr')
        try:
            os.symlink(expr_dir, 'expr')
        except:
            pass
        if os.path.exists('distance'):
            os.unlink('distance')
        try:
            os.symlink(os.path.dirname(distance_name), 'distance')
        except:
            pass

    save_expr_images = kwargs[
        'save_expr_images'] if 'save_expr_images' in kwargs else False
    if os.path.exists(distance_name):
        D = tsh.deserialize(distance_name)['D']
    else:
        imagenames = [
            os.path.join(image_prefix, sample['image']) for sample in data
        ]
        masknames = [
            os.path.join(mask_prefix, sample['mask']) for sample in data
        ]
        n = len(data)
        logger.info('Extracting %d expressions...', n)
        Parallel(n_jobs=n_jobs, verbose=True, pre_dispatch='2*n_jobs')(
            delayed(_extract_expression)
            (imagenames[j], masknames[j],
             os.path.join(expr_dir, 'inside%02d.png' %
                          data[j]['id']) if save_expr_images else None,
             os.path.join(expr_dir, 'expr%02d.png' %
                          data[j]['id']) if save_expr_images else None,
             os.path.join(expr_dir, 'expr%02d.dat' %
                          data[j]['id']), normalized_width, normalized_height)
            for j in range(n))

        logger.info('Computing %d dissimilarities...', (n * (n - 1)) / 2)
        results = Parallel(
            n_jobs=n_jobs, verbose=True,
            pre_dispatch='2*n_jobs')(delayed(_get_dissimilarity)(
                i, j, measure, rotation_invariance,
                os.path.join(expr_dir, 'expr%02d.dat' % data[i]['id']),
                os.path.join(expr_dir, 'expr%02d.dat' % data[j]['id']))
                                     for j in range(n)
                                     for i in range(j + 1, n))

        logger.info('Transforming results...')
        D = np.zeros((n, n), dtype=np.float)
        tfxs = np.array([['I'] * n] * n)
        for i, j, d, t in results:
            D[j, i] = d
            D[i, j] = d
            tfxs[j, i] = t
            tfxs[i, j] = t

        logger.info('Saving results...')
        tsh.serialize(
            distance_name, {
                'D': D,
                'min': None,
                'max': None,
                'tfxs': tfxs,
                'measure': measure,
                'rotation_invariance': rotation_invariance
            })

    return kwargs, D
def get_dissimilarities(data, output_dir=None, input_name=None, image_prefix=None, mask_prefix=None, n_jobs=None, **kwargs):
    assert image_prefix != None
    assert mask_prefix != None
    assert output_dir != None
    assert input_name != None
    measure = kwargs['measure']
    distance_name = kwargs['distance_name']
    rotation_invariance = kwargs['rotation_invariance']
    normalized_width = kwargs['normalized_width']
    normalized_height = kwargs['normalized_height']
    if n_jobs == None:
        n_jobs = 1

    image_prefix = os.path.expanduser(image_prefix)
    mask_prefix = os.path.expanduser(image_prefix)
    distance_name = distance_name.format(OUT=output_dir, INPUTNAME=input_name, **kwargs)
    tsh.makedirs(os.path.dirname(distance_name))
    kwargs['distance_name'] = distance_name

    expr_dir = os.path.join(output_dir, 'expr/%04dx%04d' % (normalized_width, normalized_height))
    tsh.makedirs(expr_dir)

    # Make it easier for evaluate.py to create nice html reports.
    if 'create_links' in kwargs and kwargs['create_links'] == True:
        if os.path.exists('expr'):
            os.unlink('expr')
        try:
            os.symlink(expr_dir, 'expr')
        except:
            pass
        if os.path.exists('distance'):
            os.unlink('distance')
        try:
            os.symlink(os.path.dirname(distance_name), 'distance')
        except:
            pass

    save_expr_images = kwargs['save_expr_images'] if 'save_expr_images' in kwargs else False
    if os.path.exists(distance_name):
        D = tsh.deserialize(distance_name)['D']
    else:
        imagenames = [ os.path.join(image_prefix, sample['image']) for sample in data ]
        masknames = [ os.path.join(mask_prefix, sample['mask']) for sample in data ]
        n = len(data)
        logger.info('Extracting %d expressions...', n)
        Parallel(n_jobs=n_jobs, verbose=True,
            pre_dispatch='2*n_jobs')(
            delayed(_extract_expression)(
                imagenames[j],
                masknames[j],
                os.path.join(expr_dir, 'inside%02d.png' % data[j]['id']) if save_expr_images else None,
                os.path.join(expr_dir, 'expr%02d.png' % data[j]['id']) if save_expr_images else None,
                os.path.join(expr_dir, 'expr%02d.dat' % data[j]['id']),
                normalized_width,
                normalized_height
            ) for j in range(n))

        logger.info('Computing %d dissimilarities...', (n*(n-1))/2)
        results = Parallel(n_jobs=n_jobs, verbose=True,
            pre_dispatch='2*n_jobs')(
            delayed(_get_dissimilarity)(
                i, j,
                measure, rotation_invariance,
                os.path.join(expr_dir, 'expr%02d.dat' % data[i]['id']),
                os.path.join(expr_dir, 'expr%02d.dat' % data[j]['id'])
            ) for j in range(n) for i in range(j+1, n))

        logger.info('Transforming results...')
        D = np.zeros((n, n), dtype=np.float)
        tfxs = np.array([['I'] * n] * n)
        for i, j, d, t in results:
            D[j, i] = d
            D[i, j] = d
            tfxs[j, i] = t
            tfxs[i, j] = t

        logger.info('Saving results...')
        tsh.serialize(distance_name, {
                'D': D,
                'min': None, 'max': None,
                'tfxs': tfxs,
                'measure': measure,
                'rotation_invariance': rotation_invariance })

    return kwargs, D
Example #10
0
from utils import read_listfile

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(description='Copies images for all the listfiles.')
    parser.add_argument('-r', '--relative', dest='relative', required=True, action='store', default=None, help='Make paths relative to this.')
    parser.add_argument('lists', nargs='*', action='store', help='List file(s).')
    parser.add_argument('-o', '--output', dest='output', required=False, action='store', default=None, help='Output directory.')
    opts = parser.parse_args()
    if opts.output == None:
        outdir = tempfile.mkdtemp(dir=os.curdir, prefix='out')
        logger.info('Output directory %s', outdir)
    else:
        outdir = opts.output
        if not os.path.exists(outdir):
            tsh.makedirs(outdir)

    for listname in opts.lists:
        meta, data = read_listfile(listname)
        image_prefix = meta['image_prefix']
        mask_prefix = meta['mask_prefix']
        for d in data:
            filename = os.path.join(image_prefix, d['image'])
            destdir = os.path.dirname(os.path.join(outdir, os.path.relpath(filename, opts.relative)))
            tsh.makedirs(destdir)
            shutil.copy(filename, destdir)
            filename = os.path.join(mask_prefix, d['mask'])
            destdir = os.path.dirname(os.path.join(outdir, os.path.relpath(filename, opts.relative)))
            tsh.makedirs(destdir)
            shutil.copy(filename, destdir)