def classifier_predict(listname, modelname, outdir=None, n_jobs=None): if outdir == None: outdir = tempfile.mkdtemp(dir=os.curdir, prefix='out') else: if not os.path.exists(outdir): tsh.makedirs(outdir) inputname = os.path.splitext(os.path.basename(listname))[0] if listname.endswith('.gz'): inputname = os.path.splitext(inputname)[0] meta, data = read_listfile(listname) classifier = read_classifierfile(modelname) feature_method = classifier['features']['meta']['feature_method'] feature_args = meta.copy() # Training input_name would shadow the current one. del classifier['features']['meta']['input_name'] featurename = os.path.join(outdir, inputname + '-feats.csv.gz') if os.path.exists(featurename): _, features = read_listfile(featurename) else: feature_args.update(classifier['features']['meta']) args, features = compute_features(feature_method, feature_args, data, input_name=inputname, n_jobs=n_jobs, output_dir=outdir) assert (data['id'] == features['id']).all() clean_args(args) write_listfile(featurename, features, input_name=inputname, **args) labels_name = classifier['meta']['truth'] + '_labels' labels = classifier['meta'][labels_name] pred = predict(classifier['classifier'], sorted(labels.keys()), features, output_dir=outdir) write_listfile(os.path.join(outdir, inputname + '-predictions.csv.gz'), pred, classifier_name=modelname, truth=classifier['meta']['truth'], labels_name=labels, input_name=inputname)
def dissimilarities(methodname, listname, argsname=None, n_jobs=None, outdir=None): if outdir == None: outdir = tempfile.mkdtemp(dir=os.curdir, prefix='out') else: if not os.path.exists(outdir): tsh.makedirs(outdir) inputname = os.path.splitext(os.path.basename(listname))[0] if listname.endswith('.gz'): inputname = os.path.splitext(inputname)[0] meta, data = read_listfile(listname) args = meta if argsname != None: args.update(read_argsfile(argsname)) args, w = compute_dissimilarity(methodname, args, data, n_jobs=n_jobs, output_dir=outdir, input_name=inputname) if 'threshold' in args and args['threshold'] != 'False': args, w = threshold_dissimilarity(args['threshold'], args, w) dissim = prepare_weights_data(data['id'], data.dtype['id'], w) clean_args(args) write_listfile(os.path.join(outdir, inputname + '-dissim.csv.gz'), dissim, input_name=inputname, **args)
def propagate_predict(modelname, dissimname, predictionsname, outdir=None): if outdir == None: outdir = tempfile.mkdtemp(dir=os.curdir, prefix='out') else: if not os.path.exists(outdir): tsh.makedirs(outdir) args = {} predictions_meta, predictions = read_listfile(predictionsname) args.update(predictions_meta) dissim_meta, sample_ids, dissim = read_weightsfile(dissimname) assert (predictions['id'] == np.array(sample_ids)).all() assert predictions_meta['input_name'] == dissim_meta['input_name'], \ 'Expecting same input names (%s x %s)' % (predictions_meta['input_name'], dissim_meta['input_name']) inputname = predictions_meta['input_name'] args.update(dissim_meta) model = read_propagatorfile(modelname) args.update(model['meta']) method_name = model['propagator']['method_name'] del model['propagator']['method_name'] args.update(model['propagator']) args, prop = propagate(method_name, args, predictions, dissim, output_dir=outdir) clean_args(args) del args['cv_results'] write_listfile(os.path.join(outdir, inputname + '-propagated.csv.gz'), prop, **args)
help='Propagator file.') parser.add_argument('-o', '--output', dest='output', required=False, action='store', default=None, help='Output directory.') opts = parser.parse_args() if opts.output == None: outdir = tempfile.mkdtemp(dir=os.curdir, prefix='out') logger.info('Output directory %s', outdir) else: outdir = opts.output if not os.path.exists(outdir): tsh.makedirs(outdir) config = tsh.read_config(opts, __file__) basename = os.path.splitext(os.path.basename(opts.model))[0] propagator = read_propagatorfile(opts.model) cv_results = propagator['meta']['cv_results'] data = {} for score, params, cm in cv_results: key = tuple(params.items()) if key not in data: data[key] = [] data[key] += [score] all_param_keys = sorted(data.keys()) data = tsh.dict_values(data, all_param_keys) plt.errorbar(range(len(all_param_keys)), np.mean(data, axis=1), fmt='ro',
import argparse parser = argparse.ArgumentParser(description='Computes features for all the input data.') parser.add_argument('-c', '--config', dest='config', required=False, action='store', default=None, help='Path to the config file') parser.add_argument('-m', '--method', dest='method', required=True, action='store', choices=method_table.keys(), default=None, help='Method name.') parser.add_argument('-a', '--args', dest='args', required=False, action='store', default=None, help='Method arguments file.') parser.add_argument('-l', '--list', dest='list', required=True, action='store', default=None, help='List file.') parser.add_argument('-j', '--jobs', dest='jobs', required=False, action='store', default=None, type=int, help='Number of parallel processes.') parser.add_argument('-o', '--output', dest='output', required=False, action='store', default=None, help='Output directory.') opts = parser.parse_args() if opts.output == None: outdir = tempfile.mkdtemp(dir=os.curdir, prefix='out') logger.info('Output directory %s', outdir) else: outdir = opts.output if not os.path.exists(outdir): tsh.makedirs(outdir) inputname = os.path.splitext(os.path.basename(opts.list))[0] if opts.list.endswith('.gz'): inputname = os.path.splitext(inputname)[0] outputname = os.path.join(outdir, inputname + '-feats.csv.gz') if os.path.exists(outputname): logger.info('Skipping file %s, already exists.', outputname) else: config = tsh.read_config(opts, __file__) meta, data = read_listfile(opts.list) args = meta if opts.args != None: args.update(read_argsfile(opts.args)) args, features = compute_features(opts.method, args, data, input_name=inputname, n_jobs=opts.jobs, output_dir=outdir) clean_args(args) write_listfile(outputname, features, input_name=inputname, **args)
def get_dissimilarities(data, output_dir=None, input_name=None, image_prefix=None, mask_prefix=None, n_jobs=None, **kwargs): assert image_prefix != None assert mask_prefix != None assert output_dir != None assert input_name != None measure = kwargs['measure'] distance_name = kwargs['distance_name'] rotation_invariance = kwargs['rotation_invariance'] normalized_width = kwargs['normalized_width'] normalized_height = kwargs['normalized_height'] if n_jobs == None: n_jobs = 1 image_prefix = os.path.expanduser(image_prefix) mask_prefix = os.path.expanduser(image_prefix) distance_name = distance_name.format(OUT=output_dir, INPUTNAME=input_name, **kwargs) tsh.makedirs(os.path.dirname(distance_name)) kwargs['distance_name'] = distance_name expr_dir = os.path.join( output_dir, 'expr/%04dx%04d' % (normalized_width, normalized_height)) tsh.makedirs(expr_dir) # Make it easier for evaluate.py to create nice html reports. if 'create_links' in kwargs and kwargs['create_links'] == True: if os.path.exists('expr'): os.unlink('expr') try: os.symlink(expr_dir, 'expr') except: pass if os.path.exists('distance'): os.unlink('distance') try: os.symlink(os.path.dirname(distance_name), 'distance') except: pass save_expr_images = kwargs[ 'save_expr_images'] if 'save_expr_images' in kwargs else False if os.path.exists(distance_name): D = tsh.deserialize(distance_name)['D'] else: imagenames = [ os.path.join(image_prefix, sample['image']) for sample in data ] masknames = [ os.path.join(mask_prefix, sample['mask']) for sample in data ] n = len(data) logger.info('Extracting %d expressions...', n) Parallel(n_jobs=n_jobs, verbose=True, pre_dispatch='2*n_jobs')( delayed(_extract_expression) (imagenames[j], masknames[j], os.path.join(expr_dir, 'inside%02d.png' % data[j]['id']) if save_expr_images else None, os.path.join(expr_dir, 'expr%02d.png' % data[j]['id']) if save_expr_images else None, os.path.join(expr_dir, 'expr%02d.dat' % data[j]['id']), normalized_width, normalized_height) for j in range(n)) logger.info('Computing %d dissimilarities...', (n * (n - 1)) / 2) results = Parallel( n_jobs=n_jobs, verbose=True, pre_dispatch='2*n_jobs')(delayed(_get_dissimilarity)( i, j, measure, rotation_invariance, os.path.join(expr_dir, 'expr%02d.dat' % data[i]['id']), os.path.join(expr_dir, 'expr%02d.dat' % data[j]['id'])) for j in range(n) for i in range(j + 1, n)) logger.info('Transforming results...') D = np.zeros((n, n), dtype=np.float) tfxs = np.array([['I'] * n] * n) for i, j, d, t in results: D[j, i] = d D[i, j] = d tfxs[j, i] = t tfxs[i, j] = t logger.info('Saving results...') tsh.serialize( distance_name, { 'D': D, 'min': None, 'max': None, 'tfxs': tfxs, 'measure': measure, 'rotation_invariance': rotation_invariance }) return kwargs, D
def get_dissimilarities(data, output_dir=None, input_name=None, image_prefix=None, mask_prefix=None, n_jobs=None, **kwargs): assert image_prefix != None assert mask_prefix != None assert output_dir != None assert input_name != None measure = kwargs['measure'] distance_name = kwargs['distance_name'] rotation_invariance = kwargs['rotation_invariance'] normalized_width = kwargs['normalized_width'] normalized_height = kwargs['normalized_height'] if n_jobs == None: n_jobs = 1 image_prefix = os.path.expanduser(image_prefix) mask_prefix = os.path.expanduser(image_prefix) distance_name = distance_name.format(OUT=output_dir, INPUTNAME=input_name, **kwargs) tsh.makedirs(os.path.dirname(distance_name)) kwargs['distance_name'] = distance_name expr_dir = os.path.join(output_dir, 'expr/%04dx%04d' % (normalized_width, normalized_height)) tsh.makedirs(expr_dir) # Make it easier for evaluate.py to create nice html reports. if 'create_links' in kwargs and kwargs['create_links'] == True: if os.path.exists('expr'): os.unlink('expr') try: os.symlink(expr_dir, 'expr') except: pass if os.path.exists('distance'): os.unlink('distance') try: os.symlink(os.path.dirname(distance_name), 'distance') except: pass save_expr_images = kwargs['save_expr_images'] if 'save_expr_images' in kwargs else False if os.path.exists(distance_name): D = tsh.deserialize(distance_name)['D'] else: imagenames = [ os.path.join(image_prefix, sample['image']) for sample in data ] masknames = [ os.path.join(mask_prefix, sample['mask']) for sample in data ] n = len(data) logger.info('Extracting %d expressions...', n) Parallel(n_jobs=n_jobs, verbose=True, pre_dispatch='2*n_jobs')( delayed(_extract_expression)( imagenames[j], masknames[j], os.path.join(expr_dir, 'inside%02d.png' % data[j]['id']) if save_expr_images else None, os.path.join(expr_dir, 'expr%02d.png' % data[j]['id']) if save_expr_images else None, os.path.join(expr_dir, 'expr%02d.dat' % data[j]['id']), normalized_width, normalized_height ) for j in range(n)) logger.info('Computing %d dissimilarities...', (n*(n-1))/2) results = Parallel(n_jobs=n_jobs, verbose=True, pre_dispatch='2*n_jobs')( delayed(_get_dissimilarity)( i, j, measure, rotation_invariance, os.path.join(expr_dir, 'expr%02d.dat' % data[i]['id']), os.path.join(expr_dir, 'expr%02d.dat' % data[j]['id']) ) for j in range(n) for i in range(j+1, n)) logger.info('Transforming results...') D = np.zeros((n, n), dtype=np.float) tfxs = np.array([['I'] * n] * n) for i, j, d, t in results: D[j, i] = d D[i, j] = d tfxs[j, i] = t tfxs[i, j] = t logger.info('Saving results...') tsh.serialize(distance_name, { 'D': D, 'min': None, 'max': None, 'tfxs': tfxs, 'measure': measure, 'rotation_invariance': rotation_invariance }) return kwargs, D
from utils import read_listfile if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='Copies images for all the listfiles.') parser.add_argument('-r', '--relative', dest='relative', required=True, action='store', default=None, help='Make paths relative to this.') parser.add_argument('lists', nargs='*', action='store', help='List file(s).') parser.add_argument('-o', '--output', dest='output', required=False, action='store', default=None, help='Output directory.') opts = parser.parse_args() if opts.output == None: outdir = tempfile.mkdtemp(dir=os.curdir, prefix='out') logger.info('Output directory %s', outdir) else: outdir = opts.output if not os.path.exists(outdir): tsh.makedirs(outdir) for listname in opts.lists: meta, data = read_listfile(listname) image_prefix = meta['image_prefix'] mask_prefix = meta['mask_prefix'] for d in data: filename = os.path.join(image_prefix, d['image']) destdir = os.path.dirname(os.path.join(outdir, os.path.relpath(filename, opts.relative))) tsh.makedirs(destdir) shutil.copy(filename, destdir) filename = os.path.join(mask_prefix, d['mask']) destdir = os.path.dirname(os.path.join(outdir, os.path.relpath(filename, opts.relative))) tsh.makedirs(destdir) shutil.copy(filename, destdir)