def main(): description = "Produce visualisations and find optimal parameters of compression algorithms" parser = argparse.ArgumentParser(description=description) parser.add_argument('--verbose', dest='verbose', action='store_true', help='produce detailed output showing work performed.') parser.add_argument('--paranoia', dest='paranoia', action='store_true', help='verify correct operation of compression algorithms by decompressing ' + 'their output and comparing to the original file.') parser.add_argument('--rerun', dest='rerun', action='store_true', help='regenerate the data, even if there is a cached result.') parser.add_argument('--include', dest='include', nargs='+', help='paths which match the specified regex are included; ' + 'if unspecified, defaults to *.') parser.add_argument('--exclude', dest='exclude', nargs='+', help='paths which match the specified regex are excluded.') parser.add_argument('--num-workers', dest='num_workers', default=config.NUM_WORKERS, help='number of local processes (default: {0})'.format(config.NUM_WORKERS)) parser.add_argument('--style', dest='style') parser.add_argument('tests', nargs='*', help='list of tests to conduct; format is test_name[:parameter1=value1[:...]]') args = vars(parser.parse_args()) global verbose, paranoia, use_cache verbose = args['verbose'] paranoia = args['paranoia'] use_cache = not args['rerun'] num_workers = int(args['num_workers']) if args['style']: plot.set_style(args['style']) files = general.include_exclude_files(args['include'], args['exclude']) if verbose: print("Operating on: {0}".format(files)) pool = multiprocessing.Pool(num_workers) if verbose: print("Splitting work across {0} processes".format(num_workers)) if not args['tests']: print("WARNING: no tests specified", file=sys.stderr) for test in args['tests']: if not test: # empty string print("ERROR: test name cannot be an empty string", file=sys.stderr) continue test_name, *test_args = test.split(":") test_kwargs = to_kwargs(test_args) test_id = canonical_name(test_name, test_kwargs) if test_name in TESTS: test_runner = TESTS[test_name] if verbose: print("Running " + test_id) test_runner(pool, files, test_id, **test_kwargs) else: print("ERROR: unrecognised test '" + test_name + "'") pool.close() pool.join() print("All tests finished.")
csv_fname = None if args['csv_fname']: table_type = '' csv_fname = args['csv_fname'] if args['table_type']: table_type = args['table_type'] if table_type not in {"bits", "per", "size", "time"}: parser.error("Unrecognised table type: " + table_type) verbose = args['verbose'] compressors = general.find_compressors(args['compressor']) if verbose: print("Using compressors: " + str(compressors)) files = general.include_exclude_files(args['include'], args['exclude']) if verbose: print("Compressing files: " + str(files)) results = {'Size': {}} for fname in files: input_fname = os.path.join(config.CORPUS_DIR, fname) results['Size'][fname] = os.path.getsize(input_fname) work = [] for compressor_name in compressors: compressor, kwargs = config.COMPRESSORS[compressor_name] for fname in files: kwargs.update({'fname': fname, 'paranoia': True}) work += [compressor.s(**kwargs)]