def main(): description = "Produce visualisations and find optimal parameters of compression algorithms" parser = argparse.ArgumentParser(description=description) parser.add_argument('--verbose', dest='verbose', action='store_true', help='produce detailed output showing work performed.') parser.add_argument('--paranoia', dest='paranoia', action='store_true', help='verify correct operation of compression algorithms by decompressing ' + 'their output and comparing to the original file.') parser.add_argument('--rerun', dest='rerun', action='store_true', help='regenerate the data, even if there is a cached result.') parser.add_argument('--include', dest='include', nargs='+', help='paths which match the specified regex are included; ' + 'if unspecified, defaults to *.') parser.add_argument('--exclude', dest='exclude', nargs='+', help='paths which match the specified regex are excluded.') parser.add_argument('--num-workers', dest='num_workers', default=config.NUM_WORKERS, help='number of local processes (default: {0})'.format(config.NUM_WORKERS)) parser.add_argument('--style', dest='style') parser.add_argument('tests', nargs='*', help='list of tests to conduct; format is test_name[:parameter1=value1[:...]]') args = vars(parser.parse_args()) global verbose, paranoia, use_cache verbose = args['verbose'] paranoia = args['paranoia'] use_cache = not args['rerun'] num_workers = int(args['num_workers']) if args['style']: plot.set_style(args['style']) files = general.include_exclude_files(args['include'], args['exclude']) if verbose: print("Operating on: {0}".format(files)) pool = multiprocessing.Pool(num_workers) if verbose: print("Splitting work across {0} processes".format(num_workers)) if not args['tests']: print("WARNING: no tests specified", file=sys.stderr) for test in args['tests']: if not test: # empty string print("ERROR: test name cannot be an empty string", file=sys.stderr) continue test_name, *test_args = test.split(":") test_kwargs = to_kwargs(test_args) test_id = canonical_name(test_name, test_kwargs) if test_name in TESTS: test_runner = TESTS[test_name] if verbose: print("Running " + test_id) test_runner(pool, files, test_id, **test_kwargs) else: print("ERROR: unrecognised test '" + test_name + "'") pool.close() pool.join() print("All tests finished.")
def generate_resource_figure(test, settings, data): file = settings['file'] algos = settings['algos'] resources, format = process_resource_data(settings, data) resources = resources[file] # flatten data flattened = [confidence_interval(resources[algo], config.RESOURCE_ALPHA) for algo in algos] x = np.arange(len(algos)) y = [z[0] for z in flattened] yerr = [z[1] for z in flattened] xticks = list(map(config.ALGO_ABBREVIATIONS.get, algos)) colors = ppl.brewer2mpl.get_map('Set2', 'qualitative', len(algos)).mpl_colors if settings['style']: plot.set_style(settings['style']) plot.new_figure() fig, ax = plt.subplots() rects = ppl.bar(x, y, xticklabels=xticks, yerr=yerr, log=True, grid='y', color=colors) # Annotate for rect in rects: bar_x = rect.get_x() + rect.get_width()/2. bar_y = rect.get_height() label = format(bar_y) plt.annotate(label, xy=(bar_x, bar_y), xytext=(0, 10), textcoords='offset points', horizontalalignment='center', verticalalignment='bottom') plt.xlabel('Compressor') if settings['col'] == 'runtime': plt.ylabel(r'Runtime (\si{\second})') elif settings['col'] == 'memory': plt.ylabel(r'Memory (\si{\byte})') ax.set_yscale('log', basey=2) # units are in powers of two, so scale should be as well # hack to make labels fit ymin, ymax = plt.ylim() plt.ylim((ymin, ymax*2)) plot.save_figure(fig, 'resources', [test])