def execute(argv): """Generate random sequences for each benchmark""" del argv FLAGS = flags.FLAGS # The benchmarks benchmarks = IO.load_yaml(FLAGS.benchmarks_filename) if not benchmarks: logging.fatal('There are no benchmarks to process') # Create the results directory try: os.makedirs(FLAGS.results_directory) except FileExistsError: pass # Process each benchmark for benchmark in tqdm(benchmarks, desc='Processing'): index = benchmark.find('.') bench_dir = benchmark[:index] bench_name = benchmark[index + 1:] bench_in_dir = os.path.join(FLAGS.benchmarks_directory, bench_dir, bench_name) if not os.path.isdir(bench_in_dir): continue bench_out_dir = os.path.join(FLAGS.results_directory, bench_dir) # Create the results directory for the suite try: os.makedirs(bench_out_dir) except FileExistsError: pass filename = '{}/{}.yaml'.format(bench_out_dir, bench_name) if FLAGS.verify_report and os.path.isfile(filename): continue results = {} for compiler in ['opt', 'llvm']: for level in FLAGS.levels: goal_value = Engine.evaluate( Goals.prepare_goals(FLAGS.goals, FLAGS.weights), '-{}'.format(level), compiler, bench_in_dir, FLAGS.working_set, FLAGS.times, FLAGS.tool, FLAGS.verify_output) compiler_name = 'clang' if compiler == 'llvm' else 'opt' if compiler_name not in results: results[compiler_name] = {} results[compiler_name][level] = { 'goal': goal_value, 'seq': ['-{}'.format(level)] } IO.dump_yaml(results, filename)
def execute(argv): """Generate random sequences for each benchmark""" del argv FLAGS = flags.FLAGS # The benchmarks benchmarks = IO.load_yaml_or_fail(FLAGS.benchmarks_filename) if not benchmarks: logging.error('There are no benchmarks to process') sys.exit(1) # Verify benchmark directory if not os.path.isdir(FLAGS.benchmarks_directory): logging.error('Benchmarks directory {} does not exist.'.format( FLAGS.benchmarks_directory)) sys.exit(1) # Create the results directory try: os.makedirs(FLAGS.results_directory) except FileExistsError: pass # Process each benchmark for benchmark in tqdm(benchmarks, desc='Processing'): index = benchmark.find('.') suite_name = benchmark[:index] bench_name = benchmark[index + 1:] benchmark_dir = os.path.join(FLAGS.benchmarks_directory, suite_name, bench_name) if not os.path.isdir(benchmark_dir): continue results_dir = os.path.join(FLAGS.results_directory, suite_name) # Create the results directory for the suite try: os.makedirs(results_dir) except FileExistsError: pass filename = '{}/{}.yaml'.format(results_dir, bench_name) if FLAGS.verify_report and os.path.isfile(filename): continue Engine.compilee(benchmark_dir, 'opt', '-{}'.format(FLAGS.baseline)) features = Milepost.extract(benchmark_dir) # Engine.cleanup(benchmark_dir, 'opt') IO.dump_yaml(features, filename)
def execute(argv): """Generate random sequences for each benchmark""" del argv FLAGS = flags.FLAGS # The benchmarks benchmarks = IO.load_yaml(FLAGS.benchmarks_filename) if not benchmarks: logging.error('There are no benchmarks to process') sys.exit(1) # Verify benchmark directory if not os.path.isdir(FLAGS.benchmarks_directory): logging.error('Benchmarks directory {} does not exist.'.format( FLAGS.benchmarks_directory)) sys.exit(1) # Create the results directory try: os.makedirs(FLAGS.results_directory) except FileExistsError: pass # Initialize a Random object rnd = Random(FLAGS.nof_sequences, FLAGS.minimum, FLAGS.maximum, FLAGS.factor, FLAGS.ssa, FLAGS.shuffle, FLAGS.update, FLAGS.repetition, FLAGS.original, FLAGS.passes_filename, Goals.prepare_goals(FLAGS.goals, FLAGS.weights), 'opt', FLAGS.benchmarks_directory, FLAGS.working_set, FLAGS.times, FLAGS.tool, FLAGS.verify_output) # Process each benchmark for benchmark in tqdm(benchmarks, desc='Processing'): index = benchmark.find('.') bench_dir = benchmark[:index] bench_name = benchmark[index + 1:] bench_dir = os.path.join(FLAGS.results_directory, bench_dir) # Create the results directory for the suite try: os.makedirs(bench_dir) except FileExistsError: pass filename = '{}/{}.yaml'.format(bench_dir, bench_name) if FLAGS.verify_report and os.path.isfile(filename): continue rnd.run(benchmark) if rnd.results: IO.dump_yaml(rnd.results, filename, FLAGS.report_only_the_best)
def execute(argv): """Generate the report""" del argv FLAGS = flags.FLAGS results_files = glob.glob('{}/*.yaml'.format(FLAGS.data_directory)) report = {} strategies = [] for results in tqdm(results_files, desc='Processing'): index = results.rfind('_') benchmark = results[:index] benchmark = benchmark.replace('{}/'.format(FLAGS.data_directory), '') strategy = results[index+1:] strategy = strategy.replace('.yaml', '') if strategy not in strategies: strategies.append(strategy) if benchmark not in report: report[benchmark] = {} data = IO.load_yaml(results) values = [str_data['goal'] for _, str_data in data.items()] report[benchmark][strategy] = sum(values) # Create the results directory try: os.makedirs(FLAGS.results_directory) except FileExistsError: pass filename = '{}/{}'.format(FLAGS.results_directory, FLAGS.report_filename) IO.dump_yaml(report, filename) strategies.sort() csv_filename = filename.replace('.yaml', '.csv') with open(csv_filename, 'w') as f: w = csv.DictWriter(f, ['bench']+strategies) w.writeheader() line = {} for bench_name, data in report.items(): line['bench'] = bench_name for strategy_name, strategy_value in data.items(): line[strategy_name] = strategy_value w.writerow(line)
def execute(argv): """Find the best K sequences, from training data.""" del argv FLAGS = flags.FLAGS # The benchmarks benchmarks = IO.load_yaml_or_fail(FLAGS.benchmarks_filename) if not benchmarks: logging.fatal('There are no training benchmarks to process.') # Create the results directory try: os.makedirs(FLAGS.results_directory) except FileExistsError: pass # Verify directories if not os.path.isdir(FLAGS.training_directory): logging.error('Training directory {} does not exit.'.format( FLAGS.training_directory)) sys.exit(1) if not os.path.isdir(FLAGS.baseline_directory): logging.error('Baseline directory {} does not exit.'.format( FLAGS.baseline_directory)) sys.exit(1) # Initialize a BestK object bestk = BestK(FLAGS.training_directory, FLAGS.baseline_directory) # Execute for k in tqdm(FLAGS.k, desc='Best-k'): filename = '{}/best_{}.yaml'.format(FLAGS.results_directory, k) if FLAGS.verify_report and os.path.isfile(filename): continue bestk.run(benchmarks, FLAGS.compiler, FLAGS.baseline, int(k)) # Store the results IO.dump_yaml(bestk.results, filename) # Store the number of the programs cover by each sequence filename = '{}/covering_{}.yaml'.format(FLAGS.results_directory, k) IO.dump_yaml(bestk.covering, filename)
def execute(argv): """Create a small sequence""" del argv FLAGS = flags.FLAGS # Create the results directory try: os.makedirs(FLAGS.results_directory) except FileExistsError: pass # The benchmarks benchmarks = IO.load_yaml_or_fail(FLAGS.benchmarks_filename) if not benchmarks: logging.fatal('There are no benchmarks to process') # Create the sequences file results = {} counter = 0 for benchmark in tqdm(benchmarks, desc='Processing'): index = benchmark.find('.') bench_dir = benchmark[:index] bench_name = benchmark[index + 1:] bench_dir = os.path.join(FLAGS.training_directory, bench_dir) filename = '{}/{}.yaml'.format(bench_dir, bench_name) sequences = IO.load_yaml_or_fail(filename) sequences = Sequence.get_the_best(sequences) for _, data in sequences.items(): sequence = data['seq'] if Sequence.exist(sequence, results): continue results[counter] = {'seq': sequence} counter += 1 filename = '{}/{}'.format(FLAGS.results_directory, FLAGS.sequences_filename) IO.dump_yaml(results, filename)
def execute(argv): """Create a small sequence""" del argv FLAGS = flags.FLAGS # Verify directories if not os.path.isdir(FLAGS.benchmarks_directory): logging.error('Benchmarks directory {} does not exist.'.format( FLAGS.benchmarks_directory)) sys.exit(1) if not os.path.isdir(FLAGS.training_directory): logging.error('Training directory {} does not exist.'.format( FLAGS.training_directory)) sys.exit(1) # Create the results directory try: os.makedirs(FLAGS.results_directory) except FileExistsError: pass # The benchmarks if FLAGS.benchmarks_filename: benchmarks = IO.load_yaml(FLAGS.benchmarks_filename) if not benchmarks: logging.exit('There are no benchmarks to process') sys.exit(1) else: benchmarks = glob.glob('{}/*.yaml'.format(FLAGS.training_directory)) benchmarks = [ b.replace('{}/'.format(FLAGS.training_directory), '').replace('.yaml', '') for b in benchmarks ] # Initialize a SequenceReduction object seqred = SequenceReduction(Goals.prepare_goals(FLAGS.goals, FLAGS.weights), 'opt', FLAGS.benchmarks_directory, FLAGS.working_set, FLAGS.times, FLAGS.tool, FLAGS.verify_output) # Reduce for benchmark in tqdm(benchmarks, desc='Processing'): index = benchmark.find('.') bench_dir = benchmark[:index] bench_name = benchmark[index + 1:] bench_dir_result = os.path.join(FLAGS.results_directory, bench_dir) filename_result = '{}/{}.yaml'.format(bench_dir_result, bench_name) # Create the results directory for the suite try: os.makedirs(bench_dir_result) except FileExistsError: pass if FLAGS.verify_report and os.path.isfile(filename_result): continue bench_dir_training = os.path.join(FLAGS.training_directory, bench_dir) filename_training = '{}/{}.yaml'.format(bench_dir_training, bench_name) sequences = IO.load_yaml(filename_training) sequences = Sequence.get_the_best(sequences, FLAGS.nof_sequences) results = {} counter = 0 for _, data in sequences.items(): seqred.run(data['seq'], benchmark) if not FLAGS.report_only_the_small: results[counter] = { 'seq': seqred.results[0]['seq'], 'goal': seqred.results[0]['goal'] } counter += 1 results[counter] = { 'seq': seqred.results[1]['seq'], 'goal': seqred.results[1]['goal'] } counter += 1 IO.dump_yaml(results, filename_result)
def execute(argv): """Generate genetic sequences for each benchmark""" del argv FLAGS = flags.FLAGS # The benchmarks benchmarks = IO.load_yaml(FLAGS.benchmarks_filename) if not benchmarks: logging.error('There are no benchmarks to process') sys.exit(1) # Verify benchmark directory if not os.path.isdir(FLAGS.benchmarks_directory): logging.error('Benchmarks directory {} does not exist.'.format( FLAGS.benchmarks_directory) ) sys.exit(1) # Create the results directory try: os.makedirs(FLAGS.results_directory) except FileExistsError: pass # Initialize a SGA object sga = SGA(FLAGS.generations, FLAGS.population, FLAGS.cr, FLAGS.m, FLAGS.param_m, FLAGS.param_s, FLAGS.crossover, FLAGS.mutation, FLAGS.selection, FLAGS.seed, FLAGS.dimension, FLAGS.passes_filename, Goals.prepare_goals(FLAGS.goals, FLAGS.weights), 'opt', FLAGS.benchmarks_directory, FLAGS.working_set, FLAGS.times, FLAGS.tool, FLAGS.verify_output) # Process each benchmark for benchmark in tqdm(benchmarks, desc='Processing'): index = benchmark.find('.') bench_dir = benchmark[:index] bench_name = benchmark[index+1:] bench_dir = os.path.join(FLAGS.results_directory, bench_dir) # Create the results directory for the suite try: os.makedirs(bench_dir) except FileExistsError: pass filename = '{}/{}.yaml'.format(bench_dir, bench_name) if FLAGS.verify_report and os.path.isfile(filename): continue sga.run(benchmark) if sga.results: IO.dump_yaml(sga.results, filename, FLAGS.report_only_the_best)
def execute(argv): """Generate the report""" del argv FLAGS = flags.FLAGS from_files = glob.glob('{}/*.yaml'.format(FLAGS.from_directory)) report = {} strategies = [] statistics = {} for from_file in tqdm(from_files, desc='Processing'): index = from_file.rfind('_') benchmark = from_file[:index] benchmark = benchmark.replace('{}/'.format(FLAGS.from_directory), '') strategy = from_file[index + 1:] strategy = strategy.replace('.yaml', '') if strategy not in strategies: strategies.append(strategy) statistics[strategy] = [] data = IO.load_yaml(from_file) the_best = Sequence.get_the_best(data) the_best_key = list(the_best.keys()).pop() filename = '{}*{}.yaml'.format(benchmark, strategy) to_files = glob.glob('{}/{}'.format(FLAGS.to_directory, filename)) for to_file in to_files: benchmark = to_file.replace('{}/'.format(FLAGS.to_directory), '') benchmark = benchmark.replace('_{}.yaml'.format(strategy), '') if benchmark not in report: report[benchmark] = {} data = IO.load_yaml(to_file) report[benchmark][strategy] = the_best.copy() statistics[strategy].append(data[the_best_key]['goal']) # Create the results directory try: os.makedirs(FLAGS.results_directory) except FileExistsError: pass # YAML Report filename = '{}/{}'.format(FLAGS.results_directory, FLAGS.report_filename) IO.dump_yaml(report, filename) # CSV Report strategies.sort() filename = filename.replace('.yaml', '.csv') with open(filename, 'w') as f: w = csv.DictWriter(f, ['bench'] + strategies) w.writeheader() line = {} for bench_name, data in report.items(): line['bench'] = bench_name for strategy_name, strategy_value in data.items(): line[strategy_name] = strategy_value w.writerow(line) # Statistics filename = filename.replace('.csv', '.stat') stats = {} for strategy, values in statistics.items(): stats[strategy] = { 'min': float(np.min(values)), 'max': float(np.max(values)), 'mean': float(np.mean(values)), 'median': float(np.median(values)), '1quantile': float(np.quantile(values, 0.25)), '2quantile': float(np.quantile(values, 0.50)), '3quantile': float(np.quantile(values, 0.75)) } IO.dump_yaml(stats, filename)
def execute(argv): """Evaluate N sequences""" del argv FLAGS = flags.FLAGS # The benchmarks benchmarks = IO.load_yaml_or_fail(FLAGS.benchmarks_filename) if not benchmarks: logging.error('There are no benchmarks to process') sys.exit(1) # Create the results directory try: os.makedirs(FLAGS.results_directory) except FileExistsError: pass # Process each benchmark for benchmark in tqdm(benchmarks, desc='Processing'): # The benchmark index = benchmark.find('.') suite = benchmark[:index] bench_name = benchmark[index + 1:] # Create the results directory for the suite results_dir = os.path.join(FLAGS.results_directory, suite) try: os.makedirs(results_dir) except FileExistsError: pass # Verify report if FLAGS.suffix: output_filename = '{}/{}_{}.yaml'.format(results_dir, bench_name, FLAGS.suffix) else: output_filename = '{}/{}.yaml'.format(results_dir, bench_name) if FLAGS.verify_report and os.path.isfile(output_filename): continue # Benchmark directory bench_dir = os.path.join(FLAGS.benchmarks_directory, suite, bench_name) if not os.path.isdir(bench_dir): logging.error('Benchmark {} does not exist.'.format(benchmark)) sys.exit(1) # The training data training_dir = os.path.join(FLAGS.training_directory, suite) filename = '{}/{}.yaml'.format(training_dir, bench_name) sequences = IO.load_yaml_or_fail(filename) if not sequences: logging.error('There are no sequences to process') sys.exit(1) best_sequence = Sequence.get_the_best(sequences) # Verify if the best sequence is better than the baseline baseline_dir = os.path.join(FLAGS.baseline_directory, suite) filename = '{}/{}.yaml'.format(baseline_dir, bench_name) baseline_data = IO.load_yaml_or_fail(filename) if not baseline_data: logging.error('There are no baseline data') sys.exit(1) baseline_goal = baseline_data[FLAGS.compiler][FLAGS.baseline]['goal'] for _, data in best_sequence.items(): best_sequence_goal = data['goal'] if not (best_sequence_goal < baseline_goal): continue sequences = split_sequence(best_sequence) # Process the sequences results = {} for key, data in sequences.items(): goal_value = Engine.evaluate( Goals.prepare_goals(FLAGS.goals, FLAGS.weights), Sequence.name_pass_to_string(data['seq']), 'opt', bench_dir, FLAGS.working_set, FLAGS.times, FLAGS.tool, FLAGS.verify_output) results[key] = {'seq': data['seq'], 'goal': goal_value} # Store the results IO.dump_yaml(results, output_filename)
def execute(argv): """Find the euclidean distance from test to training data.""" del argv FLAGS = flags.FLAGS # The training benchmarks training_benchmarks = IO.load_yaml_or_fail(FLAGS.training_benchs_filename) if not training_benchmarks: logging.error('There are no training benchmarks to process') sys.exit(1) # The training benchmarks test_benchmarks = IO.load_yaml_or_fail(FLAGS.test_benchs_filename) if not test_benchmarks: logging.error('There are no test benchmarks to process') sys.exit(1) # Verify directories if not os.path.isdir(FLAGS.training_representation_directory): logging.error('Training directory {} does not exist.'.format( FLAGS.training_representation_directory)) sys.exit(1) if not os.path.isdir(FLAGS.test_representation_directory): logging.error('Test directory {} does not exist.'.format( FLAGS.test_representation_directory)) sys.exit(1) # Measure the distance if FLAGS.distance == 'euclidean': distance = Distance.euclidean(training_benchmarks, FLAGS.training_representation_directory, test_benchmarks, FLAGS.test_representation_directory) if FLAGS.distance == 'manhattan': distance = Distance.manhattan(training_benchmarks, FLAGS.training_representation_directory, test_benchmarks, FLAGS.test_representation_directory) if FLAGS.distance == 'cosine': distance = Distance.cosine(training_benchmarks, FLAGS.training_representation_directory, test_benchmarks, FLAGS.test_representation_directory) # Store the distance for i, test_bench in enumerate(tqdm(test_benchmarks, desc='Processing')): index = test_bench.find('.') test_suite_name = test_bench[:index] test_bench_name = test_bench[index + 1:] results = {} for j, training_bench in enumerate(training_benchmarks): index = training_bench.find('.') training_suite_name = training_bench[:index] training_bench_name = training_bench[index + 1:] if training_suite_name not in results: results[training_suite_name] = {} results[training_suite_name][training_bench_name] = float( distance[i][j]) for training_suite_name, training_distance in results.items(): results_dir = os.path.join(FLAGS.results_directory, test_suite_name, training_suite_name) # Create the results directory try: os.makedirs(results_dir) except FileExistsError: pass filename = '{}/{}.yaml'.format(results_dir, test_bench_name) IO.dump_yaml(training_distance, filename)
def execute(argv): """Generate the figure""" del argv FLAGS = flags.FLAGS validate_baselines(FLAGS.baselines) # Read the reports if not os.path.isfile(FLAGS.data_report): logging.fatal('CSV report {} does not exist.'.format( FLAGS.data_report) ) if not os.path.isfile(FLAGS.levels_report): logging.fatal('CSV report {} does not exist.'.format( FLAGS.levels_report) ) goal_data = pd.read_csv(FLAGS.data_report) goal_data.index = goal_data.bench goal_data = goal_data.drop(columns='bench') x_labels, strategies = sort_strategies(goal_data.columns) level_data = pd.read_csv(FLAGS.levels_report) level_data.index = level_data.bench level_data = level_data.drop(columns='bench') baselines_average = {} statistics = {} for baseline in FLAGS.baselines: baselines_average[baseline] = [] statistics[baseline] = {} for strategy in strategies: values = [level_data.loc[index][baseline] - goal_data.loc[index][strategy] for index in goal_data.index] baselines_average[baseline].append(np.mean(values)) statistics[baseline][strategy] = values.copy() # PLOT symbols = ['p', 'x', '+', '*', 'v', '^', '<', '>', '1', '2', '3', '4', 's', 'D', 'd', 'h', 'H'] x = [i for i in range(len(strategies))] plt.style.use('seaborn-whitegrid') _, ax = plt.subplots() # plt.ylim(-1500, 1500) plt.ylabel(FLAGS.y_label, fontsize=10, fontweight='bold') plt.xlabel(FLAGS.x_label, fontsize=10, fontweight='bold') plt.xticks(x, labels=x_labels, fontweight='bold') # plt.xticks(fontsize=10, fontweight='bold') # plt.yticks(fontsize=10, fontweight='bold') plt.tight_layout() for i, (b_name, b_data) in enumerate(baselines_average.items()): ax.plot(x, b_data, symbols[i], color='gray', markersize=10, markerfacecolor='white', markeredgewidth=2, label=b_name) leg = ax.legend(prop={'weight': 'bold', 'size': 12}, ncol=2, loc=4) for text in leg.get_texts(): plt.setp(text, color='gray') filename = '{}/{}'.format( FLAGS.output_directory, FLAGS.figure_filename ) if FLAGS.output_directory else FLAGS.figure_filename # Create the results directory if FLAGS.output_directory: try: os.makedirs(FLAGS.output_directory) except FileExistsError: pass plt.savefig(filename) plt.show() # Statistics stats = {} for b_name, b_data in statistics.items(): stats[b_name] = {} for s_name, s_data in b_data.items(): stats[b_name][s_name] = { 'min': float(np.min(s_data)), 'max': float(np.max(s_data)), 'mean': float(np.mean(s_data)), 'median': float(np.median(s_data)), '1quantile': float(np.quantile(s_data, 0.25)), '2quantile': float(np.quantile(s_data, 0.50)), '3quantile': float(np.quantile(s_data, 0.75)) } index = FLAGS.figure_filename.rfind('.') sname = FLAGS.figure_filename[:index] filename = '{}/{}.yaml'.format( FLAGS.output_directory, sname, ) if FLAGS.output_directory else sname IO.dump_yaml(stats, filename)
def execute(argv): """Evaluate N sequences""" del argv FLAGS = flags.FLAGS # The benchmarks benchmarks = IO.load_yaml_or_fail(FLAGS.benchmarks_filename) if not benchmarks: logging.error('There are no benchmarks to process') sys.exit(1) # The sequences sequences = IO.load_yaml_or_fail(FLAGS.sequences_filename) if not sequences: logging.error('There are no benchmarks to process') sys.exit(1) # Create the results directory try: os.makedirs(FLAGS.results_directory) except FileExistsError: pass # Process each benchmark for benchmark in tqdm(benchmarks, desc='Processing'): index = benchmark.find('.') bench_dir = benchmark[:index] bench_name = benchmark[index+1:] bench_in_dir = os.path.join(FLAGS.benchmarks_directory, bench_dir, bench_name) if not os.path.isdir(bench_in_dir): continue bench_out_dir = os.path.join(FLAGS.results_directory, bench_dir) # Create the results directory for the suite try: os.makedirs(bench_out_dir) except FileExistsError: pass # Verify report if FLAGS.suffix: filename = '{}/{}_{}.yaml'.format( bench_out_dir, bench_name, FLAGS.suffix ) else: filename = '{}/{}.yaml'.format(bench_out_dir, bench_name) if FLAGS.verify_report and os.path.isfile(filename): continue # Process the sequences results = {} for key, data in sequences.items(): goal_value = Engine.evaluate( Goals.prepare_goals(FLAGS.goals, FLAGS.weights), Sequence.name_pass_to_string(data['seq']), 'opt', bench_in_dir, FLAGS.working_set, FLAGS.times, FLAGS.tool, FLAGS.verify_output ) results[key] = {'seq': data['seq'], 'goal': goal_value} # Store the results IO.dump_yaml(results, filename)
def execute(argv): """Generate random sequences for each benchmark""" FLAGS = flags.FLAGS results_directory = FLAGS.results_directory # Test benchmarks test_benchmarks = IO.load_yaml_or_fail(FLAGS.test_benchs_filename) if not test_benchmarks: logging.fatal('There are no test benchmarks to process') # Training benchmarks training_benchmarks = IO.load_yaml_or_fail(FLAGS.training_benchs_filename) if not training_benchmarks: logging.fatal('There are no training benchmarks to process') # Create the results directory try: os.makedirs(FLAGS.results_directory) except FileExistsError: pass # Extract the representation for test programs print(bold('1. EXTRACTING THE REPRESENTATION')) FLAGS.results_directory = os.path.join(results_directory, 'representation') FLAGS.benchmarks_filename = FLAGS.test_benchs_filename representation.execute(argv) # Distance: test --> training print(bold('2. MEASURING THE DISTANCE')) distance_results_directory = os.path.join(results_directory, 'distance') FLAGS.results_directory = distance_results_directory FLAGS.test_representation_directory = os.path.join(results_directory, 'representation') distance.execute(argv) # Process test benchmarks print(bold('3. PROCESSING THE BENCHMARKS')) for nof_sequences in tqdm(FLAGS.nof_sequences, desc='Processing'): for test_benchmark in test_benchmarks: index = test_benchmark.find('.') suite_name = test_benchmark[:index] bench_name = test_benchmark[index + 1:] # Find the best N sequences training_suite, sequences = find_sequences( test_benchmark, training_benchmarks, distance_results_directory, FLAGS.training_data_directory, int(nof_sequences)) # Goal_name if len(FLAGS.goals) > 1: goal_name = '_'.join(FLAGS.goals) else: goal_name = FLAGS.goals[0] # Create the results directory for the suite results_dir = os.path.join(results_directory, 'predictive_compilation', training_suite, goal_name) try: os.makedirs(results_dir) except FileExistsError: pass filename = '{}/{}_j{}.yaml'.format(results_dir, bench_name, nof_sequences) if FLAGS.verify_report and os.path.isfile(filename): continue results = {} for key, data in sequences.items(): goal_value = Engine.evaluate( Goals.prepare_goals(FLAGS.goals, FLAGS.weights), Sequence.name_pass_to_string(data['seq']), 'opt', os.path.join(FLAGS.benchmarks_directory, suite_name, bench_name), FLAGS.working_set, FLAGS.times, FLAGS.tool, FLAGS.verify_output) results[key] = {'seq': data['seq'], 'goal': goal_value} IO.dump_yaml(results, filename)
def execute(argv): """Generate the figure""" del argv FLAGS = flags.FLAGS validate_baselines(FLAGS.baselines) # Read the reports if not os.path.isfile(FLAGS.data_report): logging.fatal('CSV report {} does not exist.'.format( FLAGS.data_report)) if not os.path.isfile(FLAGS.levels_report): logging.fatal('CSV report {} does not exist.'.format( FLAGS.levels_report)) goal_data = pd.read_csv(FLAGS.data_report) goal_data.index = goal_data.bench goal_data = goal_data.drop(columns='bench') _, strategies, _ = sort_strategies(goal_data.columns) level_data = pd.read_csv(FLAGS.levels_report) level_data.index = level_data.bench level_data = level_data.drop(columns='bench') statistics = {} dispersion = {'value': [], 'strategy': []} for baseline in FLAGS.baselines: statistics[baseline] = {} for strategy in strategies: if FLAGS.improvement: values = [ goal_data.loc[index][strategy] / level_data.loc[index][baseline] for index in goal_data.index ] else: values = [ level_data.loc[index][baseline] - goal_data.loc[index][strategy] for index in goal_data.index ] strg = [ '{}_{}'.format(baseline, strategy) for i in range(len(values)) ] dispersion['value'] += values.copy() dispersion['strategy'] += strg.copy() statistics[baseline][strategy] = values.copy() # The data to plot dispersion = pd.DataFrame.from_dict(dispersion) # PLOT _, ax = plt.subplots() # plt.ylim(-1500, 1500) plt.ylabel(FLAGS.y_label, fontsize=10, fontweight='bold') plt.xlabel(FLAGS.x_label, fontsize=10, fontweight='bold') plt.tight_layout() y_labels = [ '{}_{}'.format(baseline, strategy) for baseline, _ in statistics.items() for strategy in strategies ] ax = sns.violinplot(ax=ax, x="value", y="strategy", order=y_labels, data=dispersion) if FLAGS.disable_labels: ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set(xlabel='', ylabel='') else: ax.set_yticklabels(y_labels, fontsize=8) ax.set(xlabel=FLAGS.x_label, ylabel=FLAGS.y_label) filename = '{}/{}'.format( FLAGS.output_directory, FLAGS.figure_filename ) if FLAGS.output_directory else FLAGS.figure_filename # Create the results directory if FLAGS.output_directory: try: os.makedirs(FLAGS.output_directory) except FileExistsError: pass plt.savefig(filename) plt.show() # Statistics stats = {} for b_name, b_data in statistics.items(): stats[b_name] = {} for s_name, s_data in b_data.items(): stats[b_name][s_name] = { 'min': float(np.min(s_data)), 'max': float(np.max(s_data)), 'mean': float(np.mean(s_data)), 'median': float(np.median(s_data)), '1quantile': float(np.quantile(s_data, 0.25)), '2quantile': float(np.quantile(s_data, 0.50)), '3quantile': float(np.quantile(s_data, 0.75)) } index = FLAGS.figure_filename.rfind('.') sname = FLAGS.figure_filename[:index] filename = '{}/{}.yaml'.format( FLAGS.output_directory, sname, ) if FLAGS.output_directory else sname IO.dump_yaml(stats, filename)