def __populate_data(training_benchmarks, training_directory, test_benchmarks, test_directory): """Create test and training data Parameters ---------- training_benchmarks : list training_directory : str tests_benchmark : list test_directory : str Returns ------- training_data : pandas.DataFrame test_data : pandas.DataFrame """ training_data = {} for training_benchmark in training_benchmarks: index = training_benchmark.find('.') suite_name = training_benchmark[:index] bench_name = training_benchmark[index + 1:] benchmark_dir = os.path.join(training_directory, suite_name) data = IO.load_yaml_or_fail('{}/{}.yaml'.format( benchmark_dir, bench_name)) if data: training_data[training_benchmark] = data if not training_data: log.error('Training features do not exist.') exit(1) test_data = {} for test_benchmark in test_benchmarks: index = test_benchmark.find('.') suite_name = test_benchmark[:index] bench_name = test_benchmark[index + 1:] benchmark_dir = os.path.join(test_directory, suite_name) data = IO.load_yaml_or_fail('{}/{}.yaml'.format( benchmark_dir, bench_name)) if data: test_data[test_benchmark] = data if not test_data: lg.error('Training features do not exist.') exit(1) training_data = pd.DataFrame.from_dict(training_data, orient='index') test_data = pd.DataFrame.from_dict(test_data, orient='index') return training_data, test_data
def execute(argv): """Generate random sequences for each benchmark""" del argv FLAGS = flags.FLAGS # The benchmarks benchmarks = IO.load_yaml_or_fail(FLAGS.benchmarks_filename) if not benchmarks: logging.error('There are no benchmarks to process') sys.exit(1) # Verify benchmark directory if not os.path.isdir(FLAGS.benchmarks_directory): logging.error('Benchmarks directory {} does not exist.'.format( FLAGS.benchmarks_directory)) sys.exit(1) # Create the results directory try: os.makedirs(FLAGS.results_directory) except FileExistsError: pass # Process each benchmark for benchmark in tqdm(benchmarks, desc='Processing'): index = benchmark.find('.') suite_name = benchmark[:index] bench_name = benchmark[index + 1:] benchmark_dir = os.path.join(FLAGS.benchmarks_directory, suite_name, bench_name) if not os.path.isdir(benchmark_dir): continue results_dir = os.path.join(FLAGS.results_directory, suite_name) # Create the results directory for the suite try: os.makedirs(results_dir) except FileExistsError: pass filename = '{}/{}.yaml'.format(results_dir, bench_name) if FLAGS.verify_report and os.path.isfile(filename): continue Engine.compilee(benchmark_dir, 'opt', '-{}'.format(FLAGS.baseline)) features = Milepost.extract(benchmark_dir) # Engine.cleanup(benchmark_dir, 'opt') IO.dump_yaml(features, filename)
def execute(argv): """Create a small sequence""" del argv FLAGS = flags.FLAGS # Create the results directory try: os.makedirs(FLAGS.results_directory) except FileExistsError: pass # The benchmarks benchmarks = IO.load_yaml_or_fail(FLAGS.benchmarks_filename) if not benchmarks: logging.fatal('There are no benchmarks to process') # Create the sequences file results = {} counter = 0 for benchmark in tqdm(benchmarks, desc='Processing'): index = benchmark.find('.') bench_dir = benchmark[:index] bench_name = benchmark[index + 1:] bench_dir = os.path.join(FLAGS.training_directory, bench_dir) filename = '{}/{}.yaml'.format(bench_dir, bench_name) sequences = IO.load_yaml_or_fail(filename) sequences = Sequence.get_the_best(sequences) for _, data in sequences.items(): sequence = data['seq'] if Sequence.exist(sequence, results): continue results[counter] = {'seq': sequence} counter += 1 filename = '{}/{}'.format(FLAGS.results_directory, FLAGS.sequences_filename) IO.dump_yaml(results, filename)
def find_sequences(test_benchmark, training_benchmaks, distance_directory, training_data_directory, nof_sequences): """Get N sequences from the most similar benchmark""" # Test suite and benchmark index = test_benchmark.find('.') test_suite_name = test_benchmark[:index] test_bench_name = test_benchmark[index + 1:] # Find the training suites training_suites = [] for training_benchmark in training_benchmaks: index = training_benchmark.find('.') training_suite_name = training_benchmark[:index] if training_suite_name not in training_suites: training_suites.append(training_suite_name) # Find the closer benchmark closer = [] for training_suite in training_suites: d_directory = os.path.join(distance_directory, test_suite_name, training_suite) filename = '{}/{}.yaml'.format(d_directory, test_bench_name) distance_data = IO.load_yaml(filename) closer += [(distance, training_suite, training_bench) for training_bench, distance in distance_data.items()] closer.sort() closer_suite_name = closer[0][1] closer_bench_name = closer[0][2] # Load closer benchmark data d_directory = os.path.join(training_data_directory, closer_suite_name) filename = '{}/{}.yaml'.format(d_directory, closer_bench_name) training_data = IO.load_yaml_or_fail(filename) # Rank sequences rank = [(seq_data['goal'], seq_key) for seq_key, seq_data in training_data.items()] rank.sort() # Extract N sequences best = {} for i, (_, seq_key) in enumerate(rank): best[seq_key] = training_data[seq_key].copy() if i + 1 == nof_sequences: break return closer_suite_name, best
def execute(argv): """Create a small sequence""" del argv FLAGS = flags.FLAGS # The benchmarks benchmarks = IO.load_yaml(FLAGS.benchmarks_filename) if not benchmarks: logging.fatal('There are no benchmarks to process') # Verify directory if not os.path.isdir(FLAGS.benchmarks_directory): logging.error('Benchmarks directory {} does not exist.'.format( FLAGS.benchmarks_directory)) sys.exit(1) if not os.path.isdir(FLAGS.training_directory): logging.error('Training directory {} does not exist.'.format( FLAGS.training_directory)) sys.exit(1) # Create the results directory try: os.makedirs(FLAGS.results_directory) except FileExistsError: pass # Initialize a BenchmarkReduction object bred = BenchmarkReduction(FLAGS.baseline, FLAGS.benchmarks_directory, FLAGS.results_directory) # Reduce for benchmark in tqdm(benchmarks, desc='Processing'): index = benchmark.find('.') bench_dir = benchmark[:index] bench_name = benchmark[index + 1:] bench_dir = os.path.join(FLAGS.training_directory, bench_dir) sequences = IO.load_yaml_or_fail('{}/{}.yaml'.format( bench_dir, bench_name)) sequence = Sequence.get_the_best(sequences) for _, seq_data in sequence.items(): sequence = seq_data['seq'] bred.run(benchmark, sequence)
def execute(argv): """Find the best K sequences, from training data.""" del argv FLAGS = flags.FLAGS # The benchmarks benchmarks = IO.load_yaml_or_fail(FLAGS.benchmarks_filename) if not benchmarks: logging.fatal('There are no training benchmarks to process.') # Create the results directory try: os.makedirs(FLAGS.results_directory) except FileExistsError: pass # Verify directories if not os.path.isdir(FLAGS.training_directory): logging.error('Training directory {} does not exit.'.format( FLAGS.training_directory)) sys.exit(1) if not os.path.isdir(FLAGS.baseline_directory): logging.error('Baseline directory {} does not exit.'.format( FLAGS.baseline_directory)) sys.exit(1) # Initialize a BestK object bestk = BestK(FLAGS.training_directory, FLAGS.baseline_directory) # Execute for k in tqdm(FLAGS.k, desc='Best-k'): filename = '{}/best_{}.yaml'.format(FLAGS.results_directory, k) if FLAGS.verify_report and os.path.isfile(filename): continue bestk.run(benchmarks, FLAGS.compiler, FLAGS.baseline, int(k)) # Store the results IO.dump_yaml(bestk.results, filename) # Store the number of the programs cover by each sequence filename = '{}/covering_{}.yaml'.format(FLAGS.results_directory, k) IO.dump_yaml(bestk.covering, filename)
def execute(argv): """Evaluate N sequences""" del argv FLAGS = flags.FLAGS # The benchmarks benchmarks = IO.load_yaml_or_fail(FLAGS.benchmarks_filename) if not benchmarks: logging.error('There are no benchmarks to process') sys.exit(1) # Create the results directory try: os.makedirs(FLAGS.results_directory) except FileExistsError: pass # Process each benchmark for benchmark in tqdm(benchmarks, desc='Processing'): # The benchmark index = benchmark.find('.') suite = benchmark[:index] bench_name = benchmark[index + 1:] # Create the results directory for the suite results_dir = os.path.join(FLAGS.results_directory, suite) try: os.makedirs(results_dir) except FileExistsError: pass # Verify report if FLAGS.suffix: output_filename = '{}/{}_{}.yaml'.format(results_dir, bench_name, FLAGS.suffix) else: output_filename = '{}/{}.yaml'.format(results_dir, bench_name) if FLAGS.verify_report and os.path.isfile(output_filename): continue # Benchmark directory bench_dir = os.path.join(FLAGS.benchmarks_directory, suite, bench_name) if not os.path.isdir(bench_dir): logging.error('Benchmark {} does not exist.'.format(benchmark)) sys.exit(1) # The training data training_dir = os.path.join(FLAGS.training_directory, suite) filename = '{}/{}.yaml'.format(training_dir, bench_name) sequences = IO.load_yaml_or_fail(filename) if not sequences: logging.error('There are no sequences to process') sys.exit(1) best_sequence = Sequence.get_the_best(sequences) # Verify if the best sequence is better than the baseline baseline_dir = os.path.join(FLAGS.baseline_directory, suite) filename = '{}/{}.yaml'.format(baseline_dir, bench_name) baseline_data = IO.load_yaml_or_fail(filename) if not baseline_data: logging.error('There are no baseline data') sys.exit(1) baseline_goal = baseline_data[FLAGS.compiler][FLAGS.baseline]['goal'] for _, data in best_sequence.items(): best_sequence_goal = data['goal'] if not (best_sequence_goal < baseline_goal): continue sequences = split_sequence(best_sequence) # Process the sequences results = {} for key, data in sequences.items(): goal_value = Engine.evaluate( Goals.prepare_goals(FLAGS.goals, FLAGS.weights), Sequence.name_pass_to_string(data['seq']), 'opt', bench_dir, FLAGS.working_set, FLAGS.times, FLAGS.tool, FLAGS.verify_output) results[key] = {'seq': data['seq'], 'goal': goal_value} # Store the results IO.dump_yaml(results, output_filename)
def run(self, training_benchmarks, compiler, baseline, k): """ Best-k Fast and effective orchestration of compiler optimizations for automatic performance tuning Z. Pan and R. Eigenmann International Symposium on Code Generation and Optimization 2006 10.1109/CGO.2006.38 Argument -------- training_benchmarks : list compiler : str baseline : str k : int Number of sequences """ # Create the dictionary dictionary = {} best_sequences = {} for training_benchmark in training_benchmarks: index = training_benchmark.find('.') bench_dir = training_benchmark[:index] bench_name = training_benchmark[index + 1:] training_dir = os.path.join(self.__flags.training_directory, bench_dir) baseline_dir = os.path.join(self.__flags.baseline_directory, bench_dir) training_sequences = IO.load_yaml('{}/{}.yaml'.format( training_dir, bench_name)) if not training_sequences: continue baseline_goal_value = IO.load_yaml_or_fail('{}/{}.yaml'.format( baseline_dir, bench_name)) baseline_goal_value = baseline_goal_value[compiler][baseline][ 'goal'] # For each sequence for seq in training_sequences.keys(): if seq not in dictionary.keys(): dictionary[seq] = [] best_sequences[seq] = training_sequences[seq]['seq'] goal_value = training_sequences[seq]['goal'] # Store the fitness if goal_value < baseline_goal_value: improvement = ((baseline_goal_value - goal_value) / baseline_goal_value) * 100 dictionary[seq].append((training_benchmark, improvement)) # Find the best dictionary entries if dictionary: bestk = [] self.__covering = {} for _ in range(k): progs = [] for _, data in dictionary.items(): progs += [p for p, _ in data if p not in progs] if len(progs) == 0: break key = self.__get_maximum(dictionary) dictionary_entry = dictionary[key].copy() self.__covering[key] = len(dictionary_entry) bestk.append(key) for key, data in dictionary.items(): for program, improvement in dictionary_entry: index = self.__program_in_dictionary(program, data) if index > -1: del dictionary[key][index] # Store the best k sequences self.__results = {} for best in bestk: self.__results[best] = {'x': best_sequences[best]}
def execute(argv): """Find the euclidean distance from test to training data.""" del argv FLAGS = flags.FLAGS # The training benchmarks training_benchmarks = IO.load_yaml_or_fail(FLAGS.training_benchs_filename) if not training_benchmarks: logging.error('There are no training benchmarks to process') sys.exit(1) # The training benchmarks test_benchmarks = IO.load_yaml_or_fail(FLAGS.test_benchs_filename) if not test_benchmarks: logging.error('There are no test benchmarks to process') sys.exit(1) # Verify directories if not os.path.isdir(FLAGS.training_representation_directory): logging.error('Training directory {} does not exist.'.format( FLAGS.training_representation_directory)) sys.exit(1) if not os.path.isdir(FLAGS.test_representation_directory): logging.error('Test directory {} does not exist.'.format( FLAGS.test_representation_directory)) sys.exit(1) # Measure the distance if FLAGS.distance == 'euclidean': distance = Distance.euclidean(training_benchmarks, FLAGS.training_representation_directory, test_benchmarks, FLAGS.test_representation_directory) if FLAGS.distance == 'manhattan': distance = Distance.manhattan(training_benchmarks, FLAGS.training_representation_directory, test_benchmarks, FLAGS.test_representation_directory) if FLAGS.distance == 'cosine': distance = Distance.cosine(training_benchmarks, FLAGS.training_representation_directory, test_benchmarks, FLAGS.test_representation_directory) # Store the distance for i, test_bench in enumerate(tqdm(test_benchmarks, desc='Processing')): index = test_bench.find('.') test_suite_name = test_bench[:index] test_bench_name = test_bench[index + 1:] results = {} for j, training_bench in enumerate(training_benchmarks): index = training_bench.find('.') training_suite_name = training_bench[:index] training_bench_name = training_bench[index + 1:] if training_suite_name not in results: results[training_suite_name] = {} results[training_suite_name][training_bench_name] = float( distance[i][j]) for training_suite_name, training_distance in results.items(): results_dir = os.path.join(FLAGS.results_directory, test_suite_name, training_suite_name) # Create the results directory try: os.makedirs(results_dir) except FileExistsError: pass filename = '{}/{}.yaml'.format(results_dir, test_bench_name) IO.dump_yaml(training_distance, filename)
def execute(argv): """Evaluate N sequences""" del argv FLAGS = flags.FLAGS # The benchmarks benchmarks = IO.load_yaml_or_fail(FLAGS.benchmarks_filename) if not benchmarks: logging.error('There are no benchmarks to process') sys.exit(1) # The sequences sequences = IO.load_yaml_or_fail(FLAGS.sequences_filename) if not sequences: logging.error('There are no benchmarks to process') sys.exit(1) # Create the results directory try: os.makedirs(FLAGS.results_directory) except FileExistsError: pass # Process each benchmark for benchmark in tqdm(benchmarks, desc='Processing'): index = benchmark.find('.') bench_dir = benchmark[:index] bench_name = benchmark[index+1:] bench_in_dir = os.path.join(FLAGS.benchmarks_directory, bench_dir, bench_name) if not os.path.isdir(bench_in_dir): continue bench_out_dir = os.path.join(FLAGS.results_directory, bench_dir) # Create the results directory for the suite try: os.makedirs(bench_out_dir) except FileExistsError: pass # Verify report if FLAGS.suffix: filename = '{}/{}_{}.yaml'.format( bench_out_dir, bench_name, FLAGS.suffix ) else: filename = '{}/{}.yaml'.format(bench_out_dir, bench_name) if FLAGS.verify_report and os.path.isfile(filename): continue # Process the sequences results = {} for key, data in sequences.items(): goal_value = Engine.evaluate( Goals.prepare_goals(FLAGS.goals, FLAGS.weights), Sequence.name_pass_to_string(data['seq']), 'opt', bench_in_dir, FLAGS.working_set, FLAGS.times, FLAGS.tool, FLAGS.verify_output ) results[key] = {'seq': data['seq'], 'goal': goal_value} # Store the results IO.dump_yaml(results, filename)
def execute(argv): """Generate random sequences for each benchmark""" FLAGS = flags.FLAGS results_directory = FLAGS.results_directory # Test benchmarks test_benchmarks = IO.load_yaml_or_fail(FLAGS.test_benchs_filename) if not test_benchmarks: logging.fatal('There are no test benchmarks to process') # Training benchmarks training_benchmarks = IO.load_yaml_or_fail(FLAGS.training_benchs_filename) if not training_benchmarks: logging.fatal('There are no training benchmarks to process') # Create the results directory try: os.makedirs(FLAGS.results_directory) except FileExistsError: pass # Extract the representation for test programs print(bold('1. EXTRACTING THE REPRESENTATION')) FLAGS.results_directory = os.path.join(results_directory, 'representation') FLAGS.benchmarks_filename = FLAGS.test_benchs_filename representation.execute(argv) # Distance: test --> training print(bold('2. MEASURING THE DISTANCE')) distance_results_directory = os.path.join(results_directory, 'distance') FLAGS.results_directory = distance_results_directory FLAGS.test_representation_directory = os.path.join(results_directory, 'representation') distance.execute(argv) # Process test benchmarks print(bold('3. PROCESSING THE BENCHMARKS')) for nof_sequences in tqdm(FLAGS.nof_sequences, desc='Processing'): for test_benchmark in test_benchmarks: index = test_benchmark.find('.') suite_name = test_benchmark[:index] bench_name = test_benchmark[index + 1:] # Find the best N sequences training_suite, sequences = find_sequences( test_benchmark, training_benchmarks, distance_results_directory, FLAGS.training_data_directory, int(nof_sequences)) # Goal_name if len(FLAGS.goals) > 1: goal_name = '_'.join(FLAGS.goals) else: goal_name = FLAGS.goals[0] # Create the results directory for the suite results_dir = os.path.join(results_directory, 'predictive_compilation', training_suite, goal_name) try: os.makedirs(results_dir) except FileExistsError: pass filename = '{}/{}_j{}.yaml'.format(results_dir, bench_name, nof_sequences) if FLAGS.verify_report and os.path.isfile(filename): continue results = {} for key, data in sequences.items(): goal_value = Engine.evaluate( Goals.prepare_goals(FLAGS.goals, FLAGS.weights), Sequence.name_pass_to_string(data['seq']), 'opt', os.path.join(FLAGS.benchmarks_directory, suite_name, bench_name), FLAGS.working_set, FLAGS.times, FLAGS.tool, FLAGS.verify_output) results[key] = {'seq': data['seq'], 'goal': goal_value} IO.dump_yaml(results, filename)