def find_patterns_in_sequences(file_path, file_name): """Searches for frequently occurring subsequences in the sequences listed in the specified file. The results are written to a new file in the specified path. """ sequences = pprof_utilities.read_sequences(file_path, file_name) occurrences = __find_frequent_subsequences(sequences) keys = [k for k, v in occurrences.items() if v >= 2] total_num_sequences = len(sequences) frequent_occurrences = dict() for pattern in keys: if occurrences[pattern] / total_num_sequences >= 0.2: frequent_occurrences[pattern] = occurrences[pattern] with open(file_path + 'subsequence_occurrences.csv', 'w', newline='') as \ file: w = csv.writer(file, delimiter=' ') w.writerow(['Subsequence', 'Occurrences', 'TotalNumberSequences']) for pattern in keys: w.writerow([ str(pattern), str(occurrences[pattern]), str(total_num_sequences) ])
def evaluate_best_sequence(program): """"Generates optimization sequences from a dependency graph and calculates the best of these sequences for the specified program.""" log = logging.getLogger() # Get different topological sorting arrangements. sequences = pprof_utilities.read_sequences(SEQUENCE_FILE_PATH, SEQUENCE_FILE, SEQUENCE_PREFIX) possible_sequences = len(sequences) seq_to_fitness = multiprocessing.Manager().dict() pool = multiprocessing.Pool() # Calculate the fitness value of the topological sorting arrangements. for sequence in sequences: pool.apply_async(calculate_fitness_value, args=(sequence, seq_to_fitness, str(sequence), program)) pool.close() pool.join() # Get the best sequences. sequences.sort(key=lambda s: seq_to_fitness[str(s)]) sequences = sequences[::-1] fittest = sequences.pop() fittest_fitness_value = seq_to_fitness[str(fittest)] fittest_sequences = [fittest] equal = True while sequences and equal: other = sequences.pop() if seq_to_fitness[str(other)] == fittest_fitness_value: fittest_sequences.append(other) else: equal = False log.info("Best sequences " + str(len(fittest_sequences)) + " of " + str(possible_sequences)) for sequence in fittest_sequences: log.info("Best: " + str(sequence)) log.info( "----------------------------------------------------------------") return random.choice(fittest_sequences)
def find_most_frequent_sequence(): """Search the heuristic-compilestats files for frequently occurring best sequences. """ log = logging.getLogger(__name__) sequence_to_programs = dict() number_programs = 0 for file in os.listdir(FILE_PATH): if not file.startswith('raw.') and file.endswith( '.heuristic-compilestats.raw'): sequences = pprof_utilities.read_sequences(FILE_PATH, str(file), prefix=PREFIX) if sequences: number_programs += 1 for sequence in sequences: sequence_tuple = tuple(sequence) if sequence_tuple in sequence_to_programs: sequence_to_programs[sequence_tuple] += 1 else: sequence_to_programs[sequence_tuple] = 1 sequences = sorted(sequence_to_programs, key=lambda key: sequence_to_programs[key]) best = sequences.pop() best_sequences = [best] frequency = sequence_to_programs[best] for sequence in sequences: if sequence_to_programs[sequence] == frequency: best_sequences.append(sequence) log.info("Number of best sequences: %s", str(len(best_sequences))) log.info("Most frequently occurring sequence:") log.info(best) log.info("Occurrences: %s of %d", str(sequence_to_programs[best]), str(number_programs)) for sequence in best_sequences: log.info("Best: %s", str(list(sequence)))
def __create_flag_statistics_csv(file_path, file_name, csv_name=FLAG_CSV): """Creates csv file that lists the occurrences of flags in the sequences found in the specified file. """ total_flag_occurrence = {} flag_occurrence_seq = {} flag_count = 0 sequences = pprof_utilities.read_sequences(file_path, file_name) for sequence in sequences: flag_occurred = {} for flag in sequence: flag_count += 1 total_flag_occurrence[ flag] = 1 if flag not in total_flag_occurrence \ else total_flag_occurrence[flag] + 1 if flag not in flag_occurred: flag_occurred[flag] = True flag_occurrence_seq[ flag] = 1 if flag not in flag_occurrence_seq \ else flag_occurrence_seq[flag] + 1 # Write the gathered information in a new csv file. with open(file_path + csv_name, 'w', newline='') as csvfile: w = csv.writer(csvfile, delimiter=' ') w.writerow([ 'Flag', 'Sequences', 'Total', 'TotalNumberSequences', 'TotalNumberFlags' ]) for flag in total_flag_occurrence: w.writerow([ str(flag), str(flag_occurrence_seq[flag]), str(total_flag_occurrence[flag]), str(len(sequences)), str(flag_count) ])