def test_add_gap_1(self): profile = Profile(["agct", "ag-t"]) profile.add_gap(0) self.assertEqual(GAP, profile.seqs[0][0], "Gap in seq1 not inserted") self.assertEqual(GAP, profile.seqs[1][0], "Gap in seq1 not inserted") self.assertEqual(5, len(profile.sp_scores), "SP-Scores has not correct length") self.assertEqual(score_dna_match, profile.sp_scores[2], "SP-Score not recomputed") self.assertEqual(5, len(profile), "Profile has not correct size")
def optimize_agent(seqs, configurations, print_console, print_graph, create_agent_trainer): """ Optimize one agent in different parametrization on the provided sequences from a benchmark :param seqs: sequences to optimize the agents on :param configurations: parameter configurations to be tested for optimality :param print_console: print log of training to the console :param print_graph: output the performance of each configurations as a graph :param create_agent_trainer: method to create the agent from the configurations and the according trainer :return: align-table with new alignments, optimal configuration and the according results (score, profile) and the scores of all other non-optimal configurations in the order they where tested """ align_tables = { "SP": { "Refinement": HashAlignTable(Profile(seqs)), "Progressive": HashAlignTable(seqs) }, "CS": { "Refinement": HashAlignTable(Profile(seqs)), "Progressive": HashAlignTable(seqs) } } best_config = None best_score = (0, (0, 0)) best_profile = None profile_scores = [] # try all given configurations to fine the best performing one for config in configurations: agent, agent_trainer = create_agent_trainer(seqs, config) # train the agent ... agent_trainer.set_align_table( align_tables[get_score(config)][get_c_type(config)]) performance = agent_trainer.run(print_console, print_graph) align_tables[get_score(config)][get_c_type( config)] = agent_trainer.get_align_table() # ... and compute the final alignment (profile, permutation), _ = agent_trainer.evaluate_training() profile_scores.append(profile.score()) # check for being better than the actual best configuration act_score = (profile.score()[config.score], score_learning(*performance)) if act_score > best_score: best_score = act_score best_config = config best_profile = profile return align_tables, best_config, best_score, best_profile, profile_scores
def train(self, print_progress): """ Performs the learning process. :return: the returns, losses and invalid action ratios computed during the training process (usable for analytical and optimization tasks) """ episode_reward, episode_loss, episode_fails = 0, (0, 0), 0 avg_rewards, avg_losses, avg_fails = [], [], [] # play as many games as specified for the training for step in range(self.games): # print the progress the model made while learning if (step + 1) % self.plot_size == 0 or self.env.align_table.is_full(): tmp_reward, tmp_loss, tmp_fail = self.print_progress( print_progress, step, episode_reward, episode_loss, episode_fails) avg_rewards.append(tmp_reward) avg_losses.append(tmp_loss) avg_fails.append(tmp_fail) episode_reward, episode_loss, episode_fails = 0, (0, 0), 0 # if all alignments have been found exit if self.env.align_table.is_full(): if self.env.best_alignment == (Profile([]), None): self.env.best_alignment = self.env.align_table.get_best( self.score) if print_progress: print( "Search exited. All alignments have been visited and optimality is guaranteed." ) break game_reward, state, _, done = self.env.soft_reset() # play new game tmp_erb = [] while not done: # compute the action, perform it in the environment and add all stats to the local replay-buffer action, value, log_prob = self.act(state, self.env.available) prev_state = np.array(state) game_reward, state, _, done = self.env.step(action) tmp_erb.append( (linearize_state(prev_state, self.num_seqs), action, game_reward[self.score], linearize_state(state, self.num_seqs), done, value, log_prob)) # update reward according to the received reward episode_reward += game_reward[self.score] if not self.refinement and len(state) != self.num_seqs: episode_fails += 1 # learn from the played game episode_loss = [ e + l for e, l in zip(episode_loss, self.learn(tmp_erb)) ] return avg_rewards, avg_losses, avg_fails
def optimize_mcts_agent(seqs, configurations: List[MCTSTC], align_table=None): """ Optimize the MCTS learning agent over the given configurations of hyperparameters :param seqs: sequences to use for alignment :param configurations: configurations to test while training :param align_table: table containing previously computed alignments :return: table containing the computed alignments and the results of the aligning """ align_tables = { "SP": { "Refinement": HashAlignTable(Profile(seqs)), "Progressive": HashAlignTable(seqs) }, "CS": { "Refinement": HashAlignTable(Profile(seqs)), "Progressive": HashAlignTable(seqs) } } best_config = None best_score = 0 # try all defined configurations of hyperparameter to find the best performing one for config in configurations: agent = MCTSAgent(seqs, simulations=config.simulations, rollouts=config.rollouts, c=config.c, score=config.score, refinement=config.refinement, adjust=config.adjust) agent.set_align_table( align_tables[get_score(config)][get_c_type(config)]) if config.refinement: pass else: # compute the alignment using UCT-MCTS to find the most promising sequence env = AlignmentWrapper(seqs, agent, config.score) score, _, _, done = env.run() align_tables[get_score(config)][get_c_type( config)] = agent.get_align_table() if score > best_score and done: best_score = score best_config = config return align_table, best_config, best_score
def run_regressions(logDirectoryPath): start = time.time() games = 0 profile = Profile() profile.__enter__() # Look for all .log files in the directory files = sorted(Path(logDirectoryPath).iterdir(), key=os.path.getmtime) logfiles = [f for f in files if f.suffix == '.log'] logfiles = logfiles[-30:] # For each log file, see if there is a csv file for file in logfiles: # If there is a matching csv file, read it for results log_path = logDirectoryPath + file.stem + file.suffix csv_path = logDirectoryPath + file.stem + ".csv" csv_exists = os.path.exists(csv_path) if csv_exists: known_results = read_game_results(csv_path) #with Profile(): try: simulated_results = simulate_game_from_log(log_path) print( f"{file.stem} results, {compare_results(known_results, simulated_results)}" ) except Exception as e: print(f"{file.stem} exception {e}") games += 1 end_time = time.time() - start print( f"Elapsed Time: {end_time}\nGames: {games}\nTime per game: {end_time / games}" ) profile.__exit__()
def align_progressive(permutation, seqs, align_table=None, pw_perm=None, m_perm=None): """ align the given sequences progressive using the specified permutation and the alignment-table :param permutation: permutation specifying the sequence how to align the sequences :param seqs: sequences to align :param align_table: hash-table to use in alignment to prevent computations of previous aligned sequences :param pw_perm: permutation to solve conflicts in the direction-matrix in the pairwise sequence alignment :param m_perm: permutation to solve conflicts in the direction-matrix in the sequence-profile alignment :return: profile of all aligned sequences """ score_gap = score_dna_gap if seqs[0].islower() else score_protein_gap if len(permutation) < 2: return Profile([seqs[permutation[0]]]) if pw_perm is None: pw_perm = [LEFT, DIAG, UP] if m_perm is None: m_perm = [LEFT, DIAG, UP] prof = Profile([]) # find previous alignments... if align_table is not None: prof = align_table.get(permutation)[0] # otherwise initialize the alignment with the first two sequences if prof.size() == 0: prof, _ = align_seq_seq(seqs[permutation[0]], seqs[permutation[1]], False, pw_perm, score_gap_loc=score_gap) if align_table is not None: align_table.set(permutation[:2], prof) # align the remaining sequences in the given sequence start = max(2, prof.size()) for i in range(start, len(permutation)): prof = align_prof_seq(seqs[permutation[i]], prof, m_perm, score_gap_loc=score_gap) if align_table is not None: align_table.set(permutation[:(i + 1)], prof) return prof
def test_fuzzer(self, count=1): ''' for _ in range(count): num_seqs = np.random.randint(1, 5) seqs_count = np.random.randint(num_seqs, 10) profile = Profile(get_sequences(length=10, count=num_seqs, different=True)) permutation = random.sample(range(seqs_count), num_seqs) self.test_get(profile, permutation, ) self.test_set(profile, permutation) ''' sequences = ["ACGT", "AGT", "ACT"] permutation = [2, 0, 1] profile = Profile(["AC-T", "ACGT", "A-GT"]) self.test_set(profile, permutation, sequences) self.test_get(profile, permutation, sequences)
def get_best(self, score): """ Get the best profile from the leafs of this tree/table :param score: score to use to find the best profile :return: best profile according to the specified score and its permutation """ best, best_permutation = Profile([]), [] for permutation in itertools.permutations(list(range(len(self.seqs)))): permutation = list(permutation) if len(best_permutation) == 0: best, changed = self.get(permutation)[0], True best_permutation = permutation else: (best, best_permutation), _ = compare_alignments( (best, best_permutation), (self.get(permutation)[0], permutation), score) return best, best_permutation
def read_drl_results(balibase_dir, results_dir, agent_name, df): """ Insert results into the dataframe from the comparison of the reference tools :param balibase_dir: directory of the balibase database :param results_dir: directory of the result files from this tool :param agent_name: agent prefix inserted into the result files :param df: dataframe from the comparison :return: extended dataframe """ # if necessary, insert the according columns if agent_name + "_q" not in df.columns: df[agent_name + "_q"] = 0 df[agent_name + "_tc"] = 0 df[agent_name + "_cs"] = 0 df[agent_name + "_sp"] = 0 df[agent_name + "_time"] = 0 # iterate on all the files for file_name in sorted( [f for f in os.listdir(results_dir) if agent_name in f]): print(file_name) # preprocess the analysis key = "RV" + file_name[-9:-7] + "/" + file_name[-11:-4] optimal = os.path.join(balibase_dir, key + ".msf") write_fasta_seqs(read_msf(optimal), "~/ref_seqs.fa") copyfile(os.path.join(results_dir, file_name), os.path.expanduser("~/output_seqs.fa")) # score the alignment sp, cs, _, _ = Profile(list(read_fasta_seqs().values())).score() q, tc = score_alignment() # insert the data into the dataframe df.loc[df.name == key, agent_name + "_q"] = q df.loc[df.name == key, agent_name + "_tc"] = tc df.loc[df.name == key, agent_name + "_cs"] = cs df.loc[df.name == key, agent_name + "_sp"] = sp os.remove(os.path.expanduser("~/output_seqs.fa")) os.remove(os.path.expanduser("~/ref_seqs.fa")) return df
def get(self, permutation, cutoff=2): """ get the profile according to the permutation in the actual instance of the problem if original permutation not contained try sub-permutation iteratively by removing the last sequence from the permutation :param permutation: query-permutation :param cutoff: minimal number of sequences in permutations list, that should be searched for :return: empty profile if nothing found, otherwise return largest applicable profile """ # linearize and compute the hash-value of the actual permutation state_hash = hash_state_fast(permutation, self.num_seqs) # search for permutation while state_hash not in self.table and len(permutation) > cutoff: # if not found, shorten the permutation permutation = permutation[:-1] state_hash = hash_state_fast(permutation, self.num_seqs) # return the according profile or the empty profile return (self.table[state_hash].to_profile(self.seqs), len(permutation)) \ if state_hash in self.table else (Profile([]), 0)
def merge_profiles(profile1, profile2): """ Merge two profiles that share the first sequence in common :param profile1: first profile to merge :param profile2: second profile to merge :return: merged profile """ sequences = ["" for _ in range(profile1.size() + profile2.size() - 1)] col1, col2, j = 0, 0, 0 while col1 < len(profile1) and col2 < len(profile2): p1_col = profile1[col1] p2_col = profile2[col2] # both leading sequences are equal if p1_col[0] == p2_col[0]: for i in range(len(p1_col)): sequences[i] += p1_col[i] for i in range(1, len(p2_col)): sequences[i + len(p1_col) - 1] += p2_col[i] col1 += 1 col2 += 1 # first leading sequence has additional gap elif p1_col[0] == GAP: for i in range(len(p1_col)): sequences[i] += p1_col[i] for i in range(1, len(p2_col)): sequences[i + len(p1_col) - 1] += GAP col1 += 1 # second leading sequence has additional gap elif p2_col[0] == GAP: for i in range(len(p1_col)): sequences[i] += GAP for i in range(1, len(p2_col)): sequences[i + len(p1_col) - 1] += p2_col[i] col2 += 1 i += 1 return Profile(sequences)
def __init__(self, sequences, simulations=0, rollouts=1, c=1, score=SP_SCORE, refinement=False, console=False, adjust=True): """ initialize the agent :param sequences: sequences to align :param simulations: number of simulations to make before selecting an action :param rollouts: number of rollouts to perform in each simulation :param c: UCB-Parameter to balance exploration/exploitation :param score: score to optimize for :param refinement: flag indicating to train for refinement :param console: flag indicating commandline outputs """ super().__init__(sequences, refinement) self.state = [] self.children = [] self.align_table = HashAlignTable( Profile(sequences) if self.refinement else sequences) self.simulations = self.num_seqs * 50 if simulations == 0 else simulations self.rollouts = rollouts self.c = c self.score = score self.steps = 0 self.console = console self.adjust = adjust and score == SP_SCORE if self.adjust: self.min_score = self.estimate_min() self.max_score = center_star(self.sequences).score()[SP_SCORE] if self.max_score < 0: self.max_score /= 2 else: self.max_score *= 2
FiendishServant(), DragonspawnLieutenant(), DragonspawnLieutenant(), RedWhelp(), RedWhelp() ]) player_board_1 = PlayerBoard(player_id=1, hero=None, life_total=12, rank=4, minions=[ DragonspawnLieutenant(), Mecharoo(), FiendishServant(), DragonspawnLieutenant(), FiendishServant(), RighteousProtector(), RighteousProtector() ]) simulation = Simulation(player_board=player_board_0, opponent_board=player_board_1, max_simulations=50) # logging.DEBUG will show all steps in combat logging.basicConfig(level=logging.DEBUG, format="%(message)s") with Profile(): print(simulation.simulate() ) # List of tuples with outcome and the frequency of that outcome
length=6, different=True), network_object=TinyREINFORCENetwork) pat = PolicyAgentTrainer(agent, value_gamma=0.99, value_alpha=0.8, baseline=True) pat.run() # compute the resulting multiple sequence alignment (best_profile, best_permutation), _ = pat.evaluate_training() reward = best_profile.score() print(str(best_profile)) print("Score:", reward[score], F"({best_permutation})") start = Profile(["ctattg", "ctaccg", "ctatgt"]) print(start) print("Score:", start.score()[score]) agent = PolicyAgent(sequences=start, network_object=TinyREINFORCENetwork, refinement=True) tat = PolicyAgentTrainer(agent, value_alpha=0.01, value_gamma=0.9, epsilon_end=0.1, refinement=True) tat.run(True, False) (best_ref_profile, best_ref_permutation), _ = tat.evaluate_training() print(str(best_ref_profile)) reward = best_ref_profile.score() print("Score:", reward[score], F"({best_permutation})")
def multithread_agent_on_benchmarks(benchmark_ids, configurations, best, settings, data_file): """ run a single agent configuration on all benchmarks in parallel :param benchmark_ids: benchmark ids :param configurations: configuration(s) :param best: best results in the actual optimization setting :param settings: settings of this search :param data_file: file to store all data about the computations :return: - a bool flag indicating that a new best alignments has been found - the actual comparison of agents on different benchmarks - the actual best in JSON format """ global number print("train one agent on multiple benchmarks") # initialize the multithreading tools needed pool = ThreadPool( processes=cpu_count() if settings.Multi == 1 else settings.Multi) tasks = [None for _ in range(len(benchmark_ids))] # set empty variables to use for the statistics of the computation name, sequences, sequence_names, base_data = "", [], [], () comparison = { "SP": { "Refinement": [], "Progressive": [] }, "CS": { "Refinement": [], "Progressive": [] } } ''' lists to store the essential data from the benchmarks per benchmark as they are not created in different disjoint for-loops and therefore has to be conserved, this is done for the comparison and the best-marks ''' names = [None for _ in range(len(benchmark_ids))] sequence_names = [None for _ in range(len(benchmark_ids))] base_dataset = [None for _ in range(len(benchmark_ids))] bench_comp = [None for _ in range(len(benchmark_ids))] bench_best = [None for _ in range(len(benchmark_ids))] config = configurations[0] changed = False for i, (b_id, agent_ids) in enumerate(benchmark_ids.items()): # initialize the files based on the actual benchmark names[i], sequences, sequence_names[i], base_dataset[i], bench_comp[i], bench_best[i] = \ initialize_benchmark(b_id, best) # if the configuration is not to be used on this benchmark, insert into the statistics and continue with next if 0 not in agent_ids: number += 1 if config.refinement: get_from_config(bench_comp[i], config)[TABLE_AGENT + 1] = ("-", "-", "-", "-") else: get_from_config(bench_comp[i], config)[TABLE_AGENT] = ("-", "-", "-", "-") continue # if indicated perform iterative refinement if config.refinement: if bench_best[i]["SP" if config.score == SP_SCORE else "CS"]["Progressive"][1] is None: print( "WARNING: Cannot compute Refinement without basic profile of progressive alignment" ) continue tasks[i] = pool.apply_async( run_agent, (config, bench_best[i][get_score(config)]["Refinement"][0], HashAlignTable(Profile(sequences)), names[i], bench_best[i][get_score(config)]["Refinement"][0].score(), settings.Update, len(configurations), settings.Individual, data_file)) else: # else compute a progressive alignment tasks[i] = pool.apply_async( run_agent, (config, sequences, HashAlignTable(sequences), names[i], bench_best[i][get_score(config)]["Progressive"][0].score(), settings.Update, len(configurations), settings.Individual, data_file)) # iterate over the processes and collect the results for i in range(len(benchmark_ids.items())): if config.refinement: # update the statistics of refinement analysis message, scoring, _, profile, permutation = tasks[i].get() get_from_config(bench_comp[i], config)[TABLE_AGENT + 1] = scoring bench_best[i][get_score(config)]["Refinement"], change = \ compare_alignments(bench_best[i][get_score(config)]["Refinement"], (profile, permutation, bench_best[i][get_score(config)]["Progressive"][1], config), config.score) else: # or the results of progressive alignments message, scoring, _, profile, permutation = tasks[i].get() get_from_config(bench_comp[i], config)[TABLE_AGENT] = scoring bench_best[i][get_score(config)]["Progressive"], change = \ compare_alignments(bench_best[i][get_score(config)]["Progressive"], (profile, permutation, config), config.score) # update the global statistics comparison, best = update_comparison(names[i], base_dataset[i], best, comparison, bench_comp[i], bench_best[i]) if settings.Folder is not None: profile.store(settings.Folder, i, config, names[i], sequence_names[i], permutation) changed |= change return changed, comparison, best
def initialize_benchmark(b_id, best): """ Initialize the execution of any agents on the benchmarks by creating necessary fields needed for search and training :param b_id: benchmark-id to train on :param best: best result from previous training runs on that benchmark :return: - the name of the benchmark used for evaluation tables at the end - the sequences of the benchmark as list of stings - the names according to the sequences in the same order ( = permutation [0,1,...,n-1,n]) - basic data of the benchmark-sequences, namely type, count and average length - basic comparison of available results for this benchmark - best result from previous run on this benchmark """ ''' Things like comparison of different agents, best-marks on benchmarks and align-tables are stored in such nested HashMaps and are accessed and modified according to the actually used configuration and its scoring and aligning The benchmark_best contains the best results on a benchmark for each alignment setting. Such an refinement-tuple consists of the Profile from the iterative aligning, the according iterative permutation, the permutation for the starting alignment and the configuration that led to the optimal alignment ''' comparison = { "SP": { "Refinement": {}, "Progressive": {} }, "CS": { "Refinement": {}, "Progressive": {} } } benchmark_best = { "SP": { "Refinement": (Profile([]), None, None, None), "Progressive": (Profile([]), None, None) }, "CS": { "Refinement": (Profile([]), None, None, None), "Progressive": (Profile([]), None, None) } } # if the benchmark is of this work and known because it is used while development, extract data from constants if isinstance(b_id, int): name, b, seqs_file = names[b_id], benchmarks[b_id], seq_files[b_id] sequences, sequence_names = read_fasta_data(seqs_file) # Insert the base-data for each setting, this is the left-hand side of the tables outputted at the end comparison["SP"]["Progressive"] = { RL: b[0:2], DRL: b[2:4], CLUSTALW: b[4:6], MAFFT: b[6:8], MUSCLE: b[8:10] } comparison["CS"]["Progressive"] = { RL: b[0:2], DRL: b[2:4], CLUSTALW: b[4:6], MAFFT: b[6:8], MUSCLE: b[8:10] } comparison["SP"]["Refinement"] = { RL: b[0:2], DRL: b[2:4], CLUSTALW: b[4:6], MAFFT: b[6:8], MUSCLE: b[8:10] } comparison["CS"]["Refinement"] = { RL: b[0:2], DRL: b[2:4], CLUSTALW: b[4:6], MAFFT: b[6:8], MUSCLE: b[8:10] } base_data = (b_id, types[b_id], sizes[b_id]) # else read in the benchmark sequences and compute the base data else: name, (sequences, sequence_names ) = os.path.basename(b_id).split(".")[0], read_fasta_data(b_id) # again the base data as left-hand side of the output tables, but here with zeros as they are not performed comparison["SP"]["Progressive"] = { RL: (0, 0), DRL: (0, 0), CLUSTALW: (0, 0), MAFFT: (0, 0), MUSCLE: (0, 0) } comparison["CS"]["Progressive"] = { RL: (0, 0), DRL: (0, 0), CLUSTALW: (0, 0), MAFFT: (0, 0), MUSCLE: (0, 0) } comparison["SP"]["Refinement"] = { RL: (0, 0), DRL: (0, 0), CLUSTALW: (0, 0), MAFFT: (0, 0), MUSCLE: (0, 0) } comparison["CS"]["Refinement"] = { RL: (0, 0), DRL: (0, 0), CLUSTALW: (0, 0), MAFFT: (0, 0), MUSCLE: (0, 0) } base_data = (b_id, get_sequence_type(sequences), get_sequence_size(sequences)) ''' Find and fill in the best alignments per optimization setting that can be found for the individual benchmark in the store of best alignments. If it is not known, the (Profile([]), None, None) tuple remains ''' if name in best["SP"]["Progressive"]: tmp = best["SP"]["Progressive"][name] benchmark_best["SP"]["Progressive"] = (align_progressive( tmp["Permutation"], sequences), tmp["Permutation"], from_dict(tmp["Configuration"])) if name in best["CS"]["Progressive"]: tmp = best["CS"]["Progressive"][name] benchmark_best["CS"]["Progressive"] = (align_progressive( tmp["Permutation"], sequences), tmp["Permutation"], from_dict(tmp["Configuration"])) if name in best["SP"]["Refinement"]: tmp = best["SP"]["Refinement"][name] benchmark_best["SP"]["Refinement"] = \ (align_iterative(tmp["Permutation"], align_progressive(tmp["BasePermutation"], sequences)), tmp["Permutation"], tmp["BasePermutation"], from_dict(tmp["Configuration"])) if name in best["CS"]["Refinement"]: tmp = best["CS"]["Refinement"][name] benchmark_best["CS"]["Refinement"] = \ (align_iterative(tmp["Permutation"], align_progressive(tmp["BasePermutation"], sequences)), tmp["Permutation"], tmp["BasePermutation"], from_dict(tmp["Configuration"])) ''' insert the baseline (aka score of best progressive alignment) into the results-table to see from which value the agent stared its alignment and to be able to argue on whether the alignment has been improved or not ''' comparison["SP"]["Refinement"][TABLE_AGENT] = ( *(benchmark_best["SP"]["Progressive"][0].score()[0:2]), 0, 0) comparison["CS"]["Refinement"][TABLE_AGENT] = ( *(benchmark_best["CS"]["Progressive"][0].score()[0:2]), 0, 0) return name, sequences, sequence_names, base_data, comparison, benchmark_best
def align_prof_seq(seq, prof, perm=None, score_gap_loc=score_dna_gap): """ align a sequence again an already aligned profile this code extends the Needleman-Wunsch-Algorithm (1970) and the review by Waterman (1976) to a profile-sequence alignment :param seq: sequence to align :param prof: profile to align :param perm: permutation to use to solve conflicts in the direction-matrix :param score_gap_loc: local scoring for a gap between the two sequences :return: profile of the profile-sequence alignment """ if perm is None: perm = [LEFT, DIAG, UP] s_matrix = np.zeros((len(seq) + 1, len(prof) + 1)) d_matrix = np.zeros((len(seq) + 1, len(prof) + 1), dtype='int32') # fill the edges of the matrices for i in range(1, len(seq) + 1): s_matrix[i, 0] = i * score_gap_loc d_matrix[i, 0] = 4 for j in range(1, len(prof) + 1): s_matrix[0, j] = s_matrix[0, j - 1] + prof.align(GAP, j - 1) d_matrix[0, j] = 2 # fill the matrix according to the algorithms for i in range(1, len(seq) + 1): for j in range(1, len(prof) + 1): # compute the possible scores for the alignment d = s_matrix[i - 1, j - 1] + prof.align(seq[i - 1], j - 1) l = s_matrix[i, j - 1] + prof.align(GAP, j - 1) u = s_matrix[i - 1, j] + score_gap_loc * prof.size() # fill the matrix with the maximal value s_matrix[i, j] = max(d, l, u) # fill the according value from the computation into the direction-matrix if s_matrix[i, j] == d: d_matrix[i, j] += DIAG if s_matrix[i, j] == u: d_matrix[i, j] += UP if s_matrix[i, j] == l: d_matrix[i, j] += LEFT aligned_seq = "" a_seq_index = len(seq) a_prof_index = len(prof) # Backtracking of the alignment through the Direction-Matrix and synchronously enlarging the sequences as needed while a_seq_index >= 0 and a_prof_index >= 0 and a_seq_index + a_prof_index != 0: for i in perm: # go diagonal in the alignment if i == DIAG and d_matrix[a_seq_index, a_prof_index] & DIAG != 0: a_seq_index -= 1 a_prof_index -= 1 aligned_seq = seq[a_seq_index] + aligned_seq break # go left in the alignment, i.e. insert a gap in the sequence if i == LEFT and d_matrix[a_seq_index, a_prof_index] & LEFT != 0: a_prof_index -= 1 aligned_seq = "-" + aligned_seq break # go up in the alignment, i.e. insert a gap in the existing profile if i == UP and d_matrix[a_seq_index, a_prof_index] & UP != 0: a_seq_index -= 1 aligned_seq = seq[a_seq_index] + aligned_seq prof.add_gap(a_prof_index) break # return the new computed profile of the two profiles return Profile(prof.seqs + [aligned_seq])
def run_balibase(balibase_dir, code, table, results_dir): """ Compute the performance of the three different reference tools (CLUSTAL, MAFFT, MUSCLE) :param balibase_dir: base-directory of the BAliBASE benchmark directory :param code: file-code from file to test :param table: table to store alignment results in """ codes = [ "RV20/BB20001", "RV20/BB20020", "RV40/BB40010", "RV40/BB40014", "RV40/BB40018", "RV50/BB50004" ] if code not in codes: return print(code) # read the reference alignment and write it to the output base = os.path.join(balibase_dir, code) optimal = base + ".msf" input_file = base + ".tfa" input_seqs, _ = read_fasta_seqs(input_file) optimal_seqs = read_msf(optimal) ref_sp, ref_cs, _, _ = Profile(list(optimal_seqs.values())).score() write_fasta_seqs(optimal_seqs, "~/ref_seqs.fa") write_fasta_seqs(input_seqs) # perform the alignment using CLUSTAL start = time.time() os.system(run_clustal) clustal_time = time.time() - start clustal_sp, clustal_cs, _, _ = Profile(list( read_fasta_seqs().values())).score() clustal_q, clustal_ts = score_alignment() if results_dir is not None: os.rename( os.path.expanduser("~/output_seqs.fa"), os.path.join(results_dir, "CLUSTAL_" + code.split("/")[1] + ".fa")) else: os.remove(os.path.expanduser("~/output_seqs.fa")) print("CLUSTAL:", clustal_q, ",\t", clustal_ts, ",\t", clustal_cs, ",\t", clustal_sp) # perform the alignment using MAFFT start = time.time() os.system(run_mafft) mafft_time = time.time() - start mafft_sp, mafft_cs, _, _ = Profile(list( read_fasta_seqs().values())).score() mafft_q, mafft_ts = score_alignment() if results_dir is not None: os.rename( os.path.expanduser("~/output_seqs.fa"), os.path.join(results_dir, "MAFFT_" + code.split("/")[1] + ".fa")) else: os.remove(os.path.expanduser("~/output_seqs.fa")) print("MAFFT:\t", mafft_q, ",\t", mafft_ts, ",\t", mafft_cs, ",\t", mafft_sp) # perform the alignment using MUSCLE start = time.time() os.system(run_muscle) muscle_time = time.time() - start muscle_sp, muscle_cs, _, _ = Profile(list( read_fasta_seqs().values())).score() muscle_q, muscle_ts = score_alignment() if results_dir is not None: os.rename( os.path.expanduser("~/output_seqs.fa"), os.path.join(results_dir, "MUSCLE_" + code.split("/")[1] + ".fa")) else: os.remove(os.path.expanduser("~/output_seqs.fa")) print("MUSCLE:\t", muscle_q, ",\t", muscle_ts, ",\t", muscle_cs, ",\t", muscle_sp) # delete not necessary files and save the results in a table os.remove(os.path.expanduser("~/input_seqs.fa")) os.remove(os.path.expanduser("~/ref_seqs.fa")) if code not in table["name"]: table.loc[len(table)] = [ code, len(optimal_seqs), get_sequence_size(optimal_seqs), ref_cs, ref_sp, compute_identity(balibase_dir, code), clustal_q, clustal_ts, clustal_cs, clustal_sp, clustal_time, mafft_q, mafft_ts, mafft_cs, mafft_sp, mafft_time, muscle_q, muscle_ts, muscle_cs, muscle_sp, muscle_time ]
def test_align_prof_seq(self): profile = Profile(["gc-gc-cc", "gccgcgcc"]) seq = "gcgccc" profile = align_prof_seq(seq, profile, [LEFT, DIAG, UP]) self.assertEqual(["gc-gc-cc", "gccgcgcc", "gc-gc-cc"], profile.get_sequences())
def align_seq_seq(seq_a, seq_b, dna, perm=None, score_match_loc=score_dna_match, score_mismatch_loc=score_dna_mismatch, score_gap_loc=score_dna_gap): """ align a sequence again an already aligned profile this code follows the Needleman-Wunsch-Algorithm (1970) and the review by Waterman (1976) :param seq_a: first sequence to align :param seq_b: second sequence to align :param dna: flag indicating that the sequences has to ba handles as dna sequences :param perm: permutation to use to solve conflicts in the direction-matrix :param score_match_loc: local scoring for a match between two bases or nucleotides :param score_mismatch_loc: local scoring for a mismatch between two bases or nucleotides :param score_gap_loc: local scoring for a gap between the two sequences :return: profile of the profile-sequence alignment """ if perm is None: perm = [LEFT, DIAG, UP] s_matrix = np.zeros((len(seq_a) + 1, len(seq_b) + 1)) d_matrix = np.zeros((len(seq_a) + 1, len(seq_b) + 1), dtype='int32') dna = seq_a[0].islower() or dna # fill the edges of the matrices for i in range(1, len(seq_a) + 1): s_matrix[i, 0] = i * score_gap_loc d_matrix[i, 0] = UP for j in range(len(seq_b) + 1): s_matrix[0, j] = j * score_gap_loc d_matrix[0, j] = LEFT # fill the matrix according to the algorithms for i in range(1, len(seq_a) + 1): for j in range(1, len(seq_b) + 1): # compute the possible scores for the alignment d = s_matrix[i - 1, j - 1] + ((score_match_loc if seq_a[i - 1] == seq_b[j - 1] else score_mismatch_loc) if dna else score(seq_a[i - 1], seq_b[j - 1])) l = s_matrix[i, j - 1] + score_gap_loc u = s_matrix[i - 1, j] + score_gap_loc # fill the matrix with the maximal value s_matrix[i, j] = max(d, l, u) # fill the according value from the computation into the direction-matrix if s_matrix[i, j] == d: d_matrix[i, j] += DIAG if s_matrix[i, j] == l: d_matrix[i, j] += LEFT if s_matrix[i, j] == u: d_matrix[i, j] += UP seq_a_idx, seq_b_idx = len(seq_a), len(seq_b) aligned_a, aligned_b = "", "" # Backtracking of the alignment through the Direction-Matrix and synchronously enlarging the sequences as needed while seq_a_idx != 0 or seq_b_idx != 0: for p in perm: # go diagonal in the alignment if p == DIAG and d_matrix[seq_a_idx, seq_b_idx] & DIAG != 0: seq_a_idx -= 1 seq_b_idx -= 1 aligned_a = seq_a[seq_a_idx] + aligned_a aligned_b = seq_b[seq_b_idx] + aligned_b break # go left in the alignment, i.e. insert a gap in the sequence elif p == UP and d_matrix[seq_a_idx, seq_b_idx] & UP != 0: seq_a_idx -= 1 aligned_a = seq_a[seq_a_idx] + aligned_a aligned_b = '-' + aligned_b break # go up in the alignment, i.e. insert a gap in the existing profile elif p == LEFT and d_matrix[seq_a_idx, seq_b_idx] & LEFT != 0: seq_b_idx -= 1 aligned_a = '-' + aligned_a aligned_b = seq_b[seq_b_idx] + aligned_b break return Profile([aligned_a, aligned_b]), s_matrix[len(seq_a), len(seq_b)]
print("########################################################") print("##Starting training of a Monte-Carlo Tree-Search agent##") print("########################################################") print() score = SP_SCORE seqs = get_sequences(count=3, length=6, different=True) agent = MCTSAgent(seqs, rollouts=2, adjust=True) env = AlignmentWrapper(seqs, agent, score) start = time.time() reward, permutation, profile, _ = env.run() end = time.time() print(str(profile)) print("Score:", reward[score], F"({permutation})") print("Trainer ran for %.2f seconds" % (end - start)) start = Profile(["ctattg", "ctaccg", "ctatgt"]) agent = MCTSAgent(sequences=start, refinement=True) env = RefinementWrapper(start, agent, score) start = time.time() reward, permutation, profile, _ = env.run() end = time.time() print(str(profile)) print("Score:", reward[score], F"({permutation})") print("Trainer ran for %.2f seconds" % (end - start))
def main(): logPath = "C:/Users/scott/Desktop/hearthstone_games/Recorded_games/Power_game_15.log" #logPath = "C:/Program Files (x86)/Hearthstone/Logs/Power_old.log" #logging.basicConfig(level=logging.DEBUG, format="%(message)s") print("Reading Log: ", logPath) logreader = LogReader(logPath) turns = 0 profile = Profile() profile.__enter__() while True: print( "------------------------------------------------------------------------" ) board_state = logreader.watch_log_file_for_combat_state() if not board_state: print("\n*** Game Over ***") profile.__exit__() continue player_board_0 = PlayerBoard( player_id=0, hero=board_state.friendlyHero, life_total=board_state.friendlyPlayerHealth, rank=board_state.friendlyTechLevel, minions=board_state.friendlyBoard, enemy_is_deathwing=board_state.enemyHero is HeroType.DEATHWING) player_board_1 = PlayerBoard( player_id=1, hero=board_state.enemyHero, life_total=board_state.enemyPlayerHealth, rank=board_state.enemyTechLevel, minions=board_state.enemyBoard, enemy_is_deathwing=board_state.friendlyHero is HeroType.DEATHWING) print("Enemy board") print(player_board_1) print("Friendly board") print(player_board_0) # turns += 1 # if turns < 9: # continue try: single_threaded = True games = 10_000 game_state = (player_board_0, player_board_1) pickled_state = pickle.dumps(game_state) if single_threaded: results = [] start = time.time() for _ in range(games): results.append(Simulator.Simulate(pickled_state)) else: start = time.time() pool = Pool() results = pool.map(Simulator.Simulate, repeat(pickled_state, games)) pool.close() pool.join() counter = Counter(results) results = sorted(counter.items(), key=lambda x: x[0]) wins, losses, ties, enemy_lethal, friendly_lethal = 0.0, 0.0, 0.0, 0.0, 0.0 for result in results: damage = result[0] game_count = result[1] if damage > 0: wins += game_count if damage > player_board_1.life_total: enemy_lethal += game_count elif damage < 0: losses += game_count if (damage * -1) > player_board_0.life_total: friendly_lethal += game_count else: ties += game_count end = time.time() print("Win", 100 * wins / games, "Tie", 100 * ties / games, "Loss", 100 * losses / games, "Elapsed:", end - start) print("We kill enemy:", 100 * enemy_lethal / games, "Enemy kills us:", 100 * friendly_lethal / games) print( "------------------------------------------------------------------------\n" ) except Exception as e: print(e)