self.policy_optimizer.step() # return the loss for optimization and learning control return policy_loss, value_loss if __name__ == "__main__": # this file was run from the command line print("#######################################") print("##Starting training of a policy agent##") print("#######################################") print() score = SP_SCORE agent = PolicyAgent(sequences=get_sequences(count=3, length=6, different=True), network_object=TinyREINFORCENetwork) pat = PolicyAgentTrainer(agent, value_gamma=0.99, value_alpha=0.8, baseline=True) pat.run() # compute the resulting multiple sequence alignment (best_profile, best_permutation), _ = pat.evaluate_training() reward = best_profile.score() print(str(best_profile)) print("Score:", reward[score], F"({best_permutation})") start = Profile(["ctattg", "ctaccg", "ctatgt"])
""" for child in self.children: print('\t' * depth + F"{child.state}: ({child.n}, {child.v}, {child.u})") child.print(depth + 1) if __name__ == "__main__": # this file was run from the command line print("########################################################") print("##Starting training of a Monte-Carlo Tree-Search agent##") print("########################################################") print() score = SP_SCORE seqs = get_sequences(count=3, length=6, different=True) agent = MCTSAgent(seqs, rollouts=2, adjust=True) env = AlignmentWrapper(seqs, agent, score) start = time.time() reward, permutation, profile, _ = env.run() end = time.time() print(str(profile)) print("Score:", reward[score], F"({permutation})") print("Trainer ran for %.2f seconds" % (end - start)) start = Profile(["ctattg", "ctaccg", "ctatgt"]) agent = MCTSAgent(sequences=start, refinement=True) env = RefinementWrapper(start, agent, score)
self.optimizer.step() # return current loss, used for hyperparameter optimization and checking of the training process return actor_loss.item(), critic_loss.item() if __name__ == "__main__": # this file was run from the command line print("#############################################") print("##Starting training of a actor-critic agent##") print("#############################################") print() score = SP_SCORE agent = ActorCriticAgent(sequences=get_sequences(count=3, length=6, different=True), network_object=TinyACNetwork) acat = ActorCriticAgentTrainer(training_agent=agent, supported_search=True) acat.run() # compute the resulting multiple sequence alignment (best_profile, best_permutation), _ = acat.evaluate_training() reward = best_profile.score() print(str(best_profile)) print("Score:", reward[score], F"({best_permutation})") start = Profile(["ctattg", "ctaccg", "ctatgt"]) print(start) print("Score:", start.score()[score]) agent = ActorCriticAgent(sequences=start,
replay_buffer.append((linearize_state(state, self.num_seqs), probs, s_est)) # and apply the selected action to the state _, state, _, done = self.env.step(action) return replay_buffer if __name__ == "__main__": # this file was run from the command line print("#############################################") print("##Starting training of a actor-critic agent##") print("#############################################") print() score = SP_SCORE agent = AlphaZeroAgent(sequences=get_sequences(count=3, length=6, different=True), network_object=TinyA0_Network) a0t = AlphaZeroAgentTrainer(training_agent=agent, simulations=50, adjust=True) a0t.run(progress_print=True) # compute the resulting multiple sequence alignment (best_profile, best_permutation), _ = a0t.evaluate_training() reward = best_profile.score() print(str(best_profile)) print("Score:", reward[score], F"({best_permutation})") start = Profile(["ctattg", "ctaccg", "ctatgt"]) print(start) print("Score:", start.score()[score]) agent = AlphaZeroAgent(sequences=start, network_object=TinyA0_Network, refinement=True, adjust=True) a0t = AlphaZeroAgentTrainer(agent, simulations=50, refinement=True)