コード例 #1
0
        self.policy_optimizer.step()

        # return the loss for optimization and learning control
        return policy_loss, value_loss


if __name__ == "__main__":  # this file was run from the command line
    print("#######################################")
    print("##Starting training of a policy agent##")
    print("#######################################")
    print()

    score = SP_SCORE

    agent = PolicyAgent(sequences=get_sequences(count=3,
                                                length=6,
                                                different=True),
                        network_object=TinyREINFORCENetwork)
    pat = PolicyAgentTrainer(agent,
                             value_gamma=0.99,
                             value_alpha=0.8,
                             baseline=True)
    pat.run()

    # compute the resulting multiple sequence alignment
    (best_profile, best_permutation), _ = pat.evaluate_training()
    reward = best_profile.score()
    print(str(best_profile))
    print("Score:", reward[score], F"({best_permutation})")

    start = Profile(["ctattg", "ctaccg", "ctatgt"])
コード例 #2
0
ファイル: mcts_agent.py プロジェクト: Old-Shatterhand/MSADRL
        """
        for child in self.children:
            print('\t' * depth +
                  F"{child.state}: ({child.n}, {child.v}, {child.u})")
            child.print(depth + 1)


if __name__ == "__main__":  # this file was run from the command line
    print("########################################################")
    print("##Starting training of a Monte-Carlo Tree-Search agent##")
    print("########################################################")
    print()

    score = SP_SCORE

    seqs = get_sequences(count=3, length=6, different=True)
    agent = MCTSAgent(seqs, rollouts=2, adjust=True)
    env = AlignmentWrapper(seqs, agent, score)

    start = time.time()
    reward, permutation, profile, _ = env.run()
    end = time.time()

    print(str(profile))
    print("Score:", reward[score], F"({permutation})")
    print("Trainer ran for %.2f seconds" % (end - start))

    start = Profile(["ctattg", "ctaccg", "ctatgt"])
    agent = MCTSAgent(sequences=start, refinement=True)
    env = RefinementWrapper(start, agent, score)
コード例 #3
0
        self.optimizer.step()

        # return current loss, used for hyperparameter optimization and checking of the training process
        return actor_loss.item(), critic_loss.item()


if __name__ == "__main__":  # this file was run from the command line
    print("#############################################")
    print("##Starting training of a actor-critic agent##")
    print("#############################################")
    print()

    score = SP_SCORE

    agent = ActorCriticAgent(sequences=get_sequences(count=3,
                                                     length=6,
                                                     different=True),
                             network_object=TinyACNetwork)
    acat = ActorCriticAgentTrainer(training_agent=agent, supported_search=True)
    acat.run()

    # compute the resulting multiple sequence alignment
    (best_profile, best_permutation), _ = acat.evaluate_training()
    reward = best_profile.score()
    print(str(best_profile))
    print("Score:", reward[score], F"({best_permutation})")

    start = Profile(["ctattg", "ctaccg", "ctatgt"])
    print(start)
    print("Score:", start.score()[score])
    agent = ActorCriticAgent(sequences=start,
コード例 #4
0
            replay_buffer.append((linearize_state(state, self.num_seqs), probs, s_est))

            # and apply the selected action to the state
            _, state, _, done = self.env.step(action)
        return replay_buffer


if __name__ == "__main__":  # this file was run from the command line
    print("#############################################")
    print("##Starting training of a actor-critic agent##")
    print("#############################################")
    print()

    score = SP_SCORE

    agent = AlphaZeroAgent(sequences=get_sequences(count=3, length=6, different=True),
                           network_object=TinyA0_Network)
    a0t = AlphaZeroAgentTrainer(training_agent=agent, simulations=50, adjust=True)
    a0t.run(progress_print=True)

    # compute the resulting multiple sequence alignment
    (best_profile, best_permutation), _ = a0t.evaluate_training()
    reward = best_profile.score()
    print(str(best_profile))
    print("Score:", reward[score], F"({best_permutation})")

    start = Profile(["ctattg", "ctaccg", "ctatgt"])
    print(start)
    print("Score:", start.score()[score])
    agent = AlphaZeroAgent(sequences=start, network_object=TinyA0_Network, refinement=True, adjust=True)
    a0t = AlphaZeroAgentTrainer(agent, simulations=50, refinement=True)