def evaluate(agent_profile, agent_new_path, agent_old_path,
             games_num, experience_path=None, acceptance_rate=0.6,
             verbose=True, debug=False, max_steps=None, self_play_examples_deque=deque([])):
    print("Evaluating model with games_num %d and acceptance_rate %f" % (games_num, acceptance_rate))

    env_selector = EnvironmentSelector()
    agent = env_selector.get_agent(agent_profile)
    agent.set_exploration_enabled(False)

    agent_profile = env_selector.get_profile(agent_profile)
    game = env_selector.get_game(agent_profile.game)

    agents = []

    for idx in range(game.get_players_num()):
        old_agent = agent.clone()
        old_agent.load(agent_old_path)
        agents.append(old_agent)

    agent.load(agent_new_path)

    agents[0] = agent

    arena_games_results = [0] * len(agents)
    arena_examples = []
    arena_games_n = int(games_num / game.get_players_num())

    world = World()

    for jdx in range(game.get_players_num()):
        playing_agents = shift_list(agents, jdx)

        sess_arena_examples, games_results = world.execute_games(playing_agents,
                                                                 game,
                                                                 arena_games_n,
                                                                 max_game_steps_n=max_steps,
                                                                 verbose=verbose,
                                                                 show_every_turn=debug)

        games_results = shift_list(games_results, -jdx)

        for index in range(len(arena_games_results)):
            arena_games_results[index] += games_results[index]

        arena_examples.extend(sess_arena_examples)

    self_play_examples_deque += arena_examples

    if experience_path:
        serialize(self_play_examples_deque, experience_path)

    cur_rewards = arena_games_results[0]
    other_rewards = sum(arena_games_results) - cur_rewards

    print("Current agent got rewards: %d\n"
          "Total reward across all other agents: %d" % (cur_rewards, other_rewards))

    updated = (cur_rewards > other_rewards) >= acceptance_rate

    return updated
Esempio n. 2
0
def train(agent_profile, agent_path, out_agent_path,
          memory_path=None, game_memory=None,
          train_distributed=False, train_distributed_native=False,
          epochs=1):
    env_selector = EnvironmentSelector()

    agent = env_selector.get_agent(agent_profile, native_multi_gpu_enabled=train_distributed_native)

    if agent_path:
        agent.load(agent_path)

    if not game_memory:
        if not memory_path:
            print("Error: You must specify either game memory or memory path!")
            throw_error("Error: You must specify either game memory or memory path!")

        print("deserializing memory from the memory model...")

        game_memory = deserialize(memory_path)

    print("%d steps loaded from memory" % len(game_memory))

    print("Initiate training...")

    agent.train(game_memory, epochs=epochs)

    print("Training finished!")

    if train_distributed:
        import horovod.tensorflow as hvd
        if hvd.rank() == 0:
            # save only on the main server
            agent.save(out_agent_path)
    else:
        agent.save(out_agent_path)

    print("Model saved!")
Esempio n. 3
0
def generate_self_play(opt_agent_profile,
                       agent_path,
                       games_num,
                       experience_path,
                       max_steps,
                       verbose,
                       debug,
                       exploration_decay_steps,
                       optimize_for_inference=False,
                       self_play_examples_deque=deque([])):
    world = World()

    env_selector = EnvironmentSelector()

    agent = env_selector.get_agent(opt_agent_profile)

    agent.load(agent_path)

    agent_profile = env_selector.get_profile(opt_agent_profile)
    game = env_selector.get_game(agent_profile.game)

    if optimize_for_inference:
        agent.disable_training_capability()

    self_play_examples = world.generate_self_play(
        agent,
        game,
        games_num,
        max_game_steps_n=max_steps,
        verbose=verbose,
        show_every_turn=debug,
        exploration_decay_steps=exploration_decay_steps)

    self_play_examples_deque += self_play_examples

    serialize(self_play_examples_deque, experience_path)
    parser.add_argument("--games_num", dest="games_num", default=100, type=int,
                        help="Number of games to play. ")

    options = parser.parse_args()

    if not options.folder:
        parser.error('Folder must be selected')

    if not options.agent:
        parser.error('Agent profile must be selected')

    if not options.random_agent:
        parser.error('Random agent profile must be selected')

    env_selector = EnvironmentSelector()
    agent = env_selector.get_agent(options.agent)
    print("Pit with agent ", agent.name)
    agent.set_exploration_enabled(False)

    random_agent = env_selector.get_agent(options.random_agent)
    print("Pit with agent ", random_agent.name)
    random_agent.set_exploration_enabled(False)

    agent_profile = env_selector.get_profile(options.agent)
    game = env_selector.get_game(agent_profile.game)

    world = World()

    agents = [agent, random_agent]
Esempio n. 5
0
        action='store_true',
        help="Optimize for inference in self-play and evaluation phases")
    parser.set_defaults(optimize_for_inference=False)

    options = parser.parse_args()

    if not options.agent_profile_new:
        parser.error('Agent profile must be selected')

    if not options.agent_profile_old:
        parser.error('Agent profile must be selected')

    if not options.games_num:
        parser.error('Number of games must be selected')

    env_selector = EnvironmentSelector()
    agent_first = env_selector.get_agent(options.agent_profile_new)
    print("Pit with agent ", agent_first.name)
    agent_first.set_exploration_enabled(False)

    agent_second = env_selector.get_agent(options.agent_profile_old)
    print("Pit with agent ", agent_second.name)
    agent_second.set_exploration_enabled(False)

    agent_profile = env_selector.get_profile(options.agent_profile_new)
    game = env_selector.get_game(agent_profile.game)

    agents = [agent_first, agent_second]

    if options.agent_new_path:
        agent_first.load(options.agent_new_path)
    def init_nnet(self):
        env_selector = EnvironmentSelector()
        agent_nnet = env_selector.get_agent("watten_agent_nnet")

        self.nnet = agent_nnet