def evaluate(agent_profile, agent_new_path, agent_old_path,
             games_num, experience_path=None, acceptance_rate=0.6,
             verbose=True, debug=False, max_steps=None, self_play_examples_deque=deque([])):
    print("Evaluating model with games_num %d and acceptance_rate %f" % (games_num, acceptance_rate))

    env_selector = EnvironmentSelector()
    agent = env_selector.get_agent(agent_profile)
    agent.set_exploration_enabled(False)

    agent_profile = env_selector.get_profile(agent_profile)
    game = env_selector.get_game(agent_profile.game)

    agents = []

    for idx in range(game.get_players_num()):
        old_agent = agent.clone()
        old_agent.load(agent_old_path)
        agents.append(old_agent)

    agent.load(agent_new_path)

    agents[0] = agent

    arena_games_results = [0] * len(agents)
    arena_examples = []
    arena_games_n = int(games_num / game.get_players_num())

    world = World()

    for jdx in range(game.get_players_num()):
        playing_agents = shift_list(agents, jdx)

        sess_arena_examples, games_results = world.execute_games(playing_agents,
                                                                 game,
                                                                 arena_games_n,
                                                                 max_game_steps_n=max_steps,
                                                                 verbose=verbose,
                                                                 show_every_turn=debug)

        games_results = shift_list(games_results, -jdx)

        for index in range(len(arena_games_results)):
            arena_games_results[index] += games_results[index]

        arena_examples.extend(sess_arena_examples)

    self_play_examples_deque += arena_examples

    if experience_path:
        serialize(self_play_examples_deque, experience_path)

    cur_rewards = arena_games_results[0]
    other_rewards = sum(arena_games_results) - cur_rewards

    print("Current agent got rewards: %d\n"
          "Total reward across all other agents: %d" % (cur_rewards, other_rewards))

    updated = (cur_rewards > other_rewards) >= acceptance_rate

    return updated
Example #2
0
def generate_self_play(opt_agent_profile,
                       agent_path,
                       games_num,
                       experience_path,
                       max_steps,
                       verbose,
                       debug,
                       exploration_decay_steps,
                       optimize_for_inference=False,
                       self_play_examples_deque=deque([])):
    world = World()

    env_selector = EnvironmentSelector()

    agent = env_selector.get_agent(opt_agent_profile)

    agent.load(agent_path)

    agent_profile = env_selector.get_profile(opt_agent_profile)
    game = env_selector.get_game(agent_profile.game)

    if optimize_for_inference:
        agent.disable_training_capability()

    self_play_examples = world.generate_self_play(
        agent,
        game,
        games_num,
        max_game_steps_n=max_steps,
        verbose=verbose,
        show_every_turn=debug,
        exploration_decay_steps=exploration_decay_steps)

    self_play_examples_deque += self_play_examples

    serialize(self_play_examples_deque, experience_path)
Example #3
0
def fuse_memory(old_memory_path, new_memory_path, out_memory_path):
    if os.path.isfile(old_memory_path) and os.path.isfile(new_memory_path):
        try:
            serialize(
                deserialize(new_memory_path) + deserialize(old_memory_path),
                out_memory_path)
        except:
            print("Could not deserialize new + old. Try reverse order")
            serialize(
                deserialize(old_memory_path) + deserialize(new_memory_path),
                out_memory_path)
    elif os.path.isfile(new_memory_path):
        serialize(deserialize(new_memory_path), out_memory_path)
Example #4
0
        if n_memory != 0:
            print('Deserializing memory from %s' % memory)
            des_mem = deserialize(memory)
            print(type(des_mem))
            # serialize(des_mem, train_memory_file)
            if n_memory == -1 or n_memory > len(memories):
                for file in memories:
                    print('Deserializing memory from %s' % file)
                    des_mem.extend(deserialize(file))
                    # fuse_memory(train_memory_file, file, train_memory_file)
            elif n_memory > 0:
                for file in memories[-n_memory:]:
                    print('Deserializing memory from %s' % file)
                    des_mem.extend(deserialize(file))
                    # fuse_memory(train_memory_file, file, train_memory_file)
            serialize(des_mem, train_memory_file)
        else:
            train_memory_file = memory

        memories.append(memory)

        # train with selected memory
        new_agent_path = workspace + '/model_updated_%d.h5' % i

        train(options.agent_profile,
              train_memory_file,
              cur_agent_path,
              new_agent_path,
              train_distributed=options.train_distributed,
              train_distributed_native=options.train_distributed_native,
              epochs=options.epochs)