Exemple #1
0
def runner(id, num_episodes, fifo, _args):
    # make args accessible to MCTSAgent
    global args
    args = _args

    # make sure TF does not allocate all memory
    init_tensorflow()

    # make sure agents play at all positions
    agent_id = id % NUM_AGENTS
    agent = MCTSAgent(args.model_file, agent_id=agent_id)

    # create environment with three SimpleAgents
    agents = [
        SimpleAgent(),
        SimpleAgent(),
        SimpleAgent(),
    ]
    agent_id = id % NUM_AGENTS
    agents.insert(agent_id, agent)

    env = pommerman.make('PommeFFACompetition-v0', agents)

    for i in range(num_episodes):
        # do rollout
        start_time = time.time()
        length, reward, rewards = agent.rollout(env)
        elapsed = time.time() - start_time
        # add data samples to log
        fifo.put((length, reward, rewards, agent_id, elapsed))
Exemple #2
0
 def __init__(self, mcts_id=None):
     self.agents = [
         SimpleAgent(),
         SimpleAgent(),
         SimpleAgent(),
         SimpleAgent()
     ]
     if mcts_id is not None:
         self.agents[mcts_id] = None
     self.mcts_id = mcts_id
Exemple #3
0
 def __init__(self, agent):
     # Test pool and Env
     self.pool = [
         agent,
         SimpleAgent(),
         SimpleAgent(),
         SimpleAgent(),
     ]
     #self.pool = [agent, RandomAgent(), RandomAgent(), RandomAgent(), ]
     self.env = pommerman.make('PommeFFACompetition-v0', self.pool)
 def __init__(self, agent_id, args):
     self.args = args
     #Initialize the expert and the pool
     self.expert = MCTSAgent(None, agent_id=agent_id)
     self.pool = [
         SimpleAgent(),
         SimpleAgent(),
         SimpleAgent(),
     ]
     #self.pool = [RandomAgent(), RandomAgent(), RandomAgent(), ]
     self.pool.insert(agent_id, self.expert)
     self.env = pommerman.make('PommeFFACompetition-v0', self.pool)
    def __init__(
        self,
        n_actions,
        character,
        evaluation_model=None,
        evaluation_model_path=None,
        # Set agent properties to preprocess observations
        use_history=True,  # Use previous observations for predictions
        use_2d=True,  # Use 2d convolutions
        patient=True,  # Wait to make initial observations (you don't need it if you don't use history)
        center_view=True,  # Use centering
        original_view=False,  # Use 11x11 board, if false, use 21x21
        verbose=False  # Comment actions
    ):
        super(EvaluatorAgent, self).__init__(character=character)

        # Properties
        self.use_history = use_history
        self.use_2d = use_2d
        self.patient = patient
        self.center_view = center_view
        self.original_view = original_view
        self.verbose = verbose

        # Acting history for the evaluation
        self.actions_history = []
        self.observations_history = []
        self.episode_count = 0
        self.steps = 0

        self.n_actions = n_actions

        self.simple_agent = SimpleAgent(character=character)
        # Load any custom model
        self.evaluation_model = None
        if evaluation_model:
            self.evaluation_model = evaluation_model
            if evaluation_model_path:
                try:
                    self.evaluation_model.load_weights(evaluation_model_path)
                except:
                    print('Weights load failed')
        elif evaluation_model_path:
            try:
                self.evaluation_model = load_model(evaluation_model_path)
            except:
                print('Model load failed')
        else:
            print('Use SimpleAgent')
Exemple #6
0
def env_for_players():
    config = ffa_v0_fast_env()
    env = Pomme(**config["env_kwargs"])
    agents = [
        DQN(config["agent"](0, config["game_type"])),
        SimpleAgent(config["agent"](1, config["game_type"])),
        SimpleAgent(config["agent"](2, config["game_type"])),
        SimpleAgent(config["agent"](3, config["game_type"]))
    ]
    env.set_agents(agents)
    env.set_training_agent(
        agents[0].agent_id)  # training_agent is only dqn agent
    env.set_init_game_state(None)

    return env
Exemple #7
0
    def _thunk():
        env = pommerman.make(
            'PommeFFACompetition-v0',
            [SimpleAgent(),
             SimpleAgent(),
             SimpleAgent(),
             SimpleAgent()])
        env._agents[0].is_alive = False
        env._agents[2].is_alive = False

        env._agents[3].restart = True
        env._agents[1].restart = True
        env._agents[2].restart = False
        env._agents[0].restart = False
        return env
        def f():
            config = ffa_competition_env()
            env = Wrapped_Env(**config["env_kwargs"])
            env.observation_space = spaces.Box(0,
                                               20,
                                               shape=(11, 11, 18),
                                               dtype=np.float32)

            # Add 3 random agents
            agents = []
            for agent_id in range(3):
                # if agent_id == env.winner_id:
                #     agents.append(TrainingAgent(config["agent"](agent_id, config["game_type"])))
                # else:
                agents.append(
                    SimpleAgent(config["agent"](agent_id,
                                                config["game_type"])))
            agent_id += 1
            agents.append(
                TrainingAgent(config["agent"](agent_id, config["game_type"])))

            env.set_agents(agents)
            env.set_training_agent(agents[-1].agent_id)
            env.set_init_game_state(None)
            return env
def main():
    # Print all possible environments in the Pommerman registry
    # Instantiate the environment
    DETERMINISTIC = False
    VISUALIZE = False

    if args.test:
        DETERMINISTIC = True
        VISUALIZE = True

    config = ffa_competition_env()
    env = Wrapped_Env(**config["env_kwargs"])
    # env.seed(0)
    env.observation_space = spaces.Box(0, 20, shape=(11, 11, 18))
    env.num_envs = 1

    # Add 3 random agents
    agents = []
    for agent_id in range(3):
        agents.append(
            SimpleAgent(config["agent"](agent_id, config["game_type"])))

    agent_id += 1

    # Add TensorforceAgent
    agents.append(TrainingAgent(config["agent"](agent_id,
                                                config["game_type"])))
    env.set_agents(agents)
    env.set_training_agent(agents[-1].agent_id)
    env.set_init_game_state(None)

    # env = VecFrameStack(make_pommerman_env(env, 8, 0), 2)

    # print(env.reset())

    policy = CnnPolicy

    # Model(policy=policy,
    #            ob_space=env.observation_space,
    #            ac_space=env.action_space,
    #            nbatch_act=1,
    #            nbatch_train=100,
    #            nsteps=1000,
    #            ent_coef=0.01,
    #            vf_coef=0.5,
    #            max_grad_norm=0.5)
    num_timesteps = 10000

    learn(policy=policy,
          env=env,
          nsteps=800,
          nminibatches=4,
          lam=0.95,
          gamma=0.99,
          noptepochs=4,
          log_interval=1,
          ent_coef=.01,
          lr=lambda f: f * 2.5e-4,
          cliprange=lambda f: f * 0.1,
          total_timesteps=int(num_timesteps * 1.1))
def run_episode(agent, config, env, agent_id=0):
    # K.clear_session()
    # Add 3 random agents and one trained
    agents = [
        agent if i == agent_id else SimpleAgent(config["agent"](
            i, config["game_type"])) for i in range(4)
    ]
    env.set_agents(agents)
    env.set_init_game_state(None)

    # Seed and reset the environment
    env.seed(0)
    obs = env.reset()

    # Run the agents until we're done
    done = False
    lens = [None] * 4
    t = 0
    while not done:
        env.render()
        actions = env.act(obs)
        obs, reward, done, info = env.step(actions)
        for j in range(4):
            if lens[j] is None and reward[j] != 0:
                lens[j] = t
        t += 1

    env.render(close=True)
    env.close()
    return info, reward, lens
    def _make_env(self):
        """Initialise gym environment, adding agents to them."""
        agents = [
            self if agent_id == self.agent_id else SimpleAgent()
            for agent_id in range(NUM_AGENTS)
        ]

        return (pommerman.make("PommeFFACompetition-v0", agents))
 def make_env(self):
     agents = []
     for agent_id in range(NUM_AGENTS):
         if agent_id == self.agent_id:
             agents.append(self)
         else:
             agents.append(SimpleAgent())
     return pommerman.make('PommeFFACompetition-v0', agents)
 def init(self, config):
     self.env.seed(0)
     # Add 3 random agents
     agents = []
     for agent_id in range(4):
         agents.append(
             SimpleAgent(config["agent"](agent_id, config["game_type"])))
     self.env.set_agents(agents)
     self.env.set_init_game_state(None)
def get_env():
    config = ffa_v0_fast_env()
    env = Pomme(**config["env_kwargs"])

    agent_id = 0

    agents = [
        DQN(config["agent"](0, config["game_type"])),
        SimpleAgent(config["agent"](1, config["game_type"])),
        SimpleAgent(config["agent"](2, config["game_type"])),
        SimpleAgent(config["agent"](3, config["game_type"])),
    ]

    env.set_agents(agents)

    env.set_training_agent(agents[agent_id].agent_id)
    env.set_init_game_state(None)

    return env
Exemple #15
0
def testSimpleAgent():
    game_type = constants.GameType(4)

    board = [[0, 0, 2, 1, 1, 1], [0, 0, 0, 0, 0, 0], [2, 8, 0, 1, 0, 1],
             [1, 0, 1, 0, 10, 1], [1, 0, 3, 0, 0, 1], [1, 11, 1, 1, 1, 0]]
    bomb_info = [(0, 1, 2, None)]

    game_state = my_utility.get_gamestate(board, bomb_info)
    game_data = my_utility.get_gamedata(game_state, game_type)

    fm = forward_model.ForwardModel()

    obs = fm.get_observations(game_data.board, game_data.agents,
                              game_data.bombs, game_data.flames, False, None,
                              game_data.game_type, None)

    simpel_agent = SimpleAgent()

    print(simpel_agent.act(obs[1], spaces.Discrete(6)))
Exemple #16
0
 def make_env(self):
     agents = []
     for agent_id in range(4):
         if agent_id == self.agent_id:
             agents.append(self)
         else:
             agents.append(SimpleAgent())
     env = pommerman.make('PommeFFACompetition-v0', agents)
     env.set_training_agent(self.agent_id)
     return env
def makeTrainingObservation():
    env = Pomme(**config["env_kwargs"])
    agents = {}
    for agent_id in range(num_players):
        agent = TrainingAgent(config["agent"](agent_id, config["game_type"]))
        agents[agent_id] = agent
    simple_Agent_id = num_players
    agents[simple_Agent_id] = SimpleAgent(config["agent"](simple_Agent_id,
                                                          config["game_type"]))
    env.set_agents(list(agents.values()))
    env.set_init_game_state(None)
    return env
def run(best_net,
        num_episodes,
        result_list=None,
        process_id=None,
        render=False):
    best_net = Net(trained_model='best.model')
    agent_list = [
        ZeroAgent(net=best_net,
                  num_simulations=100,
                  is_self_play=False,
                  num_exploration_steps=0),
        SimpleAgent()
    ]

    env = pommerman.make('OneVsOne-v0', agent_list)

    for i_episode in range(num_episodes):
        state = env.reset()
        done = False
        initial_agents = state[0]['alive']
        survivors = initial_agents
        dead_agents = []
        while not done:
            if render:
                env.render()
            actions = env.act(state)
            actions[0] = actions[0].value
            state, reward, done, info = env.step(actions)

            survivors = state[0]['alive']
            for agent in initial_agents:
                if agent not in survivors and agent not in dead_agents:
                    dead_agents.append(agent)

        if process_id is not None:
            print(
                '[Process %d, Episode %d] Dead order: ' %
                (process_id, i_episode), str(dead_agents), 'Survivors:',
                survivors)
        else:
            print('[Episode %d] Dead order: ' % i_episode, str(dead_agents),
                  'Survivors:', survivors)

        if result_list is None:
            result_list = []
        result_list.append((dead_agents, survivors))

    env.close()

    return result_list
Exemple #19
0
 def make_env(self, config):
     # Instantiate the environment
     env = Pomme(**config["env_kwargs"])
     # Add agents
     agents = []
     for agent_id in range(NUM_AGENTS):
         if agent_id == self.agent_id:
             agents.append(self)
         else:
             agents.append(
                 SimpleAgent(config["agent"](agent_id,
                                             config["game_type"])))
     env.set_agents(agents)
     env.set_init_game_state(None)
     return env
Exemple #20
0
def make_agent_env(n_ppo, n_simple, render):
    # Create environment/agents
    config = "PommeFFACompetition-v0"
    agents = [TensorForceAgent(algorithm="ppo") for _ in range(n_ppo)]
    agents += [SimpleAgent() for _ in range(n_simple)]
    n_random = 4 - n_ppo - n_simple
    agents += [RandomAgent() for _ in range(n_random)]
    env = make(config, agents, None)
    training_agent = agents[0]
    env.set_training_agent(training_agent.agent_id)

    # Map to Tensorforce environment/agents
    wrapped_env = WrappedEnv(env, visualize=render)
    agent = training_agent.initialize(env)

    return agent, wrapped_env
Exemple #21
0
def main(max_steps=200, train_for=100, render=False):
    '''CLI interface to bootstrap taining'''
    parser = argparse.ArgumentParser(description="Playground Flags.")
    parser.add_argument("--config",
                        default="PommeFFACompetition-v0",
                        help="Configuration to execute. See env_ids in "
                        "configs.py for options.")
    parser.add_argument("--render",
                        default=False,
                        action='store_true',
                        help="Whether to render or not. Defaults to False.")
    args = parser.parse_args()

    config = args.config

    our_selection = DQN_TensorForce_Agent()
    # our_selection.restore_agent('PPO_Model.file')
    agents = [our_selection, SimpleAgent(), RandomAgent(), RandomAgent()]

    env = make(config, agents)
    training_agent = None

    training_agent = our_selection
    env.set_training_agent(our_selection.agent_id)

    # Create a Proximal Policy Optimization agent
    agent = training_agent.initialize(env)

    atexit.register(functools.partial(clean_up_agents, agents))
    wrapped_env = WrappedEnv(env, visualize=(args.render or render))
    runner = Runner(agent=agent, environment=wrapped_env)
    runner.run(episodes=train_for, max_episode_timesteps=max_steps)
    won = len([x for x in runner.episode_rewards if x == 1])
    tie = len([x for x in runner.episode_timesteps if x == max_steps])
    lost = train_for - won - tie
    # print("Stats: ", runner.episode_rewards, runner.episode_timesteps,
    #       runner.episode_times)
    print(won, tie, lost)
    # our_selection.save_model(file='PPO_Model.file')

    try:
        runner.close()
    except AttributeError as e:
        pass
def set_pommerman_env(agent_id=0):
    # Instantiate the environment
    config = ffa_v0_fast_env()
    env = Pomme(**config["env_kwargs"])

    np.random.seed(0)
    env.seed(0)
    # Add 3 Simple Agents and 1 DQN agent
    agents = [
        DQN(config["agent"](agent_id, config["game_type"])) if i == agent_id
        else SimpleAgent(config["agent"](i, config["game_type"]))
        for i in range(4)
    ]
    env.set_agents(agents)
    env.set_training_agent(
        agents[agent_id].agent_id)  # training_agent is only dqn agent
    env.set_init_game_state(None)

    return env
Exemple #23
0
def runner(id, num_episodes, fifo, _args):
    # make sure agents play at all positions
    agent_id = id % NUM_AGENTS


    for j in range(num_episodes):
        dirname = "./agent"+str(id)+"_episode_"+str(j)
        if not os.path.exists(dirname):
            os.makedirs(dirname)
            print("made dir")
        agent_list = []
        agent = MCTSAgent()
        for i in range(NUM_AGENTS):
            if i == agent_id:
                agent.set_agent_id(agent_id)
                agent_list.append(agent)
            else:
                agent_list.append(SimpleAgent())
        print(agent_list)
        env = pommerman.make('PommeFFACompetition-v0', agent_list)
        env.set_training_agent(agent_id)
        step = 0
        # Run the episodes just like OpenAI Gym
        sum_rewards = 0
        obs = env.reset()
        state = env.get_json_info()
        done = False
        start_time = time.time()
        while not done:
            # env.render()
            actions = env.act(obs)
            action = agent.search(state)
            actions.insert(agent_id, action)
            print(actions)
            obs, step_reward, done, info = env.step(actions)
            state = env.get_json_info()
            sum_rewards += step_reward[agent_id]
            step += 1
            env.save_json(dirname)

        elapsed = time.time() - start_time
        env.close()
Exemple #24
0
def main():
    # Print all possible environments in the Pommerman registry
    print(pommerman.registry)

    config = ffa_v1_env()
    env = Pomme(**config["env_kwargs"])

    # Add 3 agents
    agents = {}
    for agent_id in range(4):
        agents[agent_id] = SimpleAgent(config["agent"](agent_id,
                                                       config["game_type"]))

    # agents[3] = PlayerAgent(config["agent"](agent_id, config["game_type"]), "arrows")

    env.set_agents(list(agents.values()))
    env.set_init_game_state(None)

    demo = []

    # Run the episodes just like OpenAI Gym
    for i_episode in range(1):
        state = env.reset()
        done = False
        demo.append(env.get_json_info())
        while not done:
            env.render()
            actions = env.act(state)
            state, reward, done, info = env.step(actions)
            demo.append(env.get_json_info())
        if 1 in reward:
            winner = reward.index(1)
        else:
            winner = None

        print('Episode {} finished'.format(i_episode))
    env.close()

    # If game not tied, save demonstration
    if winner is not None:
        demonstration = {'demo': demo, 'winner': winner}
        pickle.dump(demonstration, open("demonstration.p", "wb"))
Exemple #25
0
    def _thunk():
        game_type = config['game_type']
        agent_type = config['agent']
        env = config['env'](**config["env_kwargs"])
        env.seed(args.seed + rank)

        if args.how_train == 'simple':
            agents = [
                SimpleAgent(agent_type(game_type=game_type)) for _ in range(3)
            ]
            training_agent_id = rank % 4
            agents.insert(training_agent_id, training_agents[0])
            for agent_id, agent in enumerate(agents):
                agent.set_agent_id(agent_id)
            env.set_agents(agents)
            env.set_training_agents([training_agent_id])
            env.set_init_game_state(args.game_state_file)
        elif args.how_train == 'homogenous':
            # NOTE: We can't use just one agent character here because it needs to track its own state.
            # We do that by instantiating three more copies. There is probably a better way.
            if rank > -1:
                copies = [
                    training_agents[0].copy(
                        agent_type(agent_id=agent_id, game_type=game_type))
                    for agent_id in range(4)
                ]
            else:
                copies = training_agents * 4
            env.set_agents(copies)
            env.set_training_agents(list(range(4)))
            env.set_init_game_state(args.game_state_file)
        else:
            raise

        env = WrapPomme(env, args.how_train)
        # TODO: Add the FrameStack in.
        env = MultiAgentFrameStack(env, args.num_stack)
        return env
    def reset(self):
        """
        Resets the state of the environment and returns an initial observation.
        # Returns
            observation (object): The initial observation of the space. Initial reward is assumed to be 0.
        """
        # Add 3 random agents
        train_agent_pos = np.random.randint(0, 4)
        agents = []
        for agent_id in range(4):
            if agent_id == train_agent_pos:
                agents.append(
                    TensorforceAgent(config["agent"](agent_id,
                                                     config["game_type"])))
            else:
                agents.append(
                    SimpleAgent(config["agent"](agent_id,
                                                config["game_type"])))
        self.gym.set_agents(agents)
        self.gym.set_training_agent(agents[train_agent_pos].agent_id)

        obs = self.gym.reset()
        agent_obs = self.featurize(obs[self.gym.training_agent])
        return agent_obs
Exemple #27
0
    def __init__(self,
                 mcts_iters,
                 discount=1.0,
                 c=1.5,
                 temp=1.0,
                 tempsteps=None,
                 agent_id=0,
                 opponent=SimpleAgent(),
                 model_save_file=None,
                 *args,
                 **kwargs):
        super(MCTSAgent, self).__init__(*args, **kwargs)
        self.agent_id = agent_id
        self.env = self.make_env(opponent)
        self.reset_tree()
        self.mcts_iters = mcts_iters
        self.mcts_c_puct = c
        self.discount = discount
        self.init_temp = temp
        self.tempsteps = tempsteps

        self.model_save_file = model_save_file

        self.train_count = 0
# Add 3 random agents
agents = []
for agent_id in range(4):
    if agent_id == agent_pos:
        # agents.append(Cnn12833Dense1281(env.action_space.n, BOARD_SIZE, character=config["agent"](agent_id, config["game_type"]),
        #                       save_path=model_path))
        # agents.append(Dense82(env.action_space.n, BOARD_SIZE, character=config["agent"](agent_id, config["game_type"]),
        #                       save_path=model_path2))
        # agents.append(Dense128(env.action_space.n, BOARD_SIZE, character=config["agent"](agent_id, config["game_type"]),
        #                       save_path=model_path3))
        # agents.append(Dense128(env.action_space.n, BOARD_SIZE, character=config["agent"](agent_id, config["game_type"]),
        #                        save_path='./dqn/model/ddgp_dense_128_1_rs/model.h4'))
        agents.append(Cnn12832Dense1281(env.action_space.n, BOARD_SIZE, character=config["agent"](agent_id, config["game_type"]),
                              save_path='./dqn/model/ddgp_cnn128_3_2_dense_128_1_rs/model.h4'))
    else:
        agents.append(SimpleAgent(config["agent"](agent_id, config["game_type"])))

env.set_agents(agents)
env.set_init_game_state(None)

# Seed and reset the environment
env.seed(0)
obs = env.reset()

# Run the agents until we're done
done = False
while not done:
    env.render()
    actions = env.act(obs)
    obs, reward, done, info = env.step(actions)
env.render(close=True)
Exemple #29
0
    def setup(self):
        agents = []
        if self.phase == 0:
            self.agents_index = [1, 3]
            self.enemies_agents_index = [0, 2]
            config = team_v0_fast_env()
            config["env_kwargs"]["num_wood"] = 2
            config["env_kwargs"]["num_items"] = 2
            config["env_kwargs"]["num_rigid"] = 20
            agents.insert(
                0, SuicidalAgent(config["agent"](0, config["game_type"])))
            agents.insert(2, NoDoAgent(config["agent"](2,
                                                       config["game_type"])))
            print(config["env_kwargs"])
            self.env = Pomme(**config["env_kwargs"])
            self.env.seed()

        if self.phase == 1:
            self.agents_index = [1, 3]
            self.enemies_agents_index = [0, 2]
            config = team_v0_fast_env()
            config["env_kwargs"]["num_wood"] = 2
            config["env_kwargs"]["num_items"] = 2
            config["env_kwargs"]["num_rigid"] = 36
            agents.insert(
                0, SuicidalAgent(config["agent"](0, config["game_type"])))
            agents.insert(2, NoDoAgent(config["agent"](2,
                                                       config["game_type"])))
            print(config["env_kwargs"])
            self.env = Pomme(**config["env_kwargs"])
            self.env.seed()

        if self.phase == 2:
            self.agents_index = [1, 3]
            self.enemies_agents_index = [0, 2]
            config = team_v0_fast_env()
            config["env_kwargs"]["num_wood"] = 2
            config["env_kwargs"]["num_items"] = 2
            config["env_kwargs"]["num_rigid"] = 36
            agents.insert(0, NoDoAgent(config["agent"](0,
                                                       config["game_type"])))
            agents.insert(2, NoDoAgent(config["agent"](2,
                                                       config["game_type"])))
            print(config["env_kwargs"])
            self.env = Pomme(**config["env_kwargs"])
            self.env.seed()

        if self.phase == 3:
            self.agents_index = [1, 3]
            self.enemies_agents_index = [0, 2]
            config = team_v0_fast_env()
            config["env_kwargs"]["num_wood"] = 2
            config["env_kwargs"]["num_items"] = 2
            config["env_kwargs"]["num_rigid"] = 36
            agents.insert(0, NoDoAgent(config["agent"](0,
                                                       config["game_type"])))
            agents.insert(2, NoDoAgent(config["agent"](2,
                                                       config["game_type"])))
            print(config["env_kwargs"])
            self.env = Pomme(**config["env_kwargs"])
            self.env.seed()

        if self.phase == 4:
            self.agents_index = [1, 3]
            self.enemies_agents_index = [0, 2]
            config = team_v0_fast_env()
            config["env_kwargs"]["num_wood"] = 0
            config["env_kwargs"]["num_items"] = 10
            config["env_kwargs"]["num_rigid"] = 36
            agents.insert(
                0, SuicidalAgent(config["agent"](0, config["game_type"])))
            agents.insert(2,
                          SimpleAgent(config["agent"](2, config["game_type"])))
            print(config["env_kwargs"])
            self.env = Pomme(**config["env_kwargs"])
            self.env.seed()

        for agent_id in self.agents_index:
            agents.insert(
                agent_id,
                BaseLineAgent(config["agent"](agent_id, config["game_type"])))

        self.env.set_agents(agents)
        self.env.set_init_game_state(None)
        self.observation_space = spaces.Dict({
            "boards":
            spaces.Box(low=-1, high=20, shape=(3, 11, 11)),
            "states":
            spaces.Box(low=-1, high=20, shape=(9, )),
        })

        spaces.Box(low=-1.0, high=20.0, shape=(372, ), dtype=np.float32)
        self.action_space = self.env.action_space
class EvaluatorAgent(BaseAgent):
    def __init__(
        self,
        n_actions,
        character,
        evaluation_model=None,
        evaluation_model_path=None,
        # Set agent properties to preprocess observations
        use_history=True,  # Use previous observations for predictions
        use_2d=True,  # Use 2d convolutions
        patient=True,  # Wait to make initial observations (you don't need it if you don't use history)
        center_view=True,  # Use centering
        original_view=False,  # Use 11x11 board, if false, use 21x21
        verbose=False  # Comment actions
    ):
        super(EvaluatorAgent, self).__init__(character=character)

        # Properties
        self.use_history = use_history
        self.use_2d = use_2d
        self.patient = patient
        self.center_view = center_view
        self.original_view = original_view
        self.verbose = verbose

        # Acting history for the evaluation
        self.actions_history = []
        self.observations_history = []
        self.episode_count = 0
        self.steps = 0

        self.n_actions = n_actions

        self.simple_agent = SimpleAgent(character=character)
        # Load any custom model
        self.evaluation_model = None
        if evaluation_model:
            self.evaluation_model = evaluation_model
            if evaluation_model_path:
                try:
                    self.evaluation_model.load_weights(evaluation_model_path)
                except:
                    print('Weights load failed')
        elif evaluation_model_path:
            try:
                self.evaluation_model = load_model(evaluation_model_path)
            except:
                print('Model load failed')
        else:
            print('Use SimpleAgent')

    # Featurization
    def featurize(self, obs):
        return featurize(obs, center=self.center_view, crop=self.original_view)

    # Acting
    def act(self, obs, action_space=None):
        # Initialize new episode
        if self.steps == 0:
            self.actions_history.append([])

        # Create observation, merge with the predecessors
        obs_f = self.featurize(obs)

        # If our agent is patient, wait for the first 3 steps to make observations
        if self.patient and len(
                self.observations_history) < history_length - 1:
            self.observations_history.append(obs_f)
            self.actions_history[self.episode_count].append(0)
            return 0

        if self.use_history:
            obs_history = self.make_observation(obs_f, self.steps, self.use_2d)
        else:
            obs_history = obs_f

        self.observations_history.append(
            obs_f)  # Append current observation after the merge

        # Predict action
        if self.evaluation_model is not None:
            res = self.evaluation_model.predict(
                obs_history.reshape((1, ) + obs_history.shape))[0]
            res = np.argmax(res)
        else:
            res = self.simple_agent.act(obs, action_space)
        if self.verbose:
            print(res, end='; ')

#        # In the dueling DQN the first output relates to the advantage
#        if len(res) > self.n_actions:
#            res = res[1:]

        self.actions_history[self.episode_count].append(res)

        if self.verbose:
            print(ACTIONS[res])

        self.steps += 1
        return res

    def make_observation(self, obs, i, use_2d=True):
        if i == 0:  # If it is a first observation
            res = np.array([obs for _ in range(history_length)])
        elif i < history_length - 1:  # If there are less than 3 observations in a history
            n_first = history_length - 1 - i
            res = np.concatenate(
                [
                    np.array([
                        self.observations_history[0] for _ in range(n_first)
                    ]),  # Repeat the first observation
                    np.array(self.observations_history[:i]).reshape(
                        i, view_size, view_size,
                        n_channels),  # Add next observations
                    obs.reshape(1, view_size, view_size, n_channels)
                ],  # Current observation
                axis=0)
        else:
            res = np.concatenate(
                [
                    np.array(
                        self.observations_history[i - history_length + 1:i]).
                    reshape(history_length - 1, view_size, view_size,
                            n_channels),  # Add next observations
                    obs.reshape(1, view_size, view_size, n_channels)
                ],  # Current observation
                axis=0)
        if use_2d:
            res = np.concatenate(res, axis=-1)
        return res

    # Evaluation
    def end_episode(self):
        self.steps = 0
        self.episode_count += 1
        self.observations_history = []

    def reset_run(self):
        self.actions_history = []
        self.episode_count = 0
        self.steps = 0

    def close(self):
        pass

    def run_episode(self, config, env):
        return run_episode(self, config, env, self.agent_id)

    def plot_statistics(self, info, selected_labels):
        return plot_statistics(self, info, selected_labels)

    def evaluate_agent(self, selected_labels, iterations=100, plot=True):
        return evaluate_agent(self, selected_labels, self.agent_id, iterations,
                              plot)