Beispiel #1
0
def ffa_evaluate(env: Pomme, episodes, verbose, visualize, stop=False):
    """
    Evaluates the given pommerman environment (already includes the agents).

    :param episodes: The number of episodes
    :param verbose: Whether to print verbose status information
    :param visualize: Whether to visualize the execution
    :param stop: Whether to wait for input after each step
    :return: The results of the evaluation of shape (episodes, 5) where the first column [:, 0] contains the result
             of the match (tie, win, incomplete) and the remaining columns contain the individual (final) rewards.
    """

    # first element: result, additional elements: rewards
    steps = np.empty(episodes)
    results = np.empty((episodes, 1 + 4))

    start = time.time()

    # Run the episodes just like OpenAI Gym
    for i_episode in range(episodes):
        state = env.reset()
        done = False
        reward = []
        info = {}
        step = 0
        while not done:
            if visualize:
                env.render()
            actions = env.act(state)
            state, reward, done, info = env.step(actions)
            step += 1

            if stop:
                input()

        steps[i_episode] = step

        result = info['result']
        # save the result
        results[i_episode, 0] = result.value
        results[i_episode, 1:] = reward

        if verbose:
            delta = time.time() - start
            print('\r{:.2f} sec > Episode {} finished with {} ({})'.format(
                delta, i_episode, result, reward))

            if i_episode % 10 == 9 and i_episode != episodes - 1:
                ffa_print_stats(results, steps, i_episode + 1)

    env.close()

    if verbose:
        delta = time.time() - start
        print("Total time: {:.2f} sec".format(delta))
        ffa_print_stats(results, steps, episodes)

    return results
Beispiel #2
0
def main():
    # Print all possible environments in the Pommerman registry
    print(pommerman.registry)

    config = ffa_v1_env()
    env = Pomme(**config["env_kwargs"])

    # Add 3 agents
    agents = {}
    for agent_id in range(4):
        agents[agent_id] = SimpleAgent(config["agent"](agent_id,
                                                       config["game_type"]))

    # agents[3] = PlayerAgent(config["agent"](agent_id, config["game_type"]), "arrows")

    env.set_agents(list(agents.values()))
    env.set_init_game_state(None)

    demo = []

    # Run the episodes just like OpenAI Gym
    for i_episode in range(1):
        state = env.reset()
        done = False
        demo.append(env.get_json_info())
        while not done:
            env.render()
            actions = env.act(state)
            state, reward, done, info = env.step(actions)
            demo.append(env.get_json_info())
        if 1 in reward:
            winner = reward.index(1)
        else:
            winner = None

        print('Episode {} finished'.format(i_episode))
    env.close()

    # If game not tied, save demonstration
    if winner is not None:
        demonstration = {'demo': demo, 'winner': winner}
        pickle.dump(demonstration, open("demonstration.p", "wb"))
Beispiel #3
0
agents = {}
for agent_id in range(3):
    agents[agent_id] = StaticAgent(config["agent"](agent_id, config["game_type"]))

# Add human agent

agent_id += 1
agents[3] = PlayerAgent(config["agent"](agent_id, config["game_type"]), "arrows")

env.set_agents(list(agents.values()))
env.set_init_game_state(None)


# Seed and reset the environment
env.seed(0)
obs = env.reset()

# Run the agents until we're done
done = False
while not done:
    env.render()
    actions = env.act(obs) # brauch ich nicht
    #actions = [action % 4 for action in actions]
    #actions = [0,actions[1]]
    obs, reward, done, info = env.step(actions)
    #kacka = featurize(obs[0])
env.render(close=True)
env.close()

# Print the result
print(info)
        # agents.append(Dense128(env.action_space.n, BOARD_SIZE, character=config["agent"](agent_id, config["game_type"]),
        #                        save_path='./dqn/model/ddgp_dense_128_1_rs/model.h4'))
        agents.append(Cnn12832Dense1281(env.action_space.n, BOARD_SIZE, character=config["agent"](agent_id, config["game_type"]),
                              save_path='./dqn/model/ddgp_cnn128_3_2_dense_128_1_rs/model.h4'))
    else:
        agents.append(SimpleAgent(config["agent"](agent_id, config["game_type"])))

env.set_agents(agents)
env.set_init_game_state(None)

# Seed and reset the environment
env.seed(0)
obs = env.reset()

# Run the agents until we're done
done = False
while not done:
    env.render()
    actions = env.act(obs)
    obs, reward, done, info = env.step(actions)
env.render(close=True)
env.close()

# Print the result
print(info)

from sklearn.ensemble import BaggingClassifier

BaggingClassifier()

class PomFFA(gym.Env):
    agent_list = [HoldAgent(), HoldAgent(), HoldAgent(), HoldAgent()]
    all_obs = None
    all_action = None
    cur_obs = None
    alive_agents = [10, 11, 12, 13]
    player_agent_id = 10

    def __init__(self, env_config=None):

        pomme_config = pommerman.configs.ffa_competition_env()

        if env_config:
            for k, v in env_config.items():
                if k in pomme_config['env_kwargs']:
                    pomme_config['env_kwargs'][k] = v

        print("pomme_config: ")
        print(pomme_config['env_kwargs'])

        self.pomme = Pomme(**pomme_config['env_kwargs'])

        self.observation_space = self.init_observation_space(
            pomme_config['env_kwargs'])
        self.action_space = self.pomme.action_space

        self.total_reward = 0
        self.prev_alive = 4
        self.visited = np.zeros(shape=(11, 11))

        if not env_config or (env_config
                              and env_config.get("is_training", True)):
            # initialize env twice could raise error here.
            self.init(pomme_config)

    def init(self, pomm_config):
        for id_, agent in enumerate(self.agent_list):
            assert isinstance(agent, agents.BaseAgent)
            print(id_, pomm_config['game_type'])
            agent.init_agent(id_, pomm_config['game_type'])
        self.pomme.set_agents(self.agent_list)
        self.pomme.set_init_game_state(None)

    def reset(self):
        obs = self.pomme.reset()
        self.all_obs = obs
        obs = self.get_for_training_agent(obs)
        self.cur_obs = obs
        obs = self.preproess(obs)
        self.total_reward = 0
        self.prev_alive = 4
        self.visited = np.zeros(shape=(11, 11))
        return obs

    def get_reward(self, obs, action, agent_id):
        if len(obs["alive"]) == 1:
            # An agent won. Give them +1, others -1.
            if agent_id in obs['alive']:
                return 1.0 - self.total_reward
            else:
                return -0.5

        if obs["step_count"] >= 500:
            # Game is over from time. Everyone gets -1.
            return -0.5

        # Game running: 0 for alive, -1 for dead.
        if agent_id not in obs['alive']:
            return -0.5

        x, y = obs["position"]
        blast = obs["bomb_blast_strength"]

        px = [1, -1, 0, 0]
        py = [0, 0, -1, 1]

        sum_reward = 0.0

        sum_reward += 20 * (len(obs["alive"]) - self.prev_alive)
        self.prev_alive = len(obs["alive"])

        if action == 0:
            sum_reward -= 0.1

        elif action == 5:
            # sum_reward += 1
            for i in range(4):
                tx = x + px[i]
                ty = y + py[i]
                if tx < 0 or tx > 10 or ty < 0 or ty > 10:
                    continue
                if obs["board"][tx][ty] == 1:
                    sum_reward += 2
                elif obs["board"][tx][ty] > 10:
                    sum_reward += 4
        else:
            assert (1 <= action <= 4), str(action)
            dx = x + px[action - 1]
            dy = y + py[action - 1]
            if (not (dx < 0 or dx > 10 or dy < 0
                     or dy > 10)) and obs["board"][dx][dy] == 0:
                if self.visited[dx][dy] > 0:
                    sum_reward -= 0.1
                else:
                    sum_reward += 0.3
                    self.visited[dx][dy] = 1

        sum_reward = sum_reward * 1.0 / 100.0
        new_total_reward = self.total_reward + sum_reward
        if new_total_reward > 0.8 or new_total_reward < -0.5:
            sum_reward = 0.0
        else:
            self.total_reward = new_total_reward

        return sum_reward

    def step(self, action):
        actions = self.pomme.act(self.all_obs)
        if self.alive_agents and self.player_agent_id in self.alive_agents:
            actions = self.set_for_training_agent(actions, action)
        else:
            actions = self.set_for_training_agent(actions, 0)
        obs, rewards, done, info = self.pomme.step(actions)

        self.all_obs = obs
        obs = self.get_for_training_agent(obs)
        self.cur_obs = obs
        reward = self.get_reward(self.cur_obs, action, self.player_agent_id)
        self.alive_agents = obs['alive']
        if (self.player_agent_id
                not in self.alive_agents) or obs["step_count"] >= 500:
            done = True
        obs = self.preproess(obs)
        return obs, reward, done, {}

    def get_for_training_agent(self, inputs):
        order = self.player_agent_id - 10
        return inputs[order].copy()

    def set_for_training_agent(self, inputs, value):
        order = self.player_agent_id - 10
        inputs[order] = value
        return inputs

    def init_observation_space(self, env_config):
        """
            observations for agents
            board: n^2
            bomb blast strength: n^2
            bomb life: n^2
        """
        board_size = env_config['board_size'] or 11
        num_items = env_config['num_items'] or 11
        print("env config: {}".format(env_config))
        # board_size = 11

        board = spaces.Box(low=0,
                           high=len(constants.Item),
                           shape=(board_size, board_size))
        danger = spaces.Box(low=0, high=20, shape=(board_size, board_size))
        bomb_blast_strength = spaces.Box(low=0,
                                         high=num_items,
                                         shape=(board_size, board_size))
        bomb_life = spaces.Box(low=0, high=9, shape=(board_size, board_size))
        flame_life = spaces.Box(low=0, high=10, shape=(board_size, board_size))
        position = spaces.Box(low=0, high=board_size, shape=(2, ))
        blast_strength = spaces.Box(low=1, high=num_items, shape=(1, ))
        ammo = spaces.Box(low=0, high=num_items, shape=(1, ))
        # return spaces.Dict({"board": board,
        #                     "bomb_blast_strength": bomb_blast_strength, "bomb_life": bomb_life,
        #                     "flame_life": flame_life,
        #                     "position": position, "ammo": ammo, "blast_strength": blast_strength})
        return spaces.Dict({
            "board": board,
            "bomb_blast_strength": bomb_blast_strength,
            "bomb_life": bomb_life,
            "flame_life": flame_life,
            "position": position,
            "ammo": ammo,
            "blast_strength": blast_strength,
            "danger": danger
        })

    @staticmethod
    def preproess(obs):
        del obs["game_type"]
        del obs["game_env"]
        del obs["can_kick"]
        del obs["teammate"]
        del obs["enemies"]
        del obs["step_count"]
        del obs['alive']
        del obs['bomb_moving_direction']
        obs['position'] = np.array(obs['position'])
        obs['ammo'] = np.array([obs['ammo']])
        obs['blast_strength'] = np.array([obs['blast_strength']])

        board = obs['board']
        bomb_blast_strength = obs['bomb_blast_strength']
        bomb_life = obs['bomb_life']
        # flame_life = obs['flame_life']
        # position = obs['position']
        # ammo = obs['ammo']
        # blast_strength = obs['blast_strength']

        danger = np.ndarray(shape=(11, 11), dtype=int)

        for x in range(11):
            for y in range(11):
                danger[x][y] = 10
                if board[x][y] == 4:
                    board[x][y] = 0
                    danger[x][y] = 0
                elif board[x][y] == 3:
                    board[x][y] = 0
                elif board[x][y] == 10:
                    board[x][y] = 1
                elif board[x][y] > 10:
                    board[x][y] = 5
                elif 6 <= board[x][y] <= 8:
                    board[x][y] = 3
                elif board[x][y] == 1:
                    board[x][y] = 4

        for x in range(11):
            for y in range(11):
                if bomb_life[x][y] > 0:
                    strength = int(bomb_blast_strength[x][y] + 0.5)
                    for tx in range(max(0, x - strength + 1),
                                    min(11, x + strength)):
                        danger[tx][y] = min(danger[tx][y], bomb_life[x][y])
                    for ty in range(max(0, y - strength + 1),
                                    min(11, y + strength)):
                        danger[x][ty] = min(danger[x][ty], bomb_life[x][y])

        obs['danger'] = danger

        return obs

    def render(self):
        self.pomme.render()
Beispiel #6
0
class MultiAgent(MultiAgentEnv):
    def __init__(self):
        super(MultiAgent, self).__init__()
        self.phase = 0
        self.setup()

    def setup(self):
        agents = []
        if self.phase == 0:
            self.agents_index = [1, 3]
            self.enemies_agents_index = [0, 2]
            config = team_v0_fast_env()
            config["env_kwargs"]["num_wood"] = 2
            config["env_kwargs"]["num_items"] = 2
            config["env_kwargs"]["num_rigid"] = 20
            agents.insert(
                0, SuicidalAgent(config["agent"](0, config["game_type"])))
            agents.insert(2, NoDoAgent(config["agent"](2,
                                                       config["game_type"])))
            print(config["env_kwargs"])
            self.env = Pomme(**config["env_kwargs"])
            self.env.seed()

        if self.phase == 1:
            self.agents_index = [1, 3]
            self.enemies_agents_index = [0, 2]
            config = team_v0_fast_env()
            config["env_kwargs"]["num_wood"] = 2
            config["env_kwargs"]["num_items"] = 2
            config["env_kwargs"]["num_rigid"] = 36
            agents.insert(
                0, SuicidalAgent(config["agent"](0, config["game_type"])))
            agents.insert(2, NoDoAgent(config["agent"](2,
                                                       config["game_type"])))
            print(config["env_kwargs"])
            self.env = Pomme(**config["env_kwargs"])
            self.env.seed()

        if self.phase == 2:
            self.agents_index = [1, 3]
            self.enemies_agents_index = [0, 2]
            config = team_v0_fast_env()
            config["env_kwargs"]["num_wood"] = 2
            config["env_kwargs"]["num_items"] = 2
            config["env_kwargs"]["num_rigid"] = 36
            agents.insert(0, NoDoAgent(config["agent"](0,
                                                       config["game_type"])))
            agents.insert(2, NoDoAgent(config["agent"](2,
                                                       config["game_type"])))
            print(config["env_kwargs"])
            self.env = Pomme(**config["env_kwargs"])
            self.env.seed()

        if self.phase == 3:
            self.agents_index = [1, 3]
            self.enemies_agents_index = [0, 2]
            config = team_v0_fast_env()
            config["env_kwargs"]["num_wood"] = 2
            config["env_kwargs"]["num_items"] = 2
            config["env_kwargs"]["num_rigid"] = 36
            agents.insert(0, NoDoAgent(config["agent"](0,
                                                       config["game_type"])))
            agents.insert(2, NoDoAgent(config["agent"](2,
                                                       config["game_type"])))
            print(config["env_kwargs"])
            self.env = Pomme(**config["env_kwargs"])
            self.env.seed()

        if self.phase == 4:
            self.agents_index = [1, 3]
            self.enemies_agents_index = [0, 2]
            config = team_v0_fast_env()
            config["env_kwargs"]["num_wood"] = 0
            config["env_kwargs"]["num_items"] = 10
            config["env_kwargs"]["num_rigid"] = 36
            agents.insert(
                0, SuicidalAgent(config["agent"](0, config["game_type"])))
            agents.insert(2,
                          SimpleAgent(config["agent"](2, config["game_type"])))
            print(config["env_kwargs"])
            self.env = Pomme(**config["env_kwargs"])
            self.env.seed()

        for agent_id in self.agents_index:
            agents.insert(
                agent_id,
                BaseLineAgent(config["agent"](agent_id, config["game_type"])))

        self.env.set_agents(agents)
        self.env.set_init_game_state(None)
        self.observation_space = spaces.Dict({
            "boards":
            spaces.Box(low=-1, high=20, shape=(3, 11, 11)),
            "states":
            spaces.Box(low=-1, high=20, shape=(9, )),
        })

        spaces.Box(low=-1.0, high=20.0, shape=(372, ), dtype=np.float32)
        self.action_space = self.env.action_space

    def set_phase(self, phase):
        print("learn phase " + str(phase))
        self.phase = phase
        self.setup()
        self.reset()

    def step(self, actions):
        obs = self.env.get_observations()
        all_actions = self.env.act(obs)
        for index in self.agents_index:
            try:
                action = actions[index]
            except:
                action = 0
            all_actions[index] = action

        step_obs = self.env.step(all_actions)
        obs, rew, done, info = {}, {}, {}, {}
        for i in actions.keys():
            obs[i], rew[i], done[i], info[i] = [
                featurize(step_obs[0][i]),
                step_obs[1][i],
                step_obs[1][i] == -1 or step_obs[2],
                step_obs[3],
            ]

        done["__all__"] = step_obs[2]
        return obs, rew, done, info

    def reset(self):
        obs = self.env.reset()
        return {i: featurize(obs[i]) for i in self.agents_index}
Beispiel #7
0
class MultiAgend(MultiAgentEnv):
    def __init__(self):
        super(MultiAgend, self).__init__()
        self.phase = 0
        self.next_phase = 0
        self.steps = 0
        self.last_featurize_obs = None
        self.setup()

    def featurize(self, obs):

        enemies = []
        for agent_id in self.enemies_agents_index:
            if agent_id == 0:
                enemies.append(Item.Agent0)
            if agent_id == 1:
                enemies.append(Item.Agent1)
            if agent_id == 2:
                enemies.append(Item.Agent2)
            if agent_id == 3:
                enemies.append(Item.Agent3)

        for enemie in obs["enemies"]:
            if enemie not in enemies:
                obs["board"] = ma.masked_equal(
                    obs["board"], enemie.value).filled(fill_value=0)

        board = np.copy(obs["board"])
        board[obs["position"][0], obs["position"][1]] = 0.0
        enemie_pos = np.full((11, 11), 0)
        for enemie in obs["enemies"]:
            enemie_pos = enemie_pos | ma.masked_not_equal(
                board, enemie.value).filled(fill_value=0)
            board = ma.masked_equal(board, enemie.value).filled(fill_value=0)

        wood = ma.masked_not_equal(board, 2).filled(fill_value=0)
        wood = (wood > 0).astype(np.float32)
        board = ma.masked_equal(board, 2).filled(fill_value=0)

        stone = ma.masked_not_equal(board, 1).filled(fill_value=0)
        stone = (stone > 0).astype(np.float32)
        board = ma.masked_equal(board, 1).filled(fill_value=0)
        enemie_pos = (enemie_pos > 0).astype(np.float32)

        board = ma.masked_equal(board,
                                obs["teammate"].value).filled(fill_value=0)

        flames = ma.masked_not_equal(board, 4).filled(fill_value=0)
        flames = (flames > 0).astype(np.float32)

        board = ma.masked_equal(board, 4).filled(fill_value=0)
        board = ma.masked_equal(board, 3).filled(fill_value=0)

        teammate_pos = ma.masked_not_equal(
            board, obs["teammate"].value).filled(fill_value=0)
        teammate_pos = (teammate_pos > 0).astype(np.float32)
        board = ma.masked_equal(board,
                                obs["teammate"].value).filled(fill_value=0)
        items = board.astype(np.float32)

        pos = np.full((11, 11), 0)
        pos[obs["position"][0], obs["position"][1]] = 1.0
        pos = pos.astype(np.float32)

        bomb_life = obs["bomb_life"].astype(np.float32)
        bomb_blast_strength = obs["bomb_blast_strength"].astype(np.float32)

        ammo = utility.make_np_float([obs["ammo"]])
        blast_strength = utility.make_np_float([obs["blast_strength"]])
        can_kick = utility.make_np_float([obs["can_kick"]])
        game_end = utility.make_np_float([
            (self.max_steps - self.steps) / self.max_steps
        ])

        actual_featurize_obs = {
            'boards':
            np.stack([
                enemie_pos, pos, wood, stone, items, flames, teammate_pos,
                bomb_life, bomb_blast_strength
            ],
                     axis=0),
            'states':
            np.concatenate([ammo, blast_strength, can_kick, game_end]),
        }

        if self.last_featurize_obs == None:
            featurize_obs = {
                'boards':
                np.concatenate([
                    actual_featurize_obs['boards'],
                    actual_featurize_obs['boards']
                ],
                               axis=0),
                'states':
                np.concatenate([
                    actual_featurize_obs['states'],
                    actual_featurize_obs['states']
                ]),
            }
        else:
            featurize_obs = {
                'boards':
                np.concatenate([
                    self.last_featurize_obs['boards'],
                    actual_featurize_obs['boards']
                ],
                               axis=0),
                'states':
                np.concatenate([
                    self.last_featurize_obs['states'],
                    actual_featurize_obs['states']
                ]),
            }

        self.last_featurize_obs = actual_featurize_obs
        return featurize_obs

    def setup(self):
        agents = []
        if self.phase == 0:
            arr = [0, 1]
            random.shuffle(arr)
            agents_index = arr.pop()
            op_index = arr.pop()
            self.agents_index = [agents_index]
            self.enemies_agents_index = [op_index]
            self.max_steps = 200
            config = ffa_v0_fast_env()
            config["env_kwargs"]["max_steps"] = self.max_steps
            agents.insert(
                agents_index,
                BaseLineAgent(config["agent"](agents_index,
                                              config["game_type"])))
            agents.insert(
                op_index,
                NoDoAgent(config["agent"](op_index, config["game_type"])))
            self.env = Pomme(**config["env_kwargs"])
            self.env.set_agents(agents)
            init_state = {
                'board_size': '11',
                'step_count': '0',
                'board': '',
                'agents':
                '[{"agent_id": 0, "is_alive": true, "position": [1, 1], "ammo": 1, "blast_strength": 2, "can_kick": false}, {"agent_id": 1, "is_alive": true, "position": [9, 0], "ammo": 1, "blast_strength": 2, "can_kick": false}]',
                'bombs': '[]',
                'flames': '[]',
                'items': '[]',
                'intended_actions': '[0, 0]'
            }
            board = np.full((11, 11), 0)
            init_state['board'] = json.dumps(board.tolist())
            agents_json = json.loads(copy.copy(init_state['agents']))
            random_pos = np.random.choice(board.shape[0], (2, 2),
                                          replace=False)
            agents_json[0]["position"] = random_pos[0].tolist()
            agents_json[1]["position"] = random_pos[1].tolist()
            init_state['agents'] = json.dumps(agents_json)
            self.env._init_game_state = init_state
            self.env.reset()

        if self.phase == 1:
            arr = [0, 1]
            random.shuffle(arr)
            agents_index = arr.pop()
            op_index = arr.pop()
            self.agents_index = [agents_index]
            self.enemies_agents_index = [op_index]
            self.max_steps = 200
            config = ffa_v0_fast_env()
            config["env_kwargs"]["max_steps"] = self.max_steps
            agents.insert(
                agents_index,
                BaseLineAgent(config["agent"](agents_index,
                                              config["game_type"])))
            agents.insert(
                op_index,
                NoDoAgent(config["agent"](op_index, config["game_type"])))
            self.env = Pomme(**config["env_kwargs"])
            self.env.set_agents(agents)
            init_state = {
                'board_size': '11',
                'step_count': '0',
                'board': '',
                'agents':
                '[{"agent_id": 0, "is_alive": true, "position": [1, 1], "ammo": 1, "blast_strength": 2, "can_kick": false}, {"agent_id": 1, "is_alive": true, "position": [9, 0], "ammo": 1, "blast_strength": 2, "can_kick": false}]',
                'bombs': '[]',
                'flames': '[]',
                'items': '[]',
                'intended_actions': '[0, 0]'
            }
            board = np.full((11, 11), 0)
            board[5, :] = (np.ones(11) * 2)
            agents_json = json.loads(copy.copy(init_state['agents']))
            agents_json[0]["position"] = [
                random.randint(0, 4),
                random.randint(0, 10)
            ]
            agents_json[1]["position"] = [
                random.randint(6, 10),
                random.randint(0, 10)
            ]
            init_state['agents'] = json.dumps(agents_json)
            init_state['board'] = json.dumps(board.tolist())
            self.env._init_game_state = init_state
            self.env.reset()

        self.observation_space = spaces.Dict({
            'boards':
            spaces.Box(low=-1, high=25, shape=(11, 11, 18), dtype=np.float32),
            'states':
            spaces.Box(low=-1, high=25, shape=(8, ), dtype=np.float32)
        })

        self.action_space = self.env.action_space

    def set_phase(self, phase):
        print("learn phase " + str(phase))
        self.next_phase = phase

    def close(self):
        self.env.close()

    def step(self, actions):
        self.steps = self.steps + 1
        obs = self.env.get_observations()
        all_actions = self.env.act(obs)
        assert (len(all_actions) == len(self.agents_index) +
                len(self.enemies_agents_index))

        for index in self.agents_index:
            try:
                action = actions[index]
            except:
                action = 0
            assert (all_actions[index] == None)
            all_actions[index] = action

        step_obs = self.env.step(all_actions)
        obs, rew, done, info = {}, {}, {}, {}
        for i in actions.keys():
            obs[i], rew[i], done[i], info[i] = [
                self.featurize(step_obs[0][i]), step_obs[1][i],
                step_obs[1][i] == -1 or step_obs[2], step_obs[3]
            ]

        done["__all__"] = step_obs[2]
        return obs, rew, done, info

    def reset(self):
        self.steps = 0
        self.phase = self.next_phase
        self.setup()
        obs = self.env.get_observations()
        return {i: self.featurize(obs[i]) for i in self.agents_index}
Beispiel #8
0
class PomFFA(gym.Env):

    def __init__(self, env_config=None):

        self.agent_list = [HoldAgent(), agents.SimpleAgent(), HoldAgent(), HoldAgent()]
        # self.agent_list = [agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), agents.RandomAgent()]
        self.all_obs = None
        self.all_action = None
        self.cur_obs = None
        self.alive_agents = [10, 11, 12, 13]
        self.player_agent_id = 10
        self.total_reward = 0

        pomme_config = pommerman.configs.ffa_competition_env()

        if env_config:
            for k, v in env_config.items():
                if k in pomme_config['env_kwargs']:
                    pomme_config['env_kwargs'][k] = v

        self.pomme = Pomme(**pomme_config['env_kwargs'])

        self.observation_space = self.init_observation_space(pomme_config['env_kwargs'])
        self.action_space = self.pomme.action_space

        if not env_config or (env_config and env_config.get("is_training", True)):
            # initialize env twice could raise error here.
            self.init(pomme_config)

    def init(self, pomm_config):
        for id_, agent in enumerate(self.agent_list):
            assert isinstance(agent, agents.BaseAgent)
            print(id_, pomm_config['game_type'])
            agent.init_agent(id_, pomm_config['game_type'])
        self.pomme.set_agents(self.agent_list)
        self.pomme.set_init_game_state(None)

    def reset(self):
        obs = self.pomme.reset()
        self.all_obs = obs.copy()
        obs = self.get_for_training_agent(obs)
        self.cur_obs = obs.copy()
        obs = self.preproess(obs)
        self.total_reward = 0
        return obs

    def get_reward(self, obs, action, agent_id):
        if len(obs["alive"]) == 1:
            # An agent won. Give them +1, others -1.
            if agent_id in obs['alive']:
                return 0.5
            else:
                return -0.5

        if obs["step_count"] >= 500:
            # Game is over from time. Everyone gets -1.
            return -0.5

        # Game running: 0 for alive, -1 for dead.
        if agent_id not in obs['alive']:
            return -0.5

        x, y = obs["position"]
        # blast = obs["bomb_blast_strength"]

        px = [0, 1, 0, -1]
        py = [1, 0, -1, 0]

        sum_reward = 0
        if action == 5:
            for i in range(4):
                tx = x+px[i]
                ty = y+py[i]
                if tx<0 or tx>10 or ty<0 or ty>10:
                    continue
                if obs["board"][tx][ty] == 1:
                    sum_reward += 1
                elif obs["board"][tx][ty] > 10:
                    sum_reward += 4

        sum_reward = sum_reward*1.0/200.0
        new_total_reward = self.total_reward + sum_reward
        if new_total_reward > 0.5 or new_total_reward < -0.5:
            sum_reward = 0
        else:
            self.total_reward = new_total_reward

        return sum_reward

    def step(self, action):
        actions = self.pomme.act(self.all_obs)
        if self.alive_agents and self.player_agent_id in self.alive_agents:
            actions = self.set_for_training_agent(actions, action)
        else:
            actions = self.set_for_training_agent(actions, 0)
        obs, rewards, done, info = self.pomme.step(actions)

        # print(obs)
        del self.all_obs
        self.all_obs = obs.copy()
        obs = self.get_for_training_agent(obs)
        del self.cur_obs
        self.cur_obs = obs.copy()
        reward = self.get_reward(self.cur_obs, action, self.player_agent_id)
        self.alive_agents = obs['alive']

        if self.player_agent_id not in self.alive_agents or self.cur_obs["step_count"] >= 500:
            done = True
        obs = self.preproess(obs)

        return obs, reward, done, {}

    def get_for_training_agent(self, inputs):
        order = self.player_agent_id - 10
        return inputs[order]

    def set_for_training_agent(self, inputs, value):
        order = self.player_agent_id - 10
        inputs[order] = value
        return inputs

    def init_observation_space(self, env_config):
        """
            observations for agents
            board: n^2
            bomb blast strength: n^2
            bomb life: n^2
        """
        board_size = env_config['board_size']
        num_items = env_config['num_items']
        # print("env config: {}".format(env_config))
        # board_size = 11

        board = spaces.Box(low=0, high=len(constants.Item), shape=(board_size, board_size))
        bomb_blast_strength = spaces.Box(low=0, high=num_items, shape=(board_size, board_size))
        bomb_life = spaces.Box(low=0, high=9, shape=(board_size, board_size))
        flame_life = spaces.Box(low=0, high=3, shape=(board_size, board_size))
        position = spaces.Box(low=0, high=board_size, shape=(2,))
        blast_strength = spaces.Box(low=1, high=num_items, shape=(1,))
        ammo = spaces.Box(low=0, high=num_items, shape=(1,))
        return spaces.Dict({"board": board, "bomb_blast_strength": bomb_blast_strength, "bomb_life": bomb_life,
                            "flame_life": flame_life,
                            "position": position, "ammo": ammo, "blast_strength": blast_strength})

    @staticmethod
    def preproess(obs):
        del obs["game_type"]
        del obs["game_env"]
        del obs["can_kick"]
        del obs["teammate"]
        del obs["enemies"]
        del obs["step_count"]
        del obs['alive']
        del obs['bomb_moving_direction']
        obs['position'] = np.array(obs['position'])
        obs['ammo'] = np.array([obs['ammo']])
        obs['blast_strength'] = np.array([obs['blast_strength']])
        return obs

    def render(self):
        self.pomme.render()
Beispiel #9
0
class PomFFA(gym.Env):
    agent_list = [
        agents.StaticAgent(),
        agents.StaticAgent(),
        agents.StaticAgent(),
        agents.StaticAgent()
    ]
    alive_agents = [10, 11, 12, 13]
    agent_id = 10
    ammo = 1
    blast_strength = 2
    state = {}

    def __init__(self, env_config={}):
        pomme_config = pommerman.configs.ffa_competition_env()
        self.reward = Reward(env_config.get("reward"))

        self.pomme = Pomme(**pomme_config['env_kwargs'])

        self.observation_space = self.init_observation_space(
            pomme_config['env_kwargs'])
        self.action_space = self.pomme.action_space

        if not env_config or (env_config
                              and env_config.get("is_training", True)):
            # initialize env twice could raise error here.
            self.init(pomme_config)

    def init(self, pomm_config):
        for id_, agent in enumerate(self.agent_list):
            assert isinstance(agent, agents.BaseAgent)
            agent.init_agent(id_, pomm_config['game_type'])
        self.pomme.set_agents(self.agent_list)
        self.pomme.set_init_game_state(None)
        self.init_state()

    def init_state(self):
        self.state['agent_id'] = self.agent_id
        self.state['alive'] = self.alive_agents
        self.state['visited'] = set()
        self.state['blast_strength'] = self.blast_strength
        self.state['ammo'] = self.ammo
        self.state["bombs"] = {}

    def reset(self):
        all_obs = self.pomme.reset()
        obs = self.get_for_training_agent(all_obs)
        self.init_state()

        self.state['prev_obs'] = copy.deepcopy(obs)
        self.state['all_obs'] = all_obs
        self.state['alive'] = obs['alive']

        obs = self.build_obs(obs, self.state)
        return obs

    def step(self, action):
        actions = self.pomme.act(self.state['all_obs'])
        actions = self.set_for_training_agent(actions, action)

        all_obs, _, _, _ = self.pomme.step(actions)
        obs = self.get_for_training_agent(all_obs)
        info = {'board': obs['board'], 'blast_strength': obs['blast_strength']}
        done = self.get_done(obs)
        reward, self.state = self.reward.get_reward(action, obs, self.state)

        self.state['prev_obs'] = copy.deepcopy(obs)
        self.state['all_obs'] = all_obs
        self.state['alive'] = obs['alive']
        self.state['blast_strength'] = obs['blast_strength']
        self.state['ammo'] = obs['ammo']

        obs = self.build_obs(obs, self.state)
        return obs, reward, done, info

    def get_for_training_agent(self, inputs):
        order = self.agent_id - 10
        return inputs[order]

    def set_for_training_agent(self, inputs, value):
        order = self.agent_id - 10
        inputs[order] = value
        return inputs

    def get_done(self, obs):
        if self.agent_id not in obs['alive']:
            return True
        if obs['step_count'] >= 800:
            return True
        return False

    def build_obs(self, obs, state):
        board = obs['board']
        bomb_blast_strength = obs['bomb_blast_strength']
        bomb_life = obs['bomb_life']
        flame_life = obs['flame_life']
        agent_id = state['agent_id']
        ammo = state['ammo']
        passage = np.zeros_like(board)
        wall = np.zeros_like(board)
        wood = np.zeros_like(board)
        bomb = np.zeros_like(board)
        bonus = np.zeros_like(board)
        me = np.zeros_like(board)
        enemy = np.zeros_like(board)
        for y in range(board.shape[0]):
            for x in range(board.shape[1]):
                v = board[y][x]
                if v == 0:
                    passage[y][x] = 1
                elif v == 1:
                    wall[y][x] = 1
                elif v == 2:
                    wood[y][x] = 1
                elif v == 3:
                    bomb = create_cross(bomb, (y, x),
                                        bomb_blast_strength[y][x])
                elif v == 4:
                    pass
                elif v == 6 or v == 7:
                    bonus[y][x] = 1
                elif v >= 10:
                    if v == agent_id:
                        me[y][x] = 1
                    else:
                        enemy[y][x] = 1
                    if bomb_blast_strength[y][x] > 0:
                        bomb = create_cross(bomb, (y, x),
                                            bomb_blast_strength[y][x])

        ammo = ammo * np.ones_like(board) / 12
        bomb_life /= 9
        flame_life /= 3
        board = np.transpose(
            np.stack([
                passage, wall, wood, bomb, bonus, me, enemy, bomb_life,
                flame_life, ammo
            ]), [1, 2, 0])
        return board

    @staticmethod
    def init_observation_space(env_config):
        """
            observations for agents
            board: n^2
            bomb blast strength: n^2
            bomb life: n^2
        """
        board_size = env_config['board_size']
        num_items = env_config['num_items']

        board = spaces.Box(
            low=0, high=1,
            shape=(board_size, board_size,
                   10))  # passage,wall,wood,bomb,bonus,me,enemies
        bomb_life = spaces.Box(low=0, high=9, shape=(board_size, board_size))
        flame_life = spaces.Box(low=0, high=3, shape=(board_size, board_size))
        ammo = spaces.Box(low=0,
                          high=num_items,
                          shape=(board_size, board_size))
        # return spaces.Dict({"board": board, "bomb_life": bomb_life, "flame_life": flame_life,"ammo": ammo})
        return board

    @staticmethod
    def init_action_space():
        return spaces.Discrete(6)

    def render(self):
        self.pomme.render()
class PomFFA(gym.Env):
    agent_list = [
        agents.RandomAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent()
    ]
    all_obs = None
    all_action = None
    cur_obs = None
    alive_agents = [10, 11, 12, 13]
    player_agent_id = 10

    def __init__(self, env_config=None):

        pomme_config = pommerman.configs.ffa_competition_env()

        if env_config:
            for k, v in env_config.items():
                if k in pomme_config['env_kwargs']:
                    pomme_config['env_kwargs'][k] = v

        print("pomme_config: ")
        print(pomme_config['env_kwargs'])

        self.pomme = Pomme(**pomme_config['env_kwargs'])

        self.observation_space = self.init_observation_space(
            pomme_config['env_kwargs'])
        self.action_space = self.pomme.action_space

        if not env_config or (env_config
                              and env_config.get("is_training", True)):
            # initialize env twice could raise error here.
            self.init(pomme_config)

    def init(self, pomm_config):
        for id_, agent in enumerate(self.agent_list):
            assert isinstance(agent, agents.BaseAgent)
            print(id_, pomm_config['game_type'])
            agent.init_agent(id_, pomm_config['game_type'])
        self.pomme.set_agents(self.agent_list)
        self.pomme.set_init_game_state(None)

    def reset(self):
        obs = self.pomme.reset()
        self.all_obs = obs
        obs = self.get_for_training_agent(obs)
        self.cur_obs = obs
        obs = self.preproess(obs)
        return obs

    def get_reward(self, obs, action, agent_id):
        if len(obs["alive"]) == 1:
            # An agent won. Give them +1, others -1.
            if agent_id in obs['alive']:
                return 1
            else:
                return -1

        if obs["step_count"] >= 500:
            # Game is over from time. Everyone gets -1.
            return -1

        # Game running: 0 for alive, -1 for dead.
        if agent_id not in obs['alive']:
            return -1
        #
        # x, y = obs["position"]
        # blast = obs["bomb_blast_strength"]
        #
        # for w in range(11):
        #     if blast[x][w] > int(math.fabs(w-y)):
        #         return -10
        #
        #     if blast[w][y] > int(math.fabs((w-x))):
        #         return -10

        return 0

    def step(self, action):
        actions = self.pomme.act(self.all_obs)
        if self.alive_agents and self.player_agent_id in self.alive_agents:
            actions = self.set_for_training_agent(actions, action)
        else:
            actions = self.set_for_training_agent(actions, 0)
        obs, rewards, done, info = self.pomme.step(actions)

        # print(obs)

        self.all_obs = obs
        obs = self.get_for_training_agent(obs)
        self.cur_obs = obs
        reward = self.get_reward(self.cur_obs, action, self.player_agent_id)
        self.alive_agents = obs['alive']
        if (self.player_agent_id
                not in self.alive_agents) or obs["step_count"] >= 500:
            done = True
        obs = self.preproess(obs)
        return obs, reward, done, {}

    def get_for_training_agent(self, inputs):
        order = self.player_agent_id - 10
        return inputs[order]

    def set_for_training_agent(self, inputs, value):
        order = self.player_agent_id - 10
        inputs[order] = value
        return inputs

    def init_observation_space(self, env_config):
        """
            observations for agents
            board: n^2
            bomb blast strength: n^2
            bomb life: n^2
        """
        board_size = env_config['board_size'] or 11
        num_items = env_config['num_items'] or 11
        print("env config: {}".format(env_config))
        # board_size = 11

        board = spaces.Box(low=0,
                           high=len(constants.Item),
                           shape=(board_size, board_size))
        bomb_blast_strength = spaces.Box(low=0,
                                         high=num_items,
                                         shape=(board_size, board_size))
        bomb_life = spaces.Box(low=0, high=9, shape=(board_size, board_size))
        flame_life = spaces.Box(low=0, high=3, shape=(board_size, board_size))
        position = spaces.Box(low=0, high=board_size, shape=(2, ))
        blast_strength = spaces.Box(low=1, high=num_items, shape=(1, ))
        ammo = spaces.Box(low=0, high=num_items, shape=(1, ))
        return spaces.Dict({
            "board": board,
            "bomb_blast_strength": bomb_blast_strength,
            "bomb_life": bomb_life,
            "flame_life": flame_life,
            "position": position,
            "ammo": ammo,
            "blast_strength": blast_strength
        })

    @staticmethod
    def preproess(obs):
        del obs["game_type"]
        del obs["game_env"]
        del obs["can_kick"]
        del obs["teammate"]
        del obs["enemies"]
        del obs["step_count"]
        del obs['alive']
        del obs['bomb_moving_direction']
        obs['position'] = np.array(obs['position'])
        obs['ammo'] = np.array([obs['ammo']])
        obs['blast_strength'] = np.array([obs['blast_strength']])
        return obs

    def render(self):
        self.pomme.render()
Beispiel #11
0
class PomFFA(gym.Env):
    agent_list = [
        agents.StaticAgent(),
        agents.StaticAgent(),
        agents.StaticAgent(),
        agents.StaticAgent()
    ]
    all_obs = None
    all_action = None
    pre_obs = None
    alive_agents = [10, 11, 12, 13]
    agent_id = 10
    state = {}

    def __init__(self, env_config=None):

        pomme_config = pommerman.configs.ffa_competition_env()

        if env_config:
            for k, v in env_config.items():
                if k in pomme_config['env_kwargs']:
                    pomme_config['env_kwargs'][k] = v
            self.reward = Reward(env_config.get("reward"))
        else:
            self.reward = Reward()

        print("Pommerman Config:", pomme_config['env_kwargs'])

        self.pomme = Pomme(**pomme_config['env_kwargs'])

        self.observation_space = self.init_observation_space(
            pomme_config['env_kwargs'])
        self.action_space = self.pomme.action_space

        if not env_config or (env_config
                              and env_config.get("is_training", True)):
            # initialize env twice could raise error here.
            self.init(pomme_config)

    def init(self, pomm_config):
        for id_, agent in enumerate(self.agent_list):
            assert isinstance(agent, agents.BaseAgent)
            agent.init_agent(id_, pomm_config['game_type'])
        self.pomme.set_agents(self.agent_list)
        self.pomme.set_init_game_state(None)

    def reset(self):
        obs = self.pomme.reset()
        self.all_obs = obs
        obs = self.get_for_training_agent(obs)
        state = {
            "prev_obs": None,
            "visited": set(),
            "agent_id": 10,
            "alive": [10, 11, 12, 13],
            "strength": 2,
            "ammo": 1,
            "bombs": {},
        }
        state['prev_obs'] = copy.deepcopy(obs)
        state['position'] = obs['position']
        self.state = state
        obs = self.preproess(obs)
        return obs

    def step(self, action):
        actions = self.pomme.act(self.all_obs)
        actions = self.set_for_training_agent(actions, action)

        obs, rewards, _, _ = self.pomme.step(actions)
        self.all_obs = obs
        obs = self.get_for_training_agent(obs)
        reward, self.state = self.reward.get_reward(action, obs, self.state)
        done = self.get_done(obs)
        self.state['prev_obs'] = copy.deepcopy(obs)
        self.state['position'] = obs['position']
        obs = self.preproess(obs)

        return obs, reward, done, {}

    def get_for_training_agent(self, inputs):
        order = self.agent_id - 10
        return inputs[order]

    def set_for_training_agent(self, inputs, value):
        order = self.agent_id - 10
        inputs[order] = value
        return inputs

    def get_done(self, obs):
        if self.agent_id not in obs['alive']:
            return True
        if obs['step_count'] >= 800:
            return True
        return False

    @staticmethod
    def init_observation_space(env_config):
        """
            observations for agents
            board: n^2
            bomb blast strength: n^2
            bomb life: n^2
        """
        board_size = env_config['board_size']
        num_items = env_config['num_items']

        board = spaces.Box(low=0,
                           high=len(constants.Item),
                           shape=(board_size, board_size))
        bomb_blast_strength = spaces.Box(low=0,
                                         high=num_items,
                                         shape=(board_size, board_size))
        bomb_life = spaces.Box(low=0, high=9, shape=(board_size, board_size))
        flame_life = spaces.Box(low=0, high=3, shape=(board_size, board_size))
        position = spaces.Box(low=0, high=board_size, shape=(2, ))
        blast_strength = spaces.Box(low=1, high=num_items, shape=(1, ))
        ammo = spaces.Box(low=0, high=num_items, shape=(1, ))
        return spaces.Dict({
            "board": board,
            "bomb_blast_strength": bomb_blast_strength,
            "bomb_life": bomb_life,
            "flame_life": flame_life,
            "position": position,
            "ammo": ammo,
            "blast_strength": blast_strength
        })

    @staticmethod
    def init_action_space():
        return spaces.Discrete(6)

    @staticmethod
    def preproess(obs):
        del obs["game_type"]
        del obs["game_env"]
        del obs["can_kick"]
        del obs["teammate"]
        del obs["enemies"]
        del obs["step_count"]
        del obs['alive']
        del obs['bomb_moving_direction']

        obs['position'] = np.array(obs['position'])
        obs['ammo'] = np.array([obs['ammo']])
        obs['blast_strength'] = np.array([obs['blast_strength']])

        return obs

    def render(self):
        self.pomme.render()