def ffa_evaluate(env: Pomme, episodes, verbose, visualize, stop=False): """ Evaluates the given pommerman environment (already includes the agents). :param episodes: The number of episodes :param verbose: Whether to print verbose status information :param visualize: Whether to visualize the execution :param stop: Whether to wait for input after each step :return: The results of the evaluation of shape (episodes, 5) where the first column [:, 0] contains the result of the match (tie, win, incomplete) and the remaining columns contain the individual (final) rewards. """ # first element: result, additional elements: rewards steps = np.empty(episodes) results = np.empty((episodes, 1 + 4)) start = time.time() # Run the episodes just like OpenAI Gym for i_episode in range(episodes): state = env.reset() done = False reward = [] info = {} step = 0 while not done: if visualize: env.render() actions = env.act(state) state, reward, done, info = env.step(actions) step += 1 if stop: input() steps[i_episode] = step result = info['result'] # save the result results[i_episode, 0] = result.value results[i_episode, 1:] = reward if verbose: delta = time.time() - start print('\r{:.2f} sec > Episode {} finished with {} ({})'.format( delta, i_episode, result, reward)) if i_episode % 10 == 9 and i_episode != episodes - 1: ffa_print_stats(results, steps, i_episode + 1) env.close() if verbose: delta = time.time() - start print("Total time: {:.2f} sec".format(delta)) ffa_print_stats(results, steps, episodes) return results
def main(): # Print all possible environments in the Pommerman registry print(pommerman.registry) config = ffa_v1_env() env = Pomme(**config["env_kwargs"]) # Add 3 agents agents = {} for agent_id in range(4): agents[agent_id] = SimpleAgent(config["agent"](agent_id, config["game_type"])) # agents[3] = PlayerAgent(config["agent"](agent_id, config["game_type"]), "arrows") env.set_agents(list(agents.values())) env.set_init_game_state(None) demo = [] # Run the episodes just like OpenAI Gym for i_episode in range(1): state = env.reset() done = False demo.append(env.get_json_info()) while not done: env.render() actions = env.act(state) state, reward, done, info = env.step(actions) demo.append(env.get_json_info()) if 1 in reward: winner = reward.index(1) else: winner = None print('Episode {} finished'.format(i_episode)) env.close() # If game not tied, save demonstration if winner is not None: demonstration = {'demo': demo, 'winner': winner} pickle.dump(demonstration, open("demonstration.p", "wb"))
agents = {} for agent_id in range(3): agents[agent_id] = StaticAgent(config["agent"](agent_id, config["game_type"])) # Add human agent agent_id += 1 agents[3] = PlayerAgent(config["agent"](agent_id, config["game_type"]), "arrows") env.set_agents(list(agents.values())) env.set_init_game_state(None) # Seed and reset the environment env.seed(0) obs = env.reset() # Run the agents until we're done done = False while not done: env.render() actions = env.act(obs) # brauch ich nicht #actions = [action % 4 for action in actions] #actions = [0,actions[1]] obs, reward, done, info = env.step(actions) #kacka = featurize(obs[0]) env.render(close=True) env.close() # Print the result print(info)
# agents.append(Dense128(env.action_space.n, BOARD_SIZE, character=config["agent"](agent_id, config["game_type"]), # save_path='./dqn/model/ddgp_dense_128_1_rs/model.h4')) agents.append(Cnn12832Dense1281(env.action_space.n, BOARD_SIZE, character=config["agent"](agent_id, config["game_type"]), save_path='./dqn/model/ddgp_cnn128_3_2_dense_128_1_rs/model.h4')) else: agents.append(SimpleAgent(config["agent"](agent_id, config["game_type"]))) env.set_agents(agents) env.set_init_game_state(None) # Seed and reset the environment env.seed(0) obs = env.reset() # Run the agents until we're done done = False while not done: env.render() actions = env.act(obs) obs, reward, done, info = env.step(actions) env.render(close=True) env.close() # Print the result print(info) from sklearn.ensemble import BaggingClassifier BaggingClassifier()
class PomFFA(gym.Env): agent_list = [HoldAgent(), HoldAgent(), HoldAgent(), HoldAgent()] all_obs = None all_action = None cur_obs = None alive_agents = [10, 11, 12, 13] player_agent_id = 10 def __init__(self, env_config=None): pomme_config = pommerman.configs.ffa_competition_env() if env_config: for k, v in env_config.items(): if k in pomme_config['env_kwargs']: pomme_config['env_kwargs'][k] = v print("pomme_config: ") print(pomme_config['env_kwargs']) self.pomme = Pomme(**pomme_config['env_kwargs']) self.observation_space = self.init_observation_space( pomme_config['env_kwargs']) self.action_space = self.pomme.action_space self.total_reward = 0 self.prev_alive = 4 self.visited = np.zeros(shape=(11, 11)) if not env_config or (env_config and env_config.get("is_training", True)): # initialize env twice could raise error here. self.init(pomme_config) def init(self, pomm_config): for id_, agent in enumerate(self.agent_list): assert isinstance(agent, agents.BaseAgent) print(id_, pomm_config['game_type']) agent.init_agent(id_, pomm_config['game_type']) self.pomme.set_agents(self.agent_list) self.pomme.set_init_game_state(None) def reset(self): obs = self.pomme.reset() self.all_obs = obs obs = self.get_for_training_agent(obs) self.cur_obs = obs obs = self.preproess(obs) self.total_reward = 0 self.prev_alive = 4 self.visited = np.zeros(shape=(11, 11)) return obs def get_reward(self, obs, action, agent_id): if len(obs["alive"]) == 1: # An agent won. Give them +1, others -1. if agent_id in obs['alive']: return 1.0 - self.total_reward else: return -0.5 if obs["step_count"] >= 500: # Game is over from time. Everyone gets -1. return -0.5 # Game running: 0 for alive, -1 for dead. if agent_id not in obs['alive']: return -0.5 x, y = obs["position"] blast = obs["bomb_blast_strength"] px = [1, -1, 0, 0] py = [0, 0, -1, 1] sum_reward = 0.0 sum_reward += 20 * (len(obs["alive"]) - self.prev_alive) self.prev_alive = len(obs["alive"]) if action == 0: sum_reward -= 0.1 elif action == 5: # sum_reward += 1 for i in range(4): tx = x + px[i] ty = y + py[i] if tx < 0 or tx > 10 or ty < 0 or ty > 10: continue if obs["board"][tx][ty] == 1: sum_reward += 2 elif obs["board"][tx][ty] > 10: sum_reward += 4 else: assert (1 <= action <= 4), str(action) dx = x + px[action - 1] dy = y + py[action - 1] if (not (dx < 0 or dx > 10 or dy < 0 or dy > 10)) and obs["board"][dx][dy] == 0: if self.visited[dx][dy] > 0: sum_reward -= 0.1 else: sum_reward += 0.3 self.visited[dx][dy] = 1 sum_reward = sum_reward * 1.0 / 100.0 new_total_reward = self.total_reward + sum_reward if new_total_reward > 0.8 or new_total_reward < -0.5: sum_reward = 0.0 else: self.total_reward = new_total_reward return sum_reward def step(self, action): actions = self.pomme.act(self.all_obs) if self.alive_agents and self.player_agent_id in self.alive_agents: actions = self.set_for_training_agent(actions, action) else: actions = self.set_for_training_agent(actions, 0) obs, rewards, done, info = self.pomme.step(actions) self.all_obs = obs obs = self.get_for_training_agent(obs) self.cur_obs = obs reward = self.get_reward(self.cur_obs, action, self.player_agent_id) self.alive_agents = obs['alive'] if (self.player_agent_id not in self.alive_agents) or obs["step_count"] >= 500: done = True obs = self.preproess(obs) return obs, reward, done, {} def get_for_training_agent(self, inputs): order = self.player_agent_id - 10 return inputs[order].copy() def set_for_training_agent(self, inputs, value): order = self.player_agent_id - 10 inputs[order] = value return inputs def init_observation_space(self, env_config): """ observations for agents board: n^2 bomb blast strength: n^2 bomb life: n^2 """ board_size = env_config['board_size'] or 11 num_items = env_config['num_items'] or 11 print("env config: {}".format(env_config)) # board_size = 11 board = spaces.Box(low=0, high=len(constants.Item), shape=(board_size, board_size)) danger = spaces.Box(low=0, high=20, shape=(board_size, board_size)) bomb_blast_strength = spaces.Box(low=0, high=num_items, shape=(board_size, board_size)) bomb_life = spaces.Box(low=0, high=9, shape=(board_size, board_size)) flame_life = spaces.Box(low=0, high=10, shape=(board_size, board_size)) position = spaces.Box(low=0, high=board_size, shape=(2, )) blast_strength = spaces.Box(low=1, high=num_items, shape=(1, )) ammo = spaces.Box(low=0, high=num_items, shape=(1, )) # return spaces.Dict({"board": board, # "bomb_blast_strength": bomb_blast_strength, "bomb_life": bomb_life, # "flame_life": flame_life, # "position": position, "ammo": ammo, "blast_strength": blast_strength}) return spaces.Dict({ "board": board, "bomb_blast_strength": bomb_blast_strength, "bomb_life": bomb_life, "flame_life": flame_life, "position": position, "ammo": ammo, "blast_strength": blast_strength, "danger": danger }) @staticmethod def preproess(obs): del obs["game_type"] del obs["game_env"] del obs["can_kick"] del obs["teammate"] del obs["enemies"] del obs["step_count"] del obs['alive'] del obs['bomb_moving_direction'] obs['position'] = np.array(obs['position']) obs['ammo'] = np.array([obs['ammo']]) obs['blast_strength'] = np.array([obs['blast_strength']]) board = obs['board'] bomb_blast_strength = obs['bomb_blast_strength'] bomb_life = obs['bomb_life'] # flame_life = obs['flame_life'] # position = obs['position'] # ammo = obs['ammo'] # blast_strength = obs['blast_strength'] danger = np.ndarray(shape=(11, 11), dtype=int) for x in range(11): for y in range(11): danger[x][y] = 10 if board[x][y] == 4: board[x][y] = 0 danger[x][y] = 0 elif board[x][y] == 3: board[x][y] = 0 elif board[x][y] == 10: board[x][y] = 1 elif board[x][y] > 10: board[x][y] = 5 elif 6 <= board[x][y] <= 8: board[x][y] = 3 elif board[x][y] == 1: board[x][y] = 4 for x in range(11): for y in range(11): if bomb_life[x][y] > 0: strength = int(bomb_blast_strength[x][y] + 0.5) for tx in range(max(0, x - strength + 1), min(11, x + strength)): danger[tx][y] = min(danger[tx][y], bomb_life[x][y]) for ty in range(max(0, y - strength + 1), min(11, y + strength)): danger[x][ty] = min(danger[x][ty], bomb_life[x][y]) obs['danger'] = danger return obs def render(self): self.pomme.render()
class MultiAgent(MultiAgentEnv): def __init__(self): super(MultiAgent, self).__init__() self.phase = 0 self.setup() def setup(self): agents = [] if self.phase == 0: self.agents_index = [1, 3] self.enemies_agents_index = [0, 2] config = team_v0_fast_env() config["env_kwargs"]["num_wood"] = 2 config["env_kwargs"]["num_items"] = 2 config["env_kwargs"]["num_rigid"] = 20 agents.insert( 0, SuicidalAgent(config["agent"](0, config["game_type"]))) agents.insert(2, NoDoAgent(config["agent"](2, config["game_type"]))) print(config["env_kwargs"]) self.env = Pomme(**config["env_kwargs"]) self.env.seed() if self.phase == 1: self.agents_index = [1, 3] self.enemies_agents_index = [0, 2] config = team_v0_fast_env() config["env_kwargs"]["num_wood"] = 2 config["env_kwargs"]["num_items"] = 2 config["env_kwargs"]["num_rigid"] = 36 agents.insert( 0, SuicidalAgent(config["agent"](0, config["game_type"]))) agents.insert(2, NoDoAgent(config["agent"](2, config["game_type"]))) print(config["env_kwargs"]) self.env = Pomme(**config["env_kwargs"]) self.env.seed() if self.phase == 2: self.agents_index = [1, 3] self.enemies_agents_index = [0, 2] config = team_v0_fast_env() config["env_kwargs"]["num_wood"] = 2 config["env_kwargs"]["num_items"] = 2 config["env_kwargs"]["num_rigid"] = 36 agents.insert(0, NoDoAgent(config["agent"](0, config["game_type"]))) agents.insert(2, NoDoAgent(config["agent"](2, config["game_type"]))) print(config["env_kwargs"]) self.env = Pomme(**config["env_kwargs"]) self.env.seed() if self.phase == 3: self.agents_index = [1, 3] self.enemies_agents_index = [0, 2] config = team_v0_fast_env() config["env_kwargs"]["num_wood"] = 2 config["env_kwargs"]["num_items"] = 2 config["env_kwargs"]["num_rigid"] = 36 agents.insert(0, NoDoAgent(config["agent"](0, config["game_type"]))) agents.insert(2, NoDoAgent(config["agent"](2, config["game_type"]))) print(config["env_kwargs"]) self.env = Pomme(**config["env_kwargs"]) self.env.seed() if self.phase == 4: self.agents_index = [1, 3] self.enemies_agents_index = [0, 2] config = team_v0_fast_env() config["env_kwargs"]["num_wood"] = 0 config["env_kwargs"]["num_items"] = 10 config["env_kwargs"]["num_rigid"] = 36 agents.insert( 0, SuicidalAgent(config["agent"](0, config["game_type"]))) agents.insert(2, SimpleAgent(config["agent"](2, config["game_type"]))) print(config["env_kwargs"]) self.env = Pomme(**config["env_kwargs"]) self.env.seed() for agent_id in self.agents_index: agents.insert( agent_id, BaseLineAgent(config["agent"](agent_id, config["game_type"]))) self.env.set_agents(agents) self.env.set_init_game_state(None) self.observation_space = spaces.Dict({ "boards": spaces.Box(low=-1, high=20, shape=(3, 11, 11)), "states": spaces.Box(low=-1, high=20, shape=(9, )), }) spaces.Box(low=-1.0, high=20.0, shape=(372, ), dtype=np.float32) self.action_space = self.env.action_space def set_phase(self, phase): print("learn phase " + str(phase)) self.phase = phase self.setup() self.reset() def step(self, actions): obs = self.env.get_observations() all_actions = self.env.act(obs) for index in self.agents_index: try: action = actions[index] except: action = 0 all_actions[index] = action step_obs = self.env.step(all_actions) obs, rew, done, info = {}, {}, {}, {} for i in actions.keys(): obs[i], rew[i], done[i], info[i] = [ featurize(step_obs[0][i]), step_obs[1][i], step_obs[1][i] == -1 or step_obs[2], step_obs[3], ] done["__all__"] = step_obs[2] return obs, rew, done, info def reset(self): obs = self.env.reset() return {i: featurize(obs[i]) for i in self.agents_index}
class MultiAgend(MultiAgentEnv): def __init__(self): super(MultiAgend, self).__init__() self.phase = 0 self.next_phase = 0 self.steps = 0 self.last_featurize_obs = None self.setup() def featurize(self, obs): enemies = [] for agent_id in self.enemies_agents_index: if agent_id == 0: enemies.append(Item.Agent0) if agent_id == 1: enemies.append(Item.Agent1) if agent_id == 2: enemies.append(Item.Agent2) if agent_id == 3: enemies.append(Item.Agent3) for enemie in obs["enemies"]: if enemie not in enemies: obs["board"] = ma.masked_equal( obs["board"], enemie.value).filled(fill_value=0) board = np.copy(obs["board"]) board[obs["position"][0], obs["position"][1]] = 0.0 enemie_pos = np.full((11, 11), 0) for enemie in obs["enemies"]: enemie_pos = enemie_pos | ma.masked_not_equal( board, enemie.value).filled(fill_value=0) board = ma.masked_equal(board, enemie.value).filled(fill_value=0) wood = ma.masked_not_equal(board, 2).filled(fill_value=0) wood = (wood > 0).astype(np.float32) board = ma.masked_equal(board, 2).filled(fill_value=0) stone = ma.masked_not_equal(board, 1).filled(fill_value=0) stone = (stone > 0).astype(np.float32) board = ma.masked_equal(board, 1).filled(fill_value=0) enemie_pos = (enemie_pos > 0).astype(np.float32) board = ma.masked_equal(board, obs["teammate"].value).filled(fill_value=0) flames = ma.masked_not_equal(board, 4).filled(fill_value=0) flames = (flames > 0).astype(np.float32) board = ma.masked_equal(board, 4).filled(fill_value=0) board = ma.masked_equal(board, 3).filled(fill_value=0) teammate_pos = ma.masked_not_equal( board, obs["teammate"].value).filled(fill_value=0) teammate_pos = (teammate_pos > 0).astype(np.float32) board = ma.masked_equal(board, obs["teammate"].value).filled(fill_value=0) items = board.astype(np.float32) pos = np.full((11, 11), 0) pos[obs["position"][0], obs["position"][1]] = 1.0 pos = pos.astype(np.float32) bomb_life = obs["bomb_life"].astype(np.float32) bomb_blast_strength = obs["bomb_blast_strength"].astype(np.float32) ammo = utility.make_np_float([obs["ammo"]]) blast_strength = utility.make_np_float([obs["blast_strength"]]) can_kick = utility.make_np_float([obs["can_kick"]]) game_end = utility.make_np_float([ (self.max_steps - self.steps) / self.max_steps ]) actual_featurize_obs = { 'boards': np.stack([ enemie_pos, pos, wood, stone, items, flames, teammate_pos, bomb_life, bomb_blast_strength ], axis=0), 'states': np.concatenate([ammo, blast_strength, can_kick, game_end]), } if self.last_featurize_obs == None: featurize_obs = { 'boards': np.concatenate([ actual_featurize_obs['boards'], actual_featurize_obs['boards'] ], axis=0), 'states': np.concatenate([ actual_featurize_obs['states'], actual_featurize_obs['states'] ]), } else: featurize_obs = { 'boards': np.concatenate([ self.last_featurize_obs['boards'], actual_featurize_obs['boards'] ], axis=0), 'states': np.concatenate([ self.last_featurize_obs['states'], actual_featurize_obs['states'] ]), } self.last_featurize_obs = actual_featurize_obs return featurize_obs def setup(self): agents = [] if self.phase == 0: arr = [0, 1] random.shuffle(arr) agents_index = arr.pop() op_index = arr.pop() self.agents_index = [agents_index] self.enemies_agents_index = [op_index] self.max_steps = 200 config = ffa_v0_fast_env() config["env_kwargs"]["max_steps"] = self.max_steps agents.insert( agents_index, BaseLineAgent(config["agent"](agents_index, config["game_type"]))) agents.insert( op_index, NoDoAgent(config["agent"](op_index, config["game_type"]))) self.env = Pomme(**config["env_kwargs"]) self.env.set_agents(agents) init_state = { 'board_size': '11', 'step_count': '0', 'board': '', 'agents': '[{"agent_id": 0, "is_alive": true, "position": [1, 1], "ammo": 1, "blast_strength": 2, "can_kick": false}, {"agent_id": 1, "is_alive": true, "position": [9, 0], "ammo": 1, "blast_strength": 2, "can_kick": false}]', 'bombs': '[]', 'flames': '[]', 'items': '[]', 'intended_actions': '[0, 0]' } board = np.full((11, 11), 0) init_state['board'] = json.dumps(board.tolist()) agents_json = json.loads(copy.copy(init_state['agents'])) random_pos = np.random.choice(board.shape[0], (2, 2), replace=False) agents_json[0]["position"] = random_pos[0].tolist() agents_json[1]["position"] = random_pos[1].tolist() init_state['agents'] = json.dumps(agents_json) self.env._init_game_state = init_state self.env.reset() if self.phase == 1: arr = [0, 1] random.shuffle(arr) agents_index = arr.pop() op_index = arr.pop() self.agents_index = [agents_index] self.enemies_agents_index = [op_index] self.max_steps = 200 config = ffa_v0_fast_env() config["env_kwargs"]["max_steps"] = self.max_steps agents.insert( agents_index, BaseLineAgent(config["agent"](agents_index, config["game_type"]))) agents.insert( op_index, NoDoAgent(config["agent"](op_index, config["game_type"]))) self.env = Pomme(**config["env_kwargs"]) self.env.set_agents(agents) init_state = { 'board_size': '11', 'step_count': '0', 'board': '', 'agents': '[{"agent_id": 0, "is_alive": true, "position": [1, 1], "ammo": 1, "blast_strength": 2, "can_kick": false}, {"agent_id": 1, "is_alive": true, "position": [9, 0], "ammo": 1, "blast_strength": 2, "can_kick": false}]', 'bombs': '[]', 'flames': '[]', 'items': '[]', 'intended_actions': '[0, 0]' } board = np.full((11, 11), 0) board[5, :] = (np.ones(11) * 2) agents_json = json.loads(copy.copy(init_state['agents'])) agents_json[0]["position"] = [ random.randint(0, 4), random.randint(0, 10) ] agents_json[1]["position"] = [ random.randint(6, 10), random.randint(0, 10) ] init_state['agents'] = json.dumps(agents_json) init_state['board'] = json.dumps(board.tolist()) self.env._init_game_state = init_state self.env.reset() self.observation_space = spaces.Dict({ 'boards': spaces.Box(low=-1, high=25, shape=(11, 11, 18), dtype=np.float32), 'states': spaces.Box(low=-1, high=25, shape=(8, ), dtype=np.float32) }) self.action_space = self.env.action_space def set_phase(self, phase): print("learn phase " + str(phase)) self.next_phase = phase def close(self): self.env.close() def step(self, actions): self.steps = self.steps + 1 obs = self.env.get_observations() all_actions = self.env.act(obs) assert (len(all_actions) == len(self.agents_index) + len(self.enemies_agents_index)) for index in self.agents_index: try: action = actions[index] except: action = 0 assert (all_actions[index] == None) all_actions[index] = action step_obs = self.env.step(all_actions) obs, rew, done, info = {}, {}, {}, {} for i in actions.keys(): obs[i], rew[i], done[i], info[i] = [ self.featurize(step_obs[0][i]), step_obs[1][i], step_obs[1][i] == -1 or step_obs[2], step_obs[3] ] done["__all__"] = step_obs[2] return obs, rew, done, info def reset(self): self.steps = 0 self.phase = self.next_phase self.setup() obs = self.env.get_observations() return {i: self.featurize(obs[i]) for i in self.agents_index}
class PomFFA(gym.Env): def __init__(self, env_config=None): self.agent_list = [HoldAgent(), agents.SimpleAgent(), HoldAgent(), HoldAgent()] # self.agent_list = [agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), agents.RandomAgent()] self.all_obs = None self.all_action = None self.cur_obs = None self.alive_agents = [10, 11, 12, 13] self.player_agent_id = 10 self.total_reward = 0 pomme_config = pommerman.configs.ffa_competition_env() if env_config: for k, v in env_config.items(): if k in pomme_config['env_kwargs']: pomme_config['env_kwargs'][k] = v self.pomme = Pomme(**pomme_config['env_kwargs']) self.observation_space = self.init_observation_space(pomme_config['env_kwargs']) self.action_space = self.pomme.action_space if not env_config or (env_config and env_config.get("is_training", True)): # initialize env twice could raise error here. self.init(pomme_config) def init(self, pomm_config): for id_, agent in enumerate(self.agent_list): assert isinstance(agent, agents.BaseAgent) print(id_, pomm_config['game_type']) agent.init_agent(id_, pomm_config['game_type']) self.pomme.set_agents(self.agent_list) self.pomme.set_init_game_state(None) def reset(self): obs = self.pomme.reset() self.all_obs = obs.copy() obs = self.get_for_training_agent(obs) self.cur_obs = obs.copy() obs = self.preproess(obs) self.total_reward = 0 return obs def get_reward(self, obs, action, agent_id): if len(obs["alive"]) == 1: # An agent won. Give them +1, others -1. if agent_id in obs['alive']: return 0.5 else: return -0.5 if obs["step_count"] >= 500: # Game is over from time. Everyone gets -1. return -0.5 # Game running: 0 for alive, -1 for dead. if agent_id not in obs['alive']: return -0.5 x, y = obs["position"] # blast = obs["bomb_blast_strength"] px = [0, 1, 0, -1] py = [1, 0, -1, 0] sum_reward = 0 if action == 5: for i in range(4): tx = x+px[i] ty = y+py[i] if tx<0 or tx>10 or ty<0 or ty>10: continue if obs["board"][tx][ty] == 1: sum_reward += 1 elif obs["board"][tx][ty] > 10: sum_reward += 4 sum_reward = sum_reward*1.0/200.0 new_total_reward = self.total_reward + sum_reward if new_total_reward > 0.5 or new_total_reward < -0.5: sum_reward = 0 else: self.total_reward = new_total_reward return sum_reward def step(self, action): actions = self.pomme.act(self.all_obs) if self.alive_agents and self.player_agent_id in self.alive_agents: actions = self.set_for_training_agent(actions, action) else: actions = self.set_for_training_agent(actions, 0) obs, rewards, done, info = self.pomme.step(actions) # print(obs) del self.all_obs self.all_obs = obs.copy() obs = self.get_for_training_agent(obs) del self.cur_obs self.cur_obs = obs.copy() reward = self.get_reward(self.cur_obs, action, self.player_agent_id) self.alive_agents = obs['alive'] if self.player_agent_id not in self.alive_agents or self.cur_obs["step_count"] >= 500: done = True obs = self.preproess(obs) return obs, reward, done, {} def get_for_training_agent(self, inputs): order = self.player_agent_id - 10 return inputs[order] def set_for_training_agent(self, inputs, value): order = self.player_agent_id - 10 inputs[order] = value return inputs def init_observation_space(self, env_config): """ observations for agents board: n^2 bomb blast strength: n^2 bomb life: n^2 """ board_size = env_config['board_size'] num_items = env_config['num_items'] # print("env config: {}".format(env_config)) # board_size = 11 board = spaces.Box(low=0, high=len(constants.Item), shape=(board_size, board_size)) bomb_blast_strength = spaces.Box(low=0, high=num_items, shape=(board_size, board_size)) bomb_life = spaces.Box(low=0, high=9, shape=(board_size, board_size)) flame_life = spaces.Box(low=0, high=3, shape=(board_size, board_size)) position = spaces.Box(low=0, high=board_size, shape=(2,)) blast_strength = spaces.Box(low=1, high=num_items, shape=(1,)) ammo = spaces.Box(low=0, high=num_items, shape=(1,)) return spaces.Dict({"board": board, "bomb_blast_strength": bomb_blast_strength, "bomb_life": bomb_life, "flame_life": flame_life, "position": position, "ammo": ammo, "blast_strength": blast_strength}) @staticmethod def preproess(obs): del obs["game_type"] del obs["game_env"] del obs["can_kick"] del obs["teammate"] del obs["enemies"] del obs["step_count"] del obs['alive'] del obs['bomb_moving_direction'] obs['position'] = np.array(obs['position']) obs['ammo'] = np.array([obs['ammo']]) obs['blast_strength'] = np.array([obs['blast_strength']]) return obs def render(self): self.pomme.render()
class PomFFA(gym.Env): agent_list = [ agents.StaticAgent(), agents.StaticAgent(), agents.StaticAgent(), agents.StaticAgent() ] alive_agents = [10, 11, 12, 13] agent_id = 10 ammo = 1 blast_strength = 2 state = {} def __init__(self, env_config={}): pomme_config = pommerman.configs.ffa_competition_env() self.reward = Reward(env_config.get("reward")) self.pomme = Pomme(**pomme_config['env_kwargs']) self.observation_space = self.init_observation_space( pomme_config['env_kwargs']) self.action_space = self.pomme.action_space if not env_config or (env_config and env_config.get("is_training", True)): # initialize env twice could raise error here. self.init(pomme_config) def init(self, pomm_config): for id_, agent in enumerate(self.agent_list): assert isinstance(agent, agents.BaseAgent) agent.init_agent(id_, pomm_config['game_type']) self.pomme.set_agents(self.agent_list) self.pomme.set_init_game_state(None) self.init_state() def init_state(self): self.state['agent_id'] = self.agent_id self.state['alive'] = self.alive_agents self.state['visited'] = set() self.state['blast_strength'] = self.blast_strength self.state['ammo'] = self.ammo self.state["bombs"] = {} def reset(self): all_obs = self.pomme.reset() obs = self.get_for_training_agent(all_obs) self.init_state() self.state['prev_obs'] = copy.deepcopy(obs) self.state['all_obs'] = all_obs self.state['alive'] = obs['alive'] obs = self.build_obs(obs, self.state) return obs def step(self, action): actions = self.pomme.act(self.state['all_obs']) actions = self.set_for_training_agent(actions, action) all_obs, _, _, _ = self.pomme.step(actions) obs = self.get_for_training_agent(all_obs) info = {'board': obs['board'], 'blast_strength': obs['blast_strength']} done = self.get_done(obs) reward, self.state = self.reward.get_reward(action, obs, self.state) self.state['prev_obs'] = copy.deepcopy(obs) self.state['all_obs'] = all_obs self.state['alive'] = obs['alive'] self.state['blast_strength'] = obs['blast_strength'] self.state['ammo'] = obs['ammo'] obs = self.build_obs(obs, self.state) return obs, reward, done, info def get_for_training_agent(self, inputs): order = self.agent_id - 10 return inputs[order] def set_for_training_agent(self, inputs, value): order = self.agent_id - 10 inputs[order] = value return inputs def get_done(self, obs): if self.agent_id not in obs['alive']: return True if obs['step_count'] >= 800: return True return False def build_obs(self, obs, state): board = obs['board'] bomb_blast_strength = obs['bomb_blast_strength'] bomb_life = obs['bomb_life'] flame_life = obs['flame_life'] agent_id = state['agent_id'] ammo = state['ammo'] passage = np.zeros_like(board) wall = np.zeros_like(board) wood = np.zeros_like(board) bomb = np.zeros_like(board) bonus = np.zeros_like(board) me = np.zeros_like(board) enemy = np.zeros_like(board) for y in range(board.shape[0]): for x in range(board.shape[1]): v = board[y][x] if v == 0: passage[y][x] = 1 elif v == 1: wall[y][x] = 1 elif v == 2: wood[y][x] = 1 elif v == 3: bomb = create_cross(bomb, (y, x), bomb_blast_strength[y][x]) elif v == 4: pass elif v == 6 or v == 7: bonus[y][x] = 1 elif v >= 10: if v == agent_id: me[y][x] = 1 else: enemy[y][x] = 1 if bomb_blast_strength[y][x] > 0: bomb = create_cross(bomb, (y, x), bomb_blast_strength[y][x]) ammo = ammo * np.ones_like(board) / 12 bomb_life /= 9 flame_life /= 3 board = np.transpose( np.stack([ passage, wall, wood, bomb, bonus, me, enemy, bomb_life, flame_life, ammo ]), [1, 2, 0]) return board @staticmethod def init_observation_space(env_config): """ observations for agents board: n^2 bomb blast strength: n^2 bomb life: n^2 """ board_size = env_config['board_size'] num_items = env_config['num_items'] board = spaces.Box( low=0, high=1, shape=(board_size, board_size, 10)) # passage,wall,wood,bomb,bonus,me,enemies bomb_life = spaces.Box(low=0, high=9, shape=(board_size, board_size)) flame_life = spaces.Box(low=0, high=3, shape=(board_size, board_size)) ammo = spaces.Box(low=0, high=num_items, shape=(board_size, board_size)) # return spaces.Dict({"board": board, "bomb_life": bomb_life, "flame_life": flame_life,"ammo": ammo}) return board @staticmethod def init_action_space(): return spaces.Discrete(6) def render(self): self.pomme.render()
class PomFFA(gym.Env): agent_list = [ agents.RandomAgent(), agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent() ] all_obs = None all_action = None cur_obs = None alive_agents = [10, 11, 12, 13] player_agent_id = 10 def __init__(self, env_config=None): pomme_config = pommerman.configs.ffa_competition_env() if env_config: for k, v in env_config.items(): if k in pomme_config['env_kwargs']: pomme_config['env_kwargs'][k] = v print("pomme_config: ") print(pomme_config['env_kwargs']) self.pomme = Pomme(**pomme_config['env_kwargs']) self.observation_space = self.init_observation_space( pomme_config['env_kwargs']) self.action_space = self.pomme.action_space if not env_config or (env_config and env_config.get("is_training", True)): # initialize env twice could raise error here. self.init(pomme_config) def init(self, pomm_config): for id_, agent in enumerate(self.agent_list): assert isinstance(agent, agents.BaseAgent) print(id_, pomm_config['game_type']) agent.init_agent(id_, pomm_config['game_type']) self.pomme.set_agents(self.agent_list) self.pomme.set_init_game_state(None) def reset(self): obs = self.pomme.reset() self.all_obs = obs obs = self.get_for_training_agent(obs) self.cur_obs = obs obs = self.preproess(obs) return obs def get_reward(self, obs, action, agent_id): if len(obs["alive"]) == 1: # An agent won. Give them +1, others -1. if agent_id in obs['alive']: return 1 else: return -1 if obs["step_count"] >= 500: # Game is over from time. Everyone gets -1. return -1 # Game running: 0 for alive, -1 for dead. if agent_id not in obs['alive']: return -1 # # x, y = obs["position"] # blast = obs["bomb_blast_strength"] # # for w in range(11): # if blast[x][w] > int(math.fabs(w-y)): # return -10 # # if blast[w][y] > int(math.fabs((w-x))): # return -10 return 0 def step(self, action): actions = self.pomme.act(self.all_obs) if self.alive_agents and self.player_agent_id in self.alive_agents: actions = self.set_for_training_agent(actions, action) else: actions = self.set_for_training_agent(actions, 0) obs, rewards, done, info = self.pomme.step(actions) # print(obs) self.all_obs = obs obs = self.get_for_training_agent(obs) self.cur_obs = obs reward = self.get_reward(self.cur_obs, action, self.player_agent_id) self.alive_agents = obs['alive'] if (self.player_agent_id not in self.alive_agents) or obs["step_count"] >= 500: done = True obs = self.preproess(obs) return obs, reward, done, {} def get_for_training_agent(self, inputs): order = self.player_agent_id - 10 return inputs[order] def set_for_training_agent(self, inputs, value): order = self.player_agent_id - 10 inputs[order] = value return inputs def init_observation_space(self, env_config): """ observations for agents board: n^2 bomb blast strength: n^2 bomb life: n^2 """ board_size = env_config['board_size'] or 11 num_items = env_config['num_items'] or 11 print("env config: {}".format(env_config)) # board_size = 11 board = spaces.Box(low=0, high=len(constants.Item), shape=(board_size, board_size)) bomb_blast_strength = spaces.Box(low=0, high=num_items, shape=(board_size, board_size)) bomb_life = spaces.Box(low=0, high=9, shape=(board_size, board_size)) flame_life = spaces.Box(low=0, high=3, shape=(board_size, board_size)) position = spaces.Box(low=0, high=board_size, shape=(2, )) blast_strength = spaces.Box(low=1, high=num_items, shape=(1, )) ammo = spaces.Box(low=0, high=num_items, shape=(1, )) return spaces.Dict({ "board": board, "bomb_blast_strength": bomb_blast_strength, "bomb_life": bomb_life, "flame_life": flame_life, "position": position, "ammo": ammo, "blast_strength": blast_strength }) @staticmethod def preproess(obs): del obs["game_type"] del obs["game_env"] del obs["can_kick"] del obs["teammate"] del obs["enemies"] del obs["step_count"] del obs['alive'] del obs['bomb_moving_direction'] obs['position'] = np.array(obs['position']) obs['ammo'] = np.array([obs['ammo']]) obs['blast_strength'] = np.array([obs['blast_strength']]) return obs def render(self): self.pomme.render()
class PomFFA(gym.Env): agent_list = [ agents.StaticAgent(), agents.StaticAgent(), agents.StaticAgent(), agents.StaticAgent() ] all_obs = None all_action = None pre_obs = None alive_agents = [10, 11, 12, 13] agent_id = 10 state = {} def __init__(self, env_config=None): pomme_config = pommerman.configs.ffa_competition_env() if env_config: for k, v in env_config.items(): if k in pomme_config['env_kwargs']: pomme_config['env_kwargs'][k] = v self.reward = Reward(env_config.get("reward")) else: self.reward = Reward() print("Pommerman Config:", pomme_config['env_kwargs']) self.pomme = Pomme(**pomme_config['env_kwargs']) self.observation_space = self.init_observation_space( pomme_config['env_kwargs']) self.action_space = self.pomme.action_space if not env_config or (env_config and env_config.get("is_training", True)): # initialize env twice could raise error here. self.init(pomme_config) def init(self, pomm_config): for id_, agent in enumerate(self.agent_list): assert isinstance(agent, agents.BaseAgent) agent.init_agent(id_, pomm_config['game_type']) self.pomme.set_agents(self.agent_list) self.pomme.set_init_game_state(None) def reset(self): obs = self.pomme.reset() self.all_obs = obs obs = self.get_for_training_agent(obs) state = { "prev_obs": None, "visited": set(), "agent_id": 10, "alive": [10, 11, 12, 13], "strength": 2, "ammo": 1, "bombs": {}, } state['prev_obs'] = copy.deepcopy(obs) state['position'] = obs['position'] self.state = state obs = self.preproess(obs) return obs def step(self, action): actions = self.pomme.act(self.all_obs) actions = self.set_for_training_agent(actions, action) obs, rewards, _, _ = self.pomme.step(actions) self.all_obs = obs obs = self.get_for_training_agent(obs) reward, self.state = self.reward.get_reward(action, obs, self.state) done = self.get_done(obs) self.state['prev_obs'] = copy.deepcopy(obs) self.state['position'] = obs['position'] obs = self.preproess(obs) return obs, reward, done, {} def get_for_training_agent(self, inputs): order = self.agent_id - 10 return inputs[order] def set_for_training_agent(self, inputs, value): order = self.agent_id - 10 inputs[order] = value return inputs def get_done(self, obs): if self.agent_id not in obs['alive']: return True if obs['step_count'] >= 800: return True return False @staticmethod def init_observation_space(env_config): """ observations for agents board: n^2 bomb blast strength: n^2 bomb life: n^2 """ board_size = env_config['board_size'] num_items = env_config['num_items'] board = spaces.Box(low=0, high=len(constants.Item), shape=(board_size, board_size)) bomb_blast_strength = spaces.Box(low=0, high=num_items, shape=(board_size, board_size)) bomb_life = spaces.Box(low=0, high=9, shape=(board_size, board_size)) flame_life = spaces.Box(low=0, high=3, shape=(board_size, board_size)) position = spaces.Box(low=0, high=board_size, shape=(2, )) blast_strength = spaces.Box(low=1, high=num_items, shape=(1, )) ammo = spaces.Box(low=0, high=num_items, shape=(1, )) return spaces.Dict({ "board": board, "bomb_blast_strength": bomb_blast_strength, "bomb_life": bomb_life, "flame_life": flame_life, "position": position, "ammo": ammo, "blast_strength": blast_strength }) @staticmethod def init_action_space(): return spaces.Discrete(6) @staticmethod def preproess(obs): del obs["game_type"] del obs["game_env"] del obs["can_kick"] del obs["teammate"] del obs["enemies"] del obs["step_count"] del obs['alive'] del obs['bomb_moving_direction'] obs['position'] = np.array(obs['position']) obs['ammo'] = np.array([obs['ammo']]) obs['blast_strength'] = np.array([obs['blast_strength']]) return obs def render(self): self.pomme.render()