def env_for_players(self): config = ffa_v0_fast_env(30) env = Pomme(**config["env_kwargs"]) agents = [DQN(config["agent"](0, config["game_type"])), PlayerAgent(config["agent"](1, config["game_type"])), RandomAgent(config["agent"](2, config["game_type"])), RandomAgent(config["agent"](3, config["game_type"]))] env.set_agents(agents) env.set_training_agent(agents[0].agent_id) # training_agent is only dqn agent env.set_init_game_state(None) return env
def set_pommerman_env(agent_id=0): # Instantiate the environment config = ffa_v0_fast_env() env = Pomme(**config["env_kwargs"]) np.random.seed(0) env.seed(0) # Add 3 Simple Agents and 1 DQN agent agents = [ DQN(config["agent"](agent_id, config["game_type"])) if i == agent_id else SimpleAgent(config["agent"](i, config["game_type"])) for i in range(4) ] env.set_agents(agents) env.set_training_agent( agents[agent_id].agent_id) # training_agent is only dqn agent env.set_init_game_state(None) return env
def get_env(): config = ffa_v0_fast_env() env = Pomme(**config["env_kwargs"]) agent_id = 0 agents = [ DQN(config["agent"](0, config["game_type"])), SimpleAgent(config["agent"](1, config["game_type"])), SimpleAgent(config["agent"](2, config["game_type"])), SimpleAgent(config["agent"](3, config["game_type"])), ] env.set_agents(agents) env.set_training_agent(agents[agent_id].agent_id) env.set_init_game_state(None) return env
if teammate is not None: teammate = teammate.value else: teammate = -1 teammate = make_np_float([teammate]) enemies = obs["enemies"] enemies = [e.value for e in enemies] if len(enemies) < 3: enemies = enemies + [-1]*(3 - len(enemies)) enemies = make_np_float(enemies) return np.concatenate((board, bomb_blast_strength, bomb_life, position, ammo, blast_strength, can_kick, teammate, enemies)) # Instantiate the environment config = ffa_v0_fast_env() env = Pomme(**config["env_kwargs"]) env.action_space.n # Add 3 random agents agents = {} for agent_id in range(3): agents[agent_id] = StaticAgent(config["agent"](agent_id, config["game_type"])) # Add human agent agent_id += 1 agents[3] = PlayerAgent(config["agent"](agent_id, config["game_type"]), "arrows") env.set_agents(list(agents.values())) env.set_init_game_state(None)
def setup(self): agents = [] if self.phase == 0: arr = [0, 1] random.shuffle(arr) agents_index = arr.pop() op_index = arr.pop() self.agents_index = [agents_index] self.enemies_agents_index = [op_index] self.max_steps = 200 config = ffa_v0_fast_env() config["env_kwargs"]["max_steps"] = self.max_steps agents.insert( agents_index, BaseLineAgent(config["agent"](agents_index, config["game_type"]))) agents.insert( op_index, NoDoAgent(config["agent"](op_index, config["game_type"]))) self.env = Pomme(**config["env_kwargs"]) self.env.set_agents(agents) init_state = { 'board_size': '11', 'step_count': '0', 'board': '', 'agents': '[{"agent_id": 0, "is_alive": true, "position": [1, 1], "ammo": 1, "blast_strength": 2, "can_kick": false}, {"agent_id": 1, "is_alive": true, "position": [9, 0], "ammo": 1, "blast_strength": 2, "can_kick": false}]', 'bombs': '[]', 'flames': '[]', 'items': '[]', 'intended_actions': '[0, 0]' } board = np.full((11, 11), 0) init_state['board'] = json.dumps(board.tolist()) agents_json = json.loads(copy.copy(init_state['agents'])) random_pos = np.random.choice(board.shape[0], (2, 2), replace=False) agents_json[0]["position"] = random_pos[0].tolist() agents_json[1]["position"] = random_pos[1].tolist() init_state['agents'] = json.dumps(agents_json) self.env._init_game_state = init_state self.env.reset() if self.phase == 1: arr = [0, 1] random.shuffle(arr) agents_index = arr.pop() op_index = arr.pop() self.agents_index = [agents_index] self.enemies_agents_index = [op_index] self.max_steps = 200 config = ffa_v0_fast_env() config["env_kwargs"]["max_steps"] = self.max_steps agents.insert( agents_index, BaseLineAgent(config["agent"](agents_index, config["game_type"]))) agents.insert( op_index, NoDoAgent(config["agent"](op_index, config["game_type"]))) self.env = Pomme(**config["env_kwargs"]) self.env.set_agents(agents) init_state = { 'board_size': '11', 'step_count': '0', 'board': '', 'agents': '[{"agent_id": 0, "is_alive": true, "position": [1, 1], "ammo": 1, "blast_strength": 2, "can_kick": false}, {"agent_id": 1, "is_alive": true, "position": [9, 0], "ammo": 1, "blast_strength": 2, "can_kick": false}]', 'bombs': '[]', 'flames': '[]', 'items': '[]', 'intended_actions': '[0, 0]' } board = np.full((11, 11), 0) board[5, :] = (np.ones(11) * 2) agents_json = json.loads(copy.copy(init_state['agents'])) agents_json[0]["position"] = [ random.randint(0, 4), random.randint(0, 10) ] agents_json[1]["position"] = [ random.randint(6, 10), random.randint(0, 10) ] init_state['agents'] = json.dumps(agents_json) init_state['board'] = json.dumps(board.tolist()) self.env._init_game_state = init_state self.env.reset() self.observation_space = spaces.Dict({ 'boards': spaces.Box(low=-1, high=25, shape=(11, 11, 18), dtype=np.float32), 'states': spaces.Box(low=-1, high=25, shape=(8, ), dtype=np.float32) }) self.action_space = self.env.action_space
def main(args): version = 'v1' episodes = args.episodes visualize = args.visualize config = ffa_v0_fast_env() env = Pomme(**config["env_kwargs"]) env.seed(0) agent = PPOAgent( states=dict(type='float', shape=(11, 11, 12)), actions=dict(type='int', num_actions=env.action_space.n), network=[ # (9, 9, 12) dict(type='conv2d', size=12, window=3, stride=1), # (7, 7, 8) dict(type='conv2d', size=8, window=3, stride=1), # (5, 5, 4) dict(type='conv2d', size=4, window=3, stride=1), # (100) dict(type='flatten'), dict(type='dense', size=64, activation='relu'), dict(type='dense', size=16, activation='relu'), ], batching_capacity=1000, step_optimizer=dict(type='adam', learning_rate=1e-4)) if os.path.exists(os.path.join('models', version, 'checkpoint')): agent.restore_model(directory=os.path.join('models', version)) agents = [] for agent_id in range(3): # agents.append(RandomAgent(config["agent"](agent_id, config["game_type"]))) # agents.append(StoppingAgent(config["agent"](agent_id, config["game_type"]))) agents.append( SimpleAgent(config["agent"](agent_id, config["game_type"]))) agent_id += 1 agents.append( TensorforceAgent(config["agent"](agent_id, config["game_type"]))) env.set_agents(agents) env.set_training_agent(agents[-1].agent_id) env.set_init_game_state(None) wrapped_env = WrappedEnv(env, agent, visualize) runner = Runner(agent=agent, environment=wrapped_env) try: runner.run(episodes=episodes, max_episode_timesteps=100) except Exception as e: raise e finally: agent.save_model(directory=os.path.join('models', version, 'agent')) win_count = len( list(filter(lambda reward: reward == 1, runner.episode_rewards))) print('Stats: ') print(f' runner.episode_rewards = {runner.episode_rewards}') print(f' win count = {win_count}') try: runner.close() except AttributeError as e: raise e
from ray.rllib.agents.ppo import PPOTrainer from rllib_training.envs import PommeRllib from pommerman import configs config = configs.ffa_v0_fast_env() trainer = PPOTrainer(env=PommeRllib, config={ "num_workers": 9, "num_gpus": 1, "env_config": config, })