def set_pommerman_env(agent_id=0):
    # Instantiate the environment
    config = ffa_v0_fast_env()
    env = Pomme(**config["env_kwargs"])

    np.random.seed(0)
    env.seed(0)
    # Add 3 Simple Agents and 1 DQN agent
    agents = [
        DQN(config["agent"](agent_id, config["game_type"])) if i == agent_id
        else SimpleAgent(config["agent"](i, config["game_type"]))
        for i in range(4)
    ]
    env.set_agents(agents)
    env.set_training_agent(
        agents[agent_id].agent_id)  # training_agent is only dqn agent
    env.set_init_game_state(None)

    return env
Beispiel #2
0
# Add 3 random agents
agents = {}
for agent_id in range(3):
    agents[agent_id] = StaticAgent(config["agent"](agent_id, config["game_type"]))

# Add human agent

agent_id += 1
agents[3] = PlayerAgent(config["agent"](agent_id, config["game_type"]), "arrows")

env.set_agents(list(agents.values()))
env.set_init_game_state(None)


# Seed and reset the environment
env.seed(0)
obs = env.reset()

# Run the agents until we're done
done = False
while not done:
    env.render()
    actions = env.act(obs) # brauch ich nicht
    #actions = [action % 4 for action in actions]
    #actions = [0,actions[1]]
    obs, reward, done, info = env.step(actions)
    #kacka = featurize(obs[0])
env.render(close=True)
env.close()

# Print the result
Beispiel #3
0
class MultiAgent(MultiAgentEnv):
    def __init__(self):
        super(MultiAgent, self).__init__()
        self.phase = 0
        self.setup()

    def setup(self):
        agents = []
        if self.phase == 0:
            self.agents_index = [1, 3]
            self.enemies_agents_index = [0, 2]
            config = team_v0_fast_env()
            config["env_kwargs"]["num_wood"] = 2
            config["env_kwargs"]["num_items"] = 2
            config["env_kwargs"]["num_rigid"] = 20
            agents.insert(
                0, SuicidalAgent(config["agent"](0, config["game_type"])))
            agents.insert(2, NoDoAgent(config["agent"](2,
                                                       config["game_type"])))
            print(config["env_kwargs"])
            self.env = Pomme(**config["env_kwargs"])
            self.env.seed()

        if self.phase == 1:
            self.agents_index = [1, 3]
            self.enemies_agents_index = [0, 2]
            config = team_v0_fast_env()
            config["env_kwargs"]["num_wood"] = 2
            config["env_kwargs"]["num_items"] = 2
            config["env_kwargs"]["num_rigid"] = 36
            agents.insert(
                0, SuicidalAgent(config["agent"](0, config["game_type"])))
            agents.insert(2, NoDoAgent(config["agent"](2,
                                                       config["game_type"])))
            print(config["env_kwargs"])
            self.env = Pomme(**config["env_kwargs"])
            self.env.seed()

        if self.phase == 2:
            self.agents_index = [1, 3]
            self.enemies_agents_index = [0, 2]
            config = team_v0_fast_env()
            config["env_kwargs"]["num_wood"] = 2
            config["env_kwargs"]["num_items"] = 2
            config["env_kwargs"]["num_rigid"] = 36
            agents.insert(0, NoDoAgent(config["agent"](0,
                                                       config["game_type"])))
            agents.insert(2, NoDoAgent(config["agent"](2,
                                                       config["game_type"])))
            print(config["env_kwargs"])
            self.env = Pomme(**config["env_kwargs"])
            self.env.seed()

        if self.phase == 3:
            self.agents_index = [1, 3]
            self.enemies_agents_index = [0, 2]
            config = team_v0_fast_env()
            config["env_kwargs"]["num_wood"] = 2
            config["env_kwargs"]["num_items"] = 2
            config["env_kwargs"]["num_rigid"] = 36
            agents.insert(0, NoDoAgent(config["agent"](0,
                                                       config["game_type"])))
            agents.insert(2, NoDoAgent(config["agent"](2,
                                                       config["game_type"])))
            print(config["env_kwargs"])
            self.env = Pomme(**config["env_kwargs"])
            self.env.seed()

        if self.phase == 4:
            self.agents_index = [1, 3]
            self.enemies_agents_index = [0, 2]
            config = team_v0_fast_env()
            config["env_kwargs"]["num_wood"] = 0
            config["env_kwargs"]["num_items"] = 10
            config["env_kwargs"]["num_rigid"] = 36
            agents.insert(
                0, SuicidalAgent(config["agent"](0, config["game_type"])))
            agents.insert(2,
                          SimpleAgent(config["agent"](2, config["game_type"])))
            print(config["env_kwargs"])
            self.env = Pomme(**config["env_kwargs"])
            self.env.seed()

        for agent_id in self.agents_index:
            agents.insert(
                agent_id,
                BaseLineAgent(config["agent"](agent_id, config["game_type"])))

        self.env.set_agents(agents)
        self.env.set_init_game_state(None)
        self.observation_space = spaces.Dict({
            "boards":
            spaces.Box(low=-1, high=20, shape=(3, 11, 11)),
            "states":
            spaces.Box(low=-1, high=20, shape=(9, )),
        })

        spaces.Box(low=-1.0, high=20.0, shape=(372, ), dtype=np.float32)
        self.action_space = self.env.action_space

    def set_phase(self, phase):
        print("learn phase " + str(phase))
        self.phase = phase
        self.setup()
        self.reset()

    def step(self, actions):
        obs = self.env.get_observations()
        all_actions = self.env.act(obs)
        for index in self.agents_index:
            try:
                action = actions[index]
            except:
                action = 0
            all_actions[index] = action

        step_obs = self.env.step(all_actions)
        obs, rew, done, info = {}, {}, {}, {}
        for i in actions.keys():
            obs[i], rew[i], done[i], info[i] = [
                featurize(step_obs[0][i]),
                step_obs[1][i],
                step_obs[1][i] == -1 or step_obs[2],
                step_obs[3],
            ]

        done["__all__"] = step_obs[2]
        return obs, rew, done, info

    def reset(self):
        obs = self.env.reset()
        return {i: featurize(obs[i]) for i in self.agents_index}
Beispiel #4
0
def main(args):
    version = 'v1'
    episodes = args.episodes
    visualize = args.visualize

    config = ffa_v0_fast_env()
    env = Pomme(**config["env_kwargs"])
    env.seed(0)

    agent = PPOAgent(
        states=dict(type='float', shape=(11, 11, 12)),
        actions=dict(type='int', num_actions=env.action_space.n),
        network=[
            # (9, 9, 12)
            dict(type='conv2d', size=12, window=3, stride=1),
            # (7, 7, 8)
            dict(type='conv2d', size=8, window=3, stride=1),
            # (5, 5, 4)
            dict(type='conv2d', size=4, window=3, stride=1),
            # (100)
            dict(type='flatten'),
            dict(type='dense', size=64, activation='relu'),
            dict(type='dense', size=16, activation='relu'),
        ],
        batching_capacity=1000,
        step_optimizer=dict(type='adam', learning_rate=1e-4))

    if os.path.exists(os.path.join('models', version, 'checkpoint')):
        agent.restore_model(directory=os.path.join('models', version))

    agents = []
    for agent_id in range(3):
        # agents.append(RandomAgent(config["agent"](agent_id, config["game_type"])))
        # agents.append(StoppingAgent(config["agent"](agent_id, config["game_type"])))
        agents.append(
            SimpleAgent(config["agent"](agent_id, config["game_type"])))

    agent_id += 1
    agents.append(
        TensorforceAgent(config["agent"](agent_id, config["game_type"])))
    env.set_agents(agents)
    env.set_training_agent(agents[-1].agent_id)
    env.set_init_game_state(None)

    wrapped_env = WrappedEnv(env, agent, visualize)
    runner = Runner(agent=agent, environment=wrapped_env)

    try:
        runner.run(episodes=episodes, max_episode_timesteps=100)
    except Exception as e:
        raise e
    finally:
        agent.save_model(directory=os.path.join('models', version, 'agent'))

    win_count = len(
        list(filter(lambda reward: reward == 1, runner.episode_rewards)))
    print('Stats: ')
    print(f'  runner.episode_rewards = {runner.episode_rewards}')
    print(f'  win count = {win_count}')

    try:
        runner.close()
    except AttributeError as e:
        raise e
Beispiel #5
0
def main():
    # Print all possible environments in the Pommerman registry
    # Instantiate the environment
    DETERMINISTIC = False
    VISUALIZE = False

    if args.test:
        DETERMINISTIC = True
        VISUALIZE = True

    config = ffa_competition_env()
    env = Pomme(**config["env_kwargs"])
    env.seed(0)

    # Create a Proximal Policy Optimization agent
    with open('ppo.json', 'r') as fp:
            agent = json.load(fp=fp)

    with open('mlp2_lstm_network.json', 'r') as fp:
            network = json.load(fp=fp)

    agent = Agent.from_spec(
        spec=agent,
        kwargs=dict(
            states=dict(type='float', shape=env.observation_space.shape),
            actions=dict(type='int', num_actions=env.action_space.n),
            network=network
        )
    )

    # Add 3 random agents
    agents = []
    for agent_id in range(3):
        agents.append(SimpleAgent(config["agent"](agent_id, config["game_type"])))

    # Add TensorforceAgent
    agent_id += 1
    agents.append(TensorforceAgent(config["agent"](agent_id, config["game_type"])))
    env.set_agents(agents)
    env.set_training_agent(agents[-1].agent_id)
    env.set_init_game_state(None)

    # Instantiate and run the environment for 5 episodes.
    if VISUALIZE:
        wrapped_env = WrappedEnv(env, True)
    else:
        wrapped_env = WrappedEnv(env)

    runner = Runner(agent=agent, environment=wrapped_env)

    rewards = []
    episodes = []
    def episode_finished(r):
        nonlocal episodes
        nonlocal rewards
        print("Finished episode {ep} after {ts} timesteps (reward: {reward})".format(ep=r.episode, ts=r.episode_timestep,
                                                                             reward=r.episode_rewards[-1]))
        if r.episode % 1000 == 0:
            agent.save_model(('./{}').format(EXPERIMENT_NAME), False)
            try:
                prev_data = pickle.load(open(EXPERIMENT_NAME, "rb"))
                prev_len = len(prev_data[0])
                prev_data[0].extend(rewards)
                rewards = []
                prev_data[1].extend(episodes)
                episodes = []
                pickle.dump(prev_data, open(EXPERIMENT_NAME, "wb"))
            except (OSError, IOError) as e:
                pickle.dump([rewards, episodes], open(EXPERIMENT_NAME, "wb"))
        if r.episode_rewards[-1] >= 5:
            print()
            print()
            print()
            print("WINNER WINNER CHICKEN DINNER")
        episodes.append(r.episode)
        rewards.append(r.episode_rewards[-1])
        return True

    # Restore, Train, and Save Model
    if args.test or args.resume: # If test, change settings and restore model
        agent.restore_model('./','PPO_K_someS_500batch_biggerreward_99dis')
    runner.run(episodes=EPISODES, max_episode_timesteps=2000, episode_finished=episode_finished, deterministic=False)

    if not args.test:
        agent.save_model(('./{}').format(EXPERIMENT_NAME), False)
    print("Stats: ", runner.episode_rewards[-5:], runner.episode_timesteps[-5:])

    #Dump reward values
    try:
        prev_data = pickle.load(open(EXPERIMENT_NAME, "rb"))
        prev_len = len(prev_data[0])
        prev_data[0].extend(rewards)
        prev_data[1].extend(episodes)
        print(episodes)
        pickle.dump(prev_data, open(EXPERIMENT_NAME, "wb"))
    except (OSError, IOError) as e:
        pickle.dump([rewards, episodes], open(EXPERIMENT_NAME, "wb"))

    try:
        runner.close()
    except AttributeError as e:
        pass