def env_for_players(self):
        config = ffa_v0_fast_env(30)
        env = Pomme(**config["env_kwargs"])
        agents = [DQN(config["agent"](0, config["game_type"])),
                  PlayerAgent(config["agent"](1, config["game_type"])),
                  RandomAgent(config["agent"](2, config["game_type"])),
                  RandomAgent(config["agent"](3, config["game_type"]))]
        env.set_agents(agents)
        env.set_training_agent(agents[0].agent_id)  # training_agent is only dqn agent
        env.set_init_game_state(None)

        return env
def set_pommerman_env(agent_id=0):
    # Instantiate the environment
    config = ffa_v0_fast_env()
    env = Pomme(**config["env_kwargs"])

    np.random.seed(0)
    env.seed(0)
    # Add 3 Simple Agents and 1 DQN agent
    agents = [
        DQN(config["agent"](agent_id, config["game_type"])) if i == agent_id
        else SimpleAgent(config["agent"](i, config["game_type"]))
        for i in range(4)
    ]
    env.set_agents(agents)
    env.set_training_agent(
        agents[agent_id].agent_id)  # training_agent is only dqn agent
    env.set_init_game_state(None)

    return env
def get_env():
    config = ffa_v0_fast_env()
    env = Pomme(**config["env_kwargs"])

    agent_id = 0

    agents = [
        DQN(config["agent"](0, config["game_type"])),
        SimpleAgent(config["agent"](1, config["game_type"])),
        SimpleAgent(config["agent"](2, config["game_type"])),
        SimpleAgent(config["agent"](3, config["game_type"])),
    ]

    env.set_agents(agents)

    env.set_training_agent(agents[agent_id].agent_id)
    env.set_init_game_state(None)

    return env
    learning_rate=1e-3,
    summarizer=dict(
        directory="./board5/",
        #steps=50,
        summaries='all'))

# Add 3 SimpleAgents
agents = []
for agent_id in range(3):
    agents.append(SimpleAgent(config["agent"](agent_id, config["game_type"])))

# Add TensorforceAgent
agent_id += 1
agents.append(TensorforceAgent(config["agent"](agent_id, config["game_type"])))
env.set_agents(agents)
env.set_training_agent(agents[-1].agent_id)
env.set_init_game_state(None)

# Instantiate and run the environment
wrapped_env = WrappedEnv(env, env.observation_space, env.action_space, True,
                         600)
runner = Runner(agent=agent,
                environment=wrapped_env,
                max_episode_timesteps=600)
runner.run(num_episodes=15000)

# Save agent model
# - format: 'numpy' or 'hdf5' store only weights, 'checkpoint' stores full TensorFlow model
runner.agent.save(directory="C:\\Users\\ali_k\\Desktop\\my_model",
                  format='checkpoint')
Beispiel #5
0
def main(args):
    version = 'v1'
    episodes = args.episodes
    visualize = args.visualize

    config = ffa_v0_fast_env()
    env = Pomme(**config["env_kwargs"])
    env.seed(0)

    agent = PPOAgent(
        states=dict(type='float', shape=(11, 11, 12)),
        actions=dict(type='int', num_actions=env.action_space.n),
        network=[
            # (9, 9, 12)
            dict(type='conv2d', size=12, window=3, stride=1),
            # (7, 7, 8)
            dict(type='conv2d', size=8, window=3, stride=1),
            # (5, 5, 4)
            dict(type='conv2d', size=4, window=3, stride=1),
            # (100)
            dict(type='flatten'),
            dict(type='dense', size=64, activation='relu'),
            dict(type='dense', size=16, activation='relu'),
        ],
        batching_capacity=1000,
        step_optimizer=dict(type='adam', learning_rate=1e-4))

    if os.path.exists(os.path.join('models', version, 'checkpoint')):
        agent.restore_model(directory=os.path.join('models', version))

    agents = []
    for agent_id in range(3):
        # agents.append(RandomAgent(config["agent"](agent_id, config["game_type"])))
        # agents.append(StoppingAgent(config["agent"](agent_id, config["game_type"])))
        agents.append(
            SimpleAgent(config["agent"](agent_id, config["game_type"])))

    agent_id += 1
    agents.append(
        TensorforceAgent(config["agent"](agent_id, config["game_type"])))
    env.set_agents(agents)
    env.set_training_agent(agents[-1].agent_id)
    env.set_init_game_state(None)

    wrapped_env = WrappedEnv(env, agent, visualize)
    runner = Runner(agent=agent, environment=wrapped_env)

    try:
        runner.run(episodes=episodes, max_episode_timesteps=100)
    except Exception as e:
        raise e
    finally:
        agent.save_model(directory=os.path.join('models', version, 'agent'))

    win_count = len(
        list(filter(lambda reward: reward == 1, runner.episode_rewards)))
    print('Stats: ')
    print(f'  runner.episode_rewards = {runner.episode_rewards}')
    print(f'  win count = {win_count}')

    try:
        runner.close()
    except AttributeError as e:
        raise e
Beispiel #6
0
def main():
    # Print all possible environments in the Pommerman registry
    # Instantiate the environment
    DETERMINISTIC = False
    VISUALIZE = False

    if args.test:
        DETERMINISTIC = True
        VISUALIZE = True

    config = ffa_competition_env()
    env = Pomme(**config["env_kwargs"])
    env.seed(0)

    # Create a Proximal Policy Optimization agent
    with open('ppo.json', 'r') as fp:
            agent = json.load(fp=fp)

    with open('mlp2_lstm_network.json', 'r') as fp:
            network = json.load(fp=fp)

    agent = Agent.from_spec(
        spec=agent,
        kwargs=dict(
            states=dict(type='float', shape=env.observation_space.shape),
            actions=dict(type='int', num_actions=env.action_space.n),
            network=network
        )
    )

    # Add 3 random agents
    agents = []
    for agent_id in range(3):
        agents.append(SimpleAgent(config["agent"](agent_id, config["game_type"])))

    # Add TensorforceAgent
    agent_id += 1
    agents.append(TensorforceAgent(config["agent"](agent_id, config["game_type"])))
    env.set_agents(agents)
    env.set_training_agent(agents[-1].agent_id)
    env.set_init_game_state(None)

    # Instantiate and run the environment for 5 episodes.
    if VISUALIZE:
        wrapped_env = WrappedEnv(env, True)
    else:
        wrapped_env = WrappedEnv(env)

    runner = Runner(agent=agent, environment=wrapped_env)

    rewards = []
    episodes = []
    def episode_finished(r):
        nonlocal episodes
        nonlocal rewards
        print("Finished episode {ep} after {ts} timesteps (reward: {reward})".format(ep=r.episode, ts=r.episode_timestep,
                                                                             reward=r.episode_rewards[-1]))
        if r.episode % 1000 == 0:
            agent.save_model(('./{}').format(EXPERIMENT_NAME), False)
            try:
                prev_data = pickle.load(open(EXPERIMENT_NAME, "rb"))
                prev_len = len(prev_data[0])
                prev_data[0].extend(rewards)
                rewards = []
                prev_data[1].extend(episodes)
                episodes = []
                pickle.dump(prev_data, open(EXPERIMENT_NAME, "wb"))
            except (OSError, IOError) as e:
                pickle.dump([rewards, episodes], open(EXPERIMENT_NAME, "wb"))
        if r.episode_rewards[-1] >= 5:
            print()
            print()
            print()
            print("WINNER WINNER CHICKEN DINNER")
        episodes.append(r.episode)
        rewards.append(r.episode_rewards[-1])
        return True

    # Restore, Train, and Save Model
    if args.test or args.resume: # If test, change settings and restore model
        agent.restore_model('./','PPO_K_someS_500batch_biggerreward_99dis')
    runner.run(episodes=EPISODES, max_episode_timesteps=2000, episode_finished=episode_finished, deterministic=False)

    if not args.test:
        agent.save_model(('./{}').format(EXPERIMENT_NAME), False)
    print("Stats: ", runner.episode_rewards[-5:], runner.episode_timesteps[-5:])

    #Dump reward values
    try:
        prev_data = pickle.load(open(EXPERIMENT_NAME, "rb"))
        prev_len = len(prev_data[0])
        prev_data[0].extend(rewards)
        prev_data[1].extend(episodes)
        print(episodes)
        pickle.dump(prev_data, open(EXPERIMENT_NAME, "wb"))
    except (OSError, IOError) as e:
        pickle.dump([rewards, episodes], open(EXPERIMENT_NAME, "wb"))

    try:
        runner.close()
    except AttributeError as e:
        pass