def env_for_players(self): config = ffa_v0_fast_env(30) env = Pomme(**config["env_kwargs"]) agents = [DQN(config["agent"](0, config["game_type"])), PlayerAgent(config["agent"](1, config["game_type"])), RandomAgent(config["agent"](2, config["game_type"])), RandomAgent(config["agent"](3, config["game_type"]))] env.set_agents(agents) env.set_training_agent(agents[0].agent_id) # training_agent is only dqn agent env.set_init_game_state(None) return env
def set_pommerman_env(agent_id=0): # Instantiate the environment config = ffa_v0_fast_env() env = Pomme(**config["env_kwargs"]) np.random.seed(0) env.seed(0) # Add 3 Simple Agents and 1 DQN agent agents = [ DQN(config["agent"](agent_id, config["game_type"])) if i == agent_id else SimpleAgent(config["agent"](i, config["game_type"])) for i in range(4) ] env.set_agents(agents) env.set_training_agent( agents[agent_id].agent_id) # training_agent is only dqn agent env.set_init_game_state(None) return env
def get_env(): config = ffa_v0_fast_env() env = Pomme(**config["env_kwargs"]) agent_id = 0 agents = [ DQN(config["agent"](0, config["game_type"])), SimpleAgent(config["agent"](1, config["game_type"])), SimpleAgent(config["agent"](2, config["game_type"])), SimpleAgent(config["agent"](3, config["game_type"])), ] env.set_agents(agents) env.set_training_agent(agents[agent_id].agent_id) env.set_init_game_state(None) return env
learning_rate=1e-3, summarizer=dict( directory="./board5/", #steps=50, summaries='all')) # Add 3 SimpleAgents agents = [] for agent_id in range(3): agents.append(SimpleAgent(config["agent"](agent_id, config["game_type"]))) # Add TensorforceAgent agent_id += 1 agents.append(TensorforceAgent(config["agent"](agent_id, config["game_type"]))) env.set_agents(agents) env.set_training_agent(agents[-1].agent_id) env.set_init_game_state(None) # Instantiate and run the environment wrapped_env = WrappedEnv(env, env.observation_space, env.action_space, True, 600) runner = Runner(agent=agent, environment=wrapped_env, max_episode_timesteps=600) runner.run(num_episodes=15000) # Save agent model # - format: 'numpy' or 'hdf5' store only weights, 'checkpoint' stores full TensorFlow model runner.agent.save(directory="C:\\Users\\ali_k\\Desktop\\my_model", format='checkpoint')
def main(args): version = 'v1' episodes = args.episodes visualize = args.visualize config = ffa_v0_fast_env() env = Pomme(**config["env_kwargs"]) env.seed(0) agent = PPOAgent( states=dict(type='float', shape=(11, 11, 12)), actions=dict(type='int', num_actions=env.action_space.n), network=[ # (9, 9, 12) dict(type='conv2d', size=12, window=3, stride=1), # (7, 7, 8) dict(type='conv2d', size=8, window=3, stride=1), # (5, 5, 4) dict(type='conv2d', size=4, window=3, stride=1), # (100) dict(type='flatten'), dict(type='dense', size=64, activation='relu'), dict(type='dense', size=16, activation='relu'), ], batching_capacity=1000, step_optimizer=dict(type='adam', learning_rate=1e-4)) if os.path.exists(os.path.join('models', version, 'checkpoint')): agent.restore_model(directory=os.path.join('models', version)) agents = [] for agent_id in range(3): # agents.append(RandomAgent(config["agent"](agent_id, config["game_type"]))) # agents.append(StoppingAgent(config["agent"](agent_id, config["game_type"]))) agents.append( SimpleAgent(config["agent"](agent_id, config["game_type"]))) agent_id += 1 agents.append( TensorforceAgent(config["agent"](agent_id, config["game_type"]))) env.set_agents(agents) env.set_training_agent(agents[-1].agent_id) env.set_init_game_state(None) wrapped_env = WrappedEnv(env, agent, visualize) runner = Runner(agent=agent, environment=wrapped_env) try: runner.run(episodes=episodes, max_episode_timesteps=100) except Exception as e: raise e finally: agent.save_model(directory=os.path.join('models', version, 'agent')) win_count = len( list(filter(lambda reward: reward == 1, runner.episode_rewards))) print('Stats: ') print(f' runner.episode_rewards = {runner.episode_rewards}') print(f' win count = {win_count}') try: runner.close() except AttributeError as e: raise e
def main(): # Print all possible environments in the Pommerman registry # Instantiate the environment DETERMINISTIC = False VISUALIZE = False if args.test: DETERMINISTIC = True VISUALIZE = True config = ffa_competition_env() env = Pomme(**config["env_kwargs"]) env.seed(0) # Create a Proximal Policy Optimization agent with open('ppo.json', 'r') as fp: agent = json.load(fp=fp) with open('mlp2_lstm_network.json', 'r') as fp: network = json.load(fp=fp) agent = Agent.from_spec( spec=agent, kwargs=dict( states=dict(type='float', shape=env.observation_space.shape), actions=dict(type='int', num_actions=env.action_space.n), network=network ) ) # Add 3 random agents agents = [] for agent_id in range(3): agents.append(SimpleAgent(config["agent"](agent_id, config["game_type"]))) # Add TensorforceAgent agent_id += 1 agents.append(TensorforceAgent(config["agent"](agent_id, config["game_type"]))) env.set_agents(agents) env.set_training_agent(agents[-1].agent_id) env.set_init_game_state(None) # Instantiate and run the environment for 5 episodes. if VISUALIZE: wrapped_env = WrappedEnv(env, True) else: wrapped_env = WrappedEnv(env) runner = Runner(agent=agent, environment=wrapped_env) rewards = [] episodes = [] def episode_finished(r): nonlocal episodes nonlocal rewards print("Finished episode {ep} after {ts} timesteps (reward: {reward})".format(ep=r.episode, ts=r.episode_timestep, reward=r.episode_rewards[-1])) if r.episode % 1000 == 0: agent.save_model(('./{}').format(EXPERIMENT_NAME), False) try: prev_data = pickle.load(open(EXPERIMENT_NAME, "rb")) prev_len = len(prev_data[0]) prev_data[0].extend(rewards) rewards = [] prev_data[1].extend(episodes) episodes = [] pickle.dump(prev_data, open(EXPERIMENT_NAME, "wb")) except (OSError, IOError) as e: pickle.dump([rewards, episodes], open(EXPERIMENT_NAME, "wb")) if r.episode_rewards[-1] >= 5: print() print() print() print("WINNER WINNER CHICKEN DINNER") episodes.append(r.episode) rewards.append(r.episode_rewards[-1]) return True # Restore, Train, and Save Model if args.test or args.resume: # If test, change settings and restore model agent.restore_model('./','PPO_K_someS_500batch_biggerreward_99dis') runner.run(episodes=EPISODES, max_episode_timesteps=2000, episode_finished=episode_finished, deterministic=False) if not args.test: agent.save_model(('./{}').format(EXPERIMENT_NAME), False) print("Stats: ", runner.episode_rewards[-5:], runner.episode_timesteps[-5:]) #Dump reward values try: prev_data = pickle.load(open(EXPERIMENT_NAME, "rb")) prev_len = len(prev_data[0]) prev_data[0].extend(rewards) prev_data[1].extend(episodes) print(episodes) pickle.dump(prev_data, open(EXPERIMENT_NAME, "wb")) except (OSError, IOError) as e: pickle.dump([rewards, episodes], open(EXPERIMENT_NAME, "wb")) try: runner.close() except AttributeError as e: pass