def __init__(self, env_id, random_side=True, agent_list=None, rule_agents=[], replay_dir=None, n_player=4): self.n_player = n_player self.base_agents = [agents.RandomAgent() for _ in range(n_player)] if agent_list is None: self.agent_list = self.base_agents else: assert isinstance(agent_list, str) agent_list = agent_list.split(',') assert len(agent_list) == n_player self.agent_list = [ helpers.make_agent_from_string(agent, i) for i, agent in enumerate(agent_list) ] # Make the environment using the agent list env = pommerman.make(env_id, self.agent_list) if agent_list is not None: for id_, agent in enumerate(self.base_agents): agent.init_agent(id_, env.spec._kwargs['game_type']) super(PommeBase, self).__init__(env) self.rule_agents = rule_agents self._random_side = random_side self.random_side() self._uuid = str(uuid.uuid1())[:8] self._replay_dir = replay_dir self._replay_data = {"mode": str(env_id)}
def main(): '''CLI interface to bootstrap taining''' parser = argparse.ArgumentParser(description="Playground Flags.") parser.add_argument("--game", default="pommerman", help="Game to choose.") parser.add_argument( "--config", default="PommeFFACompetition-v0", help="Configuration to execute. See env_ids in " "configs.py for options.") parser.add_argument( "--agents", default="tensorflow::agents.TensorFlowAgent,test::agents.SimpleAgent," "test::agents.SimpleAgent,test::agents.SimpleAgent,", help="Comma delineated list of agent types and docker " "locations to run the agents.") parser.add_argument( "--record_pngs_dir", default=None, help="Directory to record the PNGs of the game. " "Doesn't record if None.") parser.add_argument( "--record_json_dir", default=None, help="Directory to record the JSON representations of " "the game. Doesn't record if None.") parser.add_argument( "--render", default=False, action='store_true', help="Whether to render or not. Defaults to False.") parser.add_argument( "--game_state_file", default=None, help="File from which to load game state. Defaults to " "None.") args = parser.parse_args() config = args.config record_pngs_dir = args.record_pngs_dir record_json_dir = args.record_json_dir # agent_env_vars = args.agent_env_vars game_state_file = args.game_state_file agents = [ helpers.make_agent_from_string(agent_string, agent_id + 1000) for agent_id, agent_string in enumerate(args.agents.split(",")) ] print(type(agents[0]))
def main(): '''CLI interface to bootstrap taining''' parser = argparse.ArgumentParser(description="Playground Flags.") parser.add_argument("--game", default="pommerman", help="Game to choose.") parser.add_argument("--config", default="PommeFFACompetition-v0", help="Configuration to execute. See env_ids in " "configs.py for options.") parser.add_argument("--agents", default="tensorforce::ppo,test::agents.SimpleAgent," "test::agents.SimpleAgent,test::agents.SimpleAgent", help="Comma delineated list of agent types and docker " "locations to run the agents.") parser.add_argument("--agent_env_vars", help="Comma delineated list of agent environment vars " "to pass to Docker. This is only for the Docker Agent." " An example is '0:foo=bar:baz=lar,3:foo=lam', which " "would send two arguments to Docker Agent 0 and one to" " Docker Agent 3.", default="") parser.add_argument("--record_pngs_dir", default=None, help="Directory to record the PNGs of the game. " "Doesn't record if None.") parser.add_argument("--record_json_dir", default=None, help="Directory to record the JSON representations of " "the game. Doesn't record if None.") parser.add_argument("--render", default=False, action='store_true', help="Whether to render or not. Defaults to False.") parser.add_argument("--game_state_file", default=None, help="File from which to load game state. Defaults to " "None.") parser.add_argument("--checkpoint", default="models/ppo", help="Directory where checkpoint file stored to.") parser.add_argument("--num_of_episodes", default="10", help="Number of episodes") parser.add_argument("--max_timesteps", default="2000", help="Number of steps") args = parser.parse_args() config = args.config record_pngs_dir = args.record_pngs_dir record_json_dir = args.record_json_dir agent_env_vars = args.agent_env_vars game_state_file = args.game_state_file checkpoint = args.checkpoint num_of_episodes = int(args.num_of_episodes) max_timesteps = int(args.max_timesteps) # TODO: After https://github.com/MultiAgentLearning/playground/pull/40 # this is still missing the docker_env_dict parsing for the agents. agents = [ helpers.make_agent_from_string(agent_string, agent_id + 1000) for agent_id, agent_string in enumerate(args.agents.split(",")) ] env = make(config, agents, game_state_file) training_agent = None for agent in agents: if type(agent) == TensorForceAgent: training_agent = agent env.set_training_agent(agent.agent_id) break if args.record_pngs_dir: assert not os.path.isdir(args.record_pngs_dir) os.makedirs(args.record_pngs_dir) if args.record_json_dir: assert not os.path.isdir(args.record_json_dir) os.makedirs(args.record_json_dir) # Create a Proximal Policy Optimization agent agent = training_agent.initialize(env) atexit.register(functools.partial(clean_up_agents, agents)) wrapped_env = WrappedEnv(env, visualize=args.render) runner = Runner(agent=agent, environment=wrapped_env) runner.run(episodes=num_of_episodes, max_episode_timesteps=max_timesteps) print("Stats: ", runner.episode_rewards[-30:], runner.episode_timesteps, runner.episode_times) agent.save_model(checkpoint) rewards = runner.episode_rewards win = rewards.count(1) lose = rewards.count(-1) draw = rewards.count(0) total = win + lose + draw ratio = round((win / total) * 100.0, 2) print("Results ({}%) = Win({}), Lose({}), Draw({})".format( ratio, win, lose, draw)) try: runner.close() except AttributeError as e: pass
def run(args, num_times=1, seed=None): '''Wrapper to help start the game''' config = args.config record_pngs_dir = args.record_pngs_dir record_json_dir = args.record_json_dir agent_env_vars = args.agent_env_vars game_state_file = args.game_state_file render_mode = args.render_mode do_sleep = args.do_sleep agents = [ helpers.make_agent_from_string(agent_string, agent_id) for agent_id, agent_string in enumerate(args.agents.split(',')) ] env = make(config, agents, game_state_file, render_mode=render_mode) def _run(record_pngs_dir=None, record_json_dir=None): '''Runs a game''' print("Starting the Game.") if record_pngs_dir and not os.path.isdir(record_pngs_dir): os.makedirs(record_pngs_dir) if record_json_dir and not os.path.isdir(record_json_dir): os.makedirs(record_json_dir) obs = env.reset() steps = 0 done = False while not done: steps += 1 if args.render: env.render(record_pngs_dir=record_pngs_dir, record_json_dir=record_json_dir, do_sleep=do_sleep) actions = env.act(obs) obs, reward, done, info = env.step(actions) print("Final Result: ", info) if args.render: env.render(record_pngs_dir=record_pngs_dir, record_json_dir=record_json_dir, do_sleep=do_sleep) if do_sleep: time.sleep(5) env.render(close=True) if record_json_dir: finished_at = datetime.now().isoformat() _agents = args.agents.split(',') utility.join_json_state(record_json_dir, _agents, finished_at, config) return info if seed is None: seed = random.randint(0, 1e6) np.random.seed(seed) random.seed(seed) env.seed(seed) infos = [] times = [] for i in range(num_times): start = time.time() record_pngs_dir_ = record_pngs_dir + '/%d' % (i+1) \ if record_pngs_dir else None record_json_dir_ = record_json_dir + '/%d' % (i+1) \ if record_json_dir else None infos.append(_run(record_pngs_dir_, record_json_dir_)) times.append(time.time() - start) print("Game Time: ", times[-1]) atexit.register(env.close) return infos
def run(args, num_times=1, seed=None): config = args.config record_pngs_dir = args.record_pngs_dir record_json_dir = args.record_json_dir agent_env_vars = args.agent_env_vars game_state_file = args.game_state_file # TODO: After https://github.com/MultiAgentLearning/playground/pull/40 # this is still missing the docker_env_dict parsing for the agents. agents = [ helpers.make_agent_from_string(agent_string, agent_id+1000) for agent_id, agent_string in enumerate(args.agents.split(',')) ] result_name = args.agents.split(',')[0].split('.')[1] + ".txt" env = make(config, agents, game_state_file) if args.record_pngs_dir: assert not os.path.isdir(args.record_pngs_dir) os.makedirs(args.record_pngs_dir) if args.record_json_dir: assert not os.path.isdir(args.record_json_dir) os.makedirs(args.record_json_dir) def _run(seed, record_pngs_dir=None, record_json_dir=None): env.seed(seed) print("Starting the Game.") obs = env.reset() steps = 0 done = False while not done: steps += 1 if args.render == "True": env.render(record_pngs_dir=args.record_pngs_dir, record_json_dir=args.record_json_dir) actions = env.act(obs) obs, reward, done, info = env.step(actions) print("Final Result: ", info) sys.stdout = open(result_name, "a") print(info) sys.stdout = sys.__stdout__ if args.render: #time.sleep(5) env.render(record_pngs_dir=args.record_pngs_dir, record_json_dir=args.record_json_dir, close=True) return info infos = [] times = [] for i in range(num_times): start = time.time() seed = i np.random.seed(seed) random.seed(seed) record_pngs_dir_ = record_pngs_dir + '/%d' % (i+1) if record_pngs_dir else None record_json_dir_ = record_json_dir + '/%d' % (i+1) if record_json_dir else None infos.append(_run(seed, record_pngs_dir_, record_json_dir_)) times.append(time.time() - start) print("Game Time: ", times[-1]) atexit.register(env.close) return infos
def run(config, agents_list, record_pngs_dir = None, record_json_dir = None, agent_env_vars = "", game_state_file = None, render_mode = 'human', do_sleep = True, render = False, num_episodes=1, seed=None): '''Wrapper to help start the game''' agents = [ helpers.make_agent_from_string(agent_string, agent_id) for agent_id, agent_string in enumerate(agents_list.split(',')) ] env = make(config, agents, game_state_file, render_mode=render_mode) def _run(record_pngs_dir=None, record_json_dir=None): '''Runs a game''' print("Starting the Game.") if record_pngs_dir and not os.path.isdir(record_pngs_dir): os.makedirs(record_pngs_dir) if record_json_dir and not os.path.isdir(record_json_dir): os.makedirs(record_json_dir) obs = env.reset() done = False while not done: if render: env.render( record_pngs_dir=record_pngs_dir, record_json_dir=record_json_dir, do_sleep=do_sleep) if render is False and record_json_dir: env.save_json(record_json_dir) time.sleep(1.0 / env._render_fps) actions = env.act(obs) obs, reward, done, info = env.step(actions) print("Final Result: ", info) if render: env.render( record_pngs_dir=record_pngs_dir, record_json_dir=record_json_dir, do_sleep=do_sleep) if do_sleep: time.sleep(5) env.render(close=True) if render is False and record_json_dir: env.save_json(record_json_dir) time.sleep(1.0 / env._render_fps) if record_json_dir: finished_at = datetime.now().isoformat() _agents = agents_list.split(',') utility.join_json_state(record_json_dir, _agents, finished_at, config, info) return info if seed is None: # Pick a random seed between 0 and 2^31 - 1 seed = random.randint(0, np.iinfo(np.int32).max) np.random.seed(seed) random.seed(seed) env.seed(seed) infos = [] times = [] for i in range(num_episodes): start = time.time() record_pngs_dir_ = record_pngs_dir + '/%d' % (i+1) \ if record_pngs_dir else None record_json_dir_ = record_json_dir + '/%d' % (i+1) \ if record_json_dir else None infos.append(_run(record_pngs_dir_, record_json_dir_)) times.append(time.time() - start) print("Game Time: ", times[-1]) atexit.register(env.close) return infos
def main(): '''CLI interface to bootstrap taining''' parser = argparse.ArgumentParser(description="Playground Flags.") parser.add_argument("--game", default="pommerman", help="Game to choose.") parser.add_argument("--config", default='PommeFFACompetition-v0', help="Configuration to execute. See env_ids in " "configs.py for options. default is 1v1") parser.add_argument( "--agents", default="tensorforce::ppo,test::agents.SimpleAgent," "test::agents.SimpleAgent,test::agents.SimpleAgent", #default="tensorforce::ppo,test::agents.RandomAgent," #"test::agents.RandomAgent,test::agents.RandomAgent", help="Comma delineated list of agent types and docker " "locations to run the agents.") #agent in position 1 parser.add_argument("--agent_env_vars", help="Comma delineated list of agent environment vars " "to pass to Docker. This is only for the Docker Agent." " An example is '0:foo=bar:baz=lar,3:foo=lam', which " "would send two arguments to Docker Agent 0 and one to" " Docker Agent 3.", default="") parser.add_argument("--record_pngs_dir", default=None, help="Directory to record the PNGs of the game. " "Doesn't record if None.") parser.add_argument("--record_json_dir", default=None, help="Directory to record the JSON representations of " "the game. Doesn't record if None.") parser.add_argument("--render", default=False, action='store_true', help="Whether to render or not. Defaults to False.") parser.add_argument("--game_state_file", default=None, help="File from which to load game state. Defaults to " "None.") parser.add_argument( '--batch-size', # This doesn't change batch-size in tensorforce_agent.py default=10, type=int, help='average reward visualization by batch size. default=100 episodes' ) parser.add_argument( '--episodes', default=10, type=int, help= 'number of training episodes, default=1000. must be divisible by batch_size' ) parser.add_argument( '--modelname', default='default', help= 'name of model file savename, timesteps wil be appended. default= default' ) parser.add_argument('--loadfile', default=None, help='name of model you want to load') parser.add_argument('--numprocs', default=12, type=int, help='num parallel processes. default=12') args = parser.parse_args() config = args.config record_pngs_dir = args.record_pngs_dir record_json_dir = args.record_json_dir agent_env_vars = args.agent_env_vars game_state_file = args.game_state_file num_procs = args.numprocs #variables save_path = 'saved_models/' model_name = args.modelname batch_size = args.batch_size num_episodes = args.episodes assert (num_episodes % batch_size == 0) agents = [ helpers.make_agent_from_string(agent_string, agent_id) for agent_id, agent_string in enumerate(args.agents.split(",")) ] env = make(config, agents, game_state_file) training_agent = None for agent in agents: if type(agent) == TensorForceAgent: training_agent = agent env.set_training_agent(agent.agent_id) break if args.record_pngs_dir: assert not os.path.isdir(args.record_pngs_dir) os.makedirs(args.record_pngs_dir) if args.record_json_dir: assert not os.path.isdir(args.record_json_dir) os.makedirs(args.record_json_dir) agent = training_agent.initialize( env, num_procs, # summarizer={'directory': 'tensorforce_agent', 'labels': 'graph, losses'}, #saver={'directory': './'+save_path, 'filename': model_name,'append_timesteps': True} ) # USHA Model should load automatically as saver is provided. if args.loadfile: agent.restore(directory=save_path, filename=args.loadfile) atexit.register(functools.partial(clean_up_agents, agents)) wrapped_envs = [] for i in range(num_procs): wrapped_envs.append(WrappedEnv(env, visualize=args.render)) # wrapped_env=WrappedEnv(env,visualize=args.render) runner_time = timeit.default_timer() #load history.pickle if args.loadfile: try: handle = open(save_path + args.modelname + '-history.pkl', 'rb') history = pickle.load(handle) except: history = None else: history = None runner = ParallelRunner(agent=agent, environments=wrapped_envs) # runner = Runner(agent=agent, environment=wrapped_env) num_episodes += runner.global_episodes #runner trains off number of global episodes ''' if you trained 100 episodes, num_episodes needs to be 200 if you want to train another 100 ''' runner.run(num_episodes=num_episodes, max_episode_timesteps=2000) print(runner.episode_rewards) if history: history['episode_rewards'].extend(runner.episode_rewards) history['episode_timesteps'].extend(runner.episode_timesteps) history['episode_seconds'].extend(runner.episode_seconds) history['episode_agent_seconds'].extend(runner.episode_agent_seconds) else: history = {} history['episode_rewards'] = runner.episode_rewards history['episode_timesteps'] = runner.episode_timesteps history['episode_seconds'] = runner.episode_seconds history['episode_agent_seconds'] = runner.episode_agent_seconds with open(save_path + model_name + '-history.pkl', 'wb') as handle: pickle.dump(history, handle) # USHA Model should save automatically as saver is provided. agent.save(directory=save_path, filename=model_name + str(runner.global_episodes), append_timestep=False) print('Runner time: ', timeit.default_timer() - runner_time) plt.plot( np.arange(0, int(len(history['episode_rewards']) / batch_size)), np.mean(np.asarray(history['episode_rewards']).reshape(-1, batch_size), axis=1)) plt.title('average rewards per batch of episodes') plt.ylabel('average reward') plt.xlabel('batch of ' + str(batch_size) + ' episodes') plt.show() try: runner.close() except AttributeError as e: pass
def main(): '''CLI interface to bootstrap testing''' parser = argparse.ArgumentParser(description="Playground Flags.") parser.add_argument("--game", default="pommerman", help="Game to choose.") parser.add_argument( "--config", default="PommeFFACompetition-v0", help="Configuration to execute. See env_ids in " "configs.py for options.") parser.add_argument( "--agents", default="tensorforce::ppo2,test::agents.SimpleAgent," "test::agents.SimpleAgent,test::agents.SimpleAgent", help="Comma delineated list of agent types and docker " "locations to run the agents.") parser.add_argument( "--agent_env_vars", help="Comma delineated list of agent environment vars " "to pass to Docker. This is only for the Docker Agent." " An example is '0:foo=bar:baz=lar,3:foo=lam', which " "would send two arguments to Docker Agent 0 and one to" " Docker Agent 3.", default="") parser.add_argument( "--record_pngs_dir", default=None, help="Directory to record the PNGs of the game. " "Doesn't record if None.") parser.add_argument( "--record_json_dir", default=None, help="Directory to record the JSON representations of " "the game. Doesn't record if None.") parser.add_argument( "--render", default=True, action='store_true', help="Whether to render or not. Defaults to False.") parser.add_argument( "--game_state_file", default=None, help="File from which to load game state. Defaults to " "None.") args = parser.parse_args() print(args) config = args.config game_state_file = args.game_state_file agents_string = args.agents.split(",") agents = [ helpers.make_agent_from_string(agent_string, agent_id + 1000) for agent_id, agent_string in enumerate(agents_string) ] env = make(config, agents, game_state_file) for agent in agents: if type(agent) == agnts.TensorForceAgent or type(agent) == agnts.BaselineAgent: env.set_training_agent(agent.agent_id) break if args.record_pngs_dir: assert not os.path.isdir(args.record_pngs_dir) os.makedirs(args.record_pngs_dir) if args.record_json_dir: assert not os.path.isdir(args.record_json_dir) os.makedirs(args.record_json_dir) wrapped_env = WrappedEnvBaselines(env, visualize=args.render) atexit.register(functools.partial(clean_up_agents, agents)) from stable_baselines import PPO2, DQN, A2C print( "[INFO] Loading PPO model" ) #models/ppo2/1558920269.0932562 agent = PPO2.load("pommerman/cli/models/1559359177.602215") test_finished_episodes = 0 test_won_episodes = 0 test_total_timesteps = 0 actions_freq = [0, 0, 0, 0, 0, 0] print( "[INFO] Starting testing for {test_length} games".format(test_length=testing_episodes) ) observed_state = wrapped_env.reset() while test_finished_episodes < testing_episodes: test_total_timesteps += 1 action, _states = agent.predict(observed_state) actions_freq[action] += 1 observed_state, reward, episode_finished, info = wrapped_env.step(action) if test_total_timesteps == 1: time.sleep(1) if testing_episodes - test_finished_episodes <= 5: wrapped_env.visualize = True if episode_finished: test_finished_episodes += 1 print("[LOG] Last episode reward: " + str(reward)) if reward == 1: test_won_episodes += 1 observed_state = wrapped_env.reset() time.sleep(1) print( "[INFO] Won episodes/total episodes: {rews}/{test_ep}".format(test_ep=testing_episodes, rews=test_won_episodes) ) print( "[INFO] Average episode length: {times}".format(test_ep=testing_episodes, times=float(test_total_timesteps)/testing_episodes) ) exit()
def main(): '''CLI interface to bootstrap taining''' parser = argparse.ArgumentParser(description="Playground Flags.") parser.add_argument("--game", default="pommerman", help="Game to choose.") parser.add_argument("--config", default="PommeFFACompetition-v0", help="Configuration to execute. See env_ids in " "configs.py for options.") parser.add_argument("--agents", default="tensorforce::ppo,test::agents.SimpleAgent," "test::agents.SimpleAgent,test::agents.SimpleAgent", help="Comma delineated list of agent types and docker " "locations to run the agents.") parser.add_argument("--agent_env_vars", help="Comma delineated list of agent environment vars " "to pass to Docker. This is only for the Docker Agent." " An example is '0:foo=bar:baz=lar,3:foo=lam', which " "would send two arguments to Docker Agent 0 and one to" " Docker Agent 3.", default="") parser.add_argument("--record_pngs_dir", default=None, help="Directory to record the PNGs of the game. " "Doesn't record if None.") parser.add_argument("--record_json_dir", default=None, help="Directory to record the JSON representations of " "the game. Doesn't record if None.") parser.add_argument("--render", default=False, action='store_true', help="Whether to render or not. Defaults to False.") parser.add_argument("--game_state_file", default=None, help="File from which to load game state. Defaults to " "None.") parser.add_argument("--checkpoint", default="models/ppo", help="Directory where checkpoint file stored to.") parser.add_argument("--num_of_episodes", default="10", help="Number of episodes") parser.add_argument("--max_timesteps", default="2000", help="Number of steps") parser.add_argument("--rewards", default=DEFAULT_REWARDS, help="Shaping of rewards") args = parser.parse_args() config = args.config # record_pngs_dir = args.record_pngs_dir # record_json_dir = args.record_json_dir # agent_env_vars = args.agent_env_vars game_state_file = args.game_state_file checkpoint = args.checkpoint num_of_episodes = int(args.num_of_episodes) max_timesteps = int(args.max_timesteps) custom_rewards = args.rewards # TODO: After https://github.com/MultiAgentLearning/playground/pull/40 # this is still missing the docker_env_dict parsing for the agents. agents = [ create_ppo_agent( helpers.make_agent_from_string(agent_string, agent_id + 1000)) for agent_id, agent_string in enumerate(args.agents.split(",")) ] env = make(config, agents, game_state_file) training_agent = None training_agent_id = None for agent in agents: if type(agent) == TensorForcePpoAgent: print("Ppo agent initiazlied : {}, {}".format(agent, type(agent))) training_agent = agent env.set_training_agent(agent.agent_id) training_agent_id = agent.agent_id break print("[{}] : id[{}]".format(agent, agent.agent_id)) if args.record_pngs_dir: assert not os.path.isdir(args.record_pngs_dir) os.makedirs(args.record_pngs_dir) if args.record_json_dir: assert not os.path.isdir(args.record_json_dir) os.makedirs(args.record_json_dir) learning_agent = training_agent.initialize(env) for agent in agents: if type(agent) == TensorForcePpoAgent: if agent.agent_id == training_agent_id: learning_agent = training_agent.initialize(env) else: agent.initialize(env) atexit.register(functools.partial(clean_up_agents, agents)) wrapped_env = WrappedEnv(env, visualize=args.render) wrapped_env.set_render(args.render) wrapped_env.set_rewards(custom_rewards) runner = Runner(agent=learning_agent, environment=wrapped_env) runner.run(episodes=num_of_episodes, max_episode_timesteps=max_timesteps) print("Stats: ", runner.episode_rewards[-30:], runner.episode_timesteps, runner.episode_times) learning_agent.save_model(checkpoint) rewards = runner.episode_rewards import numpy as np mean = np.mean(rewards) print('last 30 rewards {}'.format(rewards[-30:])) print('mean of rewards {}'.format(mean)) try: runner.close() except AttributeError as e: print(e) pass
def run(args, num_times=1, seed=None): '''Wrapper to help start the game''' config = args.config record_pngs_dir = args.record_pngs_dir record_json_dir = args.record_json_dir agent_env_vars = args.agent_env_vars game_state_file = args.game_state_file render_mode = args.render_mode do_sleep = args.do_sleep agents = [ helpers.make_agent_from_string(agent_string, agent_id) for agent_id, agent_string in enumerate(args.agents.split(',')) ] #TODO: DELETE #pdb.set_trace() ############### env = make(config, agents, game_state_file, render_mode=render_mode) for i, agent_string in enumerate(args.agents.split(',')): if agent_string.split('::')[0] == "tensorforce": print("run_buttle[51] ", type(agents[i])) a = agents[i] a_env = a.initialize(env) a_env.restore_model(directory="./pommerman/cli/saved_win/") # agents[i].initialize(env).restore_model(directory="./saved_win/") def _run(record_pngs_dir=None, record_json_dir=None): '''Runs a game''' print("Starting the Game.") if record_pngs_dir and not os.path.isdir(record_pngs_dir): os.makedirs(record_pngs_dir) if record_json_dir and not os.path.isdir(record_json_dir): os.makedirs(record_json_dir) obs = env.reset() done = False while not done: if args.render: env.render(record_pngs_dir=record_pngs_dir, record_json_dir=record_json_dir, do_sleep=do_sleep) actions = env.act(obs) obs, reward, done, info = env.step(actions) print("Final Result: ", info) if args.render: env.render(record_pngs_dir=record_pngs_dir, record_json_dir=record_json_dir, do_sleep=do_sleep) if do_sleep: time.sleep(5) env.render(close=True) if record_json_dir: finished_at = datetime.now().isoformat() _agents = args.agents.split(',') utility.join_json_state(record_json_dir, _agents, finished_at, config, info) return info if seed is None: # Pick a random seed between 0 and 2^31 - 1 seed = random.randint(0, np.iinfo(np.int32).max) np.random.seed(seed) random.seed(seed) env.seed(seed) infos = [] times = [] for i in range(num_times): start = time.time() record_pngs_dir_ = record_pngs_dir + '/%d' % (i+1) \ if record_pngs_dir else None record_json_dir_ = record_json_dir + '/%d' % (i+1) \ if record_json_dir else None infos.append(_run(record_pngs_dir_, record_json_dir_)) times.append(time.time() - start) print("Game Time: ", times[-1]) atexit.register(env.close) return infos
def run(args, num_times=1, seed=None): config = args.config record_pngs_dir = args.record_pngs_dir record_json_dir = args.record_json_dir agent_env_vars = args.agent_env_vars game_state_file = args.game_state_file render_mode = args.render_mode # TODO: After https://github.com/MultiAgentLearning/playground/pull/40 # this is still missing the docker_env_dict parsing for the agents. agents = [ helpers.make_agent_from_string(agent_string, agent_id+1000) for agent_id, agent_string in enumerate(args.agents.split(',')) ] env = make(config, agents, game_state_file, render_mode=render_mode) if record_pngs_dir and not os.path.isdir(record_pngs_dir): os.makedirs(record_pngs_dir) if record_json_dir and not os.path.isdir(record_json_dir): os.makedirs(record_json_dir) def _run(seed, record_pngs_dir=None, record_json_dir=None): print("Starting the Game.") obs = env.reset() steps = 0 done = False while not done: steps += 1 if args.render: env.render(record_pngs_dir=args.record_pngs_dir, record_json_dir=args.record_json_dir, mode=args.render_mode) actions = env.act(obs) obs, reward, done, info = env.step(actions) for agent in agents: agent.episode_end(reward[agent.agent_id]) print("Final Result: ", info) if args.render: env.render(record_pngs_dir=args.record_pngs_dir, record_json_dir=args.record_json_dir, mode=args.render_mode) time.sleep(5) env.render(close=True) return info if seed is None: seed = random.randint(0, 1e6) np.random.seed(seed) random.seed(seed) env.seed(seed) infos = [] times = [] for i in range(num_times): start = time.time() record_pngs_dir_ = record_pngs_dir + '/%d' % (i+1) \ if record_pngs_dir else None record_json_dir_ = record_json_dir + '/%d' % (i+1) \ if record_json_dir else None infos.append(_run(seed, record_pngs_dir_, record_json_dir_)) times.append(time.time() - start) print("Game Time: ", times[-1]) atexit.register(env.close) return infos
def main(): '''CLI interface to bootstrap taining''' parser = argparse.ArgumentParser(description="Playground Flags.") parser.add_argument("--game", default="pommerman", help="Game to choose.") parser.add_argument("--config", default="PommeFFACompetition-v0", help="Configuration to execute. See env_ids in " "configs.py for options.") parser.add_argument("--agents", default="tensorforce::ppo,test::agents.SimpleAgent," "test::agents.SimpleAgent,test::agents.SimpleAgent", help="Comma delineated list of agent types and docker " "locations to run the agents.") parser.add_argument("--agent_env_vars", help="Comma delineated list of agent environment vars " "to pass to Docker. This is only for the Docker Agent." " An example is '0:foo=bar:baz=lar,3:foo=lam', which " "would send two arguments to Docker Agent 0 and one to" " Docker Agent 3.", default="") parser.add_argument("--record_pngs_dir", default=None, help="Directory to record the PNGs of the game. " "Doesn't record if None.") parser.add_argument("--record_json_dir", default=None, help="Directory to record the JSON representations of " "the game. Doesn't record if None.") parser.add_argument("--render", default=False, action='store_true', help="Whether to render or not. Defaults to False.") parser.add_argument("--game_state_file", default=None, help="File from which to load game state. Defaults to " "None.") args = parser.parse_args() config = args.config record_pngs_dir = args.record_pngs_dir record_json_dir = args.record_json_dir agent_env_vars = args.agent_env_vars game_state_file = args.game_state_file # TODO: After https://github.com/MultiAgentLearning/playground/pull/40 # this is still missing the docker_env_dict parsing for the agents. agents = [ helpers.make_agent_from_string(agent_string, agent_id + 1000) for agent_id, agent_string in enumerate(args.agents.split(",")) ] env = make(config, agents, game_state_file) #TODO: DELETE! observation = env.reset() print(observation) ########### training_agent = None for agent in agents: if type(agent) == TensorForceAgent: training_agent = agent env.set_training_agent(agent.agent_id) break if args.record_pngs_dir: assert not os.path.isdir(args.record_pngs_dir) os.makedirs(args.record_pngs_dir) if args.record_json_dir: assert not os.path.isdir(args.record_json_dir) os.makedirs(args.record_json_dir) def episode_finished(r): if not (r.episode % 100): print( "Finished episode {ep} after {ts} timesteps (reward: {reward})" .format(ep=r.episode, ts=r.episode_timestep, reward=r.episode_rewards[-1])) if (r.episode_rewards[-1] >= 0): r.agent.save_model(directory="./clone_saved_win/") elif not (r.episode % 1000): r.agent.save_model(directory="./saved_played/") return True # Create a Proximal Policy Optimization agent agent = training_agent.initialize(env) atexit.register(functools.partial(clean_up_agents, agents)) wrapped_env = WrappedEnv(env, visualize=args.render) runner = Runner(agent=agent, environment=wrapped_env) agent.restore_model( directory= "/home/rishchen/Source/Work/playground/pommerman/cli/clone_saved_win/") runner.run(episodes=10, max_episode_timesteps=20000, episode_finished=episode_finished) print("Stats: ", runner.episode_rewards, runner.episode_timesteps, runner.episode_times) try: runner.close() except AttributeError as e: pass
def main(): '''CLI interface to bootstrap taining''' parser = argparse.ArgumentParser(description="Playground Flags.") parser.add_argument("--game", default="pommerman", help="Game to choose.") parser.add_argument("--config", default="PommeFFANHWC-v0", help="Configuration to execute. See env_ids in " "configs.py for options.") parser.add_argument("--agents", default="tensorforce::ppo,test::agents.SimpleAgent," "test::agents.SimpleAgent,test::agents.SimpleAgent", help="Comma delineated list of agent types and docker " "locations to run the agents.") parser.add_argument("--agent_env_vars", help="Comma delineated list of agent environment vars " "to pass to Docker. This is only for the Docker Agent." " An example is '0:foo=bar:baz=lar,3:foo=lam', which " "would send two arguments to Docker Agent 0 and one to" " Docker Agent 3.", default="") parser.add_argument("--record_pngs_dir", default=None, help="Directory to record the PNGs of the game. " "Doesn't record if None.") parser.add_argument("--record_json_dir", default=None, help="Directory to record the JSON representations of " "the game. Doesn't record if None.") parser.add_argument("--render", default=False, action='store_true', help="Whether to render or not. Defaults to False.") parser.add_argument("--game_state_file", default=None, help="File from which to load game state. Defaults to " "None.") parser.add_argument( "--params_dir", default="ppo", help= "Directory in which to save the params and from which load to. Defaults to None" "None.") parser.add_argument( "--testing", default=False, action='store_true', help= "Test mode for the trained/training agent or not. Defaults to False (i.e. training mode)." ) parser.add_argument( "--lstm", default=False, action='store_true', help="Whether to add an (internal) LSTM layer to the model.") args = parser.parse_args() config = args.config record_pngs_dir = args.record_pngs_dir record_json_dir = args.record_json_dir agent_env_vars = args.agent_env_vars game_state_file = args.game_state_file params_dir = "../params/{}".format(args.params_dir) # TODO: After https://github.com/MultiAgentLearning/playground/pull/40 # this is still missing the docker_env_dict parsing for the agents. agents = [ helpers.make_agent_from_string(agent_string, agent_id + 1000) for agent_id, agent_string in enumerate(args.agents.split(",")) ] env = make(config, agents, game_state_file) training_agent = None for agent in agents: if type(agent) == TensorForceAgent: training_agent = agent env.set_training_agent(agent.agent_id) break if record_pngs_dir: assert not os.path.isdir(record_pngs_dir) os.makedirs(record_pngs_dir) if record_json_dir: assert not os.path.isdir(record_json_dir) os.makedirs(record_json_dir) # Create a Proximal Policy Optimization agent agent = training_agent.initialize(env, lstm=args.lstm) if os.path.isdir(params_dir): agent.restore_model(params_dir) atexit.register(functools.partial(clean_up_agents, agents)) wrapped_env = WrappedEnv(env, visualize=args.render) runner = Runner(agent=agent, environment=wrapped_env) runner.run(num_episodes=1000, max_episode_timesteps=2000, testing=args.testing) agent.save_model("{}/params".format(params_dir)) print("Stats: ", runner.episode_rewards, runner.episode_timesteps, runner.episode_times) try: runner.close() except AttributeError as e: pass
def main(): '''CLI interface to bootstrap taining''' parser = argparse.ArgumentParser(description="Playground Flags.") parser.add_argument("--game", default="pommerman", help="Game to choose.") parser.add_argument("--config", default="PommeFFACompetition-v0", help="Configuration to execute. See env_ids in " "configs.py for options.") parser.add_argument("--agents", default="tensorforce::ppo,test::agents.SimpleAgent," "test::agents.SimpleAgent,test::agents.SimpleAgent", help="Comma delineated list of agent types and docker " "locations to run the agents.") parser.add_argument("--agent_env_vars", help="Comma delineated list of agent environment vars " "to pass to Docker. This is only for the Docker Agent." " An example is '0:foo=bar:baz=lar,3:foo=lam', which " "would send two arguments to Docker Agent 0 and one to" " Docker Agent 3.", default="") parser.add_argument("--record_pngs_dir", default=None, help="Directory to record the PNGs of the game. " "Doesn't record if None.") parser.add_argument("--record_json_dir", default=None, help="Directory to record the JSON representations of " "the game. Doesn't record if None.") parser.add_argument("--render", default=False, action='store_true', help="Whether to render or not. Defaults to False.") parser.add_argument("--game_state_file", default=None, help="File from which to load game state. Defaults to " "None.") parser.add_argument( "--num_procs", default=12, type=int, help="Number of parallel threads to run. Defaults to 12.") args = parser.parse_args() config = args.config record_pngs_dir = args.record_pngs_dir record_json_dir = args.record_json_dir agent_env_vars = args.agent_env_vars game_state_file = args.game_state_file num_procs = args.num_procs # TODO: After https://github.com/MultiAgentLearning/playground/pull/40 # this is still missing the docker_env_dict parsing for the agents. agents = [ helpers.make_agent_from_string(agent_string, agent_id + 1000) for agent_id, agent_string in enumerate(args.agents.split(",")) ] env = make(config, agents, game_state_file) training_agent = None for agent in agents: if type(agent) == TensorForceAgent: training_agent = agent env.set_training_agent(agent.agent_id) break if args.record_pngs_dir: assert not os.path.isdir(args.record_pngs_dir) os.makedirs(args.record_pngs_dir) if args.record_json_dir: assert not os.path.isdir(args.record_json_dir) os.makedirs(args.record_json_dir) # Create a Proximal Policy Optimization agent agent = training_agent.initialize(env, num_procs, summarizer={ 'directory': 'tensorforce_agent', 'labels': 'all' }, saver={ 'directory': './saved_models', 'filename': 'ppo' }) hist = load_model(agent, './saved_models') atexit.register(functools.partial(clean_up_agents, agents)) wrapped_envs = [] for i in range(num_procs): wrapped_envs.append(WrappedEnv(env, visualize=args.render)) runner_time = timeit.default_timer() for i in range(1): runner = ParallelRunner(agent=agent, environments=wrapped_envs) runner.run(num_episodes=1000, max_episode_timesteps=2000) print("Stats: ", runner.episode_rewards, runner.episode_timesteps, runner.episode_seconds) hist = { "episode_rewards": hist.episode_rewards.extend(runner.episode_rewards), "episode_timesteps": hist.episode_timesteps.extend(runner.episode_timesteps), "episode_times": hist.episode_seconds.extend(runner.episode_seconds) } print('Runner time: ', timeit.default_timer() - runner_time) save_model(agent, 'saved_models\\ppo', hist, True) plt.plot(runner.episode_rewards) plt.show() try: runner.close() except AttributeError as e: pass