def test_observation_matches_obs_spec(self): with sc2_env.SC2Env(map_name="Simple64") as env: spec = env.observation_spec() agent = random_agent.RandomAgent() agent.setup(spec, env.action_spec()) raw_obs = env.reset()[0] agent.reset() for _ in range(100): obs = raw_obs.observation self.assertItemsEqual(spec.keys(), obs.keys()) for k, o in six.iteritems(obs): descr = "%s: spec: %s != obs: %s" % (k, spec[k], o.shape) if o.shape == (0, ): # Empty tensor can't have a shape. self.assertIn(0, spec[k], descr) else: self.assertEqual(len(spec[k]), len(o.shape), descr) for a, b in zip(spec[k], o.shape): if a != 0: self.assertEqual(a, b, descr) act = agent.step(raw_obs) raw_obs = env.step([act])[0]
def test_observation_matches_obs_spec(self): with sc2_env.SC2Env( map_name="Simple64", players=[sc2_env.Agent(sc2_env.Race.random), sc2_env.Bot(sc2_env.Race.random, sc2_env.Difficulty.easy)], agent_interface_format=sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=(84, 87), minimap=(64, 67)))) as env: multiplayer_obs_spec = env.observation_spec() self.assertIsInstance(multiplayer_obs_spec, tuple) self.assertLen(multiplayer_obs_spec, 1) obs_spec = multiplayer_obs_spec[0] multiplayer_action_spec = env.action_spec() self.assertIsInstance(multiplayer_action_spec, tuple) self.assertLen(multiplayer_action_spec, 1) action_spec = multiplayer_action_spec[0] agent = random_agent.RandomAgent() agent.setup(obs_spec, action_spec) multiplayer_obs = env.reset() agent.reset() for _ in range(100): self.assertIsInstance(multiplayer_obs, tuple) self.assertLen(multiplayer_obs, 1) raw_obs = multiplayer_obs[0] obs = raw_obs.observation self.check_observation_matches_spec(obs, obs_spec) act = agent.step(raw_obs) multiplayer_act = (act,) multiplayer_obs = env.step(multiplayer_act)
def test_random_agent(self): steps = 100 step_mul = 50 with sc2_env.SC2Env(map_name="Simple64", step_mul=step_mul, game_steps_per_episode=steps * step_mul) as env: agent = random_agent.RandomAgent() run_loop.run_loop([agent], env, steps) self.assertEqual(agent.steps, steps)
def test_build_marine_random(): with sc2_env.SC2Env("BuildMarines", step_mul=step_mul, game_steps_per_episode=steps * step_mul) as env: env.observation_spec() agent = random_agent.RandomAgent() loop([agent], env, steps) print agent.reward
def run(env_name): env = sc2_env.SC2Env( map_name=env_name, # "BuildMarines", step_mul=16, visualize=False, agent_interface_format=[agent_format]) agent = random_agent.RandomAgent() observation_spec = env.observation_spec() action_spec = env.action_spec() agent.setup(observation_spec[0], action_spec[0]) reward = [] reward_cumulative = [] for ep in range(nb_episodes): reward.append(0) obs = env.reset()[0] ''' obs[0] # step_type obs[1] # reward obs[2] # discount obs[3] # observation ''' agent.reset() while True: a = agent.step(obs) ### function_id = np.random.choice(obs.observation.available_actions) function_id = 3 args = [[np.random.randint(0, size) for size in arg.sizes] for arg in agent.action_spec.functions[function_id].args] a = actions.FunctionCall(function_id, args) agent.action_spec.functions[function_id].args obs.observation.available_actions len(obs) obs[3] ### a = actions.FunctionCall(12, [[0], [12, 12]]) obs = env.step(actions=[a])[0] a = actions.FunctionCall(0, []) obs = env.step(actions=[a])[0] reward[-1] += obs.reward if obs.last(): cum_reward = obs.observation["score_cumulative"] reward_cumulative.append(cum_reward[0]) break env.close() print(reward) print(np.mean(reward)) print(reward == reward_cumulative)
def test_random_agent(self, agent_interface_format): steps = 250 step_mul = 8 with sc2_env.SC2Env(map_name="Simple64", agent_interface_format=agent_interface_format, step_mul=step_mul, game_steps_per_episode=steps * step_mul // 2) as env: agent = random_agent.RandomAgent() run_loop.run_loop([agent], env, steps) self.assertEqual(agent.steps, steps)
def test_random_agent_rgb(self): steps = 50 step_mul = 8 with sc2_env.SC2Env( map_name="Simple64", rgb_screen_size=128, rgb_minimap_size=64, step_mul=step_mul, game_steps_per_episode=steps * step_mul//3) as env: agent = random_agent.RandomAgent() run_loop.run_loop([agent], env, steps) self.assertEqual(agent.steps, steps)
def test_multi_player_env(self, agent_interface_format): steps = 100 step_mul = 16 players = 2 with sc2_env.SC2Env( map_name="Simple64", players=[sc2_env.Agent(sc2_env.Race.random, "random"), sc2_env.Agent(sc2_env.Race.random, "random")], step_mul=step_mul, game_steps_per_episode=steps * step_mul // 2, agent_interface_format=agent_interface_format) as env: agents = [random_agent.RandomAgent() for _ in range(players)] run_loop.run_loop(agents, env, steps)
def test_random_agent_rgb(self): steps = 50 step_mul = 8 with sc2_env.SC2Env( map_name="Simple64", agent_interface_format=sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions(screen=128, minimap=64)), step_mul=step_mul, game_steps_per_episode=steps * step_mul // 3) as env: agent = random_agent.RandomAgent() run_loop.run_loop([agent], env, steps) self.assertEqual(agent.steps, steps)
def test_heterogeneous_observations(self): with sc2_env.SC2Env( map_name="Simple64", players=[ sc2_env.Agent(sc2_env.Race.random), sc2_env.Agent(sc2_env.Race.random) ], agent_interface_format=[ sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions( screen=(84, 87), minimap=(64, 67) ) ), sc2_env.AgentInterfaceFormat( rgb_dimensions=sc2_env.Dimensions( screen=128, minimap=64 ) ) ]) as env: obs_specs = env.observation_spec() self.assertIsInstance(obs_specs, tuple) self.assertLen(obs_specs, 2) actions_specs = env.action_spec() self.assertIsInstance(actions_specs, tuple) self.assertLen(actions_specs, 2) agents = [] for obs_spec, action_spec in zip(obs_specs, actions_specs): agent = random_agent.RandomAgent() agent.setup(obs_spec, action_spec) agent.reset() agents.append(agent) time_steps = env.reset() for _ in range(100): self.assertIsInstance(time_steps, tuple) self.assertLen(time_steps, 2) actions = [] for i, agent in enumerate(agents): time_step = time_steps[i] obs = time_step.observation self.check_observation_matches_spec(obs, obs_specs[i]) actions.append(agent.step(time_step)) time_steps = env.step(actions)
def test_multi_player_env(self): steps = 100 step_mul = 16 players = 2 with sc2_env.SC2Env(map_name="Simple64", players=[ sc2_env.Agent(sc2_env.Race.zerg), sc2_env.Agent(sc2_env.Race.terran) ], feature_screen_size=84, feature_minimap_size=64, step_mul=step_mul, game_steps_per_episode=steps * step_mul // 2) as env: agents = [random_agent.RandomAgent() for _ in range(players)] run_loop.run_loop(agents, env, steps)
def test_defeat_zerglings(self): with sc2_env.SC2Env(map_name="DefeatZerglingsAndBanelings", step_mul=self.step_mul, visualize=True, game_steps_per_episode=self.steps * self.step_mul) as env: obs = env.step(actions=[sc2_actions.FunctionCall(_NO_OP, [])]) player_relative = obs[0].observation["screen"][_PLAYER_RELATIVE] # Break Point!! print(player_relative) agent = random_agent.RandomAgent() run_loop.run_loop([agent], env, self.steps) self.assertEqual(agent.steps, self.steps)
def test_random_agent(self, agent_interface_format): steps = 250 step_mul = 8 with sc2_env.SC2Env( map_name=["Simple64", "Simple96"], players=[sc2_env.Agent([sc2_env.Race.random, sc2_env.Race.terran]), sc2_env.Bot([sc2_env.Race.zerg, sc2_env.Race.protoss], sc2_env.Difficulty.easy, [sc2_env.BotBuild.rush, sc2_env.BotBuild.timing])], agent_interface_format=agent_interface_format, step_mul=step_mul, game_steps_per_episode=steps * step_mul // 3) as env: agent = random_agent.RandomAgent() run_loop.run_loop([agent], env, steps) self.assertEqual(agent.steps, steps)
def test_observation_matches_obs_spec(self): with sc2_env.SC2Env( map_name="Simple64", feature_screen_width=84, feature_screen_height=87, feature_minimap_width=64, feature_minimap_height=67) as env: multiplayer_obs_spec = env.observation_spec() self.assertIsInstance(multiplayer_obs_spec, tuple) self.assertLen(multiplayer_obs_spec, 1) obs_spec = multiplayer_obs_spec[0] multiplayer_action_spec = env.action_spec() self.assertIsInstance(multiplayer_action_spec, tuple) self.assertLen(multiplayer_action_spec, 1) action_spec = multiplayer_action_spec[0] agent = random_agent.RandomAgent() agent.setup(obs_spec, action_spec) multiplayer_obs = env.reset() agent.reset() for _ in range(100): self.assertIsInstance(multiplayer_obs, tuple) self.assertLen(multiplayer_obs, 1) raw_obs = multiplayer_obs[0] obs = raw_obs.observation self.assertItemsEqual(obs_spec.keys(), obs.keys()) for k, o in six.iteritems(obs): descr = "%s: spec: %s != obs: %s" % (k, obs_spec[k], o.shape) if o.shape == (0,): # Empty tensor can't have a shape. self.assertIn(0, obs_spec[k], descr) else: self.assertEqual(len(obs_spec[k]), len(o.shape), descr) for a, b in zip(obs_spec[k], o.shape): if a != 0: self.assertEqual(a, b, descr) act = agent.step(raw_obs) multiplayer_act = (act,) multiplayer_obs = env.step(multiplayer_act)
def test_defeat_zerglings(self): FLAGS(sys.argv) with sc2_env.SC2Env( map_name="DefeatZerglingsAndBanelings", step_mul=self.step_mul, visualize=True, players=[sc2_env.Agent(sc2_env.Race.terran)], agent_interface_format=sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=64, minimap=64)), game_steps_per_episode=self.steps * self.step_mul) as env: obs = env.step(actions=[sc2_actions.FunctionCall(_NO_OP, [])]) player_relative = obs[0].observation["screen"][_PLAYER_RELATIVE] # Break Point!! print(player_relative) agent = random_agent.RandomAgent() run_loop.run_loop([agent], env, self.steps) self.assertEqual(agent.steps, self.steps)
def main_runner(unused_argv): with sc2_env.SC2Env(map_name="CollectMineralShards", step_mul=8, visualize=True) as env: maps.get(FLAGS.map) # Assert the map exists. agent_000 = random_agent.RandomAgent() action_spec = env.action_spec() observation_spec = env.observation_spec() agent_000.setup(observation_spec, action_spec) agent_000.reset() try: obs = env.reset() #maybe have the first action to select all marines first for step_idx in range(1000): #could use packaged python agents print(obs[0].observation["available_actions"]) rand_step = agent_000.step(obs[0]) obs = env.step([rand_step]) except KeyboardInterrupt: pass finally: print("simulation done")
def run(env_name): env = sc2_env.SC2Env( map_name=env_name, # "BuildMarines", step_mul=16, visualize=False, agent_interface_format=[agent_format]) agent = random_agent.RandomAgent() observation_spec = env.observation_spec() action_spec = env.action_spec() agent.setup(observation_spec[0], action_spec[0]) reward = [] reward_cumulative = [] for ep in range(nb_episodes): reward.append(0) obs = env.reset()[0] ''' obs[0] # step_type obs[1] # reward obs[2] # discount obs[3] # observation ''' agent.reset() while True: actions = agent.step(obs) obs = env.step(actions=[actions])[0] reward[-1] += obs.reward if obs.last(): cum_reward = obs.observation["score_cumulative"] reward_cumulative.append(cum_reward[0]) break env.close() print(reward) print(np.mean(reward)) print(reward == reward_cumulative)
def main(): FLAGS(sys.argv) steps = 0 #Test steps print("algorithm : %s" % FLAGS.algorithm) print("timesteps : %s" % FLAGS.timesteps) print("exploration_fraction : %s" % FLAGS.exploration_fraction) print("prioritized : %s" % FLAGS.prioritized) print("dueling : %s" % FLAGS.dueling) print("num_agents : %s" % FLAGS.num_agents) print("lr : %s" % FLAGS.lr) if FLAGS.lr == 0: FLAGS.lr = random.uniform(0.00001, 0.001) print("random lr : %s" % FLAGS.lr) lr_round = round(FLAGS.lr, 8) logdir = "tensorboard" if FLAGS.algorithm == "deepq-4way": logdir = "tensorboard/mineral/%s/%s_%s_prio%s_duel%s_lr%s/%s" % ( FLAGS.algorithm, FLAGS.timesteps, FLAGS.exploration_fraction, FLAGS.prioritized, FLAGS.dueling, lr_round, start_time) elif FLAGS.algorithm == "deepq": logdir = "tensorboard/mineral/%s/%s_%s_prio%s_duel%s_lr%s/%s" % ( FLAGS.algorithm, FLAGS.timesteps, FLAGS.exploration_fraction, FLAGS.prioritized, FLAGS.dueling, lr_round, start_time) elif FLAGS.algorithm == "a2c": logdir = "tensorboard/mineral/%s/%s_n%s_s%s_nsteps%s/lr%s/%s" % ( FLAGS.algorithm, FLAGS.timesteps, FLAGS.num_agents + FLAGS.num_scripts, FLAGS.num_scripts, FLAGS.nsteps, lr_round, start_time) if FLAGS.log == "tensorboard": Logger.DEFAULT \ = Logger.CURRENT \ = Logger(dir=None, output_formats=[TensorBoardOutputFormat(logdir)]) elif FLAGS.log == "stdout": Logger.DEFAULT \ = Logger.CURRENT \ = Logger(dir=None, output_formats=[HumanOutputFormat(sys.stdout)]) if FLAGS.algorithm == "deepq": AGENT_INTERFACE_FORMAT = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=16, minimap=16)) # temp solution - sc2_env.Agent(sc2_env.Race.terran) might be too restricting # We need this change because sc2 now requires specifying players. with sc2_env.SC2Env( map_name="Simple64", players=[ sc2_env.Agent(race=sc2_env.Race.terran), sc2_env.Agent(race=sc2_env.Race.terran) ], #players=[sc2_env.Agent(sc2_env.Race.terran),sc2_env.Agent(sc2_env.Race.terran)], step_mul=step_mul, visualize=True, agent_interface_format=AGENT_INTERFACE_FORMAT) as env: model = cnn_to_mlp(convs=[(16, 8, 4), (32, 4, 2)], hiddens=[256], dueling=True) acts = deepq_nexus_wars.learn( env, q_func=model, num_actions=16, lr=FLAGS.lr, max_timesteps=FLAGS.timesteps, buffer_size=10000, exploration_fraction=FLAGS.exploration_fraction, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=True, callback=deepq_callback) agent = random_agent.RandomAgent() run_loop.run_loop([agent], env, steps) acts[0].save("mineral_shards_x.pkl") acts[1].save("mineral_shards_y.pkl") elif FLAGS.algorithm == "deepq-4way": AGENT_INTERFACE_FORMAT = sc2_env.AgentInterfaceFormat( feature_dimensions=sc2_env.Dimensions(screen=32, minimap=32)) with sc2_env.SC2Env(map_name="Simple64", players=[ sc2_env.Agent(race=sc2_env.Race.terran), sc2_env.Agent(race=sc2_env.Race.terran) ], step_mul=step_mul, agent_interface_format=AGENT_INTERFACE_FORMAT, visualize=True) as env: model = cnn_to_mlp(convs=[(16, 8, 4), (32, 4, 2)], hiddens=[256], dueling=True) act = deepq_mineral_4way.learn( env, q_func=model, num_actions=4, lr=FLAGS.lr, max_timesteps=FLAGS.timesteps, buffer_size=10000, exploration_fraction=FLAGS.exploration_fraction, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=True, callback=deepq_4way_callback) act.save("mineral_shards.pkl") elif FLAGS.algorithm == "a2c": num_timesteps = int(40e6) num_timesteps //= 4 seed = 0 env = SubprocVecEnv(FLAGS.num_agents + FLAGS.num_scripts, FLAGS.num_scripts, FLAGS.map) policy_fn = CnnPolicy a2c.learn(policy_fn, env, seed, total_timesteps=num_timesteps, nprocs=FLAGS.num_agents + FLAGS.num_scripts, nscripts=FLAGS.num_scripts, ent_coef=0.5, nsteps=FLAGS.nsteps, max_grad_norm=0.01, callback=a2c_callback)
feature_dimensions=sc2_env.Dimensions( screen=(32, 32), minimap=(32, 32))) # agent_format2 = sc2_env.AgentInterfaceFormat( # feature_dimensions=sc2_env.Dimensions( # screen=(32, 32), minimap=(32, 32))) env = sc2_env.SC2Env( map_name=mini_games[0], players=[sc2_env.Agent(sc2_env.Race.terran), sc2_env.Agent(sc2_env.Race.zerg)], step_mul=MiniGame.step_mul, visualize=False, agent_interface_format=agent_format1) agents = [random_agent.RandomAgent() for _ in range(MiniGame.players)] # run_loop.run_loop(agents, env, max_frames=1000, max_episodes=10000) obs_specs = env.observation_spec() actions_specs = env.action_spec() for agent, obs_spec, act_spec in zip(agents, obs_specs, actions_specs): agent.setup(obs_spec, act_spec) total_episodes = 0 score1 = [] score2 = [] while not total_episodes == 100: total_episodes += 1 print(total_episodes)
from a2c_agent import a2cAgent import importlib import threading from absl import app from pysc2 import maps from pysc2.env import available_actions_printer from pysc2.env import run_loop from pysc2.env import sc2_env from pysc2.lib import point_flag from pysc2.lib import stopwatch from pysc2.lib import features from pysc2.agents import random_agent AGENT = random_agent.RandomAgent() BOTS = [sc2_env.Bot(sc2_env.Race.terran, sc2_env.Difficulty.very_easy)] PLAYERS = [sc2_env.Agent(sc2_env.Race.terran)] MAP = 'CollectMineralShards' def run_thread(agents, players, map_name, visualize=False, save_replay=False): ''' set up and run sc2_env loop ''' # agents = [AGENT,] try: while True: with sc2_env.SC2Env( map_name=map_name, step_mul=16, visualize=visualize, players=players,