Ejemplo n.º 1
0
    def test_observation_matches_obs_spec(self):
        with sc2_env.SC2Env(map_name="Simple64") as env:
            spec = env.observation_spec()

            agent = random_agent.RandomAgent()
            agent.setup(spec, env.action_spec())

            raw_obs = env.reset()[0]
            agent.reset()
            for _ in range(100):
                obs = raw_obs.observation

                self.assertItemsEqual(spec.keys(), obs.keys())
                for k, o in six.iteritems(obs):
                    descr = "%s: spec: %s != obs: %s" % (k, spec[k], o.shape)

                    if o.shape == (0, ):  # Empty tensor can't have a shape.
                        self.assertIn(0, spec[k], descr)
                    else:
                        self.assertEqual(len(spec[k]), len(o.shape), descr)
                        for a, b in zip(spec[k], o.shape):
                            if a != 0:
                                self.assertEqual(a, b, descr)

                act = agent.step(raw_obs)
                raw_obs = env.step([act])[0]
Ejemplo n.º 2
0
  def test_observation_matches_obs_spec(self):
    with sc2_env.SC2Env(
        map_name="Simple64",
        players=[sc2_env.Agent(sc2_env.Race.random),
                 sc2_env.Bot(sc2_env.Race.random, sc2_env.Difficulty.easy)],
        agent_interface_format=sc2_env.AgentInterfaceFormat(
            feature_dimensions=sc2_env.Dimensions(
                screen=(84, 87),
                minimap=(64, 67)))) as env:

      multiplayer_obs_spec = env.observation_spec()
      self.assertIsInstance(multiplayer_obs_spec, tuple)
      self.assertLen(multiplayer_obs_spec, 1)
      obs_spec = multiplayer_obs_spec[0]

      multiplayer_action_spec = env.action_spec()
      self.assertIsInstance(multiplayer_action_spec, tuple)
      self.assertLen(multiplayer_action_spec, 1)
      action_spec = multiplayer_action_spec[0]

      agent = random_agent.RandomAgent()
      agent.setup(obs_spec, action_spec)

      multiplayer_obs = env.reset()
      agent.reset()
      for _ in range(100):
        self.assertIsInstance(multiplayer_obs, tuple)
        self.assertLen(multiplayer_obs, 1)
        raw_obs = multiplayer_obs[0]
        obs = raw_obs.observation
        self.check_observation_matches_spec(obs, obs_spec)

        act = agent.step(raw_obs)
        multiplayer_act = (act,)
        multiplayer_obs = env.step(multiplayer_act)
Ejemplo n.º 3
0
    def test_random_agent(self):
        steps = 100
        step_mul = 50
        with sc2_env.SC2Env(map_name="Simple64",
                            step_mul=step_mul,
                            game_steps_per_episode=steps * step_mul) as env:
            agent = random_agent.RandomAgent()
            run_loop.run_loop([agent], env, steps)

        self.assertEqual(agent.steps, steps)
Ejemplo n.º 4
0
def test_build_marine_random():

    with sc2_env.SC2Env("BuildMarines",
                        step_mul=step_mul,
                        game_steps_per_episode=steps * step_mul) as env:

        env.observation_spec()

        agent = random_agent.RandomAgent()
        loop([agent], env, steps)
        print agent.reward
Ejemplo n.º 5
0
def run(env_name):
    env = sc2_env.SC2Env(
        map_name=env_name,  # "BuildMarines",
        step_mul=16,
        visualize=False,
        agent_interface_format=[agent_format])

    agent = random_agent.RandomAgent()
    observation_spec = env.observation_spec()
    action_spec = env.action_spec()
    agent.setup(observation_spec[0], action_spec[0])

    reward = []
    reward_cumulative = []
    for ep in range(nb_episodes):
        reward.append(0)
        obs = env.reset()[0]
        '''        
        obs[0]  # step_type
        obs[1]  # reward
        obs[2]  # discount
        obs[3]  # observation        
        '''
        agent.reset()
        while True:
            a = agent.step(obs)
            ###
            function_id = np.random.choice(obs.observation.available_actions)
            function_id = 3
            args = [[np.random.randint(0, size) for size in arg.sizes]
                    for arg in agent.action_spec.functions[function_id].args]
            a = actions.FunctionCall(function_id, args)
            agent.action_spec.functions[function_id].args

            obs.observation.available_actions
            len(obs)
            obs[3]

            ###
            a = actions.FunctionCall(12, [[0], [12, 12]])
            obs = env.step(actions=[a])[0]

            a = actions.FunctionCall(0, [])
            obs = env.step(actions=[a])[0]

            reward[-1] += obs.reward
            if obs.last():
                cum_reward = obs.observation["score_cumulative"]
                reward_cumulative.append(cum_reward[0])
                break
    env.close()
    print(reward)
    print(np.mean(reward))
    print(reward == reward_cumulative)
Ejemplo n.º 6
0
    def test_random_agent(self, agent_interface_format):
        steps = 250
        step_mul = 8
        with sc2_env.SC2Env(map_name="Simple64",
                            agent_interface_format=agent_interface_format,
                            step_mul=step_mul,
                            game_steps_per_episode=steps * step_mul //
                            2) as env:
            agent = random_agent.RandomAgent()
            run_loop.run_loop([agent], env, steps)

        self.assertEqual(agent.steps, steps)
Ejemplo n.º 7
0
  def test_random_agent_rgb(self):
    steps = 50
    step_mul = 8
    with sc2_env.SC2Env(
        map_name="Simple64",
        rgb_screen_size=128,
        rgb_minimap_size=64,
        step_mul=step_mul,
        game_steps_per_episode=steps * step_mul//3) as env:
      agent = random_agent.RandomAgent()
      run_loop.run_loop([agent], env, steps)

    self.assertEqual(agent.steps, steps)
Ejemplo n.º 8
0
 def test_multi_player_env(self, agent_interface_format):
   steps = 100
   step_mul = 16
   players = 2
   with sc2_env.SC2Env(
       map_name="Simple64",
       players=[sc2_env.Agent(sc2_env.Race.random, "random"),
                sc2_env.Agent(sc2_env.Race.random, "random")],
       step_mul=step_mul,
       game_steps_per_episode=steps * step_mul // 2,
       agent_interface_format=agent_interface_format) as env:
     agents = [random_agent.RandomAgent() for _ in range(players)]
     run_loop.run_loop(agents, env, steps)
Ejemplo n.º 9
0
    def test_random_agent_rgb(self):
        steps = 50
        step_mul = 8
        with sc2_env.SC2Env(
                map_name="Simple64",
                agent_interface_format=sc2_env.AgentInterfaceFormat(
                    rgb_dimensions=sc2_env.Dimensions(screen=128, minimap=64)),
                step_mul=step_mul,
                game_steps_per_episode=steps * step_mul // 3) as env:
            agent = random_agent.RandomAgent()
            run_loop.run_loop([agent], env, steps)

        self.assertEqual(agent.steps, steps)
Ejemplo n.º 10
0
  def test_heterogeneous_observations(self):
    with sc2_env.SC2Env(
        map_name="Simple64",
        players=[
            sc2_env.Agent(sc2_env.Race.random),
            sc2_env.Agent(sc2_env.Race.random)
        ],
        agent_interface_format=[
            sc2_env.AgentInterfaceFormat(
                feature_dimensions=sc2_env.Dimensions(
                    screen=(84, 87),
                    minimap=(64, 67)
                )
            ),
            sc2_env.AgentInterfaceFormat(
                rgb_dimensions=sc2_env.Dimensions(
                    screen=128,
                    minimap=64
                )
            )
        ]) as env:

      obs_specs = env.observation_spec()
      self.assertIsInstance(obs_specs, tuple)
      self.assertLen(obs_specs, 2)

      actions_specs = env.action_spec()
      self.assertIsInstance(actions_specs, tuple)
      self.assertLen(actions_specs, 2)

      agents = []
      for obs_spec, action_spec in zip(obs_specs, actions_specs):
        agent = random_agent.RandomAgent()
        agent.setup(obs_spec, action_spec)
        agent.reset()
        agents.append(agent)

      time_steps = env.reset()
      for _ in range(100):
        self.assertIsInstance(time_steps, tuple)
        self.assertLen(time_steps, 2)

        actions = []
        for i, agent in enumerate(agents):
          time_step = time_steps[i]
          obs = time_step.observation
          self.check_observation_matches_spec(obs, obs_specs[i])
          actions.append(agent.step(time_step))

        time_steps = env.step(actions)
Ejemplo n.º 11
0
 def test_multi_player_env(self):
     steps = 100
     step_mul = 16
     players = 2
     with sc2_env.SC2Env(map_name="Simple64",
                         players=[
                             sc2_env.Agent(sc2_env.Race.zerg),
                             sc2_env.Agent(sc2_env.Race.terran)
                         ],
                         feature_screen_size=84,
                         feature_minimap_size=64,
                         step_mul=step_mul,
                         game_steps_per_episode=steps * step_mul //
                         2) as env:
         agents = [random_agent.RandomAgent() for _ in range(players)]
         run_loop.run_loop(agents, env, steps)
Ejemplo n.º 12
0
    def test_defeat_zerglings(self):
        with sc2_env.SC2Env(map_name="DefeatZerglingsAndBanelings",
                            step_mul=self.step_mul,
                            visualize=True,
                            game_steps_per_episode=self.steps *
                            self.step_mul) as env:
            obs = env.step(actions=[sc2_actions.FunctionCall(_NO_OP, [])])
            player_relative = obs[0].observation["screen"][_PLAYER_RELATIVE]

            # Break Point!!
            print(player_relative)

            agent = random_agent.RandomAgent()
            run_loop.run_loop([agent], env, self.steps)

        self.assertEqual(agent.steps, self.steps)
Ejemplo n.º 13
0
    def test_random_agent(self, agent_interface_format):
        steps = 250
        step_mul = 8
        with sc2_env.SC2Env(
                map_name=["Simple64", "Simple96"],
                players=[sc2_env.Agent([sc2_env.Race.random, sc2_env.Race.terran]),
                         sc2_env.Bot([sc2_env.Race.zerg, sc2_env.Race.protoss],
                                     sc2_env.Difficulty.easy,
                                     [sc2_env.BotBuild.rush, sc2_env.BotBuild.timing])],
                agent_interface_format=agent_interface_format,
                step_mul=step_mul,
                game_steps_per_episode=steps * step_mul // 3) as env:
            agent = random_agent.RandomAgent()
            run_loop.run_loop([agent], env, steps)

        self.assertEqual(agent.steps, steps)
Ejemplo n.º 14
0
  def test_observation_matches_obs_spec(self):
    with sc2_env.SC2Env(
        map_name="Simple64",
        feature_screen_width=84,
        feature_screen_height=87,
        feature_minimap_width=64,
        feature_minimap_height=67) as env:

      multiplayer_obs_spec = env.observation_spec()
      self.assertIsInstance(multiplayer_obs_spec, tuple)
      self.assertLen(multiplayer_obs_spec, 1)
      obs_spec = multiplayer_obs_spec[0]

      multiplayer_action_spec = env.action_spec()
      self.assertIsInstance(multiplayer_action_spec, tuple)
      self.assertLen(multiplayer_action_spec, 1)
      action_spec = multiplayer_action_spec[0]

      agent = random_agent.RandomAgent()
      agent.setup(obs_spec, action_spec)

      multiplayer_obs = env.reset()
      agent.reset()
      for _ in range(100):
        self.assertIsInstance(multiplayer_obs, tuple)
        self.assertLen(multiplayer_obs, 1)
        raw_obs = multiplayer_obs[0]

        obs = raw_obs.observation
        self.assertItemsEqual(obs_spec.keys(), obs.keys())
        for k, o in six.iteritems(obs):
          descr = "%s: spec: %s != obs: %s" % (k, obs_spec[k], o.shape)

          if o.shape == (0,):  # Empty tensor can't have a shape.
            self.assertIn(0, obs_spec[k], descr)
          else:
            self.assertEqual(len(obs_spec[k]), len(o.shape), descr)
            for a, b in zip(obs_spec[k], o.shape):
              if a != 0:
                self.assertEqual(a, b, descr)

        act = agent.step(raw_obs)
        multiplayer_act = (act,)
        multiplayer_obs = env.step(multiplayer_act)
Ejemplo n.º 15
0
    def test_defeat_zerglings(self):
        FLAGS(sys.argv)

        with sc2_env.SC2Env(
                map_name="DefeatZerglingsAndBanelings",
                step_mul=self.step_mul,
                visualize=True,
                players=[sc2_env.Agent(sc2_env.Race.terran)],
                agent_interface_format=sc2_env.AgentInterfaceFormat(
                    feature_dimensions=sc2_env.Dimensions(screen=64,
                                                          minimap=64)),
                game_steps_per_episode=self.steps * self.step_mul) as env:
            obs = env.step(actions=[sc2_actions.FunctionCall(_NO_OP, [])])
            player_relative = obs[0].observation["screen"][_PLAYER_RELATIVE]

            # Break Point!!
            print(player_relative)

            agent = random_agent.RandomAgent()
            run_loop.run_loop([agent], env, self.steps)

        self.assertEqual(agent.steps, self.steps)
Ejemplo n.º 16
0
def main_runner(unused_argv):
    with sc2_env.SC2Env(map_name="CollectMineralShards",
                        step_mul=8,
                        visualize=True) as env:

        maps.get(FLAGS.map)  # Assert the map exists.
        agent_000 = random_agent.RandomAgent()
        action_spec = env.action_spec()
        observation_spec = env.observation_spec()
        agent_000.setup(observation_spec, action_spec)
        agent_000.reset()
        try:
            obs = env.reset()
            #maybe have the first action to select all marines first
            for step_idx in range(1000):
                #could use packaged python agents
                print(obs[0].observation["available_actions"])
                rand_step = agent_000.step(obs[0])
                obs = env.step([rand_step])
        except KeyboardInterrupt:
            pass
        finally:
            print("simulation done")
Ejemplo n.º 17
0
def run(env_name):
    env = sc2_env.SC2Env(
        map_name=env_name,  # "BuildMarines",
        step_mul=16,
        visualize=False,
        agent_interface_format=[agent_format])

    agent = random_agent.RandomAgent()
    observation_spec = env.observation_spec()
    action_spec = env.action_spec()
    agent.setup(observation_spec[0], action_spec[0])

    reward = []
    reward_cumulative = []
    for ep in range(nb_episodes):
        reward.append(0)
        obs = env.reset()[0]
        '''        
        obs[0]  # step_type
        obs[1]  # reward
        obs[2]  # discount
        obs[3]  # observation        
        '''
        agent.reset()
        while True:
            actions = agent.step(obs)
            obs = env.step(actions=[actions])[0]
            reward[-1] += obs.reward
            if obs.last():
                cum_reward = obs.observation["score_cumulative"]
                reward_cumulative.append(cum_reward[0])
                break
    env.close()
    print(reward)
    print(np.mean(reward))
    print(reward == reward_cumulative)
Ejemplo n.º 18
0
def main():
    FLAGS(sys.argv)

    steps = 0  #Test steps

    print("algorithm : %s" % FLAGS.algorithm)
    print("timesteps : %s" % FLAGS.timesteps)
    print("exploration_fraction : %s" % FLAGS.exploration_fraction)
    print("prioritized : %s" % FLAGS.prioritized)
    print("dueling : %s" % FLAGS.dueling)
    print("num_agents : %s" % FLAGS.num_agents)
    print("lr : %s" % FLAGS.lr)

    if FLAGS.lr == 0:
        FLAGS.lr = random.uniform(0.00001, 0.001)

    print("random lr : %s" % FLAGS.lr)

    lr_round = round(FLAGS.lr, 8)

    logdir = "tensorboard"

    if FLAGS.algorithm == "deepq-4way":
        logdir = "tensorboard/mineral/%s/%s_%s_prio%s_duel%s_lr%s/%s" % (
            FLAGS.algorithm, FLAGS.timesteps, FLAGS.exploration_fraction,
            FLAGS.prioritized, FLAGS.dueling, lr_round, start_time)
    elif FLAGS.algorithm == "deepq":
        logdir = "tensorboard/mineral/%s/%s_%s_prio%s_duel%s_lr%s/%s" % (
            FLAGS.algorithm, FLAGS.timesteps, FLAGS.exploration_fraction,
            FLAGS.prioritized, FLAGS.dueling, lr_round, start_time)
    elif FLAGS.algorithm == "a2c":
        logdir = "tensorboard/mineral/%s/%s_n%s_s%s_nsteps%s/lr%s/%s" % (
            FLAGS.algorithm, FLAGS.timesteps,
            FLAGS.num_agents + FLAGS.num_scripts, FLAGS.num_scripts,
            FLAGS.nsteps, lr_round, start_time)

    if FLAGS.log == "tensorboard":
        Logger.DEFAULT \
          = Logger.CURRENT \
          = Logger(dir=None,
                   output_formats=[TensorBoardOutputFormat(logdir)])

    elif FLAGS.log == "stdout":
        Logger.DEFAULT \
          = Logger.CURRENT \
          = Logger(dir=None,
                   output_formats=[HumanOutputFormat(sys.stdout)])

    if FLAGS.algorithm == "deepq":

        AGENT_INTERFACE_FORMAT = sc2_env.AgentInterfaceFormat(
            feature_dimensions=sc2_env.Dimensions(screen=16, minimap=16))
        # temp solution - sc2_env.Agent(sc2_env.Race.terran) might be too restricting
        # We need this change because sc2 now requires specifying players.
        with sc2_env.SC2Env(
                map_name="Simple64",
                players=[
                    sc2_env.Agent(race=sc2_env.Race.terran),
                    sc2_env.Agent(race=sc2_env.Race.terran)
                ],
                #players=[sc2_env.Agent(sc2_env.Race.terran),sc2_env.Agent(sc2_env.Race.terran)],
                step_mul=step_mul,
                visualize=True,
                agent_interface_format=AGENT_INTERFACE_FORMAT) as env:

            model = cnn_to_mlp(convs=[(16, 8, 4), (32, 4, 2)],
                               hiddens=[256],
                               dueling=True)

            acts = deepq_nexus_wars.learn(
                env,
                q_func=model,
                num_actions=16,
                lr=FLAGS.lr,
                max_timesteps=FLAGS.timesteps,
                buffer_size=10000,
                exploration_fraction=FLAGS.exploration_fraction,
                exploration_final_eps=0.01,
                train_freq=4,
                learning_starts=10000,
                target_network_update_freq=1000,
                gamma=0.99,
                prioritized_replay=True,
                callback=deepq_callback)

            agent = random_agent.RandomAgent()
            run_loop.run_loop([agent], env, steps)

            acts[0].save("mineral_shards_x.pkl")
            acts[1].save("mineral_shards_y.pkl")

    elif FLAGS.algorithm == "deepq-4way":

        AGENT_INTERFACE_FORMAT = sc2_env.AgentInterfaceFormat(
            feature_dimensions=sc2_env.Dimensions(screen=32, minimap=32))
        with sc2_env.SC2Env(map_name="Simple64",
                            players=[
                                sc2_env.Agent(race=sc2_env.Race.terran),
                                sc2_env.Agent(race=sc2_env.Race.terran)
                            ],
                            step_mul=step_mul,
                            agent_interface_format=AGENT_INTERFACE_FORMAT,
                            visualize=True) as env:

            model = cnn_to_mlp(convs=[(16, 8, 4), (32, 4, 2)],
                               hiddens=[256],
                               dueling=True)

            act = deepq_mineral_4way.learn(
                env,
                q_func=model,
                num_actions=4,
                lr=FLAGS.lr,
                max_timesteps=FLAGS.timesteps,
                buffer_size=10000,
                exploration_fraction=FLAGS.exploration_fraction,
                exploration_final_eps=0.01,
                train_freq=4,
                learning_starts=10000,
                target_network_update_freq=1000,
                gamma=0.99,
                prioritized_replay=True,
                callback=deepq_4way_callback)

            act.save("mineral_shards.pkl")

    elif FLAGS.algorithm == "a2c":

        num_timesteps = int(40e6)

        num_timesteps //= 4

        seed = 0

        env = SubprocVecEnv(FLAGS.num_agents + FLAGS.num_scripts,
                            FLAGS.num_scripts, FLAGS.map)

        policy_fn = CnnPolicy
        a2c.learn(policy_fn,
                  env,
                  seed,
                  total_timesteps=num_timesteps,
                  nprocs=FLAGS.num_agents + FLAGS.num_scripts,
                  nscripts=FLAGS.num_scripts,
                  ent_coef=0.5,
                  nsteps=FLAGS.nsteps,
                  max_grad_norm=0.01,
                  callback=a2c_callback)
Ejemplo n.º 19
0
    feature_dimensions=sc2_env.Dimensions(
        screen=(32, 32), minimap=(32, 32)))

# agent_format2 = sc2_env.AgentInterfaceFormat(
#     feature_dimensions=sc2_env.Dimensions(
#         screen=(32, 32), minimap=(32, 32)))

env = sc2_env.SC2Env(
    map_name=mini_games[0],
    players=[sc2_env.Agent(sc2_env.Race.terran),
             sc2_env.Agent(sc2_env.Race.zerg)],
    step_mul=MiniGame.step_mul,
    visualize=False,
    agent_interface_format=agent_format1)

agents = [random_agent.RandomAgent() for _ in range(MiniGame.players)]

# run_loop.run_loop(agents, env, max_frames=1000, max_episodes=10000)
obs_specs = env.observation_spec()
actions_specs = env.action_spec()

for agent, obs_spec, act_spec in zip(agents, obs_specs, actions_specs):
    agent.setup(obs_spec, act_spec)

total_episodes = 0
score1 = []
score2 = []

while not total_episodes == 100:
    total_episodes += 1
    print(total_episodes)
Ejemplo n.º 20
0
from a2c_agent import a2cAgent

import importlib
import threading

from absl import app
from pysc2 import maps
from pysc2.env import available_actions_printer
from pysc2.env import run_loop
from pysc2.env import sc2_env
from pysc2.lib import point_flag
from pysc2.lib import stopwatch
from pysc2.lib import features
from pysc2.agents import random_agent

AGENT = random_agent.RandomAgent()
BOTS = [sc2_env.Bot(sc2_env.Race.terran, sc2_env.Difficulty.very_easy)]
PLAYERS = [sc2_env.Agent(sc2_env.Race.terran)]
MAP = 'CollectMineralShards'


def run_thread(agents, players, map_name, visualize=False, save_replay=False):
    ''' set up and run sc2_env loop '''
    # agents = [AGENT,]
    try:
        while True:
            with sc2_env.SC2Env(
                    map_name=map_name,
                    step_mul=16,
                    visualize=visualize,
                    players=players,