Beispiel #1
0
    def __init__(self, num_agents, observation_spec, action_spec):
        """Initializes the TestEnvironment.

    The `next_observation` is initialized to be reward = 0., discount = 1.,
    and an appropriately sized observation of all zeros. `episode_length` is set
    to `float('inf')`.

    Args:
      num_agents: The number of agents.
      observation_spec: The observation specs for each player.
      action_spec: The action specs for each player.
    """
        self._num_agents = num_agents
        self._observation_spec = observation_spec
        self._action_spec = action_spec
        self._episode_steps = 0

        self.next_timestep = [
            environment.TimeStep(step_type=environment.StepType.MID,
                                 reward=0.,
                                 discount=1.,
                                 observation=self._default_observation(
                                     obs_spec, agent_index))
            for agent_index, obs_spec in enumerate(observation_spec)
        ]

        self.episode_length = float('inf')
Beispiel #2
0
def agent_runner(controller, join):
    """Run the agent in a thread."""
    agent_module, agent_name = FLAGS.agent.rsplit(".", 1)
    agent_cls = getattr(importlib.import_module(agent_module), agent_name)
    agent = agent_cls()

    interface = sc_pb.InterfaceOptions()
    interface.raw = True
    interface.score = True
    interface.feature_layer.width = 24
    interface.feature_layer.resolution.x = FLAGS.feature_screen_size
    interface.feature_layer.resolution.y = FLAGS.feature_screen_size
    interface.feature_layer.minimap_resolution.x = FLAGS.feature_minimap_size
    interface.feature_layer.minimap_resolution.y = FLAGS.feature_minimap_size
    # if FLAGS.rgb_screen_size and FLAGS.rgb_minimap_size:
    #   if FLAGS.rgb_screen_size < FLAGS.rgb_minimap_size:
    #     sys.exit("Screen size can't be smaller than minimap size.")
    #   interface.render.resolution.x = FLAGS.rgb_screen_size
    #   interface.render.resolution.y = FLAGS.rgb_screen_size
    #   interface.render.minimap_resolution.x = FLAGS.rgb_minimap_size
    #   interface.render.minimap_resolution.y = FLAGS.rgb_minimap_size

    j = sc_pb.RequestJoinGame()
    j.CopyFrom(join)
    j.options.CopyFrom(interface)
    j.race = sc2_env.Race[FLAGS.agent_race]
    controller.join_game(j)

    feats = features.Features(game_info=controller.game_info())
    agent.setup(feats.observation_spec(), feats.action_spec())

    state = environment.StepType.FIRST
    reward = 0
    discount = 1
    while True:
        frame_start_time = time.time()
        if not FLAGS.realtime:
            controller.step(FLAGS.step_mul)
        obs = controller.observe()
        if obs.player_result:  # Episode over.
            state = environment.StepType.LAST
            discount = 0

        agent_obs = feats.transform_obs(obs)

        timestep = environment.TimeStep(step_type=state,
                                        reward=reward,
                                        discount=discount,
                                        observation=agent_obs)

        action = agent.step(timestep)
        if state == environment.StepType.LAST:
            break
        controller.act(feats.transform_action(obs.observation, action))

        if FLAGS.realtime:
            time.sleep(
                max(0, frame_start_time - time.time() + FLAGS.step_mul / 22.4))
    controller.quit()
Beispiel #3
0
  def _step(self):
    self._parallel.run((c.step, self._step_mul) for c in self._controllers)
    self._obs = self._parallel.run(c.observe for c in self._controllers)
    agent_obs = [self._features.transform_obs(o.observation) for o in self._obs]

    # TODO(tewalds): How should we handle more than 2 agents and the case where
    # the episode can end early for some agents?
    outcome = [0] * self._num_players
    discount = self._discount
    if any(o.player_result for o in self._obs):  # Episode over.
      self._state = environment.StepType.LAST
      discount = 0
      for i, o in enumerate(self._obs):
        player_id = o.observation.player_common.player_id
        for result in o.player_result:
          if result.player_id == player_id:
            outcome[i] = _possible_results.get(result.result, 0)

    if self._score_index >= 0:  # Game score, not win/loss reward.
      cur_score = [o["score_cumulative"][self._score_index] for o in agent_obs]
      if self._episode_steps == 0:  # First reward is always 0.
        reward = [0] * self._num_players
      else:
        reward = [cur - last for cur, last in zip(cur_score, self._last_score)]
      self._last_score = cur_score
    else:
      reward = outcome

    if self._renderer_human:
      self._renderer_human.render(self._obs[0])
      cmd = self._renderer_human.get_actions(
          self._run_config, self._controllers[0])
      if cmd == renderer_human.ActionCmd.STEP:
        pass
      elif cmd == renderer_human.ActionCmd.RESTART:
        self._state = environment.StepType.LAST
      elif cmd == renderer_human.ActionCmd.QUIT:
        raise KeyboardInterrupt("Quit?")

    self._total_steps += self._step_mul
    self._episode_steps += self._step_mul
    if self._episode_length > 0 and self._episode_steps >= self._episode_length:
      self._state = environment.StepType.LAST
      # No change to reward or discount since it's not actually terminal.

    if self._state == environment.StepType.LAST:
      if (self._save_replay_episodes > 0 and
          self._episode_count % self._save_replay_episodes == 0):
        self.save_replay(self._replay_dir)
      logging.info(
          "Episode finished. Outcome: %s, reward: %s, score: %s",
          outcome, reward, [o["score_cumulative"][0] for o in agent_obs])

    return tuple(environment.TimeStep(step_type=self._state,
                                      reward=r * self._score_multiplier,
                                      discount=discount, observation=o)
                 for r, o in zip(reward, agent_obs))
Beispiel #4
0
    def start(self):
        """Parse replays."""

        if (not self.override) and os.path.isdir(self.write_dir):
            files_to_write = [parser.NPZ_FILE for parser in self.parsers]
            if all([f in os.listdir(self.write_dir) for f in files_to_write]):
                logging.info('This replay has already been parsed.')
                return
        else:
            os.makedirs(self.write_dir, exist_ok=True)

        # Save player meta information (results, apm, mmr, ...)
        player_meta_info = self.get_player_meta_info(self.info)
        with open(os.path.join(self.write_dir, 'PlayerMetaInfo.json'),
                  'w') as fp:
            json.dump(player_meta_info, fp, indent=4)

        # sc_pb; RequestGameInfo -> ResponseGameInfo
        _features = custom_features_from_game_info(self.controller.game_info())

        while True:

            # Take step, scale specified by 'step_mul' (sc_pb, RequestStep -> ResponseStep)
            self.controller.step(self.step_mul)

            # Receive observation (sc_pb, RequestObservation -> ResponseObservation)
            obs = self.controller.observe()

            # '.transform_obs' is defined under features.Features
            try:
                agent_obs = _features.custom_transform_obs(obs)
            except Exception as err:
                print(err)

            if obs.player_result:
                self._state = environment.StepType.LAST
                discount = 0
            else:
                self._state = environment.StepType.MID
                discount = self.discount

            self._episode_steps += self.step_mul

            step = environment.TimeStep(step_type=self._state,
                                        reward=0,
                                        discount=discount,
                                        observation=agent_obs)

            for parser in self.parsers:
                parser.step(timestep=step)

            if self._state == environment.StepType.LAST:
                break  # break out of while loop
Beispiel #5
0
    def _step(self):
        self._controller.step(self._step_mul)
        self._obs = self._controller.observe()
        agent_obs = self._features.transform_obs(self._obs.observation)

        if self._obs.player_result:  # Episode over.
            self._state = environment.StepType.LAST
            outcome = _possible_results.get(self._obs.player_result[0].result,
                                            0)
            discount = 0
        else:
            outcome = 0
            discount = self._discount

        if self._score_index >= 0:  # Game score, not win/loss reward.
            cur_score = agent_obs["score_cumulative"][self._score_index]
            # First reward is always 0.
            reward = cur_score - self._last_score if self._episode_steps > 0 else 0
            self._last_score = cur_score
        else:
            reward = outcome

        if self._renderer_human:
            self._renderer_human.render(self._obs)
            cmd = self._renderer_human.get_actions(self._run_config,
                                                   self._controller)
            if cmd == renderer_human.ActionCmd.STEP:
                pass
            elif cmd == renderer_human.ActionCmd.RESTART:
                self._state = environment.StepType.LAST
            elif cmd == renderer_human.ActionCmd.QUIT:
                raise KeyboardInterrupt("Quit?")

        self._episode_steps += self._step_mul
        if self._episode_length > 0 and self._episode_steps >= self._episode_length:
            self._state = environment.StepType.LAST
            # No change to reward or discount since it's not actually terminal.

        self._total_steps += self._step_mul
        if (self._save_replay_steps > 0 and
                self._total_steps % self._save_replay_steps < self._step_mul):
            self.save_replay(self._replay_dir)

        if self._state == environment.StepType.LAST:
            logging.info(
                "Episode finished. Outcome: %s, reward: %s, score: %s",
                outcome, reward, agent_obs["score_cumulative"][0])

        return (environment.TimeStep(step_type=self._state,
                                     reward=reward * self._score_multiplier,
                                     discount=discount,
                                     observation=agent_obs),
                )  # A tuple for multiplayer.
Beispiel #6
0
    def _play(self):
        run = True
        step_counter = 0
        results = []
        while (True):
            #print('run loop=', step_counter)
            # Step the game
            self._parallel.run(c.step for c in self._controllers)

            # Observe
            obs = self._parallel.run(c.observe for c in self._controllers)
            agent_obs = [
                f.transform_obs(o.observation)
                for f, o in zip(self._features, obs)
            ]

            if step_counter == 0:
                stype = environment.StepType.FIRST
            elif any(o.player_result for o in obs):
                for o in obs:
                    results.append(o.play_result)
                stype = environment.StepType.LAST
            else:
                stype = environment.StepType.MID

            timesteps = tuple(
                environment.TimeStep(step_type=stype,
                                     reward=0,
                                     discount=0,
                                     observation=o,
                                     game_info=i)
                for o, i in zip(agent_obs, self._game_infos))
            # Act
            actions1 = self._agents[0].step(timesteps[0])
            actions2 = self._agents[1].step(timesteps[1])
            actions = [actions1, actions2]
            #actions = [[], []]
            #print(actions)
            self._parallel.run(
                (c.acts, a) for c, a in zip(self._controllers, actions))
            step_counter += 1
            if step_counter >= self._max_step:
                break
            if stype == environment.StepType.LAST:
                break
            # Done with the game.
        return results
Beispiel #7
0
    def __init__(self, num_players, observation_spec, action_spec):
        """Initializes the TestEnvironment.

    The `next_observation` is initialized to be reward = 0., discount = 1.,
    and an appropriately sized observation of all zeros. `episode_length` is set
    to `float('inf')`.

    Args:
      num_players: The number of players.
      observation_spec: The observation spec for each player.
      action_spec: The action spec for each player.
    """
        self._num_players = num_players
        self._observation_spec = (observation_spec, ) * self._num_players
        self._action_spec = (action_spec, ) * self._num_players
        self._episode_steps = 0

        self.next_timestep = environment.TimeStep(
            step_type=environment.StepType.MID,
            reward=0.,
            discount=1.,
            observation=self._default_observation())
        self.episode_length = float('inf')
Beispiel #8
0
    def _observe(self, target_game_loop):
        self._get_observations(target_game_loop)

        # TODO(tewalds): How should we handle more than 2 agents and the case where
        # the episode can end early for some agents?
        outcome = [0] * self._num_agents
        discount = self._discount
        episode_complete = any(o.player_result for o in self._obs)

        if episode_complete:
            self._state = environment.StepType.LAST
            discount = 0
            for i, o in enumerate(self._obs):
                player_id = o.observation.player_common.player_id
                for result in o.player_result:
                    if result.player_id == player_id:
                        outcome[i] = possible_results.get(result.result, 0)

        if self._score_index >= 0:  # Game score, not win/loss reward.
            cur_score = [
                o["score_cumulative"][self._score_index]
                for o in self._agent_obs
            ]
            if self._episode_steps == 0:  # First reward is always 0.
                reward = [0] * self._num_agents
            else:
                reward = [
                    cur - last
                    for cur, last in zip(cur_score, self._last_score)
                ]
            self._last_score = cur_score
        else:
            reward = outcome

        if self._renderer_human:
            self._renderer_human.render(self._obs[0])
            cmd = self._renderer_human.get_actions(self._run_config,
                                                   self._controllers[0])
            if cmd == renderer_human.ActionCmd.STEP:
                pass
            elif cmd == renderer_human.ActionCmd.RESTART:
                self._state = environment.StepType.LAST
            elif cmd == renderer_human.ActionCmd.QUIT:
                raise KeyboardInterrupt("Quit?")

        self._total_steps += self._agent_obs[0].game_loop[
            0] - self._episode_steps
        self._episode_steps = self._agent_obs[0].game_loop[0]
        if self._episode_steps >= self._episode_length:
            self._state = environment.StepType.LAST
            if self._discount_zero_after_timeout:
                discount = 0.0
            if self._episode_steps >= MAX_STEP_COUNT:
                logging.info(
                    "Cut short to avoid SC2's max step count of 2^19=524288.")

        if self._state == environment.StepType.LAST:
            if (self._save_replay_episodes > 0
                    and self._episode_count % self._save_replay_episodes == 0):
                self.save_replay(self._replay_dir, self._replay_prefix)
            logging.info(("Episode %s finished after %s game steps. "
                          "Outcome: %s, reward: %s, score: %s"),
                         self._episode_count, self._episode_steps, outcome,
                         reward,
                         [o["score_cumulative"][0] for o in self._agent_obs])

        def zero_on_first_step(value):
            return 0.0 if self._state == environment.StepType.FIRST else value

        return tuple(
            environment.TimeStep(
                step_type=self._state,
                reward=zero_on_first_step(r * self._score_multiplier),
                discount=zero_on_first_step(discount),
                observation=o) for r, o in zip(reward, self._agent_obs))
Beispiel #9
0
    def _step(self):
        with self._metrics.measure_step_time(self._step_mul):
            self._parallel.run(
                (c.step, self._step_mul) for c in self._controllers)

        with self._metrics.measure_observation_time():
            self._obs = self._parallel.run(c.observe
                                           for c in self._controllers)
            agent_obs = [
                f.transform_obs(o) for f, o in zip(self._features, self._obs)
            ]

        # TODO(tewalds): How should we handle more than 2 agents and the case where
        # the episode can end early for some agents?
        outcome = [0] * self._num_agents
        discount = self._discount
        if any(o.player_result for o in self._obs):  # Episode over.
            self._state = environment.StepType.LAST
            discount = 0
            for i, o in enumerate(self._obs):
                player_id = o.observation.player_common.player_id
                for result in o.player_result:
                    if result.player_id == player_id:
                        outcome[i] = possible_results.get(result.result, 0)

        if self._score_index >= 0:  # Game score, not win/loss reward.
            cur_score = [
                o["score_cumulative"][self._score_index] for o in agent_obs
            ]
            if self._episode_steps == 0:  # First reward is always 0.
                reward = [0] * self._num_agents
            else:
                reward = [
                    cur - last
                    for cur, last in zip(cur_score, self._last_score)
                ]
            self._last_score = cur_score
        else:
            reward = outcome

        if self._renderer_human:
            self._renderer_human.render(self._obs[0])
            cmd = self._renderer_human.get_actions(self._run_config,
                                                   self._controllers[0])
            if cmd == renderer_human.ActionCmd.STEP:
                pass
            elif cmd == renderer_human.ActionCmd.RESTART:
                self._state = environment.StepType.LAST
            elif cmd == renderer_human.ActionCmd.QUIT:
                raise KeyboardInterrupt("Quit?")

        self._total_steps += self._step_mul
        self._episode_steps += self._step_mul
        if self._episode_length > 0 and self._episode_steps >= self._episode_length:
            self._state = environment.StepType.LAST
            if self._discount_zero_after_timeout:
                discount = 0.0

        if self._state == environment.StepType.LAST:
            if (self._save_replay_episodes > 0
                    and self._episode_count % self._save_replay_episodes == 0):
                self.save_replay(self._replay_dir)
            logging.info(("Episode %s finished after %s game steps. "
                          "Outcome: %s, reward: %s, score: %s"),
                         self._episode_count, self._episode_steps, outcome,
                         reward, [o["score_cumulative"][0] for o in agent_obs])

        def zero_on_first_step(value):
            return 0.0 if self._state == environment.StepType.FIRST else value

        return tuple(
            environment.TimeStep(
                step_type=self._state,
                reward=zero_on_first_step(r * self._score_multiplier),
                discount=zero_on_first_step(discount),
                observation=o) for r, o in zip(reward, agent_obs))
Beispiel #10
0
    def _observe(self):
        if not self._realtime:
            self._get_observations()
        else:
            needed_to_wait = False
            while True:
                self._get_observations()

                # Check that the game has advanced sufficiently.
                # If it hasn't, wait for it to.
                game_loop = self._agent_obs[0].game_loop[0]
                if game_loop < self._target_step:
                    if not needed_to_wait:
                        needed_to_wait = True
                        logging.info(
                            "Target step is %s, game loop is %s, waiting...",
                            self._target_step, game_loop)

                    time.sleep(REALTIME_GAME_LOOP_SECONDS)
                else:
                    # We're beyond our target now.
                    if needed_to_wait:
                        self._last_step_time = time.time()
                        logging.info("...game loop is now %s. Continuing.",
                                     game_loop)
                    break

        # TODO(tewalds): How should we handle more than 2 agents and the case where
        # the episode can end early for some agents?
        outcome = [0] * self._num_agents
        discount = self._discount
        episode_complete = any(o.player_result for o in self._obs)

        # In realtime, we don't receive player results reliably, yet we do
        # sometimes hit 'ended' status. When that happens we terminate the
        # episode.
        # TODO(b/115466611): player_results should be returned in realtime mode
        if self._realtime and self._controllers[
                0].status == protocol.Status.ended:
            logging.info("Protocol status is ended. Episode is complete.")
            episode_complete = True

        if self._realtime and len(self._obs) > 1:
            # Realtime doesn't seem to give us a player result when one player
            # gets eliminated. Hence some temporary hackery (which can only work
            # when we have both agents in this environment)...
            # TODO(b/115466611): player_results should be returned in realtime mode
            p1 = self._obs[0].observation.score.score_details
            p2 = self._obs[1].observation.score.score_details
            if p1.killed_value_structures > p2.total_value_structures - EPSILON:
                logging.info(
                    "The episode appears to be complete, p1 killed p2.")
                episode_complete = True
                outcome[0] = 1.0
                outcome[1] = -1.0
            elif p2.killed_value_structures > p1.total_value_structures - EPSILON:
                logging.info(
                    "The episode appears to be complete, p2 killed p1.")
                episode_complete = True
                outcome[0] = -1.0
                outcome[1] = 1.0

        if episode_complete:
            self._state = environment.StepType.LAST
            discount = 0
            for i, o in enumerate(self._obs):
                player_id = o.observation.player_common.player_id
                for result in o.player_result:
                    if result.player_id == player_id:
                        outcome[i] = possible_results.get(result.result, 0)

        if self._score_index >= 0:  # Game score, not win/loss reward.
            cur_score = [
                o["score_cumulative"][self._score_index]
                for o in self._agent_obs
            ]
            if self._episode_steps == 0:  # First reward is always 0.
                reward = [0] * self._num_agents
            else:
                reward = [
                    cur - last
                    for cur, last in zip(cur_score, self._last_score)
                ]
            self._last_score = cur_score
        else:
            reward = outcome

        if self._renderer_human:
            self._renderer_human.render(self._obs[0])
            cmd = self._renderer_human.get_actions(self._run_config,
                                                   self._controllers[0])
            if cmd == renderer_human.ActionCmd.STEP:
                pass
            elif cmd == renderer_human.ActionCmd.RESTART:
                self._state = environment.StepType.LAST
            elif cmd == renderer_human.ActionCmd.QUIT:
                raise KeyboardInterrupt("Quit?")

        self._total_steps += self._agent_obs[0].game_loop[
            0] - self._episode_steps
        self._episode_steps = self._agent_obs[0].game_loop[0]
        if self._episode_length > 0 and self._episode_steps >= self._episode_length:
            self._state = environment.StepType.LAST
            if self._discount_zero_after_timeout:
                discount = 0.0

        if self._state == environment.StepType.LAST:
            if (self._save_replay_episodes > 0
                    and self._episode_count % self._save_replay_episodes == 0):
                self.save_replay(self._replay_dir, self._replay_prefix)
            logging.info(("Episode %s finished after %s game steps. "
                          "Outcome: %s, reward: %s, score: %s"),
                         self._episode_count, self._episode_steps, outcome,
                         reward,
                         [o["score_cumulative"][0] for o in self._agent_obs])

        def zero_on_first_step(value):
            return 0.0 if self._state == environment.StepType.FIRST else value

        return tuple(
            environment.TimeStep(
                step_type=self._state,
                reward=zero_on_first_step(r * self._score_multiplier),
                discount=zero_on_first_step(discount),
                observation=o) for r, o in zip(reward, self._agent_obs))
Beispiel #11
0
    def _observe(self, update_observation=None):
        if update_observation is None:
            update_observation = [True] * len(self._controllers)

        self._update_observations(update_observation)

        # TODO(tewalds): How should we handle more than 2 agents and the case where
        # the episode can end early for some agents?
        outcome = [0] * self._num_agents
        discount = self._discount
        episode_complete = any(o.player_result for o in self._obs)
        if episode_complete or self._controllers[
                0].status == protocol.Status.ended:
            if not all(update_observation):
                # The episode completed so we send new observations to everyone.
                self._update_observations([not i for i in update_observation])

            self._state = environment.StepType.LAST
            discount = 0
            for i, o in enumerate(self._obs):
                player_id = o.observation.player_common.player_id
                for result in o.player_result:
                    if result.player_id == player_id:
                        outcome[i] = possible_results.get(result.result, 0)

        if self._score_index >= 0:  # Game score, not win/loss reward.
            cur_score = [
                o["score_cumulative"][self._score_index]
                for o in self._agent_obs
            ]
            if self._episode_steps == 0:  # First reward is always 0.
                reward = [0] * self._num_agents
            else:
                reward = [
                    cur - last
                    for cur, last in zip(cur_score, self._last_score)
                ]
            self._last_score = cur_score
        else:
            reward = outcome

        if self._renderer_human:
            self._renderer_human.render(self._obs[0])
            cmd = self._renderer_human.get_actions(self._run_config,
                                                   self._controllers[0])
            if cmd == renderer_human.ActionCmd.STEP:
                pass
            elif cmd == renderer_human.ActionCmd.RESTART:
                self._state = environment.StepType.LAST
            elif cmd == renderer_human.ActionCmd.QUIT:
                raise KeyboardInterrupt("Quit?")

        self._total_steps += self._step_mul
        self._episode_steps += self._step_mul
        if self._episode_length > 0 and self._episode_steps >= self._episode_length:
            self._state = environment.StepType.LAST
            if self._discount_zero_after_timeout:
                discount = 0.0

        if self._state == environment.StepType.LAST:
            if (self._save_replay_episodes > 0
                    and self._episode_count % self._save_replay_episodes == 0):
                self.save_replay(self._replay_dir, self._replay_prefix)
            logging.info(("Episode %s finished after %s game steps. "
                          "Outcome: %s, reward: %s, score: %s"),
                         self._episode_count, self._episode_steps, outcome,
                         reward,
                         [o["score_cumulative"][0] for o in self._agent_obs])

        def zero_on_first_step(value):
            return 0.0 if self._state == environment.StepType.FIRST else value

        return tuple(
            environment.TimeStep(
                step_type=self._state,
                reward=zero_on_first_step(r * self._score_multiplier),
                discount=zero_on_first_step(discount),
                observation=o) for r, o in zip(reward, self._agent_obs))
    def _observe(self, target_game_loop):
        # Transform in the thread so it runs while waiting for other observations.
        def parallel_observe(c, f):
            obs = c.observe(target_game_loop=target_game_loop)
            agent_obs = obs.observation if f is None else f.transform_obs(obs)
            game_info = c.game_info() if self._update_game_info else None
            return obs, agent_obs, game_info

        with self._metrics.measure_observation_time():
            self._obs, agent_obs, game_info = zip(*self._parallel.run(
                (parallel_observe, c, f)
                for c, f in zip(self._controllers, self._features)))
        if not self._update_game_info:
            game_info = self._game_info

        game_loop = self._obs[0].observation.game_loop

        if game_loop < target_game_loop:
            logging.warning(
                "We got a earlier observation than we asked for, %d rather than %d.",
                game_loop, target_game_loop)
        elif game_loop > target_game_loop:
            logging.warning(
                "We got a later observation than we asked for, %d rather than %d.",
                game_loop, target_game_loop)

        # TODO(tewalds): How should we handle more than 2 agents and the case where
        # the episode can end early for some agents?
        outcome = [0] * self._num_agents
        discount = self._discount
        if any(o.player_result for o in self._obs):  # Episode over.
            self._state = environment.StepType.LAST
            discount = 0
            for i, o in enumerate(self._obs):
                player_id = o.observation.player_common.player_id
                for result in o.player_result:
                    if result.player_id == player_id:
                        outcome[i] = possible_results.get(result.result, 0)

        if self._score_index >= 0:  # Game score, not win/loss reward.
            if not self.raw:
                cur_score = [
                    o["score_cumulative"][self._score_index] for o in agent_obs
                ]
            else:
                cur_score = [
                    ext_score(o)[self._score_index] for o in self._obs
                ]
            if self._episode_steps == 0:  # First reward is always 0.
                reward = [0] * self._num_agents
            else:
                reward = [
                    cur - last
                    for cur, last in zip(cur_score, self._last_score)
                ]
            self._last_score = cur_score
        else:
            reward = outcome

        if self._renderer_human:
            self._renderer_human.render(self._obs[0])
            cmd = self._renderer_human.get_actions(self._run_config,
                                                   self._controllers[0])
            if cmd == renderer_human.ActionCmd.STEP:
                pass
            elif cmd == renderer_human.ActionCmd.RESTART:
                self._state = environment.StepType.LAST
            elif cmd == renderer_human.ActionCmd.QUIT:
                raise KeyboardInterrupt("Quit?")

        self._total_steps += game_loop - self._episode_steps
        self._episode_steps = game_loop
        if self._episode_length > 0 and self._episode_steps >= self._episode_length:
            self._state = environment.StepType.LAST
            # No change to reward or discount since it's not actually terminal.

        if self._state == environment.StepType.LAST:
            if (self._save_replay_episodes > 0
                    and self._episode_count % self._save_replay_episodes == 0):
                self.save_replay(self._replay_dir)
            if not self.raw:
                score_0 = [o["score_cumulative"][0] for o in agent_obs]
            else:
                score_0 = [ext_score(o)[0] for o in self._obs]
            logging.info(("Episode %s finished after %s game steps. "
                          "Outcome: %s, reward: %s, score: %s"),
                         self._episode_count, self._episode_steps, outcome,
                         reward, score_0)
        if not self.raw:
            for o, obs in zip(
                    agent_obs,
                    self._obs):  # expose same data structure with raw_pb
                o["score"] = obs.observation.score
                o["player_common"] = obs.observation.player_common
                o["ui_data"] = obs.observation.ui_data
                o["abilities"] = obs.observation.abilities
        return tuple(
            environment.TimeStep(step_type=self._state,
                                 reward=r * self._score_multiplier,
                                 discount=discount,
                                 observation=o,
                                 game_info=info,
                                 actions=obs.actions,
                                 action_errors=obs.action_errors) for r, o,
            info, obs in zip(reward, agent_obs, game_info, self._obs))
Beispiel #13
0
def test_multi_player(agents, disable_fog):
    players = 2
    if len(agents) == 2:
        agent1, agent2 = agents
    run_config = run_configs.get()
    parallel = run_parallel.RunParallel()
    map_inst = maps.get(FLAGS.map)

    screen_size_px = point.Point(64, 64)
    minimap_size_px = point.Point(32, 32)
    interface = sc_pb.InterfaceOptions(raw=True, score=True)
    screen_size_px.assign_to(interface.feature_layer.resolution)
    minimap_size_px.assign_to(interface.feature_layer.minimap_resolution)

    # Reserve a whole bunch of ports for the weird multiplayer implementation.
    ports = [portpicker.pick_unused_port() for _ in range(1 + players * 2)]
    print("Valid Ports: %s", ports)

    # Actually launch the game processes.
    print("start")
    sc2_procs = [run_config.start(extra_ports=ports) for _ in range(players)]
    controllers = [p.controller for p in sc2_procs]

    try:
        # Save the maps so they can access it.
        map_path = os.path.basename(map_inst.path)
        print("save_map")
        parallel.run((c.save_map, map_path, run_config.map_data(map_inst.path))
                     for c in controllers)

        # Create the create request.
        real_time = True
        create = sc_pb.RequestCreateGame(
            local_map=sc_pb.LocalMap(map_path=map_path), realtime=real_time)
        for _ in range(players):
            create.player_setup.add(type=sc_pb.Participant)

        # Create the join request.
        join1 = sc_pb.RequestJoinGame(race=races[FLAGS.agent1_race],
                                      options=interface)
        join1.shared_port = ports.pop()
        join1.server_ports.game_port = ports.pop()
        join1.server_ports.base_port = ports.pop()
        join1.client_ports.add(game_port=ports.pop(), base_port=ports.pop())

        join2 = copy.copy(join1)
        join2.race = races[FLAGS.agent2_race]

        # This is where actually game plays
        # Create and Join
        print("create")
        controllers[0].create_game(create)
        print("join")
        parallel.run((c.join_game, join)
                     for c, join in zip(controllers, [join1, join2]))

        controllers[0]._client.send(debug=sc_pb.RequestDebug(
            debug=[debug_pb2.DebugCommand(game_state=1)]))
        if disable_fog[0]:
            controllers[0].disable_fog()
        if disable_fog[1]:
            controllers[1].disable_fog()

        print("run")
        game_info = controllers[0].game_info()
        extractors = features.Features(game_info)
        for game_loop in range(1, 100000):  # steps per episode
            # Step the game
            step_mul = FLAGS.step_mul
            if not real_time:
                parallel.run((c.step, step_mul) for c in controllers)
            else:
                time.sleep(FLAGS.sleep_time)

            # Observe
            obs = parallel.run(c.observe for c in controllers)
            agent_obs = [extractors.transform_obs(o.observation) for o in obs]
            game_info = [None for c in controllers]

            if not any(o.player_result for o in obs):  # Episode over.
                game_info = parallel.run(c.game_info for c in controllers)
            timesteps = tuple(
                environment.TimeStep(step_type=0,
                                     reward=0,
                                     discount=0,
                                     observation=o,
                                     game_info=info)
                for o, info in zip(agent_obs, game_info))

            # Act
            if agent1 is not None:
                actions1 = agent1.step(timesteps[0])
            else:
                actions1 = []
            actions2 = agent2.step(timesteps[1])
            actions = [actions1, actions2]
            funcs_with_args = [(c.acts, a)
                               for c, a in zip(controllers, actions)]
            parallel.run(funcs_with_args)

        # Done with the game.
        print("leave")
        parallel.run(c.leave for c in controllers)
    finally:
        print("quit")
        # Done, shut down. Don't depend on parallel since it might be broken.
        for c in controllers:
            c.quit()
        for p in sc2_procs:
            p.close()