def __init__(self, num_agents, observation_spec, action_spec): """Initializes the TestEnvironment. The `next_observation` is initialized to be reward = 0., discount = 1., and an appropriately sized observation of all zeros. `episode_length` is set to `float('inf')`. Args: num_agents: The number of agents. observation_spec: The observation specs for each player. action_spec: The action specs for each player. """ self._num_agents = num_agents self._observation_spec = observation_spec self._action_spec = action_spec self._episode_steps = 0 self.next_timestep = [ environment.TimeStep(step_type=environment.StepType.MID, reward=0., discount=1., observation=self._default_observation( obs_spec, agent_index)) for agent_index, obs_spec in enumerate(observation_spec) ] self.episode_length = float('inf')
def agent_runner(controller, join): """Run the agent in a thread.""" agent_module, agent_name = FLAGS.agent.rsplit(".", 1) agent_cls = getattr(importlib.import_module(agent_module), agent_name) agent = agent_cls() interface = sc_pb.InterfaceOptions() interface.raw = True interface.score = True interface.feature_layer.width = 24 interface.feature_layer.resolution.x = FLAGS.feature_screen_size interface.feature_layer.resolution.y = FLAGS.feature_screen_size interface.feature_layer.minimap_resolution.x = FLAGS.feature_minimap_size interface.feature_layer.minimap_resolution.y = FLAGS.feature_minimap_size # if FLAGS.rgb_screen_size and FLAGS.rgb_minimap_size: # if FLAGS.rgb_screen_size < FLAGS.rgb_minimap_size: # sys.exit("Screen size can't be smaller than minimap size.") # interface.render.resolution.x = FLAGS.rgb_screen_size # interface.render.resolution.y = FLAGS.rgb_screen_size # interface.render.minimap_resolution.x = FLAGS.rgb_minimap_size # interface.render.minimap_resolution.y = FLAGS.rgb_minimap_size j = sc_pb.RequestJoinGame() j.CopyFrom(join) j.options.CopyFrom(interface) j.race = sc2_env.Race[FLAGS.agent_race] controller.join_game(j) feats = features.Features(game_info=controller.game_info()) agent.setup(feats.observation_spec(), feats.action_spec()) state = environment.StepType.FIRST reward = 0 discount = 1 while True: frame_start_time = time.time() if not FLAGS.realtime: controller.step(FLAGS.step_mul) obs = controller.observe() if obs.player_result: # Episode over. state = environment.StepType.LAST discount = 0 agent_obs = feats.transform_obs(obs) timestep = environment.TimeStep(step_type=state, reward=reward, discount=discount, observation=agent_obs) action = agent.step(timestep) if state == environment.StepType.LAST: break controller.act(feats.transform_action(obs.observation, action)) if FLAGS.realtime: time.sleep( max(0, frame_start_time - time.time() + FLAGS.step_mul / 22.4)) controller.quit()
def _step(self): self._parallel.run((c.step, self._step_mul) for c in self._controllers) self._obs = self._parallel.run(c.observe for c in self._controllers) agent_obs = [self._features.transform_obs(o.observation) for o in self._obs] # TODO(tewalds): How should we handle more than 2 agents and the case where # the episode can end early for some agents? outcome = [0] * self._num_players discount = self._discount if any(o.player_result for o in self._obs): # Episode over. self._state = environment.StepType.LAST discount = 0 for i, o in enumerate(self._obs): player_id = o.observation.player_common.player_id for result in o.player_result: if result.player_id == player_id: outcome[i] = _possible_results.get(result.result, 0) if self._score_index >= 0: # Game score, not win/loss reward. cur_score = [o["score_cumulative"][self._score_index] for o in agent_obs] if self._episode_steps == 0: # First reward is always 0. reward = [0] * self._num_players else: reward = [cur - last for cur, last in zip(cur_score, self._last_score)] self._last_score = cur_score else: reward = outcome if self._renderer_human: self._renderer_human.render(self._obs[0]) cmd = self._renderer_human.get_actions( self._run_config, self._controllers[0]) if cmd == renderer_human.ActionCmd.STEP: pass elif cmd == renderer_human.ActionCmd.RESTART: self._state = environment.StepType.LAST elif cmd == renderer_human.ActionCmd.QUIT: raise KeyboardInterrupt("Quit?") self._total_steps += self._step_mul self._episode_steps += self._step_mul if self._episode_length > 0 and self._episode_steps >= self._episode_length: self._state = environment.StepType.LAST # No change to reward or discount since it's not actually terminal. if self._state == environment.StepType.LAST: if (self._save_replay_episodes > 0 and self._episode_count % self._save_replay_episodes == 0): self.save_replay(self._replay_dir) logging.info( "Episode finished. Outcome: %s, reward: %s, score: %s", outcome, reward, [o["score_cumulative"][0] for o in agent_obs]) return tuple(environment.TimeStep(step_type=self._state, reward=r * self._score_multiplier, discount=discount, observation=o) for r, o in zip(reward, agent_obs))
def start(self): """Parse replays.""" if (not self.override) and os.path.isdir(self.write_dir): files_to_write = [parser.NPZ_FILE for parser in self.parsers] if all([f in os.listdir(self.write_dir) for f in files_to_write]): logging.info('This replay has already been parsed.') return else: os.makedirs(self.write_dir, exist_ok=True) # Save player meta information (results, apm, mmr, ...) player_meta_info = self.get_player_meta_info(self.info) with open(os.path.join(self.write_dir, 'PlayerMetaInfo.json'), 'w') as fp: json.dump(player_meta_info, fp, indent=4) # sc_pb; RequestGameInfo -> ResponseGameInfo _features = custom_features_from_game_info(self.controller.game_info()) while True: # Take step, scale specified by 'step_mul' (sc_pb, RequestStep -> ResponseStep) self.controller.step(self.step_mul) # Receive observation (sc_pb, RequestObservation -> ResponseObservation) obs = self.controller.observe() # '.transform_obs' is defined under features.Features try: agent_obs = _features.custom_transform_obs(obs) except Exception as err: print(err) if obs.player_result: self._state = environment.StepType.LAST discount = 0 else: self._state = environment.StepType.MID discount = self.discount self._episode_steps += self.step_mul step = environment.TimeStep(step_type=self._state, reward=0, discount=discount, observation=agent_obs) for parser in self.parsers: parser.step(timestep=step) if self._state == environment.StepType.LAST: break # break out of while loop
def _step(self): self._controller.step(self._step_mul) self._obs = self._controller.observe() agent_obs = self._features.transform_obs(self._obs.observation) if self._obs.player_result: # Episode over. self._state = environment.StepType.LAST outcome = _possible_results.get(self._obs.player_result[0].result, 0) discount = 0 else: outcome = 0 discount = self._discount if self._score_index >= 0: # Game score, not win/loss reward. cur_score = agent_obs["score_cumulative"][self._score_index] # First reward is always 0. reward = cur_score - self._last_score if self._episode_steps > 0 else 0 self._last_score = cur_score else: reward = outcome if self._renderer_human: self._renderer_human.render(self._obs) cmd = self._renderer_human.get_actions(self._run_config, self._controller) if cmd == renderer_human.ActionCmd.STEP: pass elif cmd == renderer_human.ActionCmd.RESTART: self._state = environment.StepType.LAST elif cmd == renderer_human.ActionCmd.QUIT: raise KeyboardInterrupt("Quit?") self._episode_steps += self._step_mul if self._episode_length > 0 and self._episode_steps >= self._episode_length: self._state = environment.StepType.LAST # No change to reward or discount since it's not actually terminal. self._total_steps += self._step_mul if (self._save_replay_steps > 0 and self._total_steps % self._save_replay_steps < self._step_mul): self.save_replay(self._replay_dir) if self._state == environment.StepType.LAST: logging.info( "Episode finished. Outcome: %s, reward: %s, score: %s", outcome, reward, agent_obs["score_cumulative"][0]) return (environment.TimeStep(step_type=self._state, reward=reward * self._score_multiplier, discount=discount, observation=agent_obs), ) # A tuple for multiplayer.
def _play(self): run = True step_counter = 0 results = [] while (True): #print('run loop=', step_counter) # Step the game self._parallel.run(c.step for c in self._controllers) # Observe obs = self._parallel.run(c.observe for c in self._controllers) agent_obs = [ f.transform_obs(o.observation) for f, o in zip(self._features, obs) ] if step_counter == 0: stype = environment.StepType.FIRST elif any(o.player_result for o in obs): for o in obs: results.append(o.play_result) stype = environment.StepType.LAST else: stype = environment.StepType.MID timesteps = tuple( environment.TimeStep(step_type=stype, reward=0, discount=0, observation=o, game_info=i) for o, i in zip(agent_obs, self._game_infos)) # Act actions1 = self._agents[0].step(timesteps[0]) actions2 = self._agents[1].step(timesteps[1]) actions = [actions1, actions2] #actions = [[], []] #print(actions) self._parallel.run( (c.acts, a) for c, a in zip(self._controllers, actions)) step_counter += 1 if step_counter >= self._max_step: break if stype == environment.StepType.LAST: break # Done with the game. return results
def __init__(self, num_players, observation_spec, action_spec): """Initializes the TestEnvironment. The `next_observation` is initialized to be reward = 0., discount = 1., and an appropriately sized observation of all zeros. `episode_length` is set to `float('inf')`. Args: num_players: The number of players. observation_spec: The observation spec for each player. action_spec: The action spec for each player. """ self._num_players = num_players self._observation_spec = (observation_spec, ) * self._num_players self._action_spec = (action_spec, ) * self._num_players self._episode_steps = 0 self.next_timestep = environment.TimeStep( step_type=environment.StepType.MID, reward=0., discount=1., observation=self._default_observation()) self.episode_length = float('inf')
def _observe(self, target_game_loop): self._get_observations(target_game_loop) # TODO(tewalds): How should we handle more than 2 agents and the case where # the episode can end early for some agents? outcome = [0] * self._num_agents discount = self._discount episode_complete = any(o.player_result for o in self._obs) if episode_complete: self._state = environment.StepType.LAST discount = 0 for i, o in enumerate(self._obs): player_id = o.observation.player_common.player_id for result in o.player_result: if result.player_id == player_id: outcome[i] = possible_results.get(result.result, 0) if self._score_index >= 0: # Game score, not win/loss reward. cur_score = [ o["score_cumulative"][self._score_index] for o in self._agent_obs ] if self._episode_steps == 0: # First reward is always 0. reward = [0] * self._num_agents else: reward = [ cur - last for cur, last in zip(cur_score, self._last_score) ] self._last_score = cur_score else: reward = outcome if self._renderer_human: self._renderer_human.render(self._obs[0]) cmd = self._renderer_human.get_actions(self._run_config, self._controllers[0]) if cmd == renderer_human.ActionCmd.STEP: pass elif cmd == renderer_human.ActionCmd.RESTART: self._state = environment.StepType.LAST elif cmd == renderer_human.ActionCmd.QUIT: raise KeyboardInterrupt("Quit?") self._total_steps += self._agent_obs[0].game_loop[ 0] - self._episode_steps self._episode_steps = self._agent_obs[0].game_loop[0] if self._episode_steps >= self._episode_length: self._state = environment.StepType.LAST if self._discount_zero_after_timeout: discount = 0.0 if self._episode_steps >= MAX_STEP_COUNT: logging.info( "Cut short to avoid SC2's max step count of 2^19=524288.") if self._state == environment.StepType.LAST: if (self._save_replay_episodes > 0 and self._episode_count % self._save_replay_episodes == 0): self.save_replay(self._replay_dir, self._replay_prefix) logging.info(("Episode %s finished after %s game steps. " "Outcome: %s, reward: %s, score: %s"), self._episode_count, self._episode_steps, outcome, reward, [o["score_cumulative"][0] for o in self._agent_obs]) def zero_on_first_step(value): return 0.0 if self._state == environment.StepType.FIRST else value return tuple( environment.TimeStep( step_type=self._state, reward=zero_on_first_step(r * self._score_multiplier), discount=zero_on_first_step(discount), observation=o) for r, o in zip(reward, self._agent_obs))
def _step(self): with self._metrics.measure_step_time(self._step_mul): self._parallel.run( (c.step, self._step_mul) for c in self._controllers) with self._metrics.measure_observation_time(): self._obs = self._parallel.run(c.observe for c in self._controllers) agent_obs = [ f.transform_obs(o) for f, o in zip(self._features, self._obs) ] # TODO(tewalds): How should we handle more than 2 agents and the case where # the episode can end early for some agents? outcome = [0] * self._num_agents discount = self._discount if any(o.player_result for o in self._obs): # Episode over. self._state = environment.StepType.LAST discount = 0 for i, o in enumerate(self._obs): player_id = o.observation.player_common.player_id for result in o.player_result: if result.player_id == player_id: outcome[i] = possible_results.get(result.result, 0) if self._score_index >= 0: # Game score, not win/loss reward. cur_score = [ o["score_cumulative"][self._score_index] for o in agent_obs ] if self._episode_steps == 0: # First reward is always 0. reward = [0] * self._num_agents else: reward = [ cur - last for cur, last in zip(cur_score, self._last_score) ] self._last_score = cur_score else: reward = outcome if self._renderer_human: self._renderer_human.render(self._obs[0]) cmd = self._renderer_human.get_actions(self._run_config, self._controllers[0]) if cmd == renderer_human.ActionCmd.STEP: pass elif cmd == renderer_human.ActionCmd.RESTART: self._state = environment.StepType.LAST elif cmd == renderer_human.ActionCmd.QUIT: raise KeyboardInterrupt("Quit?") self._total_steps += self._step_mul self._episode_steps += self._step_mul if self._episode_length > 0 and self._episode_steps >= self._episode_length: self._state = environment.StepType.LAST if self._discount_zero_after_timeout: discount = 0.0 if self._state == environment.StepType.LAST: if (self._save_replay_episodes > 0 and self._episode_count % self._save_replay_episodes == 0): self.save_replay(self._replay_dir) logging.info(("Episode %s finished after %s game steps. " "Outcome: %s, reward: %s, score: %s"), self._episode_count, self._episode_steps, outcome, reward, [o["score_cumulative"][0] for o in agent_obs]) def zero_on_first_step(value): return 0.0 if self._state == environment.StepType.FIRST else value return tuple( environment.TimeStep( step_type=self._state, reward=zero_on_first_step(r * self._score_multiplier), discount=zero_on_first_step(discount), observation=o) for r, o in zip(reward, agent_obs))
def _observe(self): if not self._realtime: self._get_observations() else: needed_to_wait = False while True: self._get_observations() # Check that the game has advanced sufficiently. # If it hasn't, wait for it to. game_loop = self._agent_obs[0].game_loop[0] if game_loop < self._target_step: if not needed_to_wait: needed_to_wait = True logging.info( "Target step is %s, game loop is %s, waiting...", self._target_step, game_loop) time.sleep(REALTIME_GAME_LOOP_SECONDS) else: # We're beyond our target now. if needed_to_wait: self._last_step_time = time.time() logging.info("...game loop is now %s. Continuing.", game_loop) break # TODO(tewalds): How should we handle more than 2 agents and the case where # the episode can end early for some agents? outcome = [0] * self._num_agents discount = self._discount episode_complete = any(o.player_result for o in self._obs) # In realtime, we don't receive player results reliably, yet we do # sometimes hit 'ended' status. When that happens we terminate the # episode. # TODO(b/115466611): player_results should be returned in realtime mode if self._realtime and self._controllers[ 0].status == protocol.Status.ended: logging.info("Protocol status is ended. Episode is complete.") episode_complete = True if self._realtime and len(self._obs) > 1: # Realtime doesn't seem to give us a player result when one player # gets eliminated. Hence some temporary hackery (which can only work # when we have both agents in this environment)... # TODO(b/115466611): player_results should be returned in realtime mode p1 = self._obs[0].observation.score.score_details p2 = self._obs[1].observation.score.score_details if p1.killed_value_structures > p2.total_value_structures - EPSILON: logging.info( "The episode appears to be complete, p1 killed p2.") episode_complete = True outcome[0] = 1.0 outcome[1] = -1.0 elif p2.killed_value_structures > p1.total_value_structures - EPSILON: logging.info( "The episode appears to be complete, p2 killed p1.") episode_complete = True outcome[0] = -1.0 outcome[1] = 1.0 if episode_complete: self._state = environment.StepType.LAST discount = 0 for i, o in enumerate(self._obs): player_id = o.observation.player_common.player_id for result in o.player_result: if result.player_id == player_id: outcome[i] = possible_results.get(result.result, 0) if self._score_index >= 0: # Game score, not win/loss reward. cur_score = [ o["score_cumulative"][self._score_index] for o in self._agent_obs ] if self._episode_steps == 0: # First reward is always 0. reward = [0] * self._num_agents else: reward = [ cur - last for cur, last in zip(cur_score, self._last_score) ] self._last_score = cur_score else: reward = outcome if self._renderer_human: self._renderer_human.render(self._obs[0]) cmd = self._renderer_human.get_actions(self._run_config, self._controllers[0]) if cmd == renderer_human.ActionCmd.STEP: pass elif cmd == renderer_human.ActionCmd.RESTART: self._state = environment.StepType.LAST elif cmd == renderer_human.ActionCmd.QUIT: raise KeyboardInterrupt("Quit?") self._total_steps += self._agent_obs[0].game_loop[ 0] - self._episode_steps self._episode_steps = self._agent_obs[0].game_loop[0] if self._episode_length > 0 and self._episode_steps >= self._episode_length: self._state = environment.StepType.LAST if self._discount_zero_after_timeout: discount = 0.0 if self._state == environment.StepType.LAST: if (self._save_replay_episodes > 0 and self._episode_count % self._save_replay_episodes == 0): self.save_replay(self._replay_dir, self._replay_prefix) logging.info(("Episode %s finished after %s game steps. " "Outcome: %s, reward: %s, score: %s"), self._episode_count, self._episode_steps, outcome, reward, [o["score_cumulative"][0] for o in self._agent_obs]) def zero_on_first_step(value): return 0.0 if self._state == environment.StepType.FIRST else value return tuple( environment.TimeStep( step_type=self._state, reward=zero_on_first_step(r * self._score_multiplier), discount=zero_on_first_step(discount), observation=o) for r, o in zip(reward, self._agent_obs))
def _observe(self, update_observation=None): if update_observation is None: update_observation = [True] * len(self._controllers) self._update_observations(update_observation) # TODO(tewalds): How should we handle more than 2 agents and the case where # the episode can end early for some agents? outcome = [0] * self._num_agents discount = self._discount episode_complete = any(o.player_result for o in self._obs) if episode_complete or self._controllers[ 0].status == protocol.Status.ended: if not all(update_observation): # The episode completed so we send new observations to everyone. self._update_observations([not i for i in update_observation]) self._state = environment.StepType.LAST discount = 0 for i, o in enumerate(self._obs): player_id = o.observation.player_common.player_id for result in o.player_result: if result.player_id == player_id: outcome[i] = possible_results.get(result.result, 0) if self._score_index >= 0: # Game score, not win/loss reward. cur_score = [ o["score_cumulative"][self._score_index] for o in self._agent_obs ] if self._episode_steps == 0: # First reward is always 0. reward = [0] * self._num_agents else: reward = [ cur - last for cur, last in zip(cur_score, self._last_score) ] self._last_score = cur_score else: reward = outcome if self._renderer_human: self._renderer_human.render(self._obs[0]) cmd = self._renderer_human.get_actions(self._run_config, self._controllers[0]) if cmd == renderer_human.ActionCmd.STEP: pass elif cmd == renderer_human.ActionCmd.RESTART: self._state = environment.StepType.LAST elif cmd == renderer_human.ActionCmd.QUIT: raise KeyboardInterrupt("Quit?") self._total_steps += self._step_mul self._episode_steps += self._step_mul if self._episode_length > 0 and self._episode_steps >= self._episode_length: self._state = environment.StepType.LAST if self._discount_zero_after_timeout: discount = 0.0 if self._state == environment.StepType.LAST: if (self._save_replay_episodes > 0 and self._episode_count % self._save_replay_episodes == 0): self.save_replay(self._replay_dir, self._replay_prefix) logging.info(("Episode %s finished after %s game steps. " "Outcome: %s, reward: %s, score: %s"), self._episode_count, self._episode_steps, outcome, reward, [o["score_cumulative"][0] for o in self._agent_obs]) def zero_on_first_step(value): return 0.0 if self._state == environment.StepType.FIRST else value return tuple( environment.TimeStep( step_type=self._state, reward=zero_on_first_step(r * self._score_multiplier), discount=zero_on_first_step(discount), observation=o) for r, o in zip(reward, self._agent_obs))
def _observe(self, target_game_loop): # Transform in the thread so it runs while waiting for other observations. def parallel_observe(c, f): obs = c.observe(target_game_loop=target_game_loop) agent_obs = obs.observation if f is None else f.transform_obs(obs) game_info = c.game_info() if self._update_game_info else None return obs, agent_obs, game_info with self._metrics.measure_observation_time(): self._obs, agent_obs, game_info = zip(*self._parallel.run( (parallel_observe, c, f) for c, f in zip(self._controllers, self._features))) if not self._update_game_info: game_info = self._game_info game_loop = self._obs[0].observation.game_loop if game_loop < target_game_loop: logging.warning( "We got a earlier observation than we asked for, %d rather than %d.", game_loop, target_game_loop) elif game_loop > target_game_loop: logging.warning( "We got a later observation than we asked for, %d rather than %d.", game_loop, target_game_loop) # TODO(tewalds): How should we handle more than 2 agents and the case where # the episode can end early for some agents? outcome = [0] * self._num_agents discount = self._discount if any(o.player_result for o in self._obs): # Episode over. self._state = environment.StepType.LAST discount = 0 for i, o in enumerate(self._obs): player_id = o.observation.player_common.player_id for result in o.player_result: if result.player_id == player_id: outcome[i] = possible_results.get(result.result, 0) if self._score_index >= 0: # Game score, not win/loss reward. if not self.raw: cur_score = [ o["score_cumulative"][self._score_index] for o in agent_obs ] else: cur_score = [ ext_score(o)[self._score_index] for o in self._obs ] if self._episode_steps == 0: # First reward is always 0. reward = [0] * self._num_agents else: reward = [ cur - last for cur, last in zip(cur_score, self._last_score) ] self._last_score = cur_score else: reward = outcome if self._renderer_human: self._renderer_human.render(self._obs[0]) cmd = self._renderer_human.get_actions(self._run_config, self._controllers[0]) if cmd == renderer_human.ActionCmd.STEP: pass elif cmd == renderer_human.ActionCmd.RESTART: self._state = environment.StepType.LAST elif cmd == renderer_human.ActionCmd.QUIT: raise KeyboardInterrupt("Quit?") self._total_steps += game_loop - self._episode_steps self._episode_steps = game_loop if self._episode_length > 0 and self._episode_steps >= self._episode_length: self._state = environment.StepType.LAST # No change to reward or discount since it's not actually terminal. if self._state == environment.StepType.LAST: if (self._save_replay_episodes > 0 and self._episode_count % self._save_replay_episodes == 0): self.save_replay(self._replay_dir) if not self.raw: score_0 = [o["score_cumulative"][0] for o in agent_obs] else: score_0 = [ext_score(o)[0] for o in self._obs] logging.info(("Episode %s finished after %s game steps. " "Outcome: %s, reward: %s, score: %s"), self._episode_count, self._episode_steps, outcome, reward, score_0) if not self.raw: for o, obs in zip( agent_obs, self._obs): # expose same data structure with raw_pb o["score"] = obs.observation.score o["player_common"] = obs.observation.player_common o["ui_data"] = obs.observation.ui_data o["abilities"] = obs.observation.abilities return tuple( environment.TimeStep(step_type=self._state, reward=r * self._score_multiplier, discount=discount, observation=o, game_info=info, actions=obs.actions, action_errors=obs.action_errors) for r, o, info, obs in zip(reward, agent_obs, game_info, self._obs))
def test_multi_player(agents, disable_fog): players = 2 if len(agents) == 2: agent1, agent2 = agents run_config = run_configs.get() parallel = run_parallel.RunParallel() map_inst = maps.get(FLAGS.map) screen_size_px = point.Point(64, 64) minimap_size_px = point.Point(32, 32) interface = sc_pb.InterfaceOptions(raw=True, score=True) screen_size_px.assign_to(interface.feature_layer.resolution) minimap_size_px.assign_to(interface.feature_layer.minimap_resolution) # Reserve a whole bunch of ports for the weird multiplayer implementation. ports = [portpicker.pick_unused_port() for _ in range(1 + players * 2)] print("Valid Ports: %s", ports) # Actually launch the game processes. print("start") sc2_procs = [run_config.start(extra_ports=ports) for _ in range(players)] controllers = [p.controller for p in sc2_procs] try: # Save the maps so they can access it. map_path = os.path.basename(map_inst.path) print("save_map") parallel.run((c.save_map, map_path, run_config.map_data(map_inst.path)) for c in controllers) # Create the create request. real_time = True create = sc_pb.RequestCreateGame( local_map=sc_pb.LocalMap(map_path=map_path), realtime=real_time) for _ in range(players): create.player_setup.add(type=sc_pb.Participant) # Create the join request. join1 = sc_pb.RequestJoinGame(race=races[FLAGS.agent1_race], options=interface) join1.shared_port = ports.pop() join1.server_ports.game_port = ports.pop() join1.server_ports.base_port = ports.pop() join1.client_ports.add(game_port=ports.pop(), base_port=ports.pop()) join2 = copy.copy(join1) join2.race = races[FLAGS.agent2_race] # This is where actually game plays # Create and Join print("create") controllers[0].create_game(create) print("join") parallel.run((c.join_game, join) for c, join in zip(controllers, [join1, join2])) controllers[0]._client.send(debug=sc_pb.RequestDebug( debug=[debug_pb2.DebugCommand(game_state=1)])) if disable_fog[0]: controllers[0].disable_fog() if disable_fog[1]: controllers[1].disable_fog() print("run") game_info = controllers[0].game_info() extractors = features.Features(game_info) for game_loop in range(1, 100000): # steps per episode # Step the game step_mul = FLAGS.step_mul if not real_time: parallel.run((c.step, step_mul) for c in controllers) else: time.sleep(FLAGS.sleep_time) # Observe obs = parallel.run(c.observe for c in controllers) agent_obs = [extractors.transform_obs(o.observation) for o in obs] game_info = [None for c in controllers] if not any(o.player_result for o in obs): # Episode over. game_info = parallel.run(c.game_info for c in controllers) timesteps = tuple( environment.TimeStep(step_type=0, reward=0, discount=0, observation=o, game_info=info) for o, info in zip(agent_obs, game_info)) # Act if agent1 is not None: actions1 = agent1.step(timesteps[0]) else: actions1 = [] actions2 = agent2.step(timesteps[1]) actions = [actions1, actions2] funcs_with_args = [(c.acts, a) for c, a in zip(controllers, actions)] parallel.run(funcs_with_args) # Done with the game. print("leave") parallel.run(c.leave for c in controllers) finally: print("quit") # Done, shut down. Don't depend on parallel since it might be broken. for c in controllers: c.quit() for p in sc2_procs: p.close()