def observation_spec(self): """The observation spec for the SC2 environment. It's worth noting that the image-like observations are in y,x/row,column order which is different than the actions which are in x,y order. This is due to conflicting conventions, and to facilitate printing of the images. Returns: The dict of observation names to their tensor shapes. Shapes with a 0 can vary in length, for example the number of valid actions depends on which units you have selected. """ obs_spec = named_array.NamedDict({ "action_result": (0,), # See error.proto: ActionResult. "alerts": (0,), # See sc2api.proto: Alert. "available_actions": (0,), "build_queue": (0, len(UnitLayer)), # pytype: disable=wrong-arg-types "cargo": (0, len(UnitLayer)), # pytype: disable=wrong-arg-types "cargo_slots_available": (1,), "control_groups": (10, 2), "game_loop": (1,), "last_actions": (0,), "multi_select": (0, len(UnitLayer)), # pytype: disable=wrong-arg-types "player": (len(Player),), # pytype: disable=wrong-arg-types "score_cumulative": (len(ScoreCumulative),), # pytype: disable=wrong-arg-types "score_by_category": (len(ScoreByCategory), len(ScoreCategories)), # pytype: disable=wrong-arg-types "score_by_vital": (len(ScoreByVital), len(ScoreVitals)), # pytype: disable=wrong-arg-types "single_select": (0, len(UnitLayer)), # Only (n, 7) for n in (0, 1). # pytype: disable=wrong-arg-types }) aif = self._agent_interface_format if aif.feature_dimensions: obs_spec["feature_screen"] = (len(SCREEN_FEATURES), aif.feature_dimensions.screen.y, aif.feature_dimensions.screen.x) obs_spec["feature_minimap"] = (len(MINIMAP_FEATURES), aif.feature_dimensions.minimap.y, aif.feature_dimensions.minimap.x) if aif.rgb_dimensions: obs_spec["rgb_screen"] = (aif.rgb_dimensions.screen.y, aif.rgb_dimensions.screen.x, 3) obs_spec["rgb_minimap"] = (aif.rgb_dimensions.minimap.y, aif.rgb_dimensions.minimap.x, 3) if aif.use_feature_units: obs_spec["feature_units"] = (0, len(FeatureUnit)) # pytype: disable=wrong-arg-types if aif.use_raw_units: obs_spec["raw_units"] = (0, len(FeatureUnit)) if aif.use_unit_counts: obs_spec["unit_counts"] = (0, len(UnitCounts)) if aif.use_camera_position: obs_spec["camera_position"] = (2,) return obs_spec
def test_named_dict(self): a = named_array.NamedDict(a=2, b=(1, 2)) self.assertEqual(a["a"], a.a) self.assertEqual(a["b"], a.b) self.assertIs(a["b"], a.b) self.assertNotEqual(a["a"], a.b) a.c = 3 self.assertEqual(a["c"], 3)
def observation_spec(self): """The observation spec for the SC2 environment. It's worth noting that the image-like observations are in y,x/row,column order which is different than the actions which are in x,y order. This is due to conflicting conventions, and to facilitate printing of the images. Returns: The dict of observation names to their tensor shapes. Shapes with a 0 can vary in length, for example the number of valid actions depends on which units you have selected. """ obs_spec = named_array.NamedDict({ "available_actions": (0,), "build_queue": (0, len(UnitLayer)), # pytype: disable=wrong-arg-types "cargo": (0, len(UnitLayer)), # pytype: disable=wrong-arg-types "cargo_slots_available": (1,), "control_groups": (10, 2), "game_loop": (1,), "last_actions": (0,), "multi_select": (0, len(UnitLayer)), # pytype: disable=wrong-arg-types "player": (len(Player),), # pytype: disable=wrong-arg-types "score_cumulative": (len(ScoreCumulative),), # pytype: disable=wrong-arg-types "single_select": (0, len(UnitLayer)), # Only (n, 7) for n in (0, 1). # pytype: disable=wrong-arg-types }) if self._feature_screen_px: obs_spec["feature_screen"] = (len(SCREEN_FEATURES), self._feature_screen_px.y, self._feature_screen_px.x) if self._feature_minimap_px: obs_spec["feature_minimap"] = (len(MINIMAP_FEATURES), self._feature_minimap_px.y, self._feature_minimap_px.x) if self._rgb_screen_px: obs_spec["rgb_screen"] = (self._rgb_screen_px.y, self._rgb_screen_px.x, 3) if self._rgb_minimap_px: obs_spec["rgb_minimap"] = (self._rgb_minimap_px.y, self._rgb_minimap_px.x, 3) if self._feature_units: obs_spec["feature_units"] = (0, len(FeatureUnit)) # pytype: disable=wrong-arg-types return obs_spec
def transform_obs(self, obs): """Render some SC2 observations into something an agent can handle.""" empty = np.array([], dtype=np.int32).reshape((0, 7)) out = named_array.NamedDict({ # Fill out some that are sometimes empty. "single_select": empty, "multi_select": empty, "build_queue": empty, "cargo": empty, "cargo_slots_available": np.array([0], dtype=np.int32), }) def or_zeros(layer, size): if layer is not None: return layer.astype(np.int32, copy=False) else: return np.zeros((size.y, size.x), dtype=np.int32) aif = self._agent_interface_format if aif.feature_dimensions: out["feature_screen"] = named_array.NamedNumpyArray( np.stack( or_zeros(f.unpack(obs.observation), aif.feature_dimensions.screen) for f in SCREEN_FEATURES), names=[ScreenFeatures, None, None]) out["feature_minimap"] = named_array.NamedNumpyArray( np.stack( or_zeros(f.unpack(obs.observation), aif.feature_dimensions.minimap) for f in MINIMAP_FEATURES), names=[MinimapFeatures, None, None]) if aif.rgb_dimensions: out["rgb_screen"] = Feature.unpack_rgb_image( obs.observation.render_data.map).astype(np.int32) out["rgb_minimap"] = Feature.unpack_rgb_image( obs.observation.render_data.minimap).astype(np.int32) out["last_actions"] = np.array( [self.reverse_action(a).function for a in obs.actions], dtype=np.int32) out["action_result"] = np.array([o.result for o in obs.action_errors], dtype=np.int32) out["alerts"] = np.array(obs.observation.alerts, dtype=np.int32) out["game_loop"] = np.array([obs.observation.game_loop], dtype=np.int32) score_details = obs.observation.score.score_details out["score_cumulative"] = named_array.NamedNumpyArray( [ obs.observation.score.score, score_details.idle_production_time, score_details.idle_worker_time, score_details.total_value_units, score_details.total_value_structures, score_details.killed_value_units, score_details.killed_value_structures, score_details.collected_minerals, score_details.collected_vespene, score_details.collection_rate_minerals, score_details.collection_rate_vespene, score_details.spent_minerals, score_details.spent_vespene, ], names=ScoreCumulative, dtype=np.int32) def get_score_details(key, details, categories): row = getattr(details, key.name) return [getattr(row, category.name) for category in categories] out["score_by_category"] = named_array.NamedNumpyArray( [ get_score_details(key, score_details, ScoreCategories) for key in ScoreByCategory ], names=[ScoreByCategory, ScoreCategories], dtype=np.int32) out["score_by_vital"] = named_array.NamedNumpyArray( [ get_score_details(key, score_details, ScoreVitals) for key in ScoreByVital ], names=[ScoreByVital, ScoreVitals], dtype=np.int32) player = obs.observation.player_common out["player"] = named_array.NamedNumpyArray([ player.player_id, player.minerals, player.vespene, player.food_used, player.food_cap, player.food_army, player.food_workers, player.idle_worker_count, player.army_count, player.warp_gate_count, player.larva_count, ], names=Player, dtype=np.int32) def unit_vec(u): return np.array( ( u.unit_type, u.player_relative, u.health, u.shields, u.energy, u.transport_slots_taken, int(u.build_progress * 100), # discretize ), dtype=np.int32) ui = obs.observation.ui_data with sw("ui"): groups = np.zeros((10, 2), dtype=np.int32) for g in ui.groups: groups[g.control_group_index, :] = (g.leader_unit_type, g.count) out["control_groups"] = groups if ui.single: out["single_select"] = named_array.NamedNumpyArray( [unit_vec(ui.single.unit)], [None, UnitLayer]) if ui.multi and ui.multi.units: out["multi_select"] = named_array.NamedNumpyArray( [unit_vec(u) for u in ui.multi.units], [None, UnitLayer]) if ui.cargo and ui.cargo.passengers: out["single_select"] = named_array.NamedNumpyArray( [unit_vec(ui.single.unit)], [None, UnitLayer]) out["cargo"] = named_array.NamedNumpyArray( [unit_vec(u) for u in ui.cargo.passengers], [None, UnitLayer]) out["cargo_slots_available"] = np.array( [ui.cargo.slots_available], dtype=np.int32) if ui.production and ui.production.build_queue: out["single_select"] = named_array.NamedNumpyArray( [unit_vec(ui.production.unit)], [None, UnitLayer]) out["build_queue"] = named_array.NamedNumpyArray( [unit_vec(u) for u in ui.production.build_queue], [None, UnitLayer]) def full_unit_vec(u, pos_transform, is_raw=False): screen_pos = pos_transform.fwd_pt(point.Point.build(u.pos)) screen_radius = pos_transform.fwd_dist(u.radius) return np.array( ( # Match unit_vec order u.unit_type, u.alliance, # Self = 1, Ally = 2, Neutral = 3, Enemy = 4 u.health, u.shield, u.energy, u.cargo_space_taken, int(u.build_progress * 100), # discretize # Resume API order int(u.health / u.health_max * 255) if u.health_max > 0 else 0, int(u.shield / u.shield_max * 255) if u.shield_max > 0 else 0, int(u.energy / u.energy_max * 255) if u.energy_max > 0 else 0, u.display_type, # Visible = 1, Snapshot = 2, Hidden = 3 u.owner, # 1-15, 16 = neutral screen_pos.x, screen_pos.y, u.facing, screen_radius, u. cloak, # Cloaked = 1, CloakedDetected = 2, NotCloaked = 3 u.is_selected, u.is_blip, u.is_powered, u.mineral_contents, u.vespene_contents, # Not populated for enemies or neutral u.cargo_space_max, u.assigned_harvesters, u.ideal_harvesters, u.weapon_cooldown, len(u.orders), u.tag if is_raw else 0), dtype=np.int64) raw = obs.observation.raw_data if aif.use_feature_units: with sw("feature_units"): # Update the camera location so we can calculate world to screen pos self._update_camera(point.Point.build(raw.player.camera)) feature_units = [] for u in raw.units: if u.is_on_screen and u.display_type != sc_raw.Hidden: feature_units.append( full_unit_vec(u, self._world_to_feature_screen_px)) out["feature_units"] = named_array.NamedNumpyArray( feature_units, [None, FeatureUnit], dtype=np.int32) if aif.use_raw_units: with sw("raw_units"): raw_units = [ full_unit_vec(u, self._world_to_world_tl, is_raw=True) for u in raw.units ] out["raw_units"] = named_array.NamedNumpyArray( raw_units, [None, FeatureUnit], dtype=np.int32) if aif.use_unit_counts: with sw("unit_counts"): unit_counts = collections.defaultdict(int) for u in raw.units: if u.alliance == sc_raw.Self: unit_counts[u.unit_type] += 1 out["unit_counts"] = named_array.NamedNumpyArray( sorted(unit_counts.items()), [None, UnitCounts], dtype=np.int32) if aif.use_camera_position: camera_position = self._world_to_world_tl.fwd_pt( point.Point.build(raw.player.camera)) out["camera_position"] = np.array( (camera_position.x, camera_position.y), dtype=np.int32) out["available_actions"] = np.array(self.available_actions( obs.observation), dtype=np.int32) return out