def build_agent_spaces(self) -> Tuple[Space, Space]: """Construct the action and observation spaces Description of actions and observations: https://github.com/google-research/football/blob/master/gfootball/doc/observation.md """ # noqa: E501 action_space = Discrete(19) # The football field's corners are [+-1., +-0.42]. However, the players # and balls may get out of the field. Thus we multiply those limits by # a factor of 2. xlim = 1. * 2 ylim = 0.42 * 2 num_players: int = 11 xy_space = Box( np.array([-xlim, -ylim], dtype=np.float32), np.array([xlim, ylim], dtype=np.float32)) xyz_space = Box( np.array([-xlim, -ylim, 0], dtype=np.float32), np.array([xlim, ylim, np.inf], dtype=np.float32)) observation_space = DictSpace({ "controlled_players": Discrete(2), "players_raw": TupleSpace([ DictSpace({ # ball information "ball": xyz_space, "ball_direction": Box(-np.inf, np.inf, (3, )), "ball_rotation": Box(-np.inf, np.inf, (3, )), "ball_owned_team": Discrete(3), "ball_owned_player": Discrete(num_players + 1), # left team "left_team": TupleSpace([xy_space] * num_players), "left_team_direction": TupleSpace( [xy_space] * num_players), "left_team_tired_factor": Box(0., 1., (num_players, )), "left_team_yellow_card": MultiBinary(num_players), "left_team_active": MultiBinary(num_players), "left_team_roles": MultiDiscrete([10] * num_players), # right team "right_team": TupleSpace([xy_space] * num_players), "right_team_direction": TupleSpace( [xy_space] * num_players), "right_team_tired_factor": Box(0., 1., (num_players, )), "right_team_yellow_card": MultiBinary(num_players), "right_team_active": MultiBinary(num_players), "right_team_roles": MultiDiscrete([10] * num_players), # controlled player information "active": Discrete(num_players), "designated": Discrete(num_players), "sticky_actions": MultiBinary(10), # match state "score": Box(-np.inf, np.inf, (2, )), "steps_left": Box(0, np.inf, (1, )), "game_mode": Discrete(7) }) ]) }) return action_space, observation_space
def reset(self, logging: bool = False): self.engine = self.generator.generate(logging=True) n_stations: int = self.engine.n_stations self.max_cars: int = self.engine.max_cars self.max_occ: int = np.max(self.engine.station_info[:, 2]) self.observation_space = DictSpace({ 'station_idx': Box(0, n_stations, (n_stations, 1), dtype=np.int32), 'station_locations': Box(0, np.inf, (n_stations, 2), dtype=np.float32), 'station_occs': Box(0, self.max_occ, (n_stations, 1), dtype=np.int32), 'station_maxes': Box(0, self.max_occ, (n_stations, 1), dtype=np.int32), 'car_locs': Box(0, np.inf, (self.max_cars, 2), dtype=np.float32), 'car_dest_idx': Box(0, n_stations, (self.max_cars, 1), dtype=np.int32), 'car_dest_loc': Box(0, np.inf, (self.max_cars, 2), dtype=np.float32), 't': Box(0, np.inf, (1, 1), dtype=np.int32), 'query_loc': Box(0, np.inf, (1, 2), dtype=np.float32), 'remaining_queries': Box(0, np.inf, (1, 1), dtype=np.int32) }) self.action_space = Discrete(self.engine.n_stations) self.reward_range = (-1 * self.engine.max_cars, 3 * self.engine.max_cars) return self.state()
def convertOrderedDict2Space(odict, has_pixels=False, has_task_params=False): ''' For now just using -inf and inf for the bounds of the Box ''' if len(odict.keys()) == 1: # no concatenation return convertSpec2Space(list(odict.values())[0]) else: # len keys is more than 1 _min, _max = -np.Inf, np.Inf numdim = 0 for key in odict: if key not in ['pixels', 'obs_task_params']: numdim += np.prod(odict[key].shape) # cur_min, cur_max = compute_min_max(spec) # _min = min(_min, cur_min) # _max = max(_max, cur_max) dict_thus_far = { 'obs': spaces.Box(-np.inf, np.inf, shape=(numdim,)) } if has_pixels: dict_thus_far.update( {'pixels': spaces.Box(low=0, high=1, shape=odict['pixels'].shape)} ) if has_task_params: dict_thus_far.update( {'obs_task_params': spaces.Box(low=0, high=1, shape=odict['obs_task_params'].shape)} ) if len(dict_thus_far.keys()) == 1: # we just have obs so we will make it a Box # concatentation # numdim = sum([np.int(np.prod(odict[key].shape)) for key in odict]) gym_space = spaces.Box(-np.inf, np.inf, shape=(numdim,)) else: gym_space = DictSpace(dict_thus_far) return gym_space
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.actions: List[int] = [] self.datasets_site_path = site_data_path( "llvm/10.0.0/bitcode_benchmarks") # Register the LLVM datasets. self.datasets_site_path.mkdir(parents=True, exist_ok=True) self.inactive_datasets_site_path.mkdir(parents=True, exist_ok=True) for dataset in LLVM_DATASETS: self.register_dataset(dataset) self.inst2vec = _INST2VEC_ENCODER self.observation.spaces["CpuInfo"].space = DictSpace({ "name": Sequence(size_range=(0, None), dtype=str), "cores_count": Scalar(min=None, max=None, dtype=int), "l1i_cache_size": Scalar(min=None, max=None, dtype=int), "l1i_cache_count": Scalar(min=None, max=None, dtype=int), "l1d_cache_size": Scalar(min=None, max=None, dtype=int), "l1d_cache_count": Scalar(min=None, max=None, dtype=int), "l2_cache_size": Scalar(min=None, max=None, dtype=int), "l2_cache_count": Scalar(min=None, max=None, dtype=int), "l3_cache_size": Scalar(min=None, max=None, dtype=int), "l3_cache_count": Scalar(min=None, max=None, dtype=int), "l4_cache_size": Scalar(min=None, max=None, dtype=int), "l4_cache_count": Scalar(min=None, max=None, dtype=int), }) self.observation.add_derived_space( id="Inst2vecPreprocessedText", base_id="Ir", space=Sequence(size_range=(0, None), dtype=str), cb=lambda base_observation: self.inst2vec.preprocess( base_observation), default_value="", ) self.observation.add_derived_space( id="Inst2vecEmbeddingIndices", base_id="Ir", space=Sequence(size_range=(0, None), dtype=np.int32), cb=lambda base_observation: self.inst2vec.encode( self.inst2vec.preprocess(base_observation)), default_value=np.array([self.inst2vec.vocab["!UNK"]]), ) self.observation.add_derived_space( id="Inst2vec", base_id="Ir", space=Sequence(size_range=(0, None), dtype=np.ndarray), cb=lambda base_observation: self.inst2vec.embed( self.inst2vec.encode(self.inst2vec.preprocess(base_observation) )), default_value=np.vstack( [self.inst2vec.embeddings[self.inst2vec.vocab["!UNK"]]]), ) self.observation.add_derived_space( id="AutophaseDict", base_id="Autophase", space=DictSpace({ name: Scalar(min=0, max=None, dtype=int) for name in AUTOPHASE_FEATURE_NAMES }), cb=lambda base_observation: { name: val for name, val in zip(AUTOPHASE_FEATURE_NAMES, base_observation) }, )
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.inst2vec = Inst2vecEncoder() self.register_derived_space( base_name="CpuInfo", derived_name="CpuInfoDict", derived_space=DictSpace({ "name": Sequence(size_range=(0, None), dtype=str), "cores_count": Scalar(min=None, max=None, dtype=int), "l1i_cache_size": Scalar(min=None, max=None, dtype=int), "l1i_cache_count": Scalar(min=None, max=None, dtype=int), "l1d_cache_size": Scalar(min=None, max=None, dtype=int), "l1d_cache_count": Scalar(min=None, max=None, dtype=int), "l2_cache_size": Scalar(min=None, max=None, dtype=int), "l2_cache_count": Scalar(min=None, max=None, dtype=int), "l3_cache_size": Scalar(min=None, max=None, dtype=int), "l3_cache_count": Scalar(min=None, max=None, dtype=int), "l4_cache_size": Scalar(min=None, max=None, dtype=int), "l4_cache_count": Scalar(min=None, max=None, dtype=int), }), cb=lambda base_observation: base_observation, ) self.register_derived_space( base_name="Ir", derived_name="Inst2vecPreprocessedText", derived_space=Sequence(size_range=(0, None), dtype=str), cb=lambda base_observation: self.inst2vec.preprocess( base_observation), ) self.register_derived_space( base_name="Ir", derived_name="Inst2vecEmbeddingIndices", derived_space=Sequence(size_range=(0, None), dtype=np.int32), cb=lambda base_observation: self.inst2vec.encode( self.inst2vec.preprocess(base_observation)), ) self.register_derived_space( base_name="Ir", derived_name="Inst2vec", derived_space=Sequence(size_range=(0, None), dtype=np.ndarray), cb=lambda base_observation: self.inst2vec.embed( self.inst2vec.encode(self.inst2vec.preprocess(base_observation) )), ) self.register_derived_space( base_name="Autophase", derived_name="AutophaseDict", derived_space=DictSpace({ name: Scalar(min=0, max=None, dtype=int) for name in AUTOPHASE_FEATURE_NAMES }), cb=lambda base_observation: { name: val for name, val in zip(AUTOPHASE_FEATURE_NAMES, base_observation) }, )