def __init__(self): self.plant_deck = { "sunflower": Sunflower, "peashooter": Peashooter, "wall-nut": Wallnut } self.action_space = Discrete( len(self.plant_deck) * config.N_LANES * config.LANE_LENGTH + 1) # self.action_space = MultiDiscrete([len(self.plant_deck), config.N_LANES, config.LANE_LENGTH]) # plant, lane, pos self.observation_space = Tuple([ MultiDiscrete([len(self.plant_deck) + 1] * (config.N_LANES * config.LANE_LENGTH)), MultiBinary(config.N_LANES), MultiBinary(config.N_LANES) ]) "Which plant on the cell, is the lane attacked, is there a mower on the lane" self._plant_names = [plant_name for plant_name in self.plant_deck] self._plant_classes = [ self.plant_deck[plant_name].__name__ for plant_name in self.plant_deck ] self._plant_no = { self._plant_classes[i]: i for i in range(len(self._plant_names)) } self._scene = Scene(self.plant_deck, BasicZombieSpawner()) self._reward = 0
class TestBaseNetwork: __test__ = True network = BaseNetwork list_work = [ [Discrete(3), Discrete(1)], [Discrete(3), Discrete(3)], [Discrete(10), Discrete(50)], [MultiDiscrete([3]), MultiDiscrete([1])], [MultiDiscrete([3, 3]), MultiDiscrete([3, 3])], [MultiDiscrete([4, 4, 4]), MultiDiscrete([50, 4, 4])], [MultiDiscrete([[100, 3], [3, 5]]), MultiDiscrete([[100, 3], [3, 5]])], [ MultiDiscrete([[[100, 3], [3, 5]], [[100, 3], [3, 5]]]), MultiDiscrete([[[100, 3], [3, 5]], [[100, 3], [3, 5]]]) ], [MultiBinary(1), MultiBinary(1)], [MultiBinary(3), MultiBinary(3)], # [MultiBinary([3, 2]), MultiBinary([3, 2])], # Don't work yet because gym don't implemented this [Box(low=0, high=10, shape=[1]), Box(low=0, high=10, shape=[1])], [Box(low=0, high=10, shape=[2, 2]), Box(low=0, high=10, shape=[2, 2])], [ Box(low=0, high=10, shape=[2, 2, 2]), Box(low=0, high=10, shape=[2, 2, 2]) ], [ Tuple([Discrete(1), MultiDiscrete([1, 1])]), Tuple([Discrete(1), MultiDiscrete([1, 1])]) ], [ Dict({ "first": Discrete(1), "second": MultiDiscrete([1, 1]) }), Dict({ "first": Discrete(1), "second": MultiDiscrete([1, 1]) }) ] ] list_fail = [[None, None], ["dedrfe", "qdzq"], [1215.4154, 157.48], ["zdzd", (Discrete(1))], [Discrete(1), "zdzd"], ["zdzd", (1, 4, 7)], [(1, 4, 7), "zdzd"], [152, 485]] def test_init(self): for ob, ac in self.list_fail: with pytest.raises(TypeError): self.network(observation_space=ob, action_space=ac) for ob, ac in self.list_work: with pytest.raises(TypeError): self.network(observation_space=ob, action_space=ac)
def _create_observation_space(self): # components pos_xy = Box(-self.mean * 2, +self.mean * 2, shape=(2,)) width_height = Box(-10, 10, shape=(2,)) orientation = Box(-1, 1, shape=(2,)) type = Box(-1, 1, shape=(1,)) # spaces for all cases self_space = Dict({ "position": pos_xy, "orientation": orientation, "width_height": width_height, "goal_position": pos_xy, "finished": MultiBinary(1) }) car_space = Dict({ "position": pos_xy, "orientation": orientation, "width_height": width_height, "finished": MultiBinary(1) }) obstacle_space = Dict({ "position": pos_xy, "orientation": orientation, "width_height": width_height, }) pedestrian_space = Dict({ "position": pos_xy, }) if self.observationType == ObservationType.FULL: lane_space = Dict({ "points": Box(-self.mean * 2, self.mean * 2, shape=(4,)), "type": type }) obstacle_space = Dict({ "position": pos_xy, "width_height": width_height, }) else: lane_space = Dict({ "signed_distance": Box(-self.mean * 2, self.mean * 2, shape=(1,)), "orientation": orientation, "type": type }) self.observation_space = Tuple([ Tuple([ car_space, obstacle_space, pedestrian_space, ]), Tuple([ self_space, lane_space ]) ])
def build_agent_spaces(self) -> Tuple[Space, Space]: """Construct the action and observation spaces Description of actions and observations: https://github.com/google-research/football/blob/master/gfootball/doc/observation.md """ # noqa: E501 action_space = Discrete(19) # The football field's corners are [+-1., +-0.42]. However, the players # and balls may get out of the field. Thus we multiply those limits by # a factor of 2. xlim = 1. * 2 ylim = 0.42 * 2 num_players: int = 11 xy_space = Box( np.array([-xlim, -ylim], dtype=np.float32), np.array([xlim, ylim], dtype=np.float32)) xyz_space = Box( np.array([-xlim, -ylim, 0], dtype=np.float32), np.array([xlim, ylim, np.inf], dtype=np.float32)) observation_space = DictSpace({ "controlled_players": Discrete(2), "players_raw": TupleSpace([ DictSpace({ # ball information "ball": xyz_space, "ball_direction": Box(-np.inf, np.inf, (3, )), "ball_rotation": Box(-np.inf, np.inf, (3, )), "ball_owned_team": Discrete(3), "ball_owned_player": Discrete(num_players + 1), # left team "left_team": TupleSpace([xy_space] * num_players), "left_team_direction": TupleSpace( [xy_space] * num_players), "left_team_tired_factor": Box(0., 1., (num_players, )), "left_team_yellow_card": MultiBinary(num_players), "left_team_active": MultiBinary(num_players), "left_team_roles": MultiDiscrete([10] * num_players), # right team "right_team": TupleSpace([xy_space] * num_players), "right_team_direction": TupleSpace( [xy_space] * num_players), "right_team_tired_factor": Box(0., 1., (num_players, )), "right_team_yellow_card": MultiBinary(num_players), "right_team_active": MultiBinary(num_players), "right_team_roles": MultiDiscrete([10] * num_players), # controlled player information "active": Discrete(num_players), "designated": Discrete(num_players), "sticky_actions": MultiBinary(10), # match state "score": Box(-np.inf, np.inf, (2, )), "steps_left": Box(0, np.inf, (1, )), "game_mode": Discrete(7) }) ]) }) return action_space, observation_space
def init(self, env_config={}): self.initialized = True self.max_reward = 1 self.horizon = 2 self.vstar = 0.5 if 'horizon' in env_config.keys(): self.horizon = int(env_config['horizon']) if 'antishaping' in env_config.keys(): self.antishaping = float(env_config['antishaping']) else: self.antishaping = 0 self.dimension = 0 if 'dimension' in env_config.keys(): self.dimension = int(env_config['dimension']) self.tabular = False if 'tabular' in env_config.keys(): self.tabular = env_config['tabular'] self.action_space = Discrete(4) self.reward_range = (0.0, 1.0) self.state_space = MultiBinary((self.horizon + 1) * 3) self.observation_space = Box(low=0.0, high=1.0, shape=(3 + self.dimension, ), dtype=np.float) setattr(self.observation_space, 'n', 3 + self.dimension) if self.tabular: self.observation_space = MultiBinary((self.horizon + 1) * 3) self.switch = 0.0 if 'switch' in env_config.keys(): self.switch = float(env_config['switch']) self.opt_a = self.rng.randint(low=0, high=self.action_space.n, size=self.horizon) self.opt_b = self.rng.randint(low=0, high=self.action_space.n, size=self.horizon) print("[LOCK] Initializing Combination Lock Environment") print("[LOCK] A sequence: ", end="") print([z for z in self.opt_a], end=", ") print("Switches: ", end="") print([(z + 1) % 4 for z in self.opt_a]) print("[LOCK] B sequence: ", end="") print([z for z in self.opt_b], end=", ") print("Switches: ", end="") print([(z + 1) % 4 for z in self.opt_b])
def __init__(self, nb_actions, space): spaces = { "actions": MultiBinary(nb_actions), "values": space, } super().__init__(spaces=spaces) self.shape = getattr(space, "shape", None)
def test_flatten_discrete(): md = MultiDiscrete([3, 4]) trafo = flatten(md) assert trafo.target == Discrete(12) # check that we get all actions exactly once actions = [] for (i, j) in itertools.product([0, 1, 2], [0, 1, 2, 3]): actions += [(i, j)] for i in range(0, 12): a = trafo.convert_from(i) assert a in actions, (a, actions) assert trafo.convert_to(a) == i actions = list(filter(lambda x: x != a, list(actions))) assert len(actions) == 0 # same test for binary md = MultiBinary(3) trafo = flatten(md) assert trafo.target == Discrete(2**3) # check that we get all actions exactly once actions = [] for (i, j, k) in itertools.product([0, 1], [0, 1], [0, 1]): actions += [(i, j, k)] for i in range(0, 8): a = trafo.convert_from(i) assert trafo.convert_to(a) == i assert a in actions, (a, actions) actions = list(filter(lambda x: x != a, actions)) assert len(actions) == 0 # check support for numpy array and list assert trafo.convert_to((1, 0, 1)) == trafo.convert_to(np.array([1, 0, 1])) assert trafo.convert_to((1, 0, 1)) == trafo.convert_to([1, 0, 1])
class Rehearsal(gym.Env): NOTE_REGION = np.arange(128) def __init__(self, agent_n: int): note_region_length = len(Rehearsal.NOTE_REGION) self.observation_space = Tuple( (MultiBinary(note_region_length), ) * agent_n) self.action_space = MultiBinary(note_region_length) def reset(self): return self.observation_space.sample() def step(self, action): if self.action_space.contains(action): print(action) else: print('not included') return self.observation_space.sample(), self.reward( action), False, dict() def reward(self, action): return np.sin(np.sum(action)) def render(self, mode='human'): pass
def test_MultiBinary(self): space = DecoratedSpace.create(MultiBinary(7)) self.assertEquals(2**7, space.getSize()) self.assertEquals([], space.getSubSpaces()) # reverse of normal binary notation self.assertEquals([0, 1, 0, 1, 1, 0, 1], list(space.getById(64 + 16 + 8 + 2)))
def test_space_to_list_and_list_to_space(self): def assert_restorable(space): space_restored = bench.list_to_space(bench.space_to_list(space)) assert space == space_restored bench = AbstractBenchmark() space = Box( low=np.array([0, 0]), high=np.array([1, 1]), ) assert_restorable(space) space = Discrete(2) assert_restorable(space) space = Dict( { "box": Box( low=np.array([0, 0]), high=np.array([1, 1]), ), 'discrete': Discrete( n=2 ) } ) assert_restorable(space) space = MultiDiscrete([2, 3]) assert_restorable(space) space = MultiBinary(3) assert_restorable(space)
def __init__( self, sets: List[Set], vae: VAE, data_key: str, cycle_for_batch_size_1=False, ): self.sets = sets self.vae = vae self.data_key = data_key self.mean_key = 'latent_mean' self.covariance_key = 'latent_covariance' self.description_key = 'set_description' self.set_index_key = 'set_index' self.set_embedding_key = 'set_embedding' self._num_sets = len(sets) self.cycle_for_batch_size_1 = cycle_for_batch_size_1 set_space = Box( -10 * np.ones(vae.representation_size), 10 * np.ones(vae.representation_size), dtype=np.float32, ) self._spaces = { self.mean_key: set_space, self.covariance_key: set_space, self.description_key: ObjectSpace(), self.set_index_key: Discrete(len(sets)), self.set_embedding_key: MultiBinary(len(sets)), } self.means = None self.covariances = None self.descriptions = [set.description for set in sets] self.update_encodings() self._current_idx = 0
def __init__(self, state_transform, visualize=False, max_obstacles=3, skip_frame=5, reward_mult=10.): super(RunEnv2, self).__init__(visualize, max_obstacles) self.state_transform = state_transform self.observation_space = Box(-1000, 1000, state_transform.state_size) self.action_space = MultiBinary(18) self.skip_frame = skip_frame self.reward_mult = reward_mult
def __init__(self, dim: int = 1, ep_length: int = 100): """ Identity environment for testing purposes :param dim: (int) the size of the dimensions you want to learn :param ep_length: (int) the length of each episode in timesteps """ space = MultiBinary(dim) super().__init__(ep_length=ep_length, space=space)
def test_convert_element_to_space_type(self): """Test if space converter works for all elements/space permutations""" box_space = Box(low=-1, high=1, shape=(2, )) discrete_space = Discrete(2) multi_discrete_space = MultiDiscrete([2, 2]) multi_binary_space = MultiBinary(2) tuple_space = Tuple((box_space, discrete_space)) dict_space = Dict({ "box": box_space, "discrete": discrete_space, "multi_discrete": multi_discrete_space, "multi_binary": multi_binary_space, "dict_space": Dict({ "box2": box_space, "discrete2": discrete_space, }), "tuple_space": tuple_space, }) box_space_uncoverted = box_space.sample().astype(np.float64) multi_discrete_unconverted = multi_discrete_space.sample().astype( np.int32) multi_binary_unconverted = multi_binary_space.sample().astype(np.int32) tuple_unconverted = (box_space_uncoverted, float(0)) modified_element = { "box": box_space_uncoverted, "discrete": float(0), "multi_discrete": multi_discrete_unconverted, "multi_binary": multi_binary_unconverted, "tuple_space": tuple_unconverted, "dict_space": { "box2": box_space_uncoverted, "discrete2": float(0), }, } element_with_correct_types = convert_element_to_space_type( modified_element, dict_space.sample()) assert dict_space.contains(element_with_correct_types)
def __init__(self, env, districts_ids: Sequence[int], other_agents_action: int, maac: bool = False): super(MultiAgentSelectAction, self).__init__(env) assert isinstance(env.action_space, MultiBinary) and other_agents_action in [0, 1] self.districts_ids = np.asarray(districts_ids) self.other_agents_action = other_agents_action self.n_agents = env.unwrapped.n_agents if maac: self.action_space = [Discrete(2)]*len(districts_ids) else: self.action_space = MultiBinary(len(districts_ids))
def __init__(self, dim, ep_length=100): """ Identity environment for testing purposes :param dim: (int) the size of the dimensions you want to learn :param ep_length: (int) the length of each episodes in timesteps """ super(IdentityEnvMultiBinary, self).__init__(dim, ep_length) self.action_space = MultiBinary(dim) self.observation_space = self.action_space self.reset()
def __init__(self, visualize=False, integrator_accuracy=5e-5, model='2D', prosthetic=False, difficulty=0, skip_frame=3, reward_mult=1.): super(RunEnv2, self).__init__(visualize, integrator_accuracy) self.args = (model, prosthetic, difficulty) self.change_model(*self.args) # self.state_transform = state_transform # self.observation_space = Box(-1000, 1000, [state_size], dtype=np.float32) # self.observation_space = Box(-1000, 1000, [state_transform.state_size], dtype=np.float32) self.noutput = self.get_action_space_size() self.action_space = MultiBinary(self.noutput) self.skip_frame = skip_frame self.reward_mult = reward_mult
def __init__(self, jd_path, nodes: NodeFinder, reward_func: Callable, max_idle_steps: int = 300, graphics: bool = False, resolution: int = 1080, continuous: bool = True): self.CONTINUOUS = continuous self.MAX_IDLE_STEPS = max_idle_steps self.NODES = nodes ONE_SHAPE = (1, ) self.action_space = Dict( { "steering": Box(low=-1, high=1, shape=ONE_SHAPE), # "braking": Box(low=0, high=1, shape=ONE_SHAPE), "throttle": Box(low=-1, high=1, shape=ONE_SHAPE) }) if self.CONTINUOUS else Dict({ "steering": Discrete(3), # "braking": Discrete(2), "throttle": Discrete(3) }) self.observation_space = Dict({ "velocity": Box(low=-500, high=500, shape=(3, )), "direction": Box(low=-1, high=1, shape=(4, )), # quaternion "wheel_direction": Box(low=-1, high=1, shape=ONE_SHAPE), "road_boundaries": Box(low=-1000, high=1000, shape=(self.NODES.NUM_NODES, 2, 3)), "grounded": MultiBinary(1), "wheels": MultiBinary(4), }) self.process = ManagedProcess(jd_path, graphics, resolution) self.reward_func = reward_func
class Kniffel(KniffelBase): """A default implementation of Kniffel with simple dict spaces. """ action_space: Dict = Dict({ "dices_hold": MultiBinary(5), "board_selection": Discrete(13), "select_action": Discrete(2), }) observation_space: Dict = Dict({ "board": Box(low=0, high=50, shape=(13,), dtype=np.int64), "filled_slots": MultiBinary(13), "slots_value": Box(low=0, high=50, shape=(13,), dtype=np.int64), "num_rolls_remaining": Discrete(3), "dices": MultiDiscrete([6] * 5) }) def observe(self): return { "board": self._board, "filled_slots": self._filled_mask, "slots_value": self._slots_value, "num_rolls_remaining": self._num_rolls_remaining, "dices": self._dices, } def act(self, action) -> float: pre = np.sum(self._board) + self._bonus try: if action['select_action'] == 0: self._roll(action['dices_hold']) else: self._select(action['board_selection']) except KniffelError: # On a KniffelError do nothing, but reward a -1. return -1 post = np.sum(self._board) + self._bonus return post - pre
def observation_space(self): """ If returnRealSpace=true, the observation is simply the full state. if falsle each entity receives as observation 3 5x5 binary matrices. Refer to PredatorPreyState#getStateMatrix for details. """ if self._parameters['returnRealSpace']: return CustomObjectSpace(self._state) else: mb = MultiBinary(5) # 5x1 binary matrix mbs = Tuple(mb, mb, mb, mb, mb) # 5 of them for 5x5 binary entityobs = Tuple(mbs, mbs, mbs) # an obs is 3 5x5 matrices return Dict ({ pred.getId() : entityobs \ for pred in self._state.getPredators()})
def test_tuple(): spaces = [Discrete(5), Discrete(10), Discrete(5)] space_tuple = Tuple(spaces) assert len(space_tuple) == len(spaces) assert space_tuple.count(Discrete(5)) == 2 assert space_tuple.count(MultiBinary(2)) == 0 for i, space in enumerate(space_tuple): assert space == spaces[i] for i, space in enumerate(reversed(space_tuple)): assert space == spaces[len(spaces) - 1 - i] assert space_tuple.index(Discrete(5)) == 0 assert space_tuple.index(Discrete(5), 1) == 2 with pytest.raises(ValueError): space_tuple.index(Discrete(10), 0, 1)
def __init__(self, breakout_config: Optional[BreakoutConfiguration] = None): self.config = BreakoutConfiguration() if breakout_config is None else breakout_config self.state = BreakoutState(self.config) self.viewer = None # type: Optional[PygameViewer] self.action_space = Discrete(len(Command) if self.config.fire_enabled else len(Command) - 1) self._paddle_x_space = Discrete(self.config.n_paddle_x) self._ball_x_space = Discrete(self.config.n_ball_x) self._ball_y_space = Discrete(self.config.n_ball_y) self._ball_x_speed_space = Discrete(self.config.n_ball_x_speed) self._ball_y_speed_space = Discrete(self.config.n_ball_y_speed) self._ball_dir_space = Discrete(self.config.n_ball_dir) self._bricks_matrix_space = MultiBinary((self.config._brick_rows, self.config._brick_cols))
def __init__(self, k=1): print(k) type_list = [MultiBinary(2 * k + 2) for _ in range(9)] #type_list = [Discrete(2 * k + 1) for _ in range(9)] type_list.append(Discrete(2)) self.observation_space = Tuple(type_list) action_spaces_list = [Discrete(10)] action_spaces_list.extend([Discrete(2) for _ in range(k)]) self.action_space = Tuple(action_spaces_list) #self.action_space = Tuple([Discrete(10), MultiBinary(k)]) #self.action_space = Tuple([Discrete(10), Discrete(k)]) self._current_state = [ np.array([0 for _ in range(2 * k + 2)]) for _ in range(NUM_SQUARES) ] self._current_player = 0 self._k = k
def _setup_reconstruction_info(self): # components pos_xy = Box(-self.mean * 2, +self.mean * 2, shape=(2,)) orientation = Box(-1.0, +1.0, shape=(2,)) size = Box(-10, 10, shape=(2,)) confidence = MultiBinary(1) # Self self_state = StateSpaceDescriptor(1, Dict({"position": pos_xy, "orientation": orientation, "size": size, "confidence": confidence, })) self_pred = PredictionDescriptor(numContinuous=4, contIdx=[2, 3, 4, 5]) # Car car_state = StateSpaceDescriptor(4, Dict({"position": pos_xy, "orientation": orientation, "size": size, "confidence": confidence, })) car_pred = PredictionDescriptor(numContinuous=4, contIdx=[2, 3, 4, 5]) # Obstacle obstacle_state = StateSpaceDescriptor(4, Dict({"position": pos_xy, "size": size, "confidence": confidence, })) obs_pred = PredictionDescriptor(numContinuous=2, contIdx=[2, 3]) # Pedestrian ped_state = StateSpaceDescriptor(6, Dict({"position": pos_xy, "confidence": confidence, })) ped_pred = PredictionDescriptor(numContinuous=0) self.recoDescriptor = RecoDescriptor(featureGridSize=(10, 17), fullStateSpace=[self_state, car_state, obstacle_state, ped_state], targetDefs=[self_pred, car_pred, obs_pred, ped_pred])
Box(low=np.array([[-1.0, 0.0], [0.0, -1.0]]), high=np.ones((2, 2)), dtype=np.float64), Box(low=0, high=255, shape=(), dtype=np.uint8), Box(low=0, high=255, shape=(32, 32, 3), dtype=np.uint8), Discrete(2), Discrete(5, start=-2), Tuple((Discrete(3), Discrete(5))), Tuple(( Discrete(7), Box(low=np.array([0.0, -1.0]), high=np.array([1.0, 1.0]), dtype=np.float64), )), MultiDiscrete([11, 13, 17]), MultiBinary(19), Dict({ "position": Discrete(23), "velocity": Box(low=np.array([0.0]), high=np.array([1.0]), dtype=np.float64), }), Dict({ "position": Dict({ "x": Discrete(29), "y": Discrete(31) }), "velocity": Tuple((Discrete(37), Box(low=0, high=255, shape=(), dtype=np.uint8))), }),
def __init__(self, visualize=False, integrator_accuracy=5e-5): super(JumpEnv, self).__init__(visualize, integrator_accuracy) self.action_space = MultiBinary(9)
@pytest.mark.parametrize( "space", [ Discrete(3), Discrete(5, start=-2), Box(low=0.0, high=np.inf, shape=(2, 2)), Tuple([Discrete(5), Discrete(10)]), Tuple([ Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32), ]), Tuple((Discrete(5), Discrete(2), Discrete(2))), Tuple((Discrete(5), Discrete(2, start=6), Discrete(2, start=-4))), MultiDiscrete([2, 2, 100]), MultiBinary(10), Dict({ "position": Discrete(5), "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32), }), ], ) def test_roundtripping(space): sample_1 = space.sample() sample_2 = space.sample() assert space.contains(sample_1) assert space.contains(sample_2) json_rep = space.to_jsonable([sample_1, sample_2])
import numpy as np from gym.spaces import Discrete, Box, Dict, MultiBinary action_space = Discrete(3) observation_space = Dict({ "obs": Box(-np.inf, np.inf, shape=(6, 9, 3), dtype=np.float32), "valid_action": MultiBinary(3) })
s1 = space.to_jsonable([sample_1]) s1p = space.to_jsonable([sample_1_prime]) s2 = space.to_jsonable([sample_2]) s2p = space.to_jsonable([sample_2_prime]) assert s1 == s1p, "Expected {} to equal {}".format(s1, s1p) assert s2 == s2p, "Expected {} to equal {}".format(s2, s2p) @pytest.mark.parametrize("space", [ Discrete(3), Box(low=np.array([-10, 0]),high=np.array([10, 10])), Tuple([Discrete(5), Discrete(10)]), Tuple([Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)]), Tuple((Discrete(5), Discrete(2), Discrete(2))), MultiDiscrete([2, 2, 100]), MultiBinary(6), Dict({"position": Discrete(5), "velocity": Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32)}), ]) def test_equality(space): space1 = space space2 = copy(space) assert space1 == space2, "Expected {} to equal {}".format(space1, space2) @pytest.mark.parametrize("spaces", [ (Discrete(3), Discrete(4)), (MultiDiscrete([2, 2, 100]), MultiDiscrete([2, 2, 8])), (MultiBinary(8), MultiBinary(7)), (Box(low=np.array([-10, 0]), high=np.array([10, 10]), dtype=np.float32), Box(low=np.array([-10, 0]), high=np.array([10, 9]), dtype=np.float32)),
spaces = [ Discrete(3), Box(low=0.0, high=np.inf, shape=(2, 2)), Box(low=0.0, high=np.inf, shape=(2, 2), dtype=np.float16), Tuple([Discrete(5), Discrete(10)]), Tuple( [ Discrete(5), Box(low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32), ] ), Tuple((Discrete(5), Discrete(2), Discrete(2))), MultiDiscrete([2, 2, 10]), MultiBinary(10), Dict( { "position": Discrete(5), "velocity": Box( low=np.array([0, 0]), high=np.array([1, 5]), dtype=np.float32 ), } ), ] flatdims = [3, 4, 4, 15, 7, 9, 14, 10, 7] @pytest.mark.parametrize(["space", "flatdim"], zip(spaces, flatdims)) def test_flatdim(space, flatdim):