def test_1_container_1_float_only_flatten(self): """ Adds a single component with 2-to-3 graph_fn to the core and passes one container and one float through it with only the flatten option enabled. """ input1_space = spaces.Dict(a=float, b=float, c=spaces.Tuple(float)) input2_space = spaces.FloatBox(shape=(1, )) component = OnlyFlattenDummy(constant_value=5.0) test = ComponentTest(component=component, input_spaces=dict(input1=input1_space, input2=input2_space)) # Options: only flatten_ops=True. in1 = dict(a=5.4, b=3.4, c=tuple([3.2])) in2 = np.array([1.2]) # out1: dict(in1_f key: in1_f value + in2_f[""]) # out2: in2_f # out3: self.constant_value out1 = dict(a=in1["a"] + in2, b=in1["b"] + in2, c=tuple([in1["c"][0] + in2])) out2 = dict(a=in1["a"] - in2, b=in1["b"] - in2, c=tuple([in1["c"][0] - in2])) out3 = in2 test.test(("run", [in1, in2]), expected_outputs=[out1, out2, out3], decimals=5)
def test_2_containers_no_options(self): """ Adds a single component with 2-to-2 graph_fn to the core and passes one container and one float through it with no flatten/split options enabled. """ input1_space = spaces.Dict(a=int, b=bool) input2_space = spaces.Dict(c=bool, d=int) component = NoFlattenNoSplitDummy() test = ComponentTest(component=component, input_spaces=dict(input1=input1_space, input2=input2_space)) # Options: fsu=flat/split. in1 = dict(a=5, b=True) in2 = dict(c=False, d=3) # Expect reversal (see graph_fn) out1 = in2 out2 = in1 test.test(("run", [in1, in2]), expected_outputs=[out1, out2])
def test_2_containers_flattening_splitting(self): """ Adds a single component with 2-to-2 graph_fn to the core and passes two containers through it with flatten/split options enabled. """ input1_space = spaces.Dict(a=float, b=spaces.FloatBox(shape=(1, 2))) input2_space = spaces.Dict(a=float, b=float) component = FlattenSplitDummy() test = ComponentTest( component=component, input_spaces=dict(input1=input1_space, input2=input2_space) ) # Options: fsu=flat/split/un-flat. in1_fsu = dict(a=np.array(0.234), b=np.array([[0.0, 3.0]])) in2_fsu = dict(a=np.array(5.0), b=np.array(5.5)) # Result of sending 'a' keys through graph_fn: (in1[a]+1.0=1.234, in1[a]+in2[a]=5.234) # Result of sending 'b' keys through graph_fn: (in1[b]+1.0=[[1, 4]], in1[b]+in2[b]=[[5.5, 8.5]]) out1_fsu = dict(a=1.234, b=np.array([[1.0, 4.0]])) out2_fsu = dict(a=np.array(5.234, dtype=np.float32), b=np.array([[5.5, 8.5]])) test.test(("run", [in1_fsu, in2_fsu]), expected_outputs=[out1_fsu, out2_fsu])
def __init__(self, world="4x4", save_mode=False, action_type="udlr", reward_function="sparse", state_representation="discrete"): """ Args: world (Union[str,List[str]]): Either a string to map into `MAPS` or a list of strings describing the rows of the world (e.g. ["S ", " G"] for a two-row/two-column world with start and goal state). save_mode (bool): Whether to replace holes (H) with walls (W). Default: False. action_type (str): Which action space to use. Chose between "udlr" (up, down, left, right), which is a discrete action space and "ftj" (forward + turn + jump), which is a container multi-discrete action space. reward_function (str): One of sparse: hole=-1, fire=-1, goal=50, all other steps=-1 rich: hole=-100, fire=-10, goal=50 state_representation (str): - "discrete": An int representing the field on the grid, 0 meaning the upper left field, 1 the one below, etc.. - "xy": The x and y grid position tuple. - "xy+orientation": The x and y grid position tuple plus the orientation (if any) as tuple of 2 values of the actor. - "camera": A 3-channel image where each field in the grid-world is one pixel and the 3 channels are used to indicate different items in the scene (walls, holes, the actor, etc..). """ # Build our map. if isinstance(world, str): self.description = world world = self.MAPS[world] else: self.description = "custom-map" world = np.array(list(map(list, world))) # Apply safety switch. world[world == 'H'] = ("H" if not save_mode else "F") # `world` is a list of lists that needs to be indexed using y/x pairs (first row, then column). self.world = world self.n_row, self.n_col = self.world.shape (start_x,), (start_y,) = np.nonzero(self.world == "S") # Figure out our state space. assert state_representation in ["discrete", "xy", "xy+orientation", "camera"] self.state_representation = state_representation # Discrete states (single int from 0 to n). if self.state_representation == "discrete": state_space = spaces.IntBox(self.n_row * self.n_col) # x/y position (2 ints). elif self.state_representation == "xy": state_space = spaces.IntBox(low=(0, 0), high=(self.n_col, self.n_row), shape=(2,)) # x/y position + orientation (3 ints). elif self.state_representation == "xy+orientation": state_space = spaces.IntBox(low=(0, 0, 0, 0), high=(self.n_col, self.n_row, 1, 1)) # Camera outputting a 2D color image of the world. else: state_space = spaces.IntBox(0, 255, shape=(self.n_row, self.n_col, 3)) self.default_start_pos = self.get_discrete_pos(start_x, start_y) self.discrete_pos = self.default_start_pos assert reward_function in ["sparse", "rich"] # TODO: "potential"-based reward self.reward_function = reward_function # Store the goal position for proximity calculations (for "potential" reward function). (self.goal_x,), (self.goal_y,) = np.nonzero(self.world == "G") # Specify the actual action spaces. self.action_type = action_type action_space = spaces.IntBox(4) if self.action_type == "udlr" else spaces.Dict(dict( forward=spaces.IntBox(3), turn=spaces.IntBox(3), jump=spaces.IntBox(2) )) # Call the super's constructor. super(GridWorld, self).__init__(state_space=state_space, action_space=action_space) # Reset ourselves. self.state = None self.orientation = None # int: 0, 90, 180, 270 self.camera_pixels = None # only used, if state_representation=='cam' self.reward = None self.is_terminal = None self.reset(randomize=False)