예제 #1
0
    def test_1_container_1_float_only_flatten(self):
        """
        Adds a single component with 2-to-3 graph_fn to the core and passes one container and one float through it
        with only the flatten option enabled.
        """
        input1_space = spaces.Dict(a=float, b=float, c=spaces.Tuple(float))
        input2_space = spaces.FloatBox(shape=(1, ))

        component = OnlyFlattenDummy(constant_value=5.0)
        test = ComponentTest(component=component,
                             input_spaces=dict(input1=input1_space,
                                               input2=input2_space))

        # Options: only flatten_ops=True.
        in1 = dict(a=5.4, b=3.4, c=tuple([3.2]))
        in2 = np.array([1.2])
        # out1: dict(in1_f key: in1_f value + in2_f[""])
        # out2: in2_f
        # out3: self.constant_value
        out1 = dict(a=in1["a"] + in2,
                    b=in1["b"] + in2,
                    c=tuple([in1["c"][0] + in2]))
        out2 = dict(a=in1["a"] - in2,
                    b=in1["b"] - in2,
                    c=tuple([in1["c"][0] - in2]))
        out3 = in2
        test.test(("run", [in1, in2]),
                  expected_outputs=[out1, out2, out3],
                  decimals=5)
예제 #2
0
    def test_2_containers_no_options(self):
        """
        Adds a single component with 2-to-2 graph_fn to the core and passes one container and one float through it
        with no flatten/split options enabled.
        """
        input1_space = spaces.Dict(a=int, b=bool)
        input2_space = spaces.Dict(c=bool, d=int)

        component = NoFlattenNoSplitDummy()
        test = ComponentTest(component=component, input_spaces=dict(input1=input1_space, input2=input2_space))

        # Options: fsu=flat/split.
        in1 = dict(a=5, b=True)
        in2 = dict(c=False, d=3)
        # Expect reversal (see graph_fn)
        out1 = in2
        out2 = in1
        test.test(("run", [in1, in2]), expected_outputs=[out1, out2])
예제 #3
0
    def test_2_containers_flattening_splitting(self):
        """
        Adds a single component with 2-to-2 graph_fn to the core and passes two containers through it
        with flatten/split options enabled.
        """
        input1_space = spaces.Dict(a=float, b=spaces.FloatBox(shape=(1, 2)))
        input2_space = spaces.Dict(a=float, b=float)

        component = FlattenSplitDummy()
        test = ComponentTest(
            component=component,
            input_spaces=dict(input1=input1_space, input2=input2_space)
        )

        # Options: fsu=flat/split/un-flat.
        in1_fsu = dict(a=np.array(0.234), b=np.array([[0.0, 3.0]]))
        in2_fsu = dict(a=np.array(5.0), b=np.array(5.5))
        # Result of sending 'a' keys through graph_fn: (in1[a]+1.0=1.234, in1[a]+in2[a]=5.234)
        # Result of sending 'b' keys through graph_fn: (in1[b]+1.0=[[1, 4]], in1[b]+in2[b]=[[5.5, 8.5]])
        out1_fsu = dict(a=1.234, b=np.array([[1.0, 4.0]]))
        out2_fsu = dict(a=np.array(5.234, dtype=np.float32), b=np.array([[5.5, 8.5]]))
        test.test(("run", [in1_fsu, in2_fsu]), expected_outputs=[out1_fsu, out2_fsu])
예제 #4
0
    def __init__(self, world="4x4", save_mode=False, action_type="udlr",
                 reward_function="sparse", state_representation="discrete"):
        """
        Args:
            world (Union[str,List[str]]): Either a string to map into `MAPS` or a list of strings describing the rows
                of the world (e.g. ["S ", " G"] for a two-row/two-column world with start and goal state).

            save_mode (bool): Whether to replace holes (H) with walls (W). Default: False.

            action_type (str): Which action space to use. Chose between "udlr" (up, down, left, right), which is a
                discrete action space and "ftj" (forward + turn + jump), which is a container multi-discrete
                action space.

            reward_function (str): One of
                sparse: hole=-1, fire=-1, goal=50, all other steps=-1
                rich: hole=-100, fire=-10, goal=50

            state_representation (str):
                - "discrete": An int representing the field on the grid, 0 meaning the upper left field, 1 the one
                    below, etc..
                - "xy": The x and y grid position tuple.
                - "xy+orientation": The x and y grid position tuple plus the orientation (if any) as tuple of 2 values
                    of the actor.
                - "camera": A 3-channel image where each field in the grid-world is one pixel and the 3 channels are
                    used to indicate different items in the scene (walls, holes, the actor, etc..).
        """
        # Build our map.
        if isinstance(world, str):
            self.description = world
            world = self.MAPS[world]
        else:
            self.description = "custom-map"

        world = np.array(list(map(list, world)))
        # Apply safety switch.
        world[world == 'H'] = ("H" if not save_mode else "F")

        # `world` is a list of lists that needs to be indexed using y/x pairs (first row, then column).
        self.world = world
        self.n_row, self.n_col = self.world.shape
        (start_x,), (start_y,) = np.nonzero(self.world == "S")

        # Figure out our state space.
        assert state_representation in ["discrete", "xy", "xy+orientation", "camera"]
        self.state_representation = state_representation
        # Discrete states (single int from 0 to n).
        if self.state_representation == "discrete":
            state_space = spaces.IntBox(self.n_row * self.n_col)
        # x/y position (2 ints).
        elif self.state_representation == "xy":
            state_space = spaces.IntBox(low=(0, 0), high=(self.n_col, self.n_row), shape=(2,))
        # x/y position + orientation (3 ints).
        elif self.state_representation == "xy+orientation":
            state_space = spaces.IntBox(low=(0, 0, 0, 0), high=(self.n_col, self.n_row, 1, 1))
        # Camera outputting a 2D color image of the world.
        else:
            state_space = spaces.IntBox(0, 255, shape=(self.n_row, self.n_col, 3))

        self.default_start_pos = self.get_discrete_pos(start_x, start_y)
        self.discrete_pos = self.default_start_pos

        assert reward_function in ["sparse", "rich"]  # TODO: "potential"-based reward
        self.reward_function = reward_function

        # Store the goal position for proximity calculations (for "potential" reward function).
        (self.goal_x,), (self.goal_y,) = np.nonzero(self.world == "G")

        # Specify the actual action spaces.
        self.action_type = action_type
        action_space = spaces.IntBox(4) if self.action_type == "udlr" else spaces.Dict(dict(
            forward=spaces.IntBox(3), turn=spaces.IntBox(3), jump=spaces.IntBox(2)
        ))

        # Call the super's constructor.
        super(GridWorld, self).__init__(state_space=state_space, action_space=action_space)

        # Reset ourselves.
        self.state = None
        self.orientation = None  # int: 0, 90, 180, 270
        self.camera_pixels = None  # only used, if state_representation=='cam'
        self.reward = None
        self.is_terminal = None
        self.reset(randomize=False)