예제 #1
0
    def setUp(self):
        self.env = RoomEnv(TestRoomSpec())
        self.model_tests = []

        u, d, l, r, s = get_directions()
        policy_left_right = get_two_action_uniform_policy(
            self.env.nS, self.env.nA, l, r)
        state_middle = RoomState((2, 1), {(2, 1): False})
        state_left_vase = RoomState((1, 1), {(2, 1): True})
        state_right_vase = RoomState((3, 1), {(2, 1): True})
        state_left_novase = RoomState((1, 1), {(2, 1): False})
        state_right_novase = RoomState((3, 1), {(2, 1): False})
        forward_probs = np.zeros(self.env.nS)
        forward_probs[self.env.get_num_from_state(state_left_novase)] = 0.5
        forward_probs[self.env.get_num_from_state(state_right_novase)] = 0.5
        backward_probs = np.zeros(self.env.nS)
        backward_probs[self.env.get_num_from_state(state_left_vase)] = 0.25
        backward_probs[self.env.get_num_from_state(state_right_vase)] = 0.25
        backward_probs[self.env.get_num_from_state(state_left_novase)] = 0.25
        backward_probs[self.env.get_num_from_state(state_right_novase)] = 0.25
        transitions = [(state_middle, 1, forward_probs, backward_probs)]
        unif = np.ones(self.env.nS) / self.env.nS
        self.model_tests.append({
            "policy": policy_left_right,
            "transitions": transitions,
            "initial_state_distribution": unif,
        })

        self.setUpDeterministic()
예제 #2
0
    def setUp(self):
        self.env = TrainEnv(TestTrainSpec())
        u, d, l, r, s = get_directions()

        self.trajectories = [
            [
                (u, (TrainState((0, 3), {(1, 2): True}, (3, 1), True), 1.0)),
                (u, (TrainState((0, 2), {(1, 2): True}, (3, 2), True), 1.0)),
                (u, (TrainState((0, 1), {(1, 2): True}, (2, 2), True), 1.0)),
                (r, (TrainState((1, 1), {(1, 2): True}, (2, 1), True), 1.0)),
                (u, (TrainState((1, 0), {(1, 2): True}, (3, 1), True), 1.0)),
                (r, (TrainState((2, 0), {(1, 2): True}, (3, 2), True), 1.0)),
                (s, (TrainState((2, 0), {(1, 2): True}, (2, 2), True), 1.0)),
                (s, (TrainState((2, 0), {(1, 2): True}, (2, 1), True), 1.0)),
            ],
            [
                (u, (TrainState((0, 3), {(1, 2): True}, (3, 1), True), 1.0)),
                (r, (TrainState((1, 3), {(1, 2): True}, (3, 2), True), 1.0)),
                (r, (TrainState((2, 3), {(1, 2): True}, (2, 2), True), 1.0)),
            ],
            [
                (r, (TrainState((1, 4), {(1, 2): True}, (3, 1), True), 1.0)),
                (r, (TrainState((2, 4), {(1, 2): True}, (3, 2), True), 1.0)),
                (r, (TrainState((3, 4), {(1, 2): True}, (2, 2), True), 1.0)),
                (u, (TrainState((3, 3), {(1, 2): True}, (2, 1), True), 1.0)),
                (u, (TrainState((3, 2), {(1, 2): True}, (3, 1), True), 1.0)),
                (s, (TrainState((3, 2), {(1, 2): True}, (3, 2), False), 1.0)),
                (s, (TrainState((3, 2), {(1, 2): True}, (3, 2), False), 1.0)),
                (u, (TrainState((3, 1), {(1, 2): True}, (3, 2), False), 1.0)),
                (l, (TrainState((2, 1), {(1, 2): True}, (3, 2), False), 1.0)),
            ],
        ]
예제 #3
0
    def setUp(self):
        self.env = ApplesEnv(TestApplesSpec())

        u, d, l, r, s = get_directions()
        i = 5  # interact action

        def make_state(agent_pos, tree1, tree2, bucket, carrying_apple):
            tree_states = {(0, 0): tree1, (2, 0): tree2}
            bucket_state = {(1, 2): bucket}
            return ApplesState(agent_pos, tree_states, bucket_state, carrying_apple)

        self.trajectories = [
            [
                (u, (make_state((u, 0, 1), True, True, 0, False), 1.0)),
                (i, (make_state((u, 0, 1), False, True, 0, True), 1.0)),
                (r, (make_state((r, 1, 1), False, True, 0, True), 3.0 / 4)),
                (d, (make_state((d, 1, 1), False, True, 0, True), 3.0 / 4)),
                (i, (make_state((d, 1, 1), False, True, 1, False), 3.0 / 4)),
                (u, (make_state((u, 1, 0), False, True, 1, False), 3.0 / 4)),
                (r, (make_state((r, 1, 0), False, True, 1, False), 3.0 / 4)),
                (i, (make_state((r, 1, 0), False, False, 1, True), 3.0 / 4)),
                (d, (make_state((d, 1, 1), False, False, 1, True), 9.0 / 16)),
                (i, (make_state((d, 1, 1), True, False, 2, False), 3.0 / 16)),
                (s, (make_state((d, 1, 1), True, True, 2, False), 1.0 / 4)),
            ]
        ]
예제 #4
0
    def setUp(self):
        self.env = BatteriesEnv(TestBatteriesSpec())
        u, d, l, r, s = get_directions()

        def make_state(agent, train, life, battery_vals, carrying_battery):
            battery_present = dict(zip([(0, 0), (4, 4)], battery_vals))
            return BatteriesState(agent, train, life, battery_present,
                                  carrying_battery)

        self.trajectories = [[
            (u, (make_state((0, 3), (3, 1), 7, [True, True], False), 1.0)),
            (u, (make_state((0, 2), (3, 2), 6, [True, True], False), 1.0)),
            (u, (make_state((0, 1), (2, 2), 5, [True, True], False), 1.0)),
            (u, (make_state((0, 0), (2, 1), 4, [False, True], True), 1.0)),
            (r, (make_state((1, 0), (3, 1), 3, [False, True], True), 1.0)),
            (r, (make_state((2, 0), (3, 2), 2, [False, True], True), 1.0)),
            (s, (make_state((2, 0), (2, 2), 1, [False, True], True), 1.0)),
            (s, (make_state((2, 0), (2, 1), 0, [False, True], True), 1.0)),
            (d, (make_state((2, 1), (3, 1), 9, [False, True], False), 1.0)),
            (u, (make_state((2, 0), (3, 2), 8, [False, True], False), 1.0)),
        ]]
예제 #5
0
    def setUp(self):
        self.env = RoomEnv(TestRoomSpec())
        u, d, l, r, s = get_directions()

        self.trajectories = [
            [
                (l, (RoomState((1, 2), {(2, 1): True}), 1.0)),
                (u, (RoomState((1, 1), {(2, 1): True}), 1.0)),
                (u, (RoomState((1, 0), {(2, 1): True}), 1.0)),
                (r, (RoomState((2, 0), {(2, 1): True}), 1.0)),
            ],
            [
                (u, (RoomState((2, 1), {(2, 1): False}), 1.0)),
                (u, (RoomState((2, 0), {(2, 1): False}), 1.0)),
            ],
            [
                (r, (RoomState((3, 2), {(2, 1): True}), 1.0)),
                (u, (RoomState((3, 1), {(2, 1): True}), 1.0)),
                (l, (RoomState((2, 1), {(2, 1): False}), 1.0)),
                (d, (RoomState((2, 2), {(2, 1): False}), 1.0)),
            ],
        ]
예제 #6
0
    def setUp(self):
        self.env = ApplesEnv(TestApplesSpec())
        self.model_tests = []

        _, _, _, _, stay = get_directions()
        policy_stay = np.zeros((self.env.nS, self.env.nA))
        policy_stay[:, stay] = 1

        def make_state(apple1_present, apple2_present):
            return ApplesState(
                agent_pos=(0, 1, 1),
                tree_states={(0, 0): apple1_present, (2, 0): apple2_present},
                bucket_states={(1, 2): 0},
                carrying_apple=False,
            )

        state_0_0 = make_state(False, False)
        state_0_1 = make_state(False, True)
        state_1_0 = make_state(True, False)
        state_1_1 = make_state(True, True)

        forward_probs = np.zeros(self.env.nS)
        forward_probs[self.env.get_num_from_state(state_1_1)] = 1
        backward_probs = np.zeros(self.env.nS)
        backward_probs[self.env.get_num_from_state(state_0_0)] = 0.04
        backward_probs[self.env.get_num_from_state(state_0_1)] = 0.16
        backward_probs[self.env.get_num_from_state(state_1_0)] = 0.16
        backward_probs[self.env.get_num_from_state(state_1_1)] = 0.64
        transitions = [(state_1_1, 1, forward_probs, backward_probs)]

        unif = np.ones(self.env.nS) / self.env.nS
        self.model_tests.append(
            {
                "policy": policy_stay,
                "transitions": transitions,
                "initial_state_distribution": unif,
            }
        )