def test_long_chain_grid_world(self): """ Tests a minimalistic long-chain GridWorld. """ env = GridWorld(world="long-chain") # Simple test runs with fixed actions. # X=player's position s = env.reset() # ["X G"] self.assertTrue(s == 33) s, r, t, _ = env.step(2) # down: ["X G"] self.assertTrue(s == 33) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t) s, r, t, _ = env.step(1) # right: ["SX G"] self.assertTrue(s == 34) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t) env.reset() # ["X G"] # Right, left, down, up, right -> Move one right each iteration. for x in range(20): s, r, t, _ = env.step(1) self.assertTrue(s == x + 33 + 1) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t) s, r, t, _ = env.step(3) self.assertTrue(s == x + 33) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t) s, r, t, _ = env.step(2) self.assertTrue(s == x + 33) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t) s, r, t, _ = env.step(0) self.assertTrue(s == x + 33) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t) s, r, t, _ = env.step(1) self.assertTrue(s == x + 33 + 1) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t)
def test_2x2_grid_world(self): """ Tests a minimalistic 2x2 GridWorld. """ env = GridWorld(world="2x2") # Make everything deterministic. env.seed(55) # Simple test runs with fixed actions. # X=player's position s = env.reset() # ["XH", " G"] X=player's position self.assertTrue(s == 0) s, r, t, _ = env.step(2) # down: [" H", "XG"] self.assertTrue(s == 1) self.assertTrue(r == -1.0) self.assertTrue(t is False) s, r, t, _ = env.step(1) # right: [" H", " X"] self.assertTrue(s == 3) self.assertTrue(r == 1.0) self.assertTrue(t is True) env.reset() # ["XH", " G"] X=player's position s, r, t, _ = env.step(1) # right: [" X", " G"] -> in the hole self.assertTrue(s == 2) self.assertTrue(r == -5.0) self.assertTrue(t is True) # Run against a wall. env.reset() # ["XH", " G"] X=player's position s, r, t, _ = env.step(3) # left: ["XH", " G"] self.assertTrue(s == 0) self.assertTrue(r == -1.0) self.assertTrue(t is False) s, r, t, _ = env.step(2) # down: [" H", "XG"] self.assertTrue(s == 1) self.assertTrue(r == -1.0) self.assertTrue(t is False) s, r, t, _ = env.step(0) # up: ["XH", " G"] self.assertTrue(s == 0) self.assertTrue(r == -1.0) self.assertTrue(t is False)
def test_2x2_grid_world(self): """ Tests a minimalistic 2x2 GridWorld. """ env = GridWorld(world="2x2") # Simple test runs with fixed actions. # X=player's position s = env.reset() # ["XH", " G"] X=player's position self.assertTrue(s == 0) s, r, t, _ = env.step(2) # down: [" H", "XG"] self.assertTrue(s == 1) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t) s, r, t, _ = env.step(1) # right: [" H", " X"] self.assertTrue(s == 3) recursive_assert_almost_equal(r, 1.0) self.assertTrue(t) env.reset() # ["XH", " G"] X=player's position s, r, t, _ = env.step(1) # right: [" X", " G"] -> in the hole self.assertTrue(s == 2) self.assertTrue(r == -5.0) self.assertTrue(t) # Run against a wall. env.reset() # ["XH", " G"] X=player's position s, r, t, _ = env.step(3) # left: ["XH", " G"] self.assertTrue(s == 0) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t) s, r, t, _ = env.step(2) # down: [" H", "XG"] self.assertTrue(s == 1) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t) s, r, t, _ = env.step(0) # up: ["XH", " G"] self.assertTrue(s == 0) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t)
def test_4x4_grid_world_with_container_actions(self): """ Tests a 4x4 GridWorld using forward+turn+jump container actions. """ env = GridWorld(world="4x4", action_type="ftj", state_representation="xy+orientation") # Simple test runs with fixed actions. # Fall into hole. s = env.reset() # [0, 0, 0] (x, y, orientation) recursive_assert_almost_equal(s, [0, 0, 0, 1]) s, r, t, _ = env.step(dict(turn=2, forward=2)) # turn=2 (right), move=2 (forward), jump=0 recursive_assert_almost_equal(s, [1, 0, 1, 0]) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t) s, r, t, _ = env.step(dict(turn=2, forward=1)) # turn=2 (right), move=1 (stay), jump=0 recursive_assert_almost_equal(s, [1, 0, 0, -1]) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t) s, r, t, _ = env.step(dict(turn=1, forward=2)) # turn=1 (no turn), move=2 (forward), jump=0 recursive_assert_almost_equal(s, [1, 1, 0, -1]) self.assertTrue(r == -5.0) self.assertTrue(t) # Jump quite a lot and reach goal. env.reset() # [0, 0, 0] (x, y, orientation) s, r, t, _ = env.step(dict(turn=2, forward=1)) recursive_assert_almost_equal(s, [0, 0, 1, 0]) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t) s, r, t, _ = env.step(dict(turn=1, forward=1, jump=1)) recursive_assert_almost_equal(s, [2, 0, 1, 0]) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t) s, r, t, _ = env.step(dict(turn=2, forward=2)) recursive_assert_almost_equal(s, [2, 1, 0, -1]) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t) s, r, t, _ = env.step(dict(turn=1, forward=2, jump=1)) recursive_assert_almost_equal(s, [2, 3, 0, -1]) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t) s, r, t, _ = env.step(dict(turn=2, forward=0)) recursive_assert_almost_equal(s, [3, 3, -1, 0]) self.assertTrue(r == 1.0) self.assertTrue(t) # Run against a wall. env.reset() # [0, 0, 0] (x, y, orientation) s, r, t, _ = env.step(dict(turn=1, forward=0)) recursive_assert_almost_equal(s, [0, 1, 0, 1]) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t) s, r, t, _ = env.step(dict(turn=0, forward=2)) recursive_assert_almost_equal(s, [0, 1, -1, 0]) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t) # Jump over a hole (no reset). s, r, t, _ = env.step(dict(turn=2, forward=1)) # turn around s, r, t, _ = env.step(dict(turn=2, forward=1)) recursive_assert_almost_equal(s, [0, 1, 1, 0]) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t) s, r, t, _ = env.step(dict(turn=1, forward=1, jump=1)) recursive_assert_almost_equal(s, [2, 1, 1, 0]) recursive_assert_almost_equal(r, -0.1) self.assertTrue(not t)