Python DQNAgentの例、rlgraph.agents.DQNAgent Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_backends.py プロジェクト: EmpereurCC/RLgraph_exp

    def test_pong_with_worker(self):
        env_spec = dict(
            type="openai",
            gym_env="PongNoFrameskip-v4",
            # The frameskip in the agent config will trigger worker skips, this
            # is used for internal env.
            frameskip=4,
            max_num_noops=30,
            episodic_life=False
        )

        env = OpenAIGymEnv.from_spec(env_spec)
        agent_config = config_from_path("configs/backend_performance_dqn_pong.json")

        # Test cpu settings for batching here.
        agent_config["update_spec"] = None

        agent = DQNAgent.from_spec(
            # Uses 2015 DQN parameters as closely as possible.
            agent_config,
            state_space=env.state_space,
            # Try with "reduced" action space (actually only 3 actions, up, down, no-op)
            action_space=env.action_space
        )

        worker = SingleThreadedWorker(
            env_spec=env_spec,
            agent=agent,
            frameskip=1,
            preprocessing_spec=agent_config["preprocessing_spec"],
            worker_executes_preprocessing=True
        )

        result = worker.execute_timesteps(1000)
        print(result)

コード例 #2

0

ファイルを表示

    def test_multi_gpu_dqn_agent_compilation(self):
        """
        Tests if the multi gpu strategy can compile successfully on a multi gpu system, but
        also runs on a CPU-only system using fake-GPU logic for testing purposes.
        """
        root_logger.setLevel(DEBUG)
        agent_config = config_from_path(
            "configs/multi_gpu_dqn_for_random_env.json")
        environment = RandomEnv.from_spec(self.random_env_spec)

        agent = DQNAgent.from_spec(agent_config,
                                   state_space=environment.state_space,
                                   action_space=environment.action_space)
        print("Compiled DQN agent on multi-GPU system")

        # Do an update from external batch.
        batch_size = agent_config["update_spec"]["batch_size"]
        external_batch = dict(
            states=environment.state_space.sample(size=batch_size),
            actions=environment.action_space.sample(size=batch_size),
            rewards=np.random.sample(size=batch_size),
            terminals=np.random.choice([True, False], size=batch_size),
            next_states=environment.state_space.sample(size=batch_size),
            importance_weights=np.zeros(shape=(batch_size, )))
        agent.update(batch=external_batch)
        print("Performed an update from external batch")

コード例 #3

0

ファイルを表示

ファイル: test_readme_example.py プロジェクト: MegaYEye/rlgraph

    def test_readme_example(self):
        """
        Tests deterministic functionality of RandomEnv.
        """
        from rlgraph.agents import DQNAgent
        from rlgraph.environments import OpenAIGymEnv

        environment = OpenAIGymEnv('CartPole-v0')
        config = config_from_path("../../examples/configs/dqn_cartpole.json")

        # Create from .json file or dict, see agent API for all
        # possible configuration parameters.
        agent = DQNAgent.from_spec(config,
                                   state_space=environment.state_space,
                                   action_space=environment.action_space)

        # Get an action, take a step, observe reward.
        state = environment.reset()
        preprocessed_state, action = agent.get_action(
            states=state, extra_returns="preprocessed_states")

        # Execute step in environment.
        next_state, reward, terminal, info = environment.step(action)

        # Observe result.
        agent.observe(preprocessed_states=preprocessed_state,
                      actions=action,
                      internals=[],
                      next_states=next_state,
                      rewards=reward,
                      terminals=terminal)

        # Call update when desired:
        loss = agent.update()

コード例 #4

0

ファイルを表示

    def test_double_dueling_dqn_on_cart_pole(self):
        """
        Creates a double and dueling DQNAgent and runs it via a Runner on the CartPole Env.
        """
        dummy_env = OpenAIGymEnv("CartPole-v0")
        agent = DQNAgent.from_spec(
            config_from_path("configs/dqn_agent_for_cartpole.json"),
            double_q=True,
            dueling_q=True,
            state_space=dummy_env.state_space,
            action_space=dummy_env.action_space,
            observe_spec=dict(buffer_size=200),
            execution_spec=dict(seed=156),
            update_spec=dict(update_interval=4,
                             batch_size=64,
                             sync_interval=16),
            optimizer_spec=dict(type="adam", learning_rate=0.05),
            store_last_q_table=True)

        time_steps = 3000
        worker = SingleThreadedWorker(
            env_spec=lambda: OpenAIGymEnv("CartPole-v0", seed=10),
            agent=agent,
            render=self.is_windows,
            worker_executes_preprocessing=False)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        #print("STATES:\n{}".format(agent.last_q_table["states"]))
        #print("\n\nQ(s,a)-VALUES:\n{}".format(np.round_(agent.last_q_table["q_values"], decimals=2)))

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertGreaterEqual(results["mean_episode_reward"], 15)
        self.assertGreaterEqual(results["max_episode_reward"], 160.0)
        self.assertLessEqual(results["episodes_executed"], 100)

コード例 #5

0

ファイルを表示

ファイル: test_backends.py プロジェクト: EmpereurCC/RLgraph_exp

    def test_cartpole_with_worker(self):
        env = OpenAIGymEnv("CartPole-v0")
        agent_config = config_from_path("configs/backend_performance_dqn_cartpole.json")

        # Test cpu settings for batching here.
        agent_config["update_spec"] = None

        agent = DQNAgent.from_spec(
            # Uses 2015 DQN parameters as closely as possible.
            agent_config,
            state_space=env.state_space,
            # Try with "reduced" action space (actually only 3 actions, up, down, no-op)
            action_space=env.action_space
        )

        worker = SingleThreadedWorker(
            env_spec=lambda: OpenAIGymEnv("CartPole-v0"),
            agent=agent,
            frameskip=1,
            num_environments=1,
            worker_executes_preprocessing=False
        )

        result = worker.execute_timesteps(1000)
        print(result)

コード例 #6

0

ファイルを表示

    def test_act(self):
        env = OpenAIGymEnv("Pong-v0", frameskip=4, max_num_noops=30, episodic_life=True)
        agent_config = config_from_path("configs/ray_apex_for_pong.json")
        if get_backend() == "pytorch":
            agent_config["memory_spec"]["type"] = "mem_prioritized_replay"
        agent = DQNAgent.from_spec(
            # Uses 2015 DQN parameters as closely as possible.
            agent_config,
            state_space=env.state_space,
            # Try with "reduced" action space (actually only 3 actions, up, down, no-op)
            action_space=env.action_space
        )
        state = env.reset()
        action = agent.get_action(state)
        print("Component call count = {}".format(Component.call_count))

        state_space = env.state_space
        count = 200

        samples = state_space.sample(count)
        start = time.perf_counter()
        for s in samples:
            action = agent.get_action(s)
        end = time.perf_counter() - start

        print("Took {} s for {} separate actions, mean = {}".format(end, count, end / count))

        # Now instead test 100 batch actions
        samples = state_space.sample(count)
        start = time.perf_counter()
        action = agent.get_action(samples)
        end = time.perf_counter() - start
        print("Took {} s for {} batched actions.".format(end, count))
        profile = Component.call_times
        print_call_chain(profile, False, 0.03)

コード例 #7

0

ファイルを表示

ファイル: test_dqn_agent_short_task_learning.py プロジェクト: theSoenke/rlgraph

    def test_double_dqn_on_2x2_grid_world_single_action_to_container(self):
        """
        Tests how dqn solves a mapping of a single integer to multiple actions (as opposed to using container
        actions).
        """
        # ftj = forward + turn + jump
        env_spec = dict(world="2x2",
                        action_type="ftj",
                        state_representation="xy+orientation")
        agent_config = config_from_path(
            "configs/dqn_agent_for_2x2_gridworld_single_to_container.json")
        preprocessing_spec = agent_config.pop("preprocessing_spec")

        action_space = IntBox(0, 18)
        agent = DQNAgent.from_spec(agent_config,
                                   huber_loss=True,
                                   double_q=True,
                                   dueling_q=True,
                                   state_space=FloatBox(shape=(4, )),
                                   action_space=action_space,
                                   store_last_q_table=True)

        time_steps = 10000
        worker = SingleThreadedWorker(
            env_spec=lambda: GridWorld.from_spec(env_spec),
            agent=agent,
            preprocessing_spec=preprocessing_spec,
            worker_executes_preprocessing=True,
            render=False)
        results = worker.execute_timesteps(time_steps, use_exploration=True)
        print(results)

コード例 #8

0

ファイルを表示

ファイル: test_dqn_agent_short_task_learning.py プロジェクト: theSoenke/rlgraph

    def test_dqn_on_cart_pole(self):
        """
        Creates a DQNAgent and runs it via a Runner on the CartPole Env.
        """
        dummy_env = OpenAIGymEnv("CartPole-v0")
        agent = DQNAgent.from_spec(
            config_from_path("configs/dqn_agent_for_cartpole.json"),
            double_q=False,
            dueling_q=False,
            state_space=dummy_env.state_space,
            action_space=dummy_env.action_space,
            execution_spec=dict(seed=15),
            update_spec=dict(update_interval=4,
                             batch_size=24,
                             sync_interval=64),
            optimizer_spec=dict(type="adam", learning_rate=0.05),
            store_last_q_table=True)

        time_steps = 3000
        worker = SingleThreadedWorker(
            env_spec=lambda: OpenAIGymEnv("CartPole-v0", seed=15),
            agent=agent,
            render=self.is_windows,
            worker_executes_preprocessing=False)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertGreaterEqual(results["mean_episode_reward"], 25)
        self.assertGreaterEqual(results["max_episode_reward"], 100.0)
        self.assertLessEqual(results["episodes_executed"], 200)

コード例 #9

0

ファイルを表示

ファイル: test_dqn_agent_short_task_learning.py プロジェクト: theSoenke/rlgraph

    def test_double_dqn_on_2x2_grid_world(self):
        """
        Creates a double DQNAgent and runs it via a Runner on a simple 2x2 GridWorld.
        """
        env_spec = dict(world="2x2")
        dummy_env = GridWorld.from_spec(env_spec)
        agent_config = config_from_path(
            "configs/dqn_agent_for_2x2_gridworld.json")
        preprocessing_spec = agent_config.pop("preprocessing_spec")
        agent = DQNAgent.from_spec(
            agent_config,
            dueling_q=False,
            state_space=self.grid_world_2x2_flattened_state_space,
            action_space=dummy_env.action_space,
            execution_spec=dict(seed=10),
            update_spec=dict(update_interval=4,
                             batch_size=24,
                             sync_interval=32),
            optimizer_spec=dict(type="adam", learning_rate=0.05),
            store_last_q_table=True)

        time_steps = 1000
        worker = SingleThreadedWorker(
            env_spec=lambda: GridWorld.from_spec(env_spec),
            agent=agent,
            preprocessing_spec=preprocessing_spec,
            worker_executes_preprocessing=True)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        print("STATES:\n{}".format(agent.last_q_table["states"]))
        print("\n\nQ(s,a)-VALUES:\n{}".format(
            np.round_(agent.last_q_table["q_values"], decimals=2)))

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertGreaterEqual(results["mean_episode_reward"], -4.5)
        self.assertGreaterEqual(results["max_episode_reward"], 0.0)
        self.assertLessEqual(results["episodes_executed"], 350)

        # Check q-table for correct values.
        expected_q_values_per_state = {
            (1.0, 0, 0, 0): (-1, -5, 0, -1),
            (0, 1.0, 0, 0): (-1, 1, 0, 0)
        }
        for state, q_values in zip(agent.last_q_table["states"],
                                   agent.last_q_table["q_values"]):
            state, q_values = tuple(state), tuple(q_values)
            assert state in expected_q_values_per_state, \
                "ERROR: state '{}' not expected in q-table as it's a terminal state!".format(state)
            recursive_assert_almost_equal(q_values,
                                          expected_q_values_per_state[state],
                                          decimals=0)

コード例 #10

0

ファイルを表示

 def test_dqn_compilation(self):
     """
     Tests DQN Agent compilation.
     """
     env = OpenAIGymEnv("Pong-v0", frameskip=4, max_num_noops=30, episodic_life=True)
     agent_config = config_from_path("configs/dqn_agent_for_pong.json")
     agent = DQNAgent.from_spec(
         # Uses 2015 DQN parameters as closely as possible.
         agent_config,
         state_space=env.state_space,
         # Try with "reduced" action space (actually only 3 actions, up, down, no-op)
         action_space=env.action_space
     )

コード例 #11

0

ファイルを表示

 def test_dqn_compilation(self):
     """
     Creates a DQNAgent and runs it via a Runner on an openAI Pong Env.
     """
     env = OpenAIGymEnv("Pong-v0", frameskip=4, max_num_noops=30, episodic_life=True)
     agent_config = config_from_path("configs/dqn_pytorch_test.json")
     agent = DQNAgent.from_spec(
         # Uses 2015 DQN parameters as closely as possible.
         agent_config,
         state_space=env.state_space,
         # Try with "reduced" action space (actually only 3 actions, up, down, no-op)
         action_space=env.action_space
     )

コード例 #12

0

ファイルを表示

    def test_multi_gpu_dqn_agent_learning_test_gridworld_2x2(self):
        """
        Tests if the multi gpu strategy can learn successfully on a multi gpu system, but
        also runs on a CPU-only system using fake-GPU logic for testing purposes.
        """
        env_spec = dict(type="grid-world", world="2x2")
        dummy_env = GridWorld.from_spec(env_spec)
        agent_config = config_from_path(
            "configs/multi_gpu_dqn_for_2x2_gridworld.json")
        preprocessing_spec = agent_config.pop("preprocessing_spec")
        agent = DQNAgent.from_spec(
            agent_config,
            state_space=self.grid_world_2x2_flattened_state_space,
            action_space=dummy_env.action_space,
        )

        time_steps = 1000
        worker = SingleThreadedWorker(env_spec=env_spec,
                                      agent=agent,
                                      worker_executes_preprocessing=True,
                                      preprocessing_spec=preprocessing_spec)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        # Marge q-tables of all four GPUs:
        agent.last_q_table["q_values"] = agent.last_q_table[
            "q_values"].reshape((48, 4))

        print("STATES:\n{}".format(agent.last_q_table["states"]))
        print("\n\nQ(s,a)-VALUES:\n{}".format(
            np.round_(agent.last_q_table["q_values"], decimals=2)))

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertGreaterEqual(results["mean_episode_reward"], -4.5)
        self.assertGreaterEqual(results["max_episode_reward"], 0.0)
        self.assertLessEqual(results["episodes_executed"], time_steps / 2)

        # Check q-table for correct values.
        expected_q_values_per_state = {
            (1.0, 0, 0, 0): (-1, -5, 0, -1),
            (0, 1.0, 0, 0): (-1, 1, 0, 0)
        }
        for state, q_values in zip(agent.last_q_table["states"],
                                   agent.last_q_table["q_values"]):
            state, q_values = tuple(state), tuple(q_values)
            assert state in expected_q_values_per_state, \
                "ERROR: state '{}' not expected in q-table as it's a terminal state!".format(state)
            recursive_assert_almost_equal(q_values,
                                          expected_q_values_per_state[state],
                                          decimals=0)

コード例 #13

0

ファイルを表示

    def test_multi_gpu_dqn_agent_learning_test_gridworld_2x2(self):
        """
        Tests if the multi gpu strategy can learn successfully on a multi gpu system.

        THIS TEST REQUIRES A MULTI GPU SYSTEM.
        """
        #root_logger.setLevel(DEBUG)  # test
        env = GridWorld("2x2")
        agent = DQNAgent.from_spec(
            config_from_path("configs/multi_gpu_dqn_for_2x2_gridworld.json"),
            dueling_q=False,
            state_space=env.state_space,
            action_space=env.action_space,
            observe_spec=dict(buffer_size=100),
            # Rule of thumb for multi-GPU (with n GPUs): n-fold batch-size and learning rate w/ respect to 1 GPU.
            update_spec=dict(update_interval=4, batch_size=48, sync_interval=32),
            optimizer_spec=dict(type="adam", learning_rate=0.15),
            store_last_q_table=True
        )

        time_steps = 400
        worker = SingleThreadedWorker(env_spec=lambda: env, agent=agent, worker_executes_preprocessing=False)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        print("STATES:\n{}".format(agent.last_q_table["states"]))
        print("\n\nQ(s,a)-VALUES:\n{}".format(np.round_(agent.last_q_table["q_values"], decimals=2)))

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertGreaterEqual(results["mean_episode_reward"], -4.5)
        self.assertGreaterEqual(results["max_episode_reward"], 0.0)
        self.assertLessEqual(results["episodes_executed"], 250)

        # Check q-table for correct values.
        expected_q_values_per_state = {
            (1.0, 0, 0, 0): (-1, -5, 0, -1),
            (0, 1.0, 0, 0): (-1, 1, 0, 0)
        }
        for state, q_values in zip(agent.last_q_table["states"], agent.last_q_table["q_values"]):
            state, q_values = tuple(state), tuple(q_values)
            assert state in expected_q_values_per_state, \
                "ERROR: state '{}' not expected in q-table as it's a terminal state!".format(state)
            recursive_assert_almost_equal(q_values, expected_q_values_per_state[state], decimals=0)

コード例 #14

0

ファイルを表示

ファイル: test_dqn_agent_short_task_learning.py プロジェクト: theSoenke/rlgraph

    def test_double_dueling_dqn_on_cart_pole(self):
        """
        Creates a double and dueling DQNAgent and runs it via a Runner on the CartPole Env.
        """
        gym_env = "CartPole-v0"
        dummy_env = OpenAIGymEnv(gym_env)
        config_ = config_from_path("configs/dqn_agent_for_cartpole.json")
        # Add dueling config to agent.
        config_["policy_spec"] = {
            "units_state_value_stream": 3,
            "action_adapter_spec": {
                "pre_network_spec": [{
                    "type": "dense",
                    "units": 3
                }]
            }
        }
        agent = DQNAgent.from_spec(config_,
                                   double_q=True,
                                   dueling_q=True,
                                   state_space=dummy_env.state_space,
                                   action_space=dummy_env.action_space,
                                   execution_spec=dict(seed=13),
                                   update_spec=dict(update_interval=4,
                                                    batch_size=64,
                                                    sync_interval=16),
                                   optimizer_spec=dict(type="adam",
                                                       learning_rate=0.01),
                                   store_last_q_table=True)

        time_steps = 3000
        worker = SingleThreadedWorker(
            env_spec=lambda: OpenAIGymEnv(gym_env, seed=10),
            agent=agent,
            render=self.is_windows,
            worker_executes_preprocessing=False)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertGreaterEqual(results["mean_episode_reward"], 25)
        self.assertLessEqual(results["episodes_executed"], 150)

コード例 #15

0

ファイルを表示

    def test_multi_gpu_dqn_agent_learning_test_gridworld_2x2(self):
        """
        Tests if the multi gpu strategy can learn successfully on a multi gpu system, but
        also runs on a CPU-only system using fake-GPU logic for testing purposes.
        """
        env_spec = dict(type="grid-world", world="2x2")
        dummy_env = GridWorld.from_spec(env_spec)
        agent_config = config_from_path(
            "configs/multi_gpu_dqn_for_2x2_gridworld.json")
        preprocessing_spec = agent_config.pop("preprocessing_spec")
        agent = DQNAgent.from_spec(
            agent_config,
            state_space=self.grid_world_2x2_flattened_state_space,
            action_space=dummy_env.action_space,
        )

        time_steps = 2000
        worker = SingleThreadedWorker(env_spec=env_spec,
                                      agent=agent,
                                      worker_executes_preprocessing=True,
                                      preprocessing_spec=preprocessing_spec)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertGreaterEqual(results["mean_episode_reward"], -4.5)
        self.assertGreaterEqual(results["max_episode_reward"], 0.0)
        self.assertLessEqual(results["episodes_executed"], time_steps / 2)

        # Check all learnt Q-values.
        q_values = agent.graph_executor.execute(
            ("get_q_values", one_hot(np.array([0, 1]), depth=4)))[:]
        recursive_assert_almost_equal(q_values[0], (0.8, -5, 0.9, 0.8),
                                      decimals=1)
        recursive_assert_almost_equal(q_values[1], (0.8, 1.0, 0.9, 0.9),
                                      decimals=1)

コード例 #16

0

ファイルを表示

ファイル: test_dqn_agent_short_task_learning.py プロジェクト: theSoenke/rlgraph

    def test_double_dqn_on_2x2_grid_world_with_container_actions(self):
        """
        Creates a double DQNAgent and runs it via a Runner on a simple 2x2 GridWorld using container actions.
        """
        # ftj = forward + turn + jump
        env_spec = dict(world="2x2",
                        action_type="ftj",
                        state_representation="xy+orientation")
        dummy_env = GridWorld.from_spec(env_spec)
        agent_config = config_from_path(
            "configs/dqn_agent_for_2x2_gridworld_with_container_actions.json")
        preprocessing_spec = agent_config.pop("preprocessing_spec")

        agent = DQNAgent.from_spec(agent_config,
                                   double_q=True,
                                   dueling_q=False,
                                   state_space=FloatBox(shape=(4, )),
                                   action_space=dummy_env.action_space,
                                   execution_spec=dict(seed=15),
                                   store_last_q_table=True)

        time_steps = 10000
        worker = SingleThreadedWorker(
            env_spec=lambda: GridWorld.from_spec(env_spec),
            agent=agent,
            preprocessing_spec=preprocessing_spec,
            worker_executes_preprocessing=True,
            render=False)
        results = worker.execute_timesteps(time_steps, use_exploration=True)

        print("LAST q-table:\n{}".format(agent.last_q_table))

        self.assertEqual(results["timesteps_executed"], time_steps)
        self.assertEqual(results["env_frames"], time_steps)
        self.assertGreaterEqual(results["mean_episode_reward"], -7)
        self.assertGreaterEqual(results["max_episode_reward"], -1.0)
        self.assertLessEqual(results["episodes_executed"], time_steps / 3)

        # Check q-table for correct values.
        expected_q_values_per_state = {
            (0., 0., -1., 0.): {
                "forward": (-5.0, -1.0, -1.0),
                "jump": (0.0, -1.0)
            },
            (0., 0., 1., 0.): {
                "forward": (0.0, -1.0, -1.0),
                "jump": (0.0, -1.0)
            },
            (0., 0., 0., -1.): {
                "forward": (0.0, -1.0, -1.0),
                "jump": (0.0, -1.0)
            },
            (0., 0., 0., 1.): {
                "forward": (0.0, -1.0, -1.0),
                "jump": (0.0, -1.0)
            },
            (0., 1., -1., 0.): {
                "forward": (0.0, -1.0, -1.0),
                "jump": (0.0, -1.0)
            },
            (0., 1., 1., 0.): {
                "forward": (0.0, -1.0, -1.0),
                "jump": (0.0, -1.0)
            },
            (0., 1., 0., -1.): {
                "forward": (0.0, -1.0, -1.0),
                "jump": (0.0, -1.0)
            },
            (0., 1., 0., 1.): {
                "forward": (0.0, -1.0, -1.0),
                "jump": (0.0, -1.0)
            },
        }
        for state, q_values_forward, q_values_jump in zip(
                agent.last_q_table["states"],
                agent.last_q_table["q_values"]["forward"],
                agent.last_q_table["q_values"]["jump"]):
            state, q_values_forward, q_values_jump = tuple(state), tuple(
                q_values_forward), tuple(q_values_jump)
            assert state in expected_q_values_per_state, \
                "ERROR: state '{}' not expected in q-table as it's a terminal state!".format(state)
            recursive_assert_almost_equal(
                q_values_forward,
                expected_q_values_per_state[state]["forward"],
                decimals=0)
            recursive_assert_almost_equal(
                q_values_jump,
                expected_q_values_per_state[state]["jump"],
                decimals=0)