def test_pong_with_worker(self): env_spec = dict( type="openai", gym_env="PongNoFrameskip-v4", # The frameskip in the agent config will trigger worker skips, this # is used for internal env. frameskip=4, max_num_noops=30, episodic_life=False ) env = OpenAIGymEnv.from_spec(env_spec) agent_config = config_from_path("configs/backend_performance_dqn_pong.json") # Test cpu settings for batching here. agent_config["update_spec"] = None agent = DQNAgent.from_spec( # Uses 2015 DQN parameters as closely as possible. agent_config, state_space=env.state_space, # Try with "reduced" action space (actually only 3 actions, up, down, no-op) action_space=env.action_space ) worker = SingleThreadedWorker( env_spec=env_spec, agent=agent, frameskip=1, preprocessing_spec=agent_config["preprocessing_spec"], worker_executes_preprocessing=True ) result = worker.execute_timesteps(1000) print(result)
def test_multi_gpu_dqn_agent_compilation(self): """ Tests if the multi gpu strategy can compile successfully on a multi gpu system, but also runs on a CPU-only system using fake-GPU logic for testing purposes. """ root_logger.setLevel(DEBUG) agent_config = config_from_path( "configs/multi_gpu_dqn_for_random_env.json") environment = RandomEnv.from_spec(self.random_env_spec) agent = DQNAgent.from_spec(agent_config, state_space=environment.state_space, action_space=environment.action_space) print("Compiled DQN agent on multi-GPU system") # Do an update from external batch. batch_size = agent_config["update_spec"]["batch_size"] external_batch = dict( states=environment.state_space.sample(size=batch_size), actions=environment.action_space.sample(size=batch_size), rewards=np.random.sample(size=batch_size), terminals=np.random.choice([True, False], size=batch_size), next_states=environment.state_space.sample(size=batch_size), importance_weights=np.zeros(shape=(batch_size, ))) agent.update(batch=external_batch) print("Performed an update from external batch")
def test_readme_example(self): """ Tests deterministic functionality of RandomEnv. """ from rlgraph.agents import DQNAgent from rlgraph.environments import OpenAIGymEnv environment = OpenAIGymEnv('CartPole-v0') config = config_from_path("../../examples/configs/dqn_cartpole.json") # Create from .json file or dict, see agent API for all # possible configuration parameters. agent = DQNAgent.from_spec(config, state_space=environment.state_space, action_space=environment.action_space) # Get an action, take a step, observe reward. state = environment.reset() preprocessed_state, action = agent.get_action( states=state, extra_returns="preprocessed_states") # Execute step in environment. next_state, reward, terminal, info = environment.step(action) # Observe result. agent.observe(preprocessed_states=preprocessed_state, actions=action, internals=[], next_states=next_state, rewards=reward, terminals=terminal) # Call update when desired: loss = agent.update()
def test_double_dueling_dqn_on_cart_pole(self): """ Creates a double and dueling DQNAgent and runs it via a Runner on the CartPole Env. """ dummy_env = OpenAIGymEnv("CartPole-v0") agent = DQNAgent.from_spec( config_from_path("configs/dqn_agent_for_cartpole.json"), double_q=True, dueling_q=True, state_space=dummy_env.state_space, action_space=dummy_env.action_space, observe_spec=dict(buffer_size=200), execution_spec=dict(seed=156), update_spec=dict(update_interval=4, batch_size=64, sync_interval=16), optimizer_spec=dict(type="adam", learning_rate=0.05), store_last_q_table=True) time_steps = 3000 worker = SingleThreadedWorker( env_spec=lambda: OpenAIGymEnv("CartPole-v0", seed=10), agent=agent, render=self.is_windows, worker_executes_preprocessing=False) results = worker.execute_timesteps(time_steps, use_exploration=True) #print("STATES:\n{}".format(agent.last_q_table["states"])) #print("\n\nQ(s,a)-VALUES:\n{}".format(np.round_(agent.last_q_table["q_values"], decimals=2))) self.assertEqual(results["timesteps_executed"], time_steps) self.assertEqual(results["env_frames"], time_steps) self.assertGreaterEqual(results["mean_episode_reward"], 15) self.assertGreaterEqual(results["max_episode_reward"], 160.0) self.assertLessEqual(results["episodes_executed"], 100)
def test_cartpole_with_worker(self): env = OpenAIGymEnv("CartPole-v0") agent_config = config_from_path("configs/backend_performance_dqn_cartpole.json") # Test cpu settings for batching here. agent_config["update_spec"] = None agent = DQNAgent.from_spec( # Uses 2015 DQN parameters as closely as possible. agent_config, state_space=env.state_space, # Try with "reduced" action space (actually only 3 actions, up, down, no-op) action_space=env.action_space ) worker = SingleThreadedWorker( env_spec=lambda: OpenAIGymEnv("CartPole-v0"), agent=agent, frameskip=1, num_environments=1, worker_executes_preprocessing=False ) result = worker.execute_timesteps(1000) print(result)
def test_act(self): env = OpenAIGymEnv("Pong-v0", frameskip=4, max_num_noops=30, episodic_life=True) agent_config = config_from_path("configs/ray_apex_for_pong.json") if get_backend() == "pytorch": agent_config["memory_spec"]["type"] = "mem_prioritized_replay" agent = DQNAgent.from_spec( # Uses 2015 DQN parameters as closely as possible. agent_config, state_space=env.state_space, # Try with "reduced" action space (actually only 3 actions, up, down, no-op) action_space=env.action_space ) state = env.reset() action = agent.get_action(state) print("Component call count = {}".format(Component.call_count)) state_space = env.state_space count = 200 samples = state_space.sample(count) start = time.perf_counter() for s in samples: action = agent.get_action(s) end = time.perf_counter() - start print("Took {} s for {} separate actions, mean = {}".format(end, count, end / count)) # Now instead test 100 batch actions samples = state_space.sample(count) start = time.perf_counter() action = agent.get_action(samples) end = time.perf_counter() - start print("Took {} s for {} batched actions.".format(end, count)) profile = Component.call_times print_call_chain(profile, False, 0.03)
def test_double_dqn_on_2x2_grid_world_single_action_to_container(self): """ Tests how dqn solves a mapping of a single integer to multiple actions (as opposed to using container actions). """ # ftj = forward + turn + jump env_spec = dict(world="2x2", action_type="ftj", state_representation="xy+orientation") agent_config = config_from_path( "configs/dqn_agent_for_2x2_gridworld_single_to_container.json") preprocessing_spec = agent_config.pop("preprocessing_spec") action_space = IntBox(0, 18) agent = DQNAgent.from_spec(agent_config, huber_loss=True, double_q=True, dueling_q=True, state_space=FloatBox(shape=(4, )), action_space=action_space, store_last_q_table=True) time_steps = 10000 worker = SingleThreadedWorker( env_spec=lambda: GridWorld.from_spec(env_spec), agent=agent, preprocessing_spec=preprocessing_spec, worker_executes_preprocessing=True, render=False) results = worker.execute_timesteps(time_steps, use_exploration=True) print(results)
def test_dqn_on_cart_pole(self): """ Creates a DQNAgent and runs it via a Runner on the CartPole Env. """ dummy_env = OpenAIGymEnv("CartPole-v0") agent = DQNAgent.from_spec( config_from_path("configs/dqn_agent_for_cartpole.json"), double_q=False, dueling_q=False, state_space=dummy_env.state_space, action_space=dummy_env.action_space, execution_spec=dict(seed=15), update_spec=dict(update_interval=4, batch_size=24, sync_interval=64), optimizer_spec=dict(type="adam", learning_rate=0.05), store_last_q_table=True) time_steps = 3000 worker = SingleThreadedWorker( env_spec=lambda: OpenAIGymEnv("CartPole-v0", seed=15), agent=agent, render=self.is_windows, worker_executes_preprocessing=False) results = worker.execute_timesteps(time_steps, use_exploration=True) self.assertEqual(results["timesteps_executed"], time_steps) self.assertEqual(results["env_frames"], time_steps) self.assertGreaterEqual(results["mean_episode_reward"], 25) self.assertGreaterEqual(results["max_episode_reward"], 100.0) self.assertLessEqual(results["episodes_executed"], 200)
def test_double_dqn_on_2x2_grid_world(self): """ Creates a double DQNAgent and runs it via a Runner on a simple 2x2 GridWorld. """ env_spec = dict(world="2x2") dummy_env = GridWorld.from_spec(env_spec) agent_config = config_from_path( "configs/dqn_agent_for_2x2_gridworld.json") preprocessing_spec = agent_config.pop("preprocessing_spec") agent = DQNAgent.from_spec( agent_config, dueling_q=False, state_space=self.grid_world_2x2_flattened_state_space, action_space=dummy_env.action_space, execution_spec=dict(seed=10), update_spec=dict(update_interval=4, batch_size=24, sync_interval=32), optimizer_spec=dict(type="adam", learning_rate=0.05), store_last_q_table=True) time_steps = 1000 worker = SingleThreadedWorker( env_spec=lambda: GridWorld.from_spec(env_spec), agent=agent, preprocessing_spec=preprocessing_spec, worker_executes_preprocessing=True) results = worker.execute_timesteps(time_steps, use_exploration=True) print("STATES:\n{}".format(agent.last_q_table["states"])) print("\n\nQ(s,a)-VALUES:\n{}".format( np.round_(agent.last_q_table["q_values"], decimals=2))) self.assertEqual(results["timesteps_executed"], time_steps) self.assertEqual(results["env_frames"], time_steps) self.assertGreaterEqual(results["mean_episode_reward"], -4.5) self.assertGreaterEqual(results["max_episode_reward"], 0.0) self.assertLessEqual(results["episodes_executed"], 350) # Check q-table for correct values. expected_q_values_per_state = { (1.0, 0, 0, 0): (-1, -5, 0, -1), (0, 1.0, 0, 0): (-1, 1, 0, 0) } for state, q_values in zip(agent.last_q_table["states"], agent.last_q_table["q_values"]): state, q_values = tuple(state), tuple(q_values) assert state in expected_q_values_per_state, \ "ERROR: state '{}' not expected in q-table as it's a terminal state!".format(state) recursive_assert_almost_equal(q_values, expected_q_values_per_state[state], decimals=0)
def test_dqn_compilation(self): """ Tests DQN Agent compilation. """ env = OpenAIGymEnv("Pong-v0", frameskip=4, max_num_noops=30, episodic_life=True) agent_config = config_from_path("configs/dqn_agent_for_pong.json") agent = DQNAgent.from_spec( # Uses 2015 DQN parameters as closely as possible. agent_config, state_space=env.state_space, # Try with "reduced" action space (actually only 3 actions, up, down, no-op) action_space=env.action_space )
def test_dqn_compilation(self): """ Creates a DQNAgent and runs it via a Runner on an openAI Pong Env. """ env = OpenAIGymEnv("Pong-v0", frameskip=4, max_num_noops=30, episodic_life=True) agent_config = config_from_path("configs/dqn_pytorch_test.json") agent = DQNAgent.from_spec( # Uses 2015 DQN parameters as closely as possible. agent_config, state_space=env.state_space, # Try with "reduced" action space (actually only 3 actions, up, down, no-op) action_space=env.action_space )
def test_multi_gpu_dqn_agent_learning_test_gridworld_2x2(self): """ Tests if the multi gpu strategy can learn successfully on a multi gpu system, but also runs on a CPU-only system using fake-GPU logic for testing purposes. """ env_spec = dict(type="grid-world", world="2x2") dummy_env = GridWorld.from_spec(env_spec) agent_config = config_from_path( "configs/multi_gpu_dqn_for_2x2_gridworld.json") preprocessing_spec = agent_config.pop("preprocessing_spec") agent = DQNAgent.from_spec( agent_config, state_space=self.grid_world_2x2_flattened_state_space, action_space=dummy_env.action_space, ) time_steps = 1000 worker = SingleThreadedWorker(env_spec=env_spec, agent=agent, worker_executes_preprocessing=True, preprocessing_spec=preprocessing_spec) results = worker.execute_timesteps(time_steps, use_exploration=True) # Marge q-tables of all four GPUs: agent.last_q_table["q_values"] = agent.last_q_table[ "q_values"].reshape((48, 4)) print("STATES:\n{}".format(agent.last_q_table["states"])) print("\n\nQ(s,a)-VALUES:\n{}".format( np.round_(agent.last_q_table["q_values"], decimals=2))) self.assertEqual(results["timesteps_executed"], time_steps) self.assertEqual(results["env_frames"], time_steps) self.assertGreaterEqual(results["mean_episode_reward"], -4.5) self.assertGreaterEqual(results["max_episode_reward"], 0.0) self.assertLessEqual(results["episodes_executed"], time_steps / 2) # Check q-table for correct values. expected_q_values_per_state = { (1.0, 0, 0, 0): (-1, -5, 0, -1), (0, 1.0, 0, 0): (-1, 1, 0, 0) } for state, q_values in zip(agent.last_q_table["states"], agent.last_q_table["q_values"]): state, q_values = tuple(state), tuple(q_values) assert state in expected_q_values_per_state, \ "ERROR: state '{}' not expected in q-table as it's a terminal state!".format(state) recursive_assert_almost_equal(q_values, expected_q_values_per_state[state], decimals=0)
def test_multi_gpu_dqn_agent_learning_test_gridworld_2x2(self): """ Tests if the multi gpu strategy can learn successfully on a multi gpu system. THIS TEST REQUIRES A MULTI GPU SYSTEM. """ #root_logger.setLevel(DEBUG) # test env = GridWorld("2x2") agent = DQNAgent.from_spec( config_from_path("configs/multi_gpu_dqn_for_2x2_gridworld.json"), dueling_q=False, state_space=env.state_space, action_space=env.action_space, observe_spec=dict(buffer_size=100), # Rule of thumb for multi-GPU (with n GPUs): n-fold batch-size and learning rate w/ respect to 1 GPU. update_spec=dict(update_interval=4, batch_size=48, sync_interval=32), optimizer_spec=dict(type="adam", learning_rate=0.15), store_last_q_table=True ) time_steps = 400 worker = SingleThreadedWorker(env_spec=lambda: env, agent=agent, worker_executes_preprocessing=False) results = worker.execute_timesteps(time_steps, use_exploration=True) print("STATES:\n{}".format(agent.last_q_table["states"])) print("\n\nQ(s,a)-VALUES:\n{}".format(np.round_(agent.last_q_table["q_values"], decimals=2))) self.assertEqual(results["timesteps_executed"], time_steps) self.assertEqual(results["env_frames"], time_steps) self.assertGreaterEqual(results["mean_episode_reward"], -4.5) self.assertGreaterEqual(results["max_episode_reward"], 0.0) self.assertLessEqual(results["episodes_executed"], 250) # Check q-table for correct values. expected_q_values_per_state = { (1.0, 0, 0, 0): (-1, -5, 0, -1), (0, 1.0, 0, 0): (-1, 1, 0, 0) } for state, q_values in zip(agent.last_q_table["states"], agent.last_q_table["q_values"]): state, q_values = tuple(state), tuple(q_values) assert state in expected_q_values_per_state, \ "ERROR: state '{}' not expected in q-table as it's a terminal state!".format(state) recursive_assert_almost_equal(q_values, expected_q_values_per_state[state], decimals=0)
def test_double_dueling_dqn_on_cart_pole(self): """ Creates a double and dueling DQNAgent and runs it via a Runner on the CartPole Env. """ gym_env = "CartPole-v0" dummy_env = OpenAIGymEnv(gym_env) config_ = config_from_path("configs/dqn_agent_for_cartpole.json") # Add dueling config to agent. config_["policy_spec"] = { "units_state_value_stream": 3, "action_adapter_spec": { "pre_network_spec": [{ "type": "dense", "units": 3 }] } } agent = DQNAgent.from_spec(config_, double_q=True, dueling_q=True, state_space=dummy_env.state_space, action_space=dummy_env.action_space, execution_spec=dict(seed=13), update_spec=dict(update_interval=4, batch_size=64, sync_interval=16), optimizer_spec=dict(type="adam", learning_rate=0.01), store_last_q_table=True) time_steps = 3000 worker = SingleThreadedWorker( env_spec=lambda: OpenAIGymEnv(gym_env, seed=10), agent=agent, render=self.is_windows, worker_executes_preprocessing=False) results = worker.execute_timesteps(time_steps, use_exploration=True) self.assertEqual(results["timesteps_executed"], time_steps) self.assertEqual(results["env_frames"], time_steps) self.assertGreaterEqual(results["mean_episode_reward"], 25) self.assertLessEqual(results["episodes_executed"], 150)
def test_multi_gpu_dqn_agent_learning_test_gridworld_2x2(self): """ Tests if the multi gpu strategy can learn successfully on a multi gpu system, but also runs on a CPU-only system using fake-GPU logic for testing purposes. """ env_spec = dict(type="grid-world", world="2x2") dummy_env = GridWorld.from_spec(env_spec) agent_config = config_from_path( "configs/multi_gpu_dqn_for_2x2_gridworld.json") preprocessing_spec = agent_config.pop("preprocessing_spec") agent = DQNAgent.from_spec( agent_config, state_space=self.grid_world_2x2_flattened_state_space, action_space=dummy_env.action_space, ) time_steps = 2000 worker = SingleThreadedWorker(env_spec=env_spec, agent=agent, worker_executes_preprocessing=True, preprocessing_spec=preprocessing_spec) results = worker.execute_timesteps(time_steps, use_exploration=True) self.assertEqual(results["timesteps_executed"], time_steps) self.assertEqual(results["env_frames"], time_steps) self.assertGreaterEqual(results["mean_episode_reward"], -4.5) self.assertGreaterEqual(results["max_episode_reward"], 0.0) self.assertLessEqual(results["episodes_executed"], time_steps / 2) # Check all learnt Q-values. q_values = agent.graph_executor.execute( ("get_q_values", one_hot(np.array([0, 1]), depth=4)))[:] recursive_assert_almost_equal(q_values[0], (0.8, -5, 0.9, 0.8), decimals=1) recursive_assert_almost_equal(q_values[1], (0.8, 1.0, 0.9, 0.9), decimals=1)
def test_double_dqn_on_2x2_grid_world_with_container_actions(self): """ Creates a double DQNAgent and runs it via a Runner on a simple 2x2 GridWorld using container actions. """ # ftj = forward + turn + jump env_spec = dict(world="2x2", action_type="ftj", state_representation="xy+orientation") dummy_env = GridWorld.from_spec(env_spec) agent_config = config_from_path( "configs/dqn_agent_for_2x2_gridworld_with_container_actions.json") preprocessing_spec = agent_config.pop("preprocessing_spec") agent = DQNAgent.from_spec(agent_config, double_q=True, dueling_q=False, state_space=FloatBox(shape=(4, )), action_space=dummy_env.action_space, execution_spec=dict(seed=15), store_last_q_table=True) time_steps = 10000 worker = SingleThreadedWorker( env_spec=lambda: GridWorld.from_spec(env_spec), agent=agent, preprocessing_spec=preprocessing_spec, worker_executes_preprocessing=True, render=False) results = worker.execute_timesteps(time_steps, use_exploration=True) print("LAST q-table:\n{}".format(agent.last_q_table)) self.assertEqual(results["timesteps_executed"], time_steps) self.assertEqual(results["env_frames"], time_steps) self.assertGreaterEqual(results["mean_episode_reward"], -7) self.assertGreaterEqual(results["max_episode_reward"], -1.0) self.assertLessEqual(results["episodes_executed"], time_steps / 3) # Check q-table for correct values. expected_q_values_per_state = { (0., 0., -1., 0.): { "forward": (-5.0, -1.0, -1.0), "jump": (0.0, -1.0) }, (0., 0., 1., 0.): { "forward": (0.0, -1.0, -1.0), "jump": (0.0, -1.0) }, (0., 0., 0., -1.): { "forward": (0.0, -1.0, -1.0), "jump": (0.0, -1.0) }, (0., 0., 0., 1.): { "forward": (0.0, -1.0, -1.0), "jump": (0.0, -1.0) }, (0., 1., -1., 0.): { "forward": (0.0, -1.0, -1.0), "jump": (0.0, -1.0) }, (0., 1., 1., 0.): { "forward": (0.0, -1.0, -1.0), "jump": (0.0, -1.0) }, (0., 1., 0., -1.): { "forward": (0.0, -1.0, -1.0), "jump": (0.0, -1.0) }, (0., 1., 0., 1.): { "forward": (0.0, -1.0, -1.0), "jump": (0.0, -1.0) }, } for state, q_values_forward, q_values_jump in zip( agent.last_q_table["states"], agent.last_q_table["q_values"]["forward"], agent.last_q_table["q_values"]["jump"]): state, q_values_forward, q_values_jump = tuple(state), tuple( q_values_forward), tuple(q_values_jump) assert state in expected_q_values_per_state, \ "ERROR: state '{}' not expected in q-table as it's a terminal state!".format(state) recursive_assert_almost_equal( q_values_forward, expected_q_values_per_state[state]["forward"], decimals=0) recursive_assert_almost_equal( q_values_jump, expected_q_values_per_state[state]["jump"], decimals=0)