def test_insert_demos(self): """ Tests inserting into the demo memory. """ env = OpenAIGymEnv.from_spec(self.env_spec) agent_config = config_from_path("configs/dqfd_agent_for_cartpole.json") agent = DQFDAgent.from_spec( agent_config, state_space=env.state_space, action_space=env.action_space ) terminals = BoolBox(add_batch_rank=True) rewards = FloatBox(add_batch_rank=True) # Observe a single data point. agent.observe_demos( preprocessed_states=agent.preprocessed_state_space.with_batch_rank().sample(1), actions=env.action_space.with_batch_rank().sample(1), rewards=rewards.sample(1), next_states=agent.preprocessed_state_space.with_batch_rank().sample(1), terminals=terminals.sample(1), ) # Observe a batch of demos. agent.observe_demos( preprocessed_states=agent.preprocessed_state_space.sample(10), actions=env.action_space.sample(10), rewards=FloatBox().sample(10), terminals=terminals.sample(10), next_states=agent.preprocessed_state_space.sample(10) )
def test_post_processing(self): """ Tests external batch post-processing for the PPO agent. """ env = OpenAIGymEnv("Pong-v0", frameskip=4, max_num_noops=30, episodic_life=True) agent_config = config_from_path("configs/ppo_agent_for_pong.json") agent = PPOAgent.from_spec(agent_config, state_space=env.state_space, action_space=env.action_space) num_samples = 200 states = agent.preprocessed_state_space.sample(num_samples) reward_space = FloatBox(add_batch_rank=True) terminal_space = BoolBox(add_batch_rank=True) sequence_indices_space = BoolBox(add_batch_rank=True) # GAE is separately tested, just testing if this API method returns results. pg_advantages = agent.post_process( dict(states=states, rewards=reward_space.sample(num_samples), terminals=terminal_space.sample(num_samples, fill_value=0), sequence_indices=sequence_indices_space.sample(num_samples, fill_value=0)))
def test_impala_actor_compilation(self): """ Tests IMPALA agent compilation (actor). """ try: from rlgraph.environments.deepmind_lab import DeepmindLabEnv except ImportError: print("Deepmind Lab not installed: Will skip this test.") return agent_config = config_from_path("configs/impala_agent_for_deepmind_lab_env.json") env = DeepmindLabEnv( level_id="seekavoid_arena_01", observations=["RGB_INTERLEAVED", "INSTR"], frameskip=4 ) actor_agent = IMPALAAgent.from_spec( agent_config, type="actor", state_space=env.state_space, action_space=env.action_space, internal_states_space=Tuple(FloatBox(shape=(256,)), FloatBox(shape=(256,)), add_batch_rank=True), # Make session-creation hang in docker. execution_spec=dict(disable_monitoring=True) ) # Start Specifiable Server with Env manually. actor_agent.environment_stepper.environment_server.start() print("Compiled IMPALA type=actor agent.") actor_agent.environment_stepper.environment_server.stop()
def test_activation_functions(self): # Test single activation functions (no other custom computations in layer). space = FloatBox(shape=(3, ), add_batch_rank=True) # ReLU. relu_layer = NNLayer(activation="relu") test = ComponentTest(component=relu_layer, input_spaces=dict(inputs=space)) input_ = space.sample(size=5) expected = relu(input_) test.test(("apply", input_), expected_outputs=expected) # Again manually in case util numpy-relu is broken. input_ = np.array([[1.0, 2.0, -5.0], [-10.0, -100.1, 4.5]]) expected = np.array([[1.0, 2.0, 0.0], [0.0, 0.0, 4.5]]) test.test(("apply", input_), expected_outputs=expected) # Sigmoid. sigmoid_layer = NNLayer(activation="sigmoid") test = ComponentTest(component=sigmoid_layer, input_spaces=dict(inputs=space)) input_ = space.sample(size=10) expected = sigmoid(input_) test.test(("apply", input_), expected_outputs=expected)
def test_simple_nn_using_layers(self): # Space must contain batch dimension (otherwise, NNlayer will complain). space = FloatBox(shape=(4, ), add_batch_rank=True) # Create a simple neural net from json. nn_layers = config_from_path("configs/test_simple_nn.json") neural_net = NeuralNetwork(*nn_layers["layers"]) # Do not seed, we calculate expectations manually. test = ComponentTest(component=neural_net, input_spaces=dict(inputs=space)) # Batch of size=3. input_ = space.sample(4) # Calculate output manually. var_dict = neural_net.get_variables("hidden-layer/dense/kernel", "hidden-layer/dense/bias", global_scope=False) w1_value = test.read_variable_values( var_dict["hidden-layer/dense/kernel"]) b1_value = test.read_variable_values( var_dict["hidden-layer/dense/bias"]) expected = dense_layer(input_, w1_value, b1_value) test.test(("call", input_), expected_outputs=expected, decimals=5) test.terminate()
def test_time_rank_folding_for_large_cnn_nn(self): width = 86 height = 86 time_rank = 20 input_space = FloatBox(shape=(width, height, 3), add_batch_rank=True, add_time_rank=True, time_major=True) base_config = config_from_path("configs/test_3x_cnn_nn.json") base_config.insert(0, {"type": "reshape", "fold_time_rank": True}) base_config.append({ "type": "reshape", "unfold_time_rank": time_rank, "time_major": True }) neural_net = NeuralNetwork.from_spec(base_config) test = ComponentTest(component=neural_net, input_spaces=dict(nn_input=input_space)) # Pull a large batch+time ranked sample. sample_shape = (time_rank, 256) inputs = input_space.sample(sample_shape) out = test.test(("call", inputs), expected_outputs=None)["output"] self.assertTrue(out.shape == (time_rank, 256, 7 * 7 * 64)) self.assertTrue(out.dtype == np.float32)
def test_maxpool2d_layer(self): space = FloatBox(shape=(2, 2, 3), add_batch_rank=True) # e.g. a simple 3-color image # NOTE: Strides shouldn't matter. maxpool2d_layer = MaxPool2DLayer(pool_size=2, strides=2, padding="valid") test = ComponentTest(component=maxpool2d_layer, input_spaces=dict(inputs=space)) # Batch of 2 sample. input_ = space.sample(2) item0_ch0 = max(input_[0][0][0][0], input_[0][0][1][0], input_[0][1][0][0], input_[0][1][1][0]) item0_ch1 = max(input_[0][0][0][1], input_[0][0][1][1], input_[0][1][0][1], input_[0][1][1][1]) item0_ch2 = max(input_[0][0][0][2], input_[0][0][1][2], input_[0][1][0][2], input_[0][1][1][2]) item1_ch0 = max(input_[1][0][0][0], input_[1][0][1][0], input_[1][1][0][0], input_[1][1][1][0]) item1_ch1 = max(input_[1][0][0][1], input_[1][0][1][1], input_[1][1][0][1], input_[1][1][1][1]) item1_ch2 = max(input_[1][0][0][2], input_[1][0][1][2], input_[1][1][0][2], input_[1][1][1][2]) expected = np.array([[[[item0_ch0, item0_ch1, item0_ch2]]], [[[item1_ch0, item1_ch1, item1_ch2]]]]) test.test(("apply", input_), expected_outputs=expected)
def test_add_layer_to_simple_nn(self): # Space must contain batch dimension (otherwise, NNlayer will complain). space = FloatBox(shape=(3, ), add_batch_rank=True) # Create a simple neural net from json. neural_net = NeuralNetwork.from_spec( config_from_path( "configs/test_simple_nn.json")) # type: NeuralNetwork # Add another layer to it. neural_net.add_layer(DenseLayer(units=10, scope="last-layer")) # Do not seed, we calculate expectations manually. test = ComponentTest(component=neural_net, input_spaces=dict(nn_input=space)) # Batch of size=3. input_ = space.sample(3) # Calculate output manually. var_dict = test.read_variable_values(neural_net.variable_registry) expected = dense_layer( dense_layer(input_, var_dict["test-network/hidden-layer/dense/kernel"], var_dict["test-network/hidden-layer/dense/bias"]), var_dict["test-network/last-layer/dense/kernel"], var_dict["test-network/last-layer/dense/bias"]) test.test(("apply", input_), expected_outputs=dict(output=expected), decimals=5) test.terminate()
def test_keras_style_two_separate_input_spaces(self): # Define two input Spaces first. Independently (no container). input_space_1 = IntBox(3, add_batch_rank=True) input_space_2 = FloatBox(shape=(4,), add_batch_rank=True) # One-hot flatten the int tensor. flatten_layer_out = ReShape(flatten=True, flatten_categories=True)(input_space_1) # Run the float tensor through two dense layers. dense_1_out = DenseLayer(units=3, scope="d1")(input_space_2) dense_2_out = DenseLayer(units=5, scope="d2")(dense_1_out) # Concat everything. cat_out = ConcatLayer()(flatten_layer_out, dense_2_out) # Use the `outputs` arg to allow your network to trace back the data flow until the input space. neural_net = NeuralNetwork(inputs=[input_space_1, input_space_2], outputs=cat_out) test = ComponentTest(component=neural_net, input_spaces=dict(inputs=[input_space_1, input_space_2])) var_dict = neural_net.variable_registry w1_value = test.read_variable_values(var_dict["neural-network/d1/dense/kernel"]) b1_value = test.read_variable_values(var_dict["neural-network/d1/dense/bias"]) w2_value = test.read_variable_values(var_dict["neural-network/d2/dense/kernel"]) b2_value = test.read_variable_values(var_dict["neural-network/d2/dense/bias"]) # Batch of size=n. input_ = [input_space_1.sample(4), input_space_2.sample(4)] expected = np.concatenate([ # concat everything one_hot(input_[0]), # int flattening dense_layer(dense_layer(input_[1], w1_value, b1_value), w2_value, b2_value) # float -> 2 x dense ], axis=-1) out = test.test(("call", input_), expected_outputs=expected) test.terminate()
def test_keras_style_simple_nn(self): # Input Space of the network. input_space = FloatBox(shape=(3,), add_batch_rank=True) # Create a DenseLayer with a fixed `call` method input space for the arg `inputs`. output1 = DenseLayer(units=5, activation="linear", scope="a")(input_space) # Create a DenseLayer whose `inputs` arg is the resulting DataOpRec of output1's `call` output. output2 = DenseLayer(units=7, activation="relu", scope="b")(output1) # This will trace back automatically through the given output DataOpRec(s) and add all components # on the way to the input-space to this network. neural_net = NeuralNetwork(outputs=output2) test = ComponentTest(component=neural_net, input_spaces=dict(inputs=input_space)) # Batch of size=n. input_ = input_space.sample(5) # Calculate output manually. var_dict = neural_net.get_variables("a/dense/kernel", "a/dense/bias", "b/dense/kernel", "b/dense/bias", global_scope=False) w1_value = test.read_variable_values(var_dict["a/dense/kernel"]) b1_value = test.read_variable_values(var_dict["a/dense/bias"]) w2_value = test.read_variable_values(var_dict["b/dense/kernel"]) b2_value = test.read_variable_values(var_dict["b/dense/bias"]) expected = relu(dense_layer(dense_layer(input_, w1_value, b1_value), w2_value, b2_value)) test.test(("call", input_), expected_outputs=expected, decimals=5) test.terminate()
def test_functional_api_one_output_is_discarded(self): # Input Space of the network. input_space = FloatBox(shape=(3, ), add_batch_rank=True, add_time_rank=True) # Pass input through an LSTM and get two outputs (output and internal states), only one of which will be used. lstm_out, _ = LSTMLayer(units=2, return_sequences=False)(input_space) # A NN with 1 output (don't return internal_states of LSTM). neural_net = NeuralNetwork(outputs=lstm_out) test = ComponentTest(component=neural_net, input_spaces=dict(inputs=input_space)) # Batch of size=n. input_ = input_space.sample((5, 3)) # Calculate output manually. var_dict = neural_net.variable_registry w1_value = test.read_variable_values( var_dict["neural-network/lstm-layer/lstm-cell/kernel"]) b1_value = test.read_variable_values( var_dict["neural-network/lstm-layer/lstm-cell/bias"]) expected_out, _ = lstm_layer(input_, w1_value, b1_value) expected_out = expected_out[:, -1, :] # last time step only # Don't expect internal states (our NN does not return these as per the functional API definition above). test.test(("call", input_), expected_outputs=expected_out, decimals=5) test.terminate()
def test_update_online(self): """ Tests if joint updates from demo and online memory work. """ env = OpenAIGymEnv.from_spec(self.env_spec) agent_config = config_from_path("configs/dqfd_agent_for_cartpole.json") agent = DQFDAgent.from_spec( agent_config, state_space=env.state_space, action_space=env.action_space ) terminals = BoolBox(add_batch_rank=True) # Observe a batch of demos. agent.observe_demos( preprocessed_states=agent.preprocessed_state_space.sample(32), actions=env.action_space.sample(32), rewards=FloatBox().sample(32), terminals=terminals.sample(32), next_states=agent.preprocessed_state_space.sample(32) ) # Observe a batch of online data. agent._observe_graph( preprocessed_states=agent.preprocessed_state_space.sample(32), actions=env.action_space.sample(32), rewards=FloatBox().sample(32), internals=[], terminals=terminals.sample(32), next_states=agent.preprocessed_state_space.sample(32) ) # Call update. agent.update()
def test_environment_stepper_on_deterministic_env_with_action_probs_lstm(self): internal_states_space = Tuple(FloatBox(shape=(3,)), FloatBox(shape=(3,))) preprocessor_spec = [dict(type="multiply", factor=0.1)] network_spec = config_from_path("configs/test_lstm_nn.json") exploration_spec = None actor_component = ActorComponent( preprocessor_spec, dict(network_spec=network_spec, action_space=self.deterministic_env_action_space), exploration_spec ) environment_stepper = EnvironmentStepper( environment_spec=dict(type="deterministic_env", steps_to_terminal=3), actor_component_spec=actor_component, state_space=self.deterministic_env_state_space, reward_space="float32", internal_states_space=internal_states_space, add_action_probs=True, action_probs_space=self.deterministic_action_probs_space, num_steps=4, ) test = ComponentTest( component=environment_stepper, action_space=self.deterministic_env_action_space, ) weights = test.read_variable_values(environment_stepper.actor_component.policy.variable_registry) policy_scope = "environment-stepper/actor-component/policy/" weights_lstm = weights[policy_scope+"test-lstm-network/lstm-layer/lstm-cell/kernel"] biases_lstm = weights[policy_scope+"test-lstm-network/lstm-layer/lstm-cell/bias"] weights_action = weights[policy_scope+"action-adapter-0/action-network/action-layer/dense/kernel"] biases_action = weights[policy_scope+"action-adapter-0/action-network/action-layer/dense/bias"] # Step 3 times through the Env and collect results. lstm_1 = lstm_layer(np.array([[[0.0]]]), weights_lstm, biases_lstm) lstm_2 = lstm_layer(np.array([[[0.1]]]), weights_lstm, biases_lstm, lstm_1[1]) lstm_3 = lstm_layer(np.array([[[0.2]]]), weights_lstm, biases_lstm, lstm_2[1]) lstm_4 = lstm_layer(np.array([[[0.0]]]), weights_lstm, biases_lstm, lstm_3[1]) expected = ( np.array([False, False, True, False]), np.array([[0.0], [1.0], [2.0], [0.0], [1.0]]), # s' (raw) np.array([ softmax(dense_layer(np.squeeze(lstm_1[0]), weights_action, biases_action)), softmax(dense_layer(np.squeeze(lstm_2[0]), weights_action, biases_action)), softmax(dense_layer(np.squeeze(lstm_3[0]), weights_action, biases_action)), softmax(dense_layer(np.squeeze(lstm_4[0]), weights_action, biases_action)), ]), # action probs # internal states ( np.squeeze(np.array([[[0.0, 0.0, 0.0]], lstm_1[1][0], lstm_2[1][0], lstm_3[1][0], lstm_4[1][0]])), np.squeeze(np.array([[[0.0, 0.0, 0.0]], lstm_1[1][1], lstm_2[1][1], lstm_3[1][1], lstm_4[1][1]])) ) ) test.test("step", expected_outputs=expected) # Make sure we close the session (to shut down the Env on the server). test.terminate()
def __init__(self, episode_length=5, scale=0.1): super(GaussianDensityAsRewardEnvironment, self).__init__(state_space=FloatBox(shape=(1, )), action_space=FloatBox(shape=(1, ), low=-2.0, high=2.0)) self.episode_length = episode_length self.episode_step = 0 self.loc = None self.scale = scale
def test_dummy_nn_layer(self): # Tests simple pass through (no activation, no layer (graph_fn) computation). space = FloatBox(shape=(3,), add_batch_rank=True) # - fixed 1.0 weights, no biases dummy_layer = NNLayer(activation=None) test = ComponentTest(component=dummy_layer, input_spaces=dict(inputs=space)) input_ = space.sample(size=5) test.test(("apply", input_), expected_outputs=input_)
def test_demos_with_container_actions(self): # Tests if dqfd can fit a set of states to a set of actions. vocab_size = 100 embed_dim = 128 # ID/state space. state_space = IntBox(vocab_size, shape=(10, )) # Container action space. actions_space = {} num_outputs = 3 for i in range(3): actions_space['action_{}'.format(i)] = IntBox(low=0, high=num_outputs) actions_space = Dict(actions_space) agent_config = config_from_path("configs/dqfd_container.json") agent_config["network_spec"] = [ dict(type="embedding", embed_dim=embed_dim, vocab_size=vocab_size), dict(type="reshape", flatten=True), dict(type="dense", units=embed_dim, activation="relu", scope="dense_1") ] agent = DQFDAgent.from_spec(agent_config, state_space=state_space, action_space=actions_space) terminals = BoolBox(add_batch_rank=True) rewards = FloatBox(add_batch_rank=True) # Create a set of demos. demo_states = agent.preprocessed_state_space.with_batch_rank().sample( 20) demo_actions = actions_space.with_batch_rank().sample(20) demo_rewards = rewards.sample(20, fill_value=1.0) demo_next_states = agent.preprocessed_state_space.with_batch_rank( ).sample(20) demo_terminals = terminals.sample(20, fill_value=False) # Insert. agent.observe_demos( preprocessed_states=demo_states, actions=demo_actions, rewards=demo_rewards, next_states=demo_next_states, terminals=demo_terminals, ) # Fit demos. agent.update_from_demos(num_updates=5000, batch_size=20) # Evaluate demos: agent_actions = agent.get_action(demo_states, apply_preprocessing=False, use_exploration=False) recursive_assert_almost_equal(agent_actions, demo_actions)
def test_softmax_on_simple_inputs(self): softmax = Softmax() input_space = FloatBox(shape=(2, 2, 3), add_batch_rank=True) test = ComponentTest(component=softmax, input_spaces=dict(logits=input_space)) # Batch=5 inputs = input_space.sample(5) expected = softmax_(inputs) test.test(("softmax", inputs), expected_outputs=(expected, np.log(expected)))
def test_multi_lstm_layer(self): return # TODO: finish this test case # Tests a double MultiLSTMLayer. input_spaces = dict(inputs=FloatBox(shape=(3, ), add_batch_rank=True, add_time_rank=True), initial_c_and_h_states=Tuple( Tuple(FloatBox(shape=(5, )), FloatBox(shape=(5, ))), Tuple(FloatBox(shape=(5, )), FloatBox(shape=(5, ))), add_batch_rank=True)) multi_lstm_layer = MultiLSTMLayer( num_lstms=2, units=5, # Full skip connections (x goes into both layers, out0 goes into layer1). skip_connections=[[True, False], [True, True]]) # Do not seed, we calculate expectations manually. test = ComponentTest(component=multi_lstm_layer, input_spaces=input_spaces) # Batch of size=n, time-steps=m. input_ = input_spaces["inputs"].sample((2, 3)) global_scope = "variational-auto-encoder/" # Calculate output manually. var_dict = test.read_variable_values( multi_lstm_layer.variable_registry) encoder_network_out = dense_layer( input_, var_dict[global_scope + "encoder-network/encoder-layer/dense/kernel"], var_dict[global_scope + "encoder-network/encoder-layer/dense/bias"]) expected_mean = dense_layer( encoder_network_out, var_dict[global_scope + "mean-layer/dense/kernel"], var_dict[global_scope + "mean-layer/dense/bias"]) expected_stddev = dense_layer( encoder_network_out, var_dict[global_scope + "stddev-layer/dense/kernel"], var_dict[global_scope + "stddev-layer/dense/bias"]) out = test.test(("encode", input_), expected_outputs=None) recursive_assert_almost_equal(out["mean"], expected_mean, decimals=5) recursive_assert_almost_equal(out["stddev"], expected_stddev, decimals=5) self.assertTrue(out["z_sample"].shape == (3, 1)) test.terminate()
def get_preprocessed_space(self, space): # TODO map of allowed conversions in utils? if isinstance(space, IntBox): if self.to_dtype == "float" or self.to_dtype == "float32" or self.to_dtype == "np.float"\ or self.to_dtype == "tf.float32" or self.to_dtype == "torch.float32": return FloatBox(shape=space.shape, low=space.low, high=space.high, add_batch_rank=space.has_batch_rank, add_time_rank=space.has_time_rank) elif self.to_dtype == "bool": if space.low == 0 and space.high == 1: return BoolBox(shape=space.shape, add_batch_rank=space.has_batch_rank, add_time_rank=space.has_time_rank) else: raise RLGraphError( "ERROR: Conversion from IntBox to BoolBox not allowed if low is not 0 and " "high is not 1.") elif isinstance(space, BoolBox): if self.to_dtype == "float" or self.to_dtype == "float32" or self.to_dtype == "np.float" \ or self.to_dtype == "tf.float32" or self.to_dtype == "torch.float32": return FloatBox(shape=space.shape, low=0.0, high=1.0, add_batch_rank=space.has_batch_rank, add_time_rank=space.has_time_rank) elif self.to_dtype == "int" or self.to_dtype == "int32" or self.to_dtype == "np.int32" or \ self.to_dtype == "tf.int32" or self.to_dtype == "torch.int32": return IntBox(shape=space.shape, low=0, high=1, add_batch_rank=space.has_batch_rank, add_time_rank=space.has_time_rank) elif isinstance(space, FloatBox): if self.to_dtype == "int" or self.to_dtype == "int32" or self.to_dtype == "np.int32" or \ self.to_dtype == "tf.int32" or self.to_dtype == "torch.int32": return IntBox(shape=space.shape, low=space.low, high=space.high, add_batch_rank=space.has_batch_rank, add_time_rank=space.has_time_rank) # Wrong conversion. else: raise RLGraphError( "ERROR: Space conversion from: {} to type {} not supported". format(space, self.to_dtype)) # No conversion. return space
def test_time_rank_folding_for_large_dense_nn(self): vector_dim = 256 input_space = FloatBox(shape=(vector_dim, ), add_batch_rank=True, add_time_rank=True) base_config = config_from_path("configs/test_large_dense_nn.json") neural_net_wo_folding = NeuralNetwork.from_spec(base_config) test = ComponentTest(component=neural_net_wo_folding, input_spaces=dict(nn_input=input_space)) # Pull a large batch+time ranked sample. sample_shape = (256, 200) inputs = input_space.sample(sample_shape) start = time.monotonic() runs = 10 for _ in range(runs): print(".", flush=True, end="") test.test(("call", inputs), expected_outputs=None) runtime_wo_folding = time.monotonic() - start print( "\nTesting large dense NN w/o time-rank folding: {}x pass through with {}-data took " "{}s".format(runs, sample_shape, runtime_wo_folding)) neural_net_w_folding = NeuralNetwork.from_spec(base_config) # Folded space. input_space_folded = FloatBox(shape=(vector_dim, ), add_batch_rank=True) inputs = input_space.sample(sample_shape[0] * sample_shape[1]) test = ComponentTest(component=neural_net_w_folding, input_spaces=dict(nn_input=input_space_folded)) start = time.monotonic() for _ in range(runs): print(".", flush=True, end="") test.test(("call", inputs), expected_outputs=None) runtime_w_folding = time.monotonic() - start print( "\nTesting large dense NN w/ time-rank folding: {}x pass through with {}-data took " "{}s".format(runs, sample_shape, runtime_w_folding)) recursive_assert_almost_equal(runtime_w_folding, runtime_wo_folding, decimals=0)
def test_update_from_demos(self): """ Tests the separate API method to update from demos. """ env = OpenAIGymEnv.from_spec(self.env_spec) agent_config = config_from_path("configs/dqfd_agent_for_cartpole.json") agent = DQFDAgent.from_spec(agent_config, state_space=env.state_space, action_space=env.action_space) terminals = BoolBox(add_batch_rank=True) rewards = FloatBox(add_batch_rank=True) state_1 = agent.preprocessed_state_space.with_batch_rank().sample(1) action_1 = [1] state_2 = agent.preprocessed_state_space.with_batch_rank().sample(1) action_2 = [0] # Insert two states with fixed actions and a few random examples. for _ in range(10): # State with correct action agent.observe_demos( preprocessed_states=state_1, actions=action_1, rewards=rewards.sample(1), next_states=agent.preprocessed_state_space.with_batch_rank(). sample(1), terminals=terminals.sample(1), ) agent.observe_demos( preprocessed_states=state_2, actions=action_2, rewards=rewards.sample(1), next_states=agent.preprocessed_state_space.with_batch_rank(). sample(1), terminals=terminals.sample(1), ) # Update. agent.update_from_demos(num_updates=100, batch_size=8) # Test if fixed states and actions map. action = agent.get_action(states=state_1, apply_preprocessing=False, use_exploration=False) self.assertEqual(action, action_1) action = agent.get_action(states=state_2, apply_preprocessing=False, use_exploration=False) self.assertEqual(action, action_2)
def test_softmax_on_complex_inputs(self): softmax = Softmax() input_space = Dict(dict(a=FloatBox(shape=(4, 5)), b=FloatBox(shape=(3, ))), add_batch_rank=True, add_time_rank=True) test = ComponentTest(component=softmax, input_spaces=dict(logits=input_space)) inputs = input_space.sample(size=(4, 5)) expected = dict(a=softmax_(inputs["a"]), b=softmax_(inputs["b"])) expected_logs = dict(a=np.log(expected["a"]), b=np.log(expected["b"])) test.test(("softmax", inputs), expected_outputs=(expected, expected_logs), decimals=5)
def test_apply_gradients(self): component = DummyWithOptimizer(variable_value=2.0) test = ComponentTest( component=component, input_spaces=dict(input_=FloatBox(add_batch_rank=True))) expected_grad = 0.69314718 expected_outputs = [expected_grad, 2.0] test.test(("calc_grads"), expected_outputs=expected_outputs) # Now apply the grad and check the variable value. expected_loss = np.square(np.log(2.0)) expected_outputs = [None, expected_loss, expected_loss] var_values_before = test.read_variable_values( component.variable_registry) test.test(("step"), expected_outputs=expected_outputs) # Check against variable now. Should change by -learning_rate*grad. var_values_after = test.read_variable_values( component.variable_registry) expected_new_value = var_values_before[ "dummy-with-optimizer/variable"] - (component.learning_rate * expected_grad) recursive_assert_almost_equal( var_values_after["dummy-with-optimizer/variable"], expected_new_value, decimals=5)
def test_keras_style_one_container_input_space(self): # Define one container input Space. input_space = Tuple(IntBox(3), FloatBox(shape=(4,)), add_batch_rank=True) # One-hot flatten the int tensor. flatten_layer_out = ReShape(flatten=True, flatten_categories=True)(input_space[0]) # Run the float tensor through two dense layers. dense_1_out = DenseLayer(units=3, scope="d1")(input_space[1]) dense_2_out = DenseLayer(units=5, scope="d2")(dense_1_out) # Concat everything. cat_out = ConcatLayer()(flatten_layer_out, dense_2_out) # Use the `outputs` arg to allow your network to trace back the data flow until the input space. # `inputs` is not needed here as we only have one single input (the Tuple). neural_net = NeuralNetwork(outputs=cat_out) test = ComponentTest(component=neural_net, input_spaces=dict(inputs=input_space)) var_dict = neural_net.variable_registry w1_value = test.read_variable_values(var_dict["neural-network/d1/dense/kernel"]) b1_value = test.read_variable_values(var_dict["neural-network/d1/dense/bias"]) w2_value = test.read_variable_values(var_dict["neural-network/d2/dense/kernel"]) b2_value = test.read_variable_values(var_dict["neural-network/d2/dense/bias"]) # Batch of size=n. input_ = input_space.sample(4) expected = np.concatenate([ # concat everything one_hot(input_[0]), # int flattening dense_layer(dense_layer(input_[1], w1_value, b1_value), w2_value, b2_value) # float -> 2 x dense ], axis=-1) out = test.test(("call", tuple([input_])), expected_outputs=expected) test.terminate()
def test_sac_2x2_grid_world_with_container_actions(self): """ Creates a SAC agent and runs it via a Runner on a simple 2x2 GridWorld using container actions. """ # ftj = forward + turn + jump env_spec = dict(world="2x2", action_type="ftj", state_representation="xy+orientation") dummy_env = GridWorld.from_spec(env_spec) agent_config = config_from_path("configs/sac_agent_for_2x2_gridworld_with_container_actions.json") preprocessing_spec = agent_config.pop("preprocessing_spec") agent = SACAgent.from_spec( agent_config, state_space=FloatBox(shape=(4,)), action_space=dummy_env.action_space, ) time_steps = 10000 worker = SingleThreadedWorker( env_spec=lambda: GridWorld.from_spec(env_spec), agent=agent, preprocessing_spec=preprocessing_spec, worker_executes_preprocessing=False, render=False ) results = worker.execute_timesteps(time_steps, use_exploration=True) print(results)
def test_simple_nn(self): # Space must contain batch dimension (otherwise, NNlayer will complain). space = FloatBox(shape=(3, ), add_batch_rank=True) # Create a simple neural net from json. neural_net = NeuralNetwork.from_spec( config_from_path( "configs/test_simple_nn.json")) # type: NeuralNetwork # Do not seed, we calculate expectations manually. test = ComponentTest(component=neural_net, input_spaces=dict(nn_input=space)) # Batch of size=3. input_ = np.array([[0.1, 0.2, 0.3], [1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]) # Calculate output manually. var_dict = neural_net.get_variables("hidden-layer/dense/kernel", "hidden-layer/dense/bias", global_scope=False) w1_value = test.read_variable_values( var_dict["hidden-layer/dense/kernel"]) b1_value = test.read_variable_values( var_dict["hidden-layer/dense/bias"]) expected = dense_layer(input_, w1_value, b1_value) test.test(("apply", input_), expected_outputs=dict(output=expected), decimals=5) test.terminate()
def test_memory_compilation(self): # Builds a memory and returns build stats. env = OpenAIGymEnv("Pong-v0", frameskip=4, max_num_noops=30, episodic_life=True) record_space = Dict(states=env.state_space, actions=env.action_space, rewards=float, terminals=BoolBox(), add_batch_rank=True) input_spaces = dict( # insert: records records=record_space, # get_records: num_records num_records=int, # update_records: indices, update indices=IntBox(add_batch_rank=True), update=FloatBox(add_batch_rank=True)) input_spaces.pop("num_records") memory = MemPrioritizedReplay(capacity=20000, ) test = ComponentTest(component=memory, input_spaces=input_spaces, auto_build=False) return test.build()
def test_residual_layer(self): # Input space to residual layer (with 2-repeat [simple Conv2D layer]-residual-unit). input_space = FloatBox(shape=(2, 2, 3), add_batch_rank=True) residual_unit = Conv2DLayer(filters=3, kernel_size=1, strides=1, padding="same", kernel_spec=0.5, biases_spec=1.0) residual_layer = ResidualLayer(residual_unit=residual_unit, repeats=2) test = ComponentTest(component=residual_layer, input_spaces=dict(inputs=input_space)) # Batch of 2 samples. inputs = np.array( [ [[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], [[0.7, 0.8, 0.9], [1.1, 1.2, 1.3]]], [[[1.1, 1.2, 1.3], [2.4, 2.5, 2.6]], [[-0.7, -0.8, -0.9], [3.1, 3.2, 3.3]]] ] ) """ Calculation: 1st_conv2d = sum-over-last-axis(input) * 0.5 + 1.0 -> tile last axis 3x 2nd_conv2d = sum-over-last-axis(2nd_conv2d) * 0.5 + 1.0 -> tile last axis 3x output: 2nd_conv2d + input """ conv2d_1 = np.tile(np.sum(inputs, axis=3, keepdims=True) * 0.5 + 1.0, (1, 1, 1, 3)) conv2d_2 = np.tile(np.sum(conv2d_1, axis=3, keepdims=True) * 0.5 + 1.0, (1, 1, 1, 3)) expected = conv2d_2 + inputs test.test(("apply", inputs), expected_outputs=expected, decimals=5)
def test_lstm_layer(self): # 0th rank=batch-rank; 1st rank=time/sequence-rank; 2nd-nth rank=data. batch_size = 3 sequence_length = 2 input_space = FloatBox(shape=(3, ), add_batch_rank=True, add_time_rank=True) lstm_layer_component = LSTMLayer(units=5) test = ComponentTest(component=lstm_layer_component, input_spaces=dict(inputs=input_space)) # Batch of n samples. inputs = np.ones(shape=(batch_size, sequence_length, 3)) # First matmul the inputs times the LSTM matrix: var_values = test.read_variable_values(lstm_layer_component.variables) lstm_matrix = var_values["lstm-layer/lstm-cell/kernel"] lstm_biases = var_values["lstm-layer/lstm-cell/bias"] expected_outputs, expected_internal_states = lstm_layer( inputs, lstm_matrix, lstm_biases, time_major=False) expected = dict(output=expected_outputs, last_internal_states=expected_internal_states) test.test(("apply", inputs), expected_outputs=expected)
def test_conv2d_layer(self): # Space must contain batch dimension (otherwise, NNlayer will complain). space = FloatBox(shape=(2, 2, 3), add_batch_rank=True) # e.g. a simple 3-color image conv2d_layer = Conv2DLayer(filters=4, kernel_size=2, strides=1, padding="valid", kernel_spec=0.5, biases_spec=False) test = ComponentTest(component=conv2d_layer, input_spaces=dict(inputs=space)) # Batch of 2 samples. input_ = np.array([ [ [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], # sample 1 (2x2x3) [[7.0, 8.0, 9.0], [10.0, 11.0, 12.0]] ], [ [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], # sample 2 (2x2x3) [[0.7, 0.8, 0.9], [1.00, 1.10, 1.20]] ] ]) expected = np.array([ [[[39.0, 39.0, 39.0, 39.0]]], # output 1 (1x1x4) [[[3.9, 3.9, 3.9, 3.9]]], # output 2 (1x1x4) ]) test.test(("apply", input_), expected_outputs=expected)