def test_environment_stepper_on_deterministic_env_with_action_probs_lstm(self): internal_states_space = Tuple(FloatBox(shape=(3,)), FloatBox(shape=(3,))) preprocessor_spec = [dict(type="multiply", factor=0.1)] network_spec = config_from_path("configs/test_lstm_nn.json") exploration_spec = None actor_component = ActorComponent( preprocessor_spec, dict(network_spec=network_spec, action_space=self.deterministic_env_action_space), exploration_spec ) environment_stepper = EnvironmentStepper( environment_spec=dict(type="deterministic_env", steps_to_terminal=3), actor_component_spec=actor_component, state_space=self.deterministic_env_state_space, reward_space="float32", internal_states_space=internal_states_space, add_action_probs=True, action_probs_space=self.deterministic_action_probs_space, num_steps=4, ) test = ComponentTest( component=environment_stepper, action_space=self.deterministic_env_action_space, ) weights = test.read_variable_values(environment_stepper.actor_component.policy.variable_registry) policy_scope = "environment-stepper/actor-component/policy/" weights_lstm = weights[policy_scope+"test-lstm-network/lstm-layer/lstm-cell/kernel"] biases_lstm = weights[policy_scope+"test-lstm-network/lstm-layer/lstm-cell/bias"] weights_action = weights[policy_scope+"action-adapter-0/action-network/action-layer/dense/kernel"] biases_action = weights[policy_scope+"action-adapter-0/action-network/action-layer/dense/bias"] # Step 3 times through the Env and collect results. lstm_1 = lstm_layer(np.array([[[0.0]]]), weights_lstm, biases_lstm) lstm_2 = lstm_layer(np.array([[[0.1]]]), weights_lstm, biases_lstm, lstm_1[1]) lstm_3 = lstm_layer(np.array([[[0.2]]]), weights_lstm, biases_lstm, lstm_2[1]) lstm_4 = lstm_layer(np.array([[[0.0]]]), weights_lstm, biases_lstm, lstm_3[1]) expected = ( np.array([False, False, True, False]), np.array([[0.0], [1.0], [2.0], [0.0], [1.0]]), # s' (raw) np.array([ softmax(dense_layer(np.squeeze(lstm_1[0]), weights_action, biases_action)), softmax(dense_layer(np.squeeze(lstm_2[0]), weights_action, biases_action)), softmax(dense_layer(np.squeeze(lstm_3[0]), weights_action, biases_action)), softmax(dense_layer(np.squeeze(lstm_4[0]), weights_action, biases_action)), ]), # action probs # internal states ( np.squeeze(np.array([[[0.0, 0.0, 0.0]], lstm_1[1][0], lstm_2[1][0], lstm_3[1][0], lstm_4[1][0]])), np.squeeze(np.array([[[0.0, 0.0, 0.0]], lstm_1[1][1], lstm_2[1][1], lstm_3[1][1], lstm_4[1][1]])) ) ) test.test("step", expected_outputs=expected) # Make sure we close the session (to shut down the Env on the server). test.terminate()
def test_functional_api_one_output_is_discarded(self): # Input Space of the network. input_space = FloatBox(shape=(3, ), add_batch_rank=True, add_time_rank=True) # Pass input through an LSTM and get two outputs (output and internal states), only one of which will be used. lstm_out, _ = LSTMLayer(units=2, return_sequences=False)(input_space) # A NN with 1 output (don't return internal_states of LSTM). neural_net = NeuralNetwork(outputs=lstm_out) test = ComponentTest(component=neural_net, input_spaces=dict(inputs=input_space)) # Batch of size=n. input_ = input_space.sample((5, 3)) # Calculate output manually. var_dict = neural_net.variable_registry w1_value = test.read_variable_values( var_dict["neural-network/lstm-layer/lstm-cell/kernel"]) b1_value = test.read_variable_values( var_dict["neural-network/lstm-layer/lstm-cell/bias"]) expected_out, _ = lstm_layer(input_, w1_value, b1_value) expected_out = expected_out[:, -1, :] # last time step only # Don't expect internal states (our NN does not return these as per the functional API definition above). test.test(("call", input_), expected_outputs=expected_out, decimals=5) test.terminate()
def test_lstm_layer(self): # 0th rank=batch-rank; 1st rank=time/sequence-rank; 2nd-nth rank=data. batch_size = 3 sequence_length = 2 input_space = FloatBox(shape=(3, ), add_batch_rank=True, add_time_rank=True) lstm_layer_component = LSTMLayer(units=5) test = ComponentTest(component=lstm_layer_component, input_spaces=dict(inputs=input_space)) # Batch of n samples. inputs = np.ones(shape=(batch_size, sequence_length, 3)) # First matmul the inputs times the LSTM matrix: var_values = test.read_variable_values(lstm_layer_component.variables) lstm_matrix = var_values["lstm-layer/lstm-cell/kernel"] lstm_biases = var_values["lstm-layer/lstm-cell/bias"] expected_outputs, expected_internal_states = lstm_layer( inputs, lstm_matrix, lstm_biases, time_major=False) expected = dict(output=expected_outputs, last_internal_states=expected_internal_states) test.test(("apply", inputs), expected_outputs=expected)
def test_lstm_nn(self): # Space must contain batch dimension (otherwise, NNlayer will complain). #units = 3 batch_size = 2 time_steps = 4 input_nodes = 2 input_space = FloatBox(shape=(input_nodes, ), add_batch_rank=True, add_time_rank=True) #internal_states_space = Tuple(FloatBox(shape=(units,)), FloatBox(shape=(units,)), add_batch_rank=True) neural_net = NeuralNetwork.from_spec( config_from_path("configs/test_dense_to_lstm_nn.json")) # Do not seed, we calculate expectations manually. test = ComponentTest(component=neural_net, input_spaces=dict(inputs=input_space)) # Batch of size=2, time-steps=3. input_ = input_space.sample((batch_size, time_steps)) # Calculate output manually. w0_value = test.read_variable_values( neural_net. variable_registry["test-lstm-network/dense-layer/dense/kernel"]) b0_value = test.read_variable_values( neural_net. variable_registry["test-lstm-network/dense-layer/dense/bias"]) lstm_w_value = test.read_variable_values( neural_net. variable_registry["test-lstm-network/lstm-layer/lstm-cell/kernel"]) lstm_b_value = test.read_variable_values( neural_net. variable_registry["test-lstm-network/lstm-layer/lstm-cell/bias"]) d0_out = dense_layer(input_, w0_value, b0_value) lstm_out, last_internal_states = lstm_layer(d0_out, lstm_w_value, lstm_b_value, time_major=False) expected = [lstm_out, last_internal_states] test.test(("call", input_), expected_outputs=tuple(expected), decimals=5) test.terminate()
def test_lstm_nn_with_custom_apply(self): # Space must contain batch dimension (otherwise, NNlayer will complain). units = 3 batch_size = 2 time_steps = 4 input_nodes = 2 input_space = FloatBox(shape=(input_nodes, ), add_batch_rank=True, add_time_rank=True) internal_states_space = Tuple(FloatBox(shape=(units, )), FloatBox(shape=(units, )), add_batch_rank=True) def custom_apply(self, input_, internal_states=None): d0_out = self.get_sub_component_by_name("d0").apply(input_) lstm_out = self.get_sub_component_by_name("lstm").apply( d0_out, internal_states) d1_out = self.get_sub_component_by_name("d1").apply( lstm_out["output"]) return dict(output=d1_out, last_internal_states=lstm_out["last_internal_states"]) # Create a simple neural net with the above custom API-method. neural_net = NeuralNetwork(DenseLayer(units, scope="d0"), LSTMLayer(units, scope="lstm"), DenseLayer(units, scope="d1"), api_methods={("apply", custom_apply)}) # Do not seed, we calculate expectations manually. test = ComponentTest(component=neural_net, input_spaces=dict( input_=input_space, internal_states=internal_states_space)) # Batch of size=2, time-steps=3. input_ = input_space.sample((batch_size, time_steps)) internal_states = internal_states_space.sample(batch_size) # Calculate output manually. w0_value = test.read_variable_values( neural_net.variable_registry["neural-network/d0/dense/kernel"]) b0_value = test.read_variable_values( neural_net.variable_registry["neural-network/d0/dense/bias"]) w1_value = test.read_variable_values( neural_net.variable_registry["neural-network/d1/dense/kernel"]) b1_value = test.read_variable_values( neural_net.variable_registry["neural-network/d1/dense/bias"]) lstm_w_value = test.read_variable_values( neural_net. variable_registry["neural-network/lstm/lstm-cell/kernel"]) lstm_b_value = test.read_variable_values( neural_net.variable_registry["neural-network/lstm/lstm-cell/bias"]) d0_out = dense_layer(input_, w0_value, b0_value) lstm_out, last_internal_states = lstm_layer( d0_out, lstm_w_value, lstm_b_value, initial_internal_states=internal_states, time_major=False) d1_out = dense_layer(lstm_out, w1_value, b1_value) expected = dict(output=d1_out, last_internal_states=last_internal_states) test.test(("apply", [input_, internal_states]), expected_outputs=expected, decimals=5) test.terminate()