Beispiel #1
0
    def test_activation_functions(self):
        # Test single activation functions (no other custom computations in layer).
        space = FloatBox(shape=(3, ), add_batch_rank=True)

        # ReLU.
        relu_layer = NNLayer(activation="relu")
        test = ComponentTest(component=relu_layer,
                             input_spaces=dict(inputs=space))

        input_ = space.sample(size=5)
        expected = relu(input_)
        test.test(("apply", input_), expected_outputs=expected)

        # Again manually in case util numpy-relu is broken.
        input_ = np.array([[1.0, 2.0, -5.0], [-10.0, -100.1, 4.5]])
        expected = np.array([[1.0, 2.0, 0.0], [0.0, 0.0, 4.5]])
        test.test(("apply", input_), expected_outputs=expected)

        # Sigmoid.
        sigmoid_layer = NNLayer(activation="sigmoid")
        test = ComponentTest(component=sigmoid_layer,
                             input_spaces=dict(inputs=space))

        input_ = space.sample(size=10)
        expected = sigmoid(input_)
        test.test(("apply", input_), expected_outputs=expected)
Beispiel #2
0
    def test_time_rank_folding_for_large_dense_nn(self):
        vector_dim = 256
        input_space = FloatBox(shape=(vector_dim, ),
                               add_batch_rank=True,
                               add_time_rank=True)
        base_config = config_from_path("configs/test_large_dense_nn.json")
        neural_net_wo_folding = NeuralNetwork.from_spec(base_config)

        test = ComponentTest(component=neural_net_wo_folding,
                             input_spaces=dict(nn_input=input_space))

        # Pull a large batch+time ranked sample.
        sample_shape = (256, 200)
        inputs = input_space.sample(sample_shape)

        start = time.monotonic()
        runs = 10
        for _ in range(runs):
            print(".", flush=True, end="")
            test.test(("call", inputs), expected_outputs=None)
        runtime_wo_folding = time.monotonic() - start

        print(
            "\nTesting large dense NN w/o time-rank folding: {}x pass through with {}-data took "
            "{}s".format(runs, sample_shape, runtime_wo_folding))

        neural_net_w_folding = NeuralNetwork.from_spec(base_config)

        # Folded space.
        input_space_folded = FloatBox(shape=(vector_dim, ),
                                      add_batch_rank=True)
        inputs = input_space.sample(sample_shape[0] * sample_shape[1])

        test = ComponentTest(component=neural_net_w_folding,
                             input_spaces=dict(nn_input=input_space_folded))

        start = time.monotonic()
        for _ in range(runs):
            print(".", flush=True, end="")
            test.test(("call", inputs), expected_outputs=None)
        runtime_w_folding = time.monotonic() - start

        print(
            "\nTesting large dense NN w/ time-rank folding: {}x pass through with {}-data took "
            "{}s".format(runs, sample_shape, runtime_w_folding))

        recursive_assert_almost_equal(runtime_w_folding,
                                      runtime_wo_folding,
                                      decimals=0)
    def test_update_from_demos(self):
        """
        Tests the separate API method to update from demos.
        """
        env = OpenAIGymEnv.from_spec(self.env_spec)
        agent_config = config_from_path("configs/dqfd_agent_for_cartpole.json")
        agent = DQFDAgent.from_spec(agent_config,
                                    state_space=env.state_space,
                                    action_space=env.action_space)
        terminals = BoolBox(add_batch_rank=True)
        rewards = FloatBox(add_batch_rank=True)
        state_1 = agent.preprocessed_state_space.with_batch_rank().sample(1)
        action_1 = [1]
        state_2 = agent.preprocessed_state_space.with_batch_rank().sample(1)
        action_2 = [0]

        # Insert two states with fixed actions and a few random examples.
        for _ in range(10):
            # State with correct action
            agent.observe_demos(
                preprocessed_states=state_1,
                actions=action_1,
                rewards=rewards.sample(1),
                next_states=agent.preprocessed_state_space.with_batch_rank().
                sample(1),
                terminals=terminals.sample(1),
            )
            agent.observe_demos(
                preprocessed_states=state_2,
                actions=action_2,
                rewards=rewards.sample(1),
                next_states=agent.preprocessed_state_space.with_batch_rank().
                sample(1),
                terminals=terminals.sample(1),
            )

        # Update.
        agent.update_from_demos(num_updates=100, batch_size=8)

        # Test if fixed states and actions map.
        action = agent.get_action(states=state_1,
                                  apply_preprocessing=False,
                                  use_exploration=False)
        self.assertEqual(action, action_1)

        action = agent.get_action(states=state_2,
                                  apply_preprocessing=False,
                                  use_exploration=False)
        self.assertEqual(action, action_2)
    def test_simple_nn_using_layers(self):
        # Space must contain batch dimension (otherwise, NNlayer will complain).
        space = FloatBox(shape=(4, ), add_batch_rank=True)

        # Create a simple neural net from json.
        nn_layers = config_from_path("configs/test_simple_nn.json")
        neural_net = NeuralNetwork(*nn_layers["layers"])

        # Do not seed, we calculate expectations manually.
        test = ComponentTest(component=neural_net,
                             input_spaces=dict(inputs=space))

        # Batch of size=3.
        input_ = space.sample(4)
        # Calculate output manually.
        var_dict = neural_net.get_variables("hidden-layer/dense/kernel",
                                            "hidden-layer/dense/bias",
                                            global_scope=False)
        w1_value = test.read_variable_values(
            var_dict["hidden-layer/dense/kernel"])
        b1_value = test.read_variable_values(
            var_dict["hidden-layer/dense/bias"])

        expected = dense_layer(input_, w1_value, b1_value)

        test.test(("call", input_), expected_outputs=expected, decimals=5)

        test.terminate()
Beispiel #5
0
    def test_maxpool2d_layer(self):
        space = FloatBox(shape=(2, 2, 3),
                         add_batch_rank=True)  # e.g. a simple 3-color image

        # NOTE: Strides shouldn't matter.
        maxpool2d_layer = MaxPool2DLayer(pool_size=2,
                                         strides=2,
                                         padding="valid")
        test = ComponentTest(component=maxpool2d_layer,
                             input_spaces=dict(inputs=space))

        # Batch of 2 sample.
        input_ = space.sample(2)
        item0_ch0 = max(input_[0][0][0][0], input_[0][0][1][0],
                        input_[0][1][0][0], input_[0][1][1][0])
        item0_ch1 = max(input_[0][0][0][1], input_[0][0][1][1],
                        input_[0][1][0][1], input_[0][1][1][1])
        item0_ch2 = max(input_[0][0][0][2], input_[0][0][1][2],
                        input_[0][1][0][2], input_[0][1][1][2])
        item1_ch0 = max(input_[1][0][0][0], input_[1][0][1][0],
                        input_[1][1][0][0], input_[1][1][1][0])
        item1_ch1 = max(input_[1][0][0][1], input_[1][0][1][1],
                        input_[1][1][0][1], input_[1][1][1][1])
        item1_ch2 = max(input_[1][0][0][2], input_[1][0][1][2],
                        input_[1][1][0][2], input_[1][1][1][2])
        expected = np.array([[[[item0_ch0, item0_ch1, item0_ch2]]],
                             [[[item1_ch0, item1_ch1, item1_ch2]]]])
        test.test(("apply", input_), expected_outputs=expected)
Beispiel #6
0
    def test_time_rank_folding_for_large_cnn_nn(self):
        width = 86
        height = 86
        time_rank = 20
        input_space = FloatBox(shape=(width, height, 3),
                               add_batch_rank=True,
                               add_time_rank=True,
                               time_major=True)
        base_config = config_from_path("configs/test_3x_cnn_nn.json")
        base_config.insert(0, {"type": "reshape", "fold_time_rank": True})
        base_config.append({
            "type": "reshape",
            "unfold_time_rank": time_rank,
            "time_major": True
        })
        neural_net = NeuralNetwork.from_spec(base_config)

        test = ComponentTest(component=neural_net,
                             input_spaces=dict(nn_input=input_space))

        # Pull a large batch+time ranked sample.
        sample_shape = (time_rank, 256)
        inputs = input_space.sample(sample_shape)

        out = test.test(("call", inputs), expected_outputs=None)["output"]

        self.assertTrue(out.shape == (time_rank, 256, 7 * 7 * 64))
        self.assertTrue(out.dtype == np.float32)
    def test_post_processing(self):
        """
        Tests external batch post-processing for the PPO agent.
        """
        env = OpenAIGymEnv("Pong-v0",
                           frameskip=4,
                           max_num_noops=30,
                           episodic_life=True)
        agent_config = config_from_path("configs/ppo_agent_for_pong.json")
        agent = PPOAgent.from_spec(agent_config,
                                   state_space=env.state_space,
                                   action_space=env.action_space)
        num_samples = 200
        states = agent.preprocessed_state_space.sample(num_samples)
        reward_space = FloatBox(add_batch_rank=True)
        terminal_space = BoolBox(add_batch_rank=True)
        sequence_indices_space = BoolBox(add_batch_rank=True)

        # GAE is separately tested, just testing if this API method returns results.
        pg_advantages = agent.post_process(
            dict(states=states,
                 rewards=reward_space.sample(num_samples),
                 terminals=terminal_space.sample(num_samples, fill_value=0),
                 sequence_indices=sequence_indices_space.sample(num_samples,
                                                                fill_value=0)))
    def test_keras_style_two_separate_input_spaces(self):
        # Define two input Spaces first. Independently (no container).
        input_space_1 = IntBox(3, add_batch_rank=True)
        input_space_2 = FloatBox(shape=(4,), add_batch_rank=True)

        # One-hot flatten the int tensor.
        flatten_layer_out = ReShape(flatten=True, flatten_categories=True)(input_space_1)
        # Run the float tensor through two dense layers.
        dense_1_out = DenseLayer(units=3, scope="d1")(input_space_2)
        dense_2_out = DenseLayer(units=5, scope="d2")(dense_1_out)
        # Concat everything.
        cat_out = ConcatLayer()(flatten_layer_out, dense_2_out)

        # Use the `outputs` arg to allow your network to trace back the data flow until the input space.
        neural_net = NeuralNetwork(inputs=[input_space_1, input_space_2], outputs=cat_out)

        test = ComponentTest(component=neural_net, input_spaces=dict(inputs=[input_space_1, input_space_2]))

        var_dict = neural_net.variable_registry
        w1_value = test.read_variable_values(var_dict["neural-network/d1/dense/kernel"])
        b1_value = test.read_variable_values(var_dict["neural-network/d1/dense/bias"])
        w2_value = test.read_variable_values(var_dict["neural-network/d2/dense/kernel"])
        b2_value = test.read_variable_values(var_dict["neural-network/d2/dense/bias"])

        # Batch of size=n.
        input_ = [input_space_1.sample(4), input_space_2.sample(4)]

        expected = np.concatenate([  # concat everything
            one_hot(input_[0]),  # int flattening
            dense_layer(dense_layer(input_[1], w1_value, b1_value), w2_value, b2_value)  # float -> 2 x dense
        ], axis=-1)
        out = test.test(("call", input_), expected_outputs=expected)

        test.terminate()
    def test_keras_style_simple_nn(self):
        # Input Space of the network.
        input_space = FloatBox(shape=(3,), add_batch_rank=True)

        # Create a DenseLayer with a fixed `call` method input space for the arg `inputs`.
        output1 = DenseLayer(units=5, activation="linear", scope="a")(input_space)
        # Create a DenseLayer whose `inputs` arg is the resulting DataOpRec of output1's `call` output.
        output2 = DenseLayer(units=7, activation="relu", scope="b")(output1)

        # This will trace back automatically through the given output DataOpRec(s) and add all components
        # on the way to the input-space to this network.
        neural_net = NeuralNetwork(outputs=output2)

        test = ComponentTest(component=neural_net, input_spaces=dict(inputs=input_space))

        # Batch of size=n.
        input_ = input_space.sample(5)
        # Calculate output manually.
        var_dict = neural_net.get_variables("a/dense/kernel", "a/dense/bias", "b/dense/kernel", "b/dense/bias", global_scope=False)
        w1_value = test.read_variable_values(var_dict["a/dense/kernel"])
        b1_value = test.read_variable_values(var_dict["a/dense/bias"])
        w2_value = test.read_variable_values(var_dict["b/dense/kernel"])
        b2_value = test.read_variable_values(var_dict["b/dense/bias"])

        expected = relu(dense_layer(dense_layer(input_, w1_value, b1_value), w2_value, b2_value))

        test.test(("call", input_), expected_outputs=expected, decimals=5)

        test.terminate()
    def test_add_layer_to_simple_nn(self):
        # Space must contain batch dimension (otherwise, NNlayer will complain).
        space = FloatBox(shape=(3, ), add_batch_rank=True)

        # Create a simple neural net from json.
        neural_net = NeuralNetwork.from_spec(
            config_from_path(
                "configs/test_simple_nn.json"))  # type: NeuralNetwork
        # Add another layer to it.
        neural_net.add_layer(DenseLayer(units=10, scope="last-layer"))

        # Do not seed, we calculate expectations manually.
        test = ComponentTest(component=neural_net,
                             input_spaces=dict(nn_input=space))

        # Batch of size=3.
        input_ = space.sample(3)
        # Calculate output manually.
        var_dict = test.read_variable_values(neural_net.variable_registry)

        expected = dense_layer(
            dense_layer(input_,
                        var_dict["test-network/hidden-layer/dense/kernel"],
                        var_dict["test-network/hidden-layer/dense/bias"]),
            var_dict["test-network/last-layer/dense/kernel"],
            var_dict["test-network/last-layer/dense/bias"])

        test.test(("apply", input_),
                  expected_outputs=dict(output=expected),
                  decimals=5)

        test.terminate()
    def test_functional_api_one_output_is_discarded(self):
        # Input Space of the network.
        input_space = FloatBox(shape=(3, ),
                               add_batch_rank=True,
                               add_time_rank=True)

        # Pass input through an LSTM and get two outputs (output and internal states), only one of which will be used.
        lstm_out, _ = LSTMLayer(units=2, return_sequences=False)(input_space)

        # A NN with 1 output (don't return internal_states of LSTM).
        neural_net = NeuralNetwork(outputs=lstm_out)

        test = ComponentTest(component=neural_net,
                             input_spaces=dict(inputs=input_space))

        # Batch of size=n.
        input_ = input_space.sample((5, 3))
        # Calculate output manually.
        var_dict = neural_net.variable_registry
        w1_value = test.read_variable_values(
            var_dict["neural-network/lstm-layer/lstm-cell/kernel"])
        b1_value = test.read_variable_values(
            var_dict["neural-network/lstm-layer/lstm-cell/bias"])

        expected_out, _ = lstm_layer(input_, w1_value, b1_value)
        expected_out = expected_out[:, -1, :]  # last time step only

        # Don't expect internal states (our NN does not return these as per the functional API definition above).
        test.test(("call", input_), expected_outputs=expected_out, decimals=5)

        test.terminate()
    def test_insert_demos(self):
        """
        Tests inserting into the demo memory.
        """
        env = OpenAIGymEnv.from_spec(self.env_spec)

        agent_config = config_from_path("configs/dqfd_agent_for_cartpole.json")
        agent = DQFDAgent.from_spec(
            agent_config,
            state_space=env.state_space,
            action_space=env.action_space
        )
        terminals = BoolBox(add_batch_rank=True)
        rewards = FloatBox(add_batch_rank=True)

        # Observe a single data point.
        agent.observe_demos(
            preprocessed_states=agent.preprocessed_state_space.with_batch_rank().sample(1),
            actions=env.action_space.with_batch_rank().sample(1),
            rewards=rewards.sample(1),
            next_states=agent.preprocessed_state_space.with_batch_rank().sample(1),
            terminals=terminals.sample(1),
        )

        # Observe a batch of demos.
        agent.observe_demos(
            preprocessed_states=agent.preprocessed_state_space.sample(10),
            actions=env.action_space.sample(10),
            rewards=FloatBox().sample(10),
            terminals=terminals.sample(10),
            next_states=agent.preprocessed_state_space.sample(10)
        )
Beispiel #13
0
    def test_dummy_nn_layer(self):
        # Tests simple pass through (no activation, no layer (graph_fn) computation).
        space = FloatBox(shape=(3,), add_batch_rank=True)

        # - fixed 1.0 weights, no biases
        dummy_layer = NNLayer(activation=None)
        test = ComponentTest(component=dummy_layer, input_spaces=dict(inputs=space))

        input_ = space.sample(size=5)
        test.test(("apply", input_), expected_outputs=input_)
Beispiel #14
0
    def test_demos_with_container_actions(self):
        # Tests if dqfd can fit a set of states to a set of actions.
        vocab_size = 100
        embed_dim = 128
        # ID/state space.
        state_space = IntBox(vocab_size, shape=(10, ))
        # Container action space.
        actions_space = {}
        num_outputs = 3
        for i in range(3):
            actions_space['action_{}'.format(i)] = IntBox(low=0,
                                                          high=num_outputs)
        actions_space = Dict(actions_space)

        agent_config = config_from_path("configs/dqfd_container.json")
        agent_config["network_spec"] = [
            dict(type="embedding", embed_dim=embed_dim, vocab_size=vocab_size),
            dict(type="reshape", flatten=True),
            dict(type="dense",
                 units=embed_dim,
                 activation="relu",
                 scope="dense_1")
        ]
        agent = DQFDAgent.from_spec(agent_config,
                                    state_space=state_space,
                                    action_space=actions_space)
        terminals = BoolBox(add_batch_rank=True)
        rewards = FloatBox(add_batch_rank=True)

        # Create a set of demos.
        demo_states = agent.preprocessed_state_space.with_batch_rank().sample(
            20)
        demo_actions = actions_space.with_batch_rank().sample(20)
        demo_rewards = rewards.sample(20, fill_value=1.0)
        demo_next_states = agent.preprocessed_state_space.with_batch_rank(
        ).sample(20)
        demo_terminals = terminals.sample(20, fill_value=False)

        # Insert.
        agent.observe_demos(
            preprocessed_states=demo_states,
            actions=demo_actions,
            rewards=demo_rewards,
            next_states=demo_next_states,
            terminals=demo_terminals,
        )

        # Fit demos.
        agent.update_from_demos(num_updates=5000, batch_size=20)

        # Evaluate demos:
        agent_actions = agent.get_action(demo_states,
                                         apply_preprocessing=False,
                                         use_exploration=False)
        recursive_assert_almost_equal(agent_actions, demo_actions)
Beispiel #15
0
    def test_softmax_on_simple_inputs(self):
        softmax = Softmax()
        input_space = FloatBox(shape=(2, 2, 3), add_batch_rank=True)
        test = ComponentTest(component=softmax,
                             input_spaces=dict(logits=input_space))

        # Batch=5
        inputs = input_space.sample(5)
        expected = softmax_(inputs)
        test.test(("softmax", inputs),
                  expected_outputs=(expected, np.log(expected)))
    def test_lstm_nn(self):
        # Space must contain batch dimension (otherwise, NNlayer will complain).
        #units = 3
        batch_size = 2
        time_steps = 4
        input_nodes = 2
        input_space = FloatBox(shape=(input_nodes, ),
                               add_batch_rank=True,
                               add_time_rank=True)
        #internal_states_space = Tuple(FloatBox(shape=(units,)), FloatBox(shape=(units,)), add_batch_rank=True)

        neural_net = NeuralNetwork.from_spec(
            config_from_path("configs/test_dense_to_lstm_nn.json"))

        # Do not seed, we calculate expectations manually.
        test = ComponentTest(component=neural_net,
                             input_spaces=dict(inputs=input_space))

        # Batch of size=2, time-steps=3.
        input_ = input_space.sample((batch_size, time_steps))

        # Calculate output manually.
        w0_value = test.read_variable_values(
            neural_net.
            variable_registry["test-lstm-network/dense-layer/dense/kernel"])
        b0_value = test.read_variable_values(
            neural_net.
            variable_registry["test-lstm-network/dense-layer/dense/bias"])
        lstm_w_value = test.read_variable_values(
            neural_net.
            variable_registry["test-lstm-network/lstm-layer/lstm-cell/kernel"])
        lstm_b_value = test.read_variable_values(
            neural_net.
            variable_registry["test-lstm-network/lstm-layer/lstm-cell/bias"])

        d0_out = dense_layer(input_, w0_value, b0_value)
        lstm_out, last_internal_states = lstm_layer(d0_out,
                                                    lstm_w_value,
                                                    lstm_b_value,
                                                    time_major=False)

        expected = [lstm_out, last_internal_states]
        test.test(("call", input_),
                  expected_outputs=tuple(expected),
                  decimals=5)

        test.terminate()
Beispiel #17
0
    def test_two_sub_components_and_time_rank_unfolding(self):
        stack = Stack(Dummy1To1(scope="A", constant_value=3.0),
                      Dummy1To1(scope="B", constant_value=1.0),
                      api_methods=[dict(api="run", unfold_time_rank=True)])
        input_space = FloatBox(add_batch_rank=True)
        test = ComponentTest(
            component=stack,
            input_spaces=dict(
                inputs=[input_space, input_space.with_time_rank()]))

        input_ = input_space.sample(size=4)
        input_before_folding = input_.reshape((2, 2))

        test.test(
            ("run", [np.array([4.6, 5.2, 1.0, 2.0]), input_before_folding]),
            expected_outputs=np.array([[8.6, 9.2], [5.0, 6.0]],
                                      dtype=np.float32))
Beispiel #18
0
    def test_two_sub_components_1to2_2to1_time_rank_folding_and_unfolding(
            self):
        stack = Stack(
            [Dummy1To2(scope="A", constant_value=1.5),
             Dummy2To1(scope="B")],
            api_methods=[
                dict(api="run", fold_time_rank=True, unfold_time_rank=True)
            ])
        input_space = FloatBox(add_batch_rank=True,
                               add_time_rank=True,
                               time_major=True)
        test = ComponentTest(component=stack,
                             input_spaces=dict(inputs=[input_space]))

        input_ = input_space.sample(size=(2, 3))
        expected_outputs = input_ + 1.5 + (input_ * 1.5)

        test.test(("run", input_), expected_outputs=expected_outputs)
Beispiel #19
0
    def test_slice_with_squeeze(self):
        slicer = Slice(squeeze=True)
        input_space = FloatBox(shape=(2, 2, 3), add_batch_rank=True, add_time_rank=True, time_major=True)
        test = ComponentTest(component=slicer, input_spaces=dict(
            preprocessing_inputs=input_space,
            start_index=IntBox(),
            end_index=IntBox()
        ))

        # Time-steps=3, Batch=5
        inputs = input_space.sample(size=(3, 5))
        expected = inputs[1]
        test.test(("slice", [inputs, 1, 2]), expected_outputs=expected)

        expected = inputs[0:2]
        test.test(("slice", [inputs, 0, 2]), expected_outputs=expected)

        expected = inputs[0]
        test.test(("slice", [inputs, 0, 1]), expected_outputs=expected)
Beispiel #20
0
    def test_slice_without_squeeze(self):
        slicer = Slice(squeeze=False)
        input_space = FloatBox(shape=(1, 4, 5), add_batch_rank=True)
        test = ComponentTest(component=slicer,
                             input_spaces=dict(inputs=input_space,
                                               start_index=IntBox(),
                                               end_index=IntBox()))

        # Time-steps=3, Batch=5
        inputs = input_space.sample(size=4)
        expected = np.asarray(
            [inputs[1]])  # Add the not-squeezed rank back to expected.
        test.test(("slice", [inputs, 1, 2]), expected_outputs=expected)

        expected = inputs[0:2]
        test.test(("slice", [inputs, 0, 2]), expected_outputs=expected)

        expected = np.asarray([inputs[0]])
        test.test(("slice", [inputs, 0, 1]), expected_outputs=expected)
Beispiel #21
0
    def test_local_response_normalization_layer(self):
        space = FloatBox(shape=(2, 2, 3), add_batch_rank=True)  # e.g. a simple 3-color image

        # Todo: This is a very simple example ignoring the depth radius, which is the main idea of this normalization
        depth_radius = 0.0
        bias = np.random.random() + 1.0
        alpha = np.random.random() + 1.0
        beta = np.random.random() + 1.0

        test_local_response_normalization_layer = LocalResponseNormalizationLayer(
            depth_radius=depth_radius, bias=bias, alpha=alpha, beta=beta
        )
        test = ComponentTest(component=test_local_response_normalization_layer, input_spaces=dict(inputs=space))

        # Batch of 2 sample.
        input_ = space.sample(2)

        calculated = input_ / (bias + alpha * np.square(input_)) ** beta

        expected = np.array(calculated)
        test.test(("apply", input_), expected_outputs=expected)
Beispiel #22
0
    def test_container_actions(self):
        # Test container actions with embedding.

        vocab_size = 100
        embed_dim = 128
        # ID/state space.
        state_space = IntBox(vocab_size, shape=(10, ))
        # Container action space.
        actions_space = {}
        num_outputs = 3
        for i in range(3):
            actions_space['action_{}'.format(i)] = IntBox(low=0,
                                                          high=num_outputs)
        actions_space = Dict(actions_space)

        agent_config = config_from_path("configs/dqfd_container.json")
        agent_config["network_spec"] = [
            dict(type="embedding", embed_dim=embed_dim, vocab_size=vocab_size),
            dict(type="reshape", flatten=True),
            dict(type="dense",
                 units=embed_dim,
                 activation="relu",
                 scope="dense_1")
        ]
        agent = DQFDAgent.from_spec(agent_config,
                                    state_space=state_space,
                                    action_space=actions_space)
        terminals = BoolBox(add_batch_rank=True)
        rewards = FloatBox(add_batch_rank=True)

        agent.observe_demos(
            preprocessed_states=agent.preprocessed_state_space.with_batch_rank(
            ).sample(1),
            actions=actions_space.with_batch_rank().sample(1),
            rewards=rewards.sample(1),
            next_states=agent.preprocessed_state_space.with_batch_rank().
            sample(1),
            terminals=terminals.sample(1),
        )
    def test_custom_margin_demos_with_container_actions(self):
        # Tests if using different margins per sample works.
        # Same state, but different
        vocab_size = 100
        embed_dim = 8
        # ID/state space.
        state_space = IntBox(vocab_size, shape=(10,))
        # Container action space.
        actions_space = {}
        num_outputs = 3
        for i in range(3):
            actions_space['action_{}'.format(i)] = IntBox(
                low=0,
                high=num_outputs
            )
        actions_space = Dict(actions_space)

        agent_config = config_from_path("configs/dqfd_container.json")
        agent_config["network_spec"] = [
            dict(type="embedding", embed_dim=embed_dim, vocab_size=vocab_size),
            dict(type="reshape", flatten=True),
            dict(type="dense", units=embed_dim, activation="relu", scope="dense_1")
        ]
        agent = DQFDAgent.from_spec(
            agent_config,
            state_space=state_space,
            action_space=actions_space
        )
        terminals = BoolBox(add_batch_rank=True)
        rewards = FloatBox(add_batch_rank=True)

        # Create a set of demos.
        demo_states = agent.preprocessed_state_space.with_batch_rank().sample(2)
        # Same state.
        demo_states[1] = demo_states[0]
        demo_actions = actions_space.with_batch_rank().sample(2)

        for name, action in actions_space.items():
            demo_actions[name][0] = 0
            demo_actions[name][1] = 1

        demo_rewards = rewards.sample(2, fill_value=.0)
        # One action has positive reward, one negative
        demo_rewards[0] = 0
        demo_rewards[1] = 0

        # One action is encouraged, one is discouraged.
        margins = np.asarray([0.5, -0.5])

        demo_next_states = agent.preprocessed_state_space.with_batch_rank().sample(2)
        demo_terminals = terminals.sample(2, fill_value=False)

        # When using margins, need to use external batch.
        batch = dict(
            states=demo_states,
            actions=demo_actions,
            rewards=demo_rewards,
            next_states=demo_next_states,
            importance_weights=np.ones_like(demo_rewards),
            terminals=demo_terminals,
        )
        # Fit demos with custom margins.
        for _ in range(10000):
            agent.update(batch=batch, update_from_demos=False, apply_demo_loss_to_batch=True, expert_margins=margins)

        # Evaluate demos for the state -> should have action with positive reward.
        agent_actions = agent.get_action(np.array([demo_states[0]]), apply_preprocessing=False, use_exploration=False)
        print("learned action = ", agent_actions)
Beispiel #24
0
    def test_sac_agent_component_on_fake_env(self):
        config = config_from_path("configs/sac_component_for_fake_env_test.json")

        # Arbitrary state space, state should not be used in this example.
        state_space = FloatBox(shape=(2,))
        continuous_action_space = FloatBox(low=-1.0, high=1.0)
        terminal_space = BoolBox(add_batch_rank=True)
        policy = Policy.from_spec(config["policy"], action_space=continuous_action_space)
        policy.add_components(Synchronizable(), expose_apis="sync")
        q_function = ValueFunction.from_spec(config["value_function"])

        agent_component = SACAgentComponent(
            agent=None,
            policy=policy,
            q_function=q_function,
            preprocessor=PreprocessorStack.from_spec([]),
            memory=ReplayMemory.from_spec(config["memory"]),
            discount=config["discount"],
            initial_alpha=config["initial_alpha"],
            target_entropy=None,
            optimizer=AdamOptimizer.from_spec(config["optimizer"]),
            vf_optimizer=AdamOptimizer.from_spec(config["value_function_optimizer"], scope="vf-optimizer"),
            alpha_optimizer=None,
            q_sync_spec=SyncSpecification(sync_interval=10, sync_tau=1.0),
            num_q_functions=2
        )

        test = ComponentTest(
            component=agent_component,
            input_spaces=dict(
                states=state_space.with_batch_rank(),
                preprocessed_states=state_space.with_batch_rank(),
                actions=continuous_action_space.with_batch_rank(),
                rewards=FloatBox(add_batch_rank=True),
                next_states=state_space.with_batch_rank(),
                terminals=terminal_space,
                batch_size=int,
                preprocessed_s_prime=state_space.with_batch_rank(),
                importance_weights=FloatBox(add_batch_rank=True),
                preprocessed_next_states=state_space.with_batch_rank(),
                deterministic=bool,
                weights="variables:{}".format(policy.scope),
                # TODO: how to provide the space for multiple component variables?
                # q_weights=Dict(
                #    q_0="variables:{}".format(q_function.scope),
                #    q_1="variables:{}".format(agent_component._q_functions[1].scope),
                # )
            ),
            action_space=continuous_action_space,
            build_kwargs=dict(
                optimizer=agent_component._optimizer,
                build_options=dict(
                    vf_optimizer=agent_component.vf_optimizer,
                ),
            )
        )

        policy_loss = []
        vf_loss = []

        # This test simulates an env that always requires actions to be close to the max-pdf
        # value of a loc=0.5, scale=0.2 normal, regardless of any state inputs.
        # The component should learn to produce actions like that (close to 0.5).
        true_mean = 0.5
        target_dist = stats.norm(loc=true_mean, scale=0.2)
        batch_size = 100
        for _ in range(5000):
            action_sample = continuous_action_space.sample(batch_size)
            rewards = target_dist.pdf(action_sample)
            result = test.test(("update_from_external_batch", [
                state_space.sample(batch_size),
                action_sample,
                rewards,
                [True] * batch_size,
                state_space.sample(batch_size),
                [1.0] * batch_size  # importance
            ]))
            policy_loss.append(result["actor_loss"])
            vf_loss.append(result["critic_loss"])

        self.assertTrue(np.mean(policy_loss[:100]) > np.mean(policy_loss[-100:]))
        self.assertTrue(np.mean(vf_loss[:100]) > np.mean(vf_loss[-100:]))

        action_sample = np.linspace(-1, 1, batch_size)
        q_values = test.test(("get_q_values", [state_space.sample(batch_size), action_sample]))
        for q_val in q_values:
            q_val = q_val.flatten()
            np.testing.assert_allclose(q_val, target_dist.pdf(action_sample), atol=0.2)

        action_sample, _ = test.test(("action_from_preprocessed_state", [state_space.sample(batch_size), False]))
        action_sample = action_sample.flatten()
        np.testing.assert_allclose(np.mean(action_sample), true_mean, atol=0.1)
    def test_lstm_nn_with_custom_apply(self):
        # Space must contain batch dimension (otherwise, NNlayer will complain).
        units = 3
        batch_size = 2
        time_steps = 4
        input_nodes = 2
        input_space = FloatBox(shape=(input_nodes, ),
                               add_batch_rank=True,
                               add_time_rank=True)
        internal_states_space = Tuple(FloatBox(shape=(units, )),
                                      FloatBox(shape=(units, )),
                                      add_batch_rank=True)

        def custom_apply(self, input_, internal_states=None):
            d0_out = self.get_sub_component_by_name("d0").apply(input_)
            lstm_out = self.get_sub_component_by_name("lstm").apply(
                d0_out, internal_states)
            d1_out = self.get_sub_component_by_name("d1").apply(
                lstm_out["output"])
            return dict(output=d1_out,
                        last_internal_states=lstm_out["last_internal_states"])

        # Create a simple neural net with the above custom API-method.
        neural_net = NeuralNetwork(DenseLayer(units, scope="d0"),
                                   LSTMLayer(units, scope="lstm"),
                                   DenseLayer(units, scope="d1"),
                                   api_methods={("apply", custom_apply)})

        # Do not seed, we calculate expectations manually.
        test = ComponentTest(component=neural_net,
                             input_spaces=dict(
                                 input_=input_space,
                                 internal_states=internal_states_space))

        # Batch of size=2, time-steps=3.
        input_ = input_space.sample((batch_size, time_steps))
        internal_states = internal_states_space.sample(batch_size)

        # Calculate output manually.
        w0_value = test.read_variable_values(
            neural_net.variable_registry["neural-network/d0/dense/kernel"])
        b0_value = test.read_variable_values(
            neural_net.variable_registry["neural-network/d0/dense/bias"])
        w1_value = test.read_variable_values(
            neural_net.variable_registry["neural-network/d1/dense/kernel"])
        b1_value = test.read_variable_values(
            neural_net.variable_registry["neural-network/d1/dense/bias"])
        lstm_w_value = test.read_variable_values(
            neural_net.
            variable_registry["neural-network/lstm/lstm-cell/kernel"])
        lstm_b_value = test.read_variable_values(
            neural_net.variable_registry["neural-network/lstm/lstm-cell/bias"])

        d0_out = dense_layer(input_, w0_value, b0_value)
        lstm_out, last_internal_states = lstm_layer(
            d0_out,
            lstm_w_value,
            lstm_b_value,
            initial_internal_states=internal_states,
            time_major=False)
        d1_out = dense_layer(lstm_out, w1_value, b1_value)

        expected = dict(output=d1_out,
                        last_internal_states=last_internal_states)
        test.test(("apply", [input_, internal_states]),
                  expected_outputs=expected,
                  decimals=5)

        test.terminate()
Beispiel #26
0
    def test_sac_agent_component_functionality(self):
        config = config_from_path(
            "configs/sac_component_for_fake_env_test.json")

        # Arbitrary state space, state should not be used in this example.
        state_space = FloatBox(shape=(8, ))
        continuous_action_space = FloatBox(shape=(1, ), low=-2.0, high=2.0)
        terminal_space = BoolBox(add_batch_rank=True)
        rewards_space = FloatBox(add_batch_rank=True)
        policy = Policy.from_spec(config["policy"],
                                  action_space=continuous_action_space)
        policy.add_components(Synchronizable(), expose_apis="sync")
        q_function = ValueFunction.from_spec(config["value_function"])

        agent_component = SACAgentComponent(
            agent=None,
            policy=policy,
            q_function=q_function,
            preprocessor=PreprocessorStack.from_spec([]),
            memory=ReplayMemory.from_spec(config["memory"]),
            discount=config["discount"],
            initial_alpha=config["initial_alpha"],
            target_entropy=None,
            optimizer=AdamOptimizer.from_spec(config["optimizer"]),
            vf_optimizer=AdamOptimizer.from_spec(
                config["value_function_optimizer"], scope="vf-optimizer"),
            alpha_optimizer=None,
            q_sync_spec=SyncSpecification(sync_interval=10, sync_tau=1.0),
            num_q_functions=2)

        test = ComponentTest(
            component=agent_component,
            input_spaces=dict(
                states=state_space.with_batch_rank(),
                preprocessed_states=state_space.with_batch_rank(),
                env_actions=continuous_action_space.with_batch_rank(),
                actions=continuous_action_space.with_batch_rank(),
                rewards=rewards_space,
                next_states=state_space.with_batch_rank(),
                terminals=terminal_space,
                batch_size=int,
                preprocessed_s_prime=state_space.with_batch_rank(),
                importance_weights=FloatBox(add_batch_rank=True),
                preprocessed_next_states=state_space.with_batch_rank(),
                deterministic=bool,
                weights="variables:{}".format(policy.scope),
                # TODO: how to provide the space for multiple component variables?
                #q_weights=Dict(
                #    q_0="variables:{}".format(q_function.scope),
                #    q_1="variables:{}".format(agent_component._q_functions[1].scope),
                #)
            ),
            action_space=continuous_action_space,
            build_kwargs=dict(
                optimizer=agent_component._optimizer,
                build_options=dict(
                    vf_optimizer=agent_component.vf_optimizer, ),
            ))

        batch_size = 10
        action_sample = continuous_action_space.with_batch_rank().sample(
            batch_size)
        rewards = rewards_space.sample(batch_size)
        # Check, whether an update runs ok.
        result = test.test((
            "update_from_external_batch",
            [
                state_space.sample(batch_size),
                action_sample,
                rewards,
                [True] * batch_size,
                state_space.sample(batch_size),
                [1.0] * batch_size  # importance
            ]))
        self.assertTrue(result["actor_loss"].dtype == np.float32)
        self.assertTrue(result["critic_loss"].dtype == np.float32)

        action_sample = np.linspace(-1, 1, batch_size).reshape((batch_size, 1))
        q_values = test.test(
            ("get_q_values", [state_space.sample(batch_size), action_sample]))
        for q_val in q_values:
            self.assertTrue(q_val.dtype == np.float32)
            self.assertTrue(q_val.shape == (batch_size, 1))

        action_sample, _ = test.test(("action_from_preprocessed_state",
                                      [state_space.sample(batch_size), False]))
        self.assertTrue(action_sample.dtype == np.float32)
        self.assertTrue(action_sample.shape == (batch_size, 1))