Esempio n. 1
0
    def __init__(self, network_spec, units_state_value_stream,
                 weights_spec_state_value_stream=None, biases_spec_state_value_stream=None,
                 activation_state_value_stream="relu", scope="dueling-policy", **kwargs):
        super(DuelingPolicy, self).__init__(network_spec, scope=scope, **kwargs)

        self.action_space_flattened = self.action_space.flatten()

        # The state-value stream.
        self.units_state_value_stream = units_state_value_stream
        self.weights_spec_state_value_stream = weights_spec_state_value_stream
        self.biases_spec_state_value_stream = biases_spec_state_value_stream
        self.activation_state_value_stream = activation_state_value_stream

        # Our softmax component to produce probabilities.
        self.softmax = Softmax()

        # Create all state value extra Layers.
        # TODO: Make this a NN-spec as well (right now it's one layer fixed plus the final value node).
        self.dense_layer_state_value_stream = DenseLayer(
            units=self.units_state_value_stream, weights_spec=self.weights_spec_state_value_stream,
            biases_spec=self.biases_spec_state_value_stream,
            activation=self.activation_state_value_stream,
            scope="dense-layer-state-value-stream"
        )
        self.state_value_node = DenseLayer(
            units=1,
            activation="linear",
            scope="state-value-node"
        )

        self.add_components(self.dense_layer_state_value_stream, self.state_value_node)
Esempio n. 2
0
    def test_softmax_on_simple_inputs(self):
        softmax = Softmax()
        input_space = FloatBox(shape=(2, 2, 3), add_batch_rank=True)
        test = ComponentTest(component=softmax,
                             input_spaces=dict(logits=input_space))

        # Batch=5
        inputs = input_space.sample(5)
        expected = softmax_(inputs)
        test.test(("softmax", inputs),
                  expected_outputs=(expected, np.log(expected)))
Esempio n. 3
0
    def test_softmax_on_complex_inputs(self):
        softmax = Softmax()
        input_space = Dict(dict(a=FloatBox(shape=(4, 5)),
                                b=FloatBox(shape=(3, ))),
                           add_batch_rank=True,
                           add_time_rank=True)
        test = ComponentTest(component=softmax,
                             input_spaces=dict(logits=input_space))

        inputs = input_space.sample(size=(4, 5))
        expected = dict(a=softmax_(inputs["a"]), b=softmax_(inputs["b"]))
        expected_logs = dict(a=np.log(expected["a"]), b=np.log(expected["b"]))
        test.test(("softmax", inputs),
                  expected_outputs=(expected, expected_logs),
                  decimals=5)