def __init__(self, network_spec, units_state_value_stream, weights_spec_state_value_stream=None, biases_spec_state_value_stream=None, activation_state_value_stream="relu", scope="dueling-policy", **kwargs): super(DuelingPolicy, self).__init__(network_spec, scope=scope, **kwargs) self.action_space_flattened = self.action_space.flatten() # The state-value stream. self.units_state_value_stream = units_state_value_stream self.weights_spec_state_value_stream = weights_spec_state_value_stream self.biases_spec_state_value_stream = biases_spec_state_value_stream self.activation_state_value_stream = activation_state_value_stream # Our softmax component to produce probabilities. self.softmax = Softmax() # Create all state value extra Layers. # TODO: Make this a NN-spec as well (right now it's one layer fixed plus the final value node). self.dense_layer_state_value_stream = DenseLayer( units=self.units_state_value_stream, weights_spec=self.weights_spec_state_value_stream, biases_spec=self.biases_spec_state_value_stream, activation=self.activation_state_value_stream, scope="dense-layer-state-value-stream" ) self.state_value_node = DenseLayer( units=1, activation="linear", scope="state-value-node" ) self.add_components(self.dense_layer_state_value_stream, self.state_value_node)
def test_softmax_on_simple_inputs(self): softmax = Softmax() input_space = FloatBox(shape=(2, 2, 3), add_batch_rank=True) test = ComponentTest(component=softmax, input_spaces=dict(logits=input_space)) # Batch=5 inputs = input_space.sample(5) expected = softmax_(inputs) test.test(("softmax", inputs), expected_outputs=(expected, np.log(expected)))
def test_softmax_on_complex_inputs(self): softmax = Softmax() input_space = Dict(dict(a=FloatBox(shape=(4, 5)), b=FloatBox(shape=(3, ))), add_batch_rank=True, add_time_rank=True) test = ComponentTest(component=softmax, input_spaces=dict(logits=input_space)) inputs = input_space.sample(size=(4, 5)) expected = dict(a=softmax_(inputs["a"]), b=softmax_(inputs["b"])) expected_logs = dict(a=np.log(expected["a"]), b=np.log(expected["b"])) test.test(("softmax", inputs), expected_outputs=(expected, expected_logs), decimals=5)