예제 #1
0
    def test_softplus_min_std(self, output_dim, hidden_sizes):
        filters = ((3, (3, 3)), (6, (3, 3)))
        strides = [1, 1]

        model = GaussianCNNModel(filters=filters,
                                 strides=strides,
                                 padding='VALID',
                                 output_dim=output_dim,
                                 init_std=2.0,
                                 hidden_sizes=hidden_sizes,
                                 std_share_network=False,
                                 adaptive_std=False,
                                 hidden_nonlinearity=None,
                                 std_parameterization='softplus',
                                 min_std=10,
                                 hidden_w_init=tf.constant_initializer(0.1),
                                 output_w_init=tf.constant_initializer(1))
        outputs = model.build(self._input_ph)
        _, _, log_std, std_param = self.sess.run(
            outputs[:-1], feed_dict={self._input_ph: self.obs})

        expected_log_std = np.full([1, output_dim], np.log(10))
        expected_std_param = np.full([1, output_dim], np.log(np.exp(10) - 1))

        assert np.allclose(log_std, expected_log_std)
        assert np.allclose(std_param, expected_std_param)
예제 #2
0
    def test_softplus_max_std(self, output_dim, hidden_sizes):
        filter_sizes = [3, 3]
        out_channels = [3, 6]
        strides = [1, 1]

        model = GaussianCNNModel(filter_dims=filter_sizes,
                                 num_filters=out_channels,
                                 strides=strides,
                                 padding='VALID',
                                 output_dim=output_dim,
                                 init_std=10.0,
                                 hidden_sizes=hidden_sizes,
                                 std_share_network=False,
                                 adaptive_std=False,
                                 hidden_nonlinearity=None,
                                 std_parameterization='softplus',
                                 max_std=1.0,
                                 hidden_w_init=tf.constant_initializer(0.1),
                                 output_w_init=tf.constant_initializer(1))
        outputs = model.build(self._input_ph)
        action, mean, log_std, std_param = self.sess.run(
            outputs[:-1], feed_dict={self._input_ph: self.obs})

        expected_log_std = np.full([1, output_dim], np.log(1))
        expected_std_param = np.full([1, output_dim], np.log(np.exp(1) - 1))

        assert np.allclose(log_std, expected_log_std, rtol=0, atol=0.0001)
        assert np.allclose(std_param, expected_std_param, rtol=0, atol=0.0001)
예제 #3
0
    def test_without_std_share_network_is_pickleable(self, mock_normal,
                                                     output_dim, hidden_sizes):
        mock_normal.return_value = 0.5
        input_var = tf.compat.v1.placeholder(tf.float32,
                                             shape=(None, 10, 10, 3))
        model = GaussianCNNModel(num_filters=[3, 6],
                                 filter_dims=[3, 3],
                                 strides=[1, 1],
                                 padding='SAME',
                                 hidden_sizes=hidden_sizes,
                                 output_dim=output_dim,
                                 std_share_network=False,
                                 adaptive_std=False)
        outputs = model.build(input_var)

        # get output bias
        with tf.compat.v1.variable_scope('GaussianCNNModel', reuse=True):
            bias = tf.compat.v1.get_variable(
                'dist_params/mean_network/output/bias')
        # assign it to all ones
        bias.load(tf.ones_like(bias).eval())

        output1 = self.sess.run(outputs[:-1], feed_dict={input_var: self.obs})

        h = pickle.dumps(model)
        with tf.compat.v1.Session(graph=tf.Graph()) as sess:
            input_var = tf.compat.v1.placeholder(tf.float32,
                                                 shape=(None, 10, 10, 3))
            model_pickled = pickle.loads(h)
            outputs = model_pickled.build(input_var)
            output2 = sess.run(outputs[:-1], feed_dict={input_var: self.obs})
            assert np.array_equal(output1, output2)
예제 #4
0
    def test_without_std_share_network_output_values(self, mock_normal,
                                                     filters, in_channels,
                                                     strides, output_dim,
                                                     hidden_sizes):
        mock_normal.return_value = 0.5
        model = GaussianCNNModel(input_dim=self.input_dim,
                                 filters=filters,
                                 strides=strides,
                                 padding='VALID',
                                 output_dim=output_dim,
                                 init_std=2,
                                 hidden_sizes=hidden_sizes,
                                 std_share_network=False,
                                 adaptive_std=False,
                                 hidden_nonlinearity=None,
                                 std_parameterization='exp',
                                 hidden_w_init=tf.constant_initializer(0.01),
                                 output_w_init=tf.constant_initializer(1))
        outputs = model.build(self._input_ph).outputs

        action, mean, log_std, std_param = self.sess.run(
            outputs[:-1], feed_dict={self._input_ph: self.obs})

        filter_sum = 1

        for filter_iter, in_channel in zip(filters, in_channels):
            filter_height = filter_iter[1][0]
            filter_width = filter_iter[1][1]
            filter_sum *= 0.01 * filter_height * filter_width * in_channel

        for _ in hidden_sizes:
            filter_sum *= 0.01

        height_size = self.input_height
        width_size = self.input_width
        for filter_iter, stride in zip(filters, strides):
            height_size = int((height_size - filter_iter[1][0]) / stride) + 1
            width_size = int((width_size - filter_iter[1][1]) / stride) + 1
        flatten_shape = height_size * width_size * filters[-1][0]

        network_output = filter_sum * flatten_shape * np.prod(hidden_sizes)
        expected_mean = np.full((self.batch_size, output_dim),
                                network_output,
                                dtype=np.float32)
        expected_std_param = np.full((self.batch_size, output_dim),
                                     np.log(2),
                                     dtype=np.float32)
        expected_log_std = np.full((self.batch_size, output_dim),
                                   np.log(2),
                                   dtype=np.float32)

        assert np.allclose(mean, expected_mean)

        assert np.allclose(std_param, expected_std_param)
        assert np.allclose(log_std, expected_log_std)

        expected_action = 0.5 * np.exp(expected_log_std) + expected_mean
        assert np.allclose(action, expected_action, rtol=0, atol=0.1)
예제 #5
0
    def test_softplus_output_values(self, mock_normal, output_dim,
                                    hidden_sizes):
        mock_normal.return_value = 0.5
        filter_sizes = [3, 3]
        in_channels = [3, 3]
        out_channels = [3, 6]
        strides = [1, 1]

        model = GaussianCNNModel(filter_dims=filter_sizes,
                                 num_filters=out_channels,
                                 strides=strides,
                                 padding='VALID',
                                 output_dim=output_dim,
                                 init_std=2.0,
                                 hidden_sizes=hidden_sizes,
                                 std_share_network=False,
                                 adaptive_std=False,
                                 hidden_nonlinearity=None,
                                 std_parameterization='softplus',
                                 hidden_w_init=tf.constant_initializer(0.01),
                                 output_w_init=tf.constant_initializer(1))
        outputs = model.build(self._input_ph)

        action, mean, log_std, std_param = self.sess.run(
            outputs[:-1], feed_dict={self._input_ph: self.obs})

        filter_sum = 1
        for filter_size, in_channel in zip(filter_sizes, in_channels):
            filter_sum *= 0.01 * filter_size * filter_size * in_channel

        for hidden_size in hidden_sizes:
            filter_sum *= 0.01

        current_size = self.input_width
        for filter_size, stride in zip(filter_sizes, strides):
            current_size = int((current_size - filter_size) / stride) + 1
        flatten_shape = current_size * current_size * out_channels[-1]

        network_output = filter_sum * flatten_shape * np.prod(hidden_sizes)
        expected_mean = np.full((self.batch_size, output_dim),
                                network_output,
                                dtype=np.float32)
        expected_std_param = np.full((self.batch_size, output_dim),
                                     np.log(np.exp(2) - 1),
                                     dtype=np.float32)
        expected_log_std = np.log(np.log(1. + np.exp(expected_std_param)))

        assert np.allclose(mean, expected_mean)
        assert np.allclose(std_param, expected_std_param)
        assert np.allclose(log_std, expected_log_std)

        expected_action = 0.5 * np.exp(expected_log_std) + expected_mean
        assert np.allclose(action, expected_action, rtol=0, atol=0.1)
예제 #6
0
 def test_std_share_network_shapes(self, output_dim, hidden_sizes):
     # should be 2 * output_dim
     model = GaussianCNNModel(filters=((3, (3, 3)), (6, (3, 3))),
                              strides=[1, 1],
                              padding='SAME',
                              hidden_sizes=hidden_sizes,
                              output_dim=output_dim,
                              std_share_network=True)
     model.build(self._input_ph)
     with tf.compat.v1.variable_scope(model.name, reuse=True):
         std_share_output_weights = tf.compat.v1.get_variable(
             'dist_params/mean_std_network/output/kernel')
         std_share_output_bias = tf.compat.v1.get_variable(
             'dist_params/mean_std_network/output/bias')
     assert std_share_output_weights.shape[1] == output_dim * 2
     assert std_share_output_bias.shape == output_dim * 2
예제 #7
0
 def test_unknown_std_parameterization(self):
     with pytest.raises(NotImplementedError):
         _ = GaussianCNNModel(filters=(((3, 3), 3), ((3, 3), 6)),
                              strides=[1, 1],
                              padding='SAME',
                              hidden_sizes=(1, ),
                              output_dim=1,
                              std_parameterization='unknown')
예제 #8
0
 def test_without_std_share_network_shapes(self, output_dim, hidden_sizes):
     model = GaussianCNNModel(filters=((3, (3, 3)), (6, (3, 3))),
                              strides=[1, 1],
                              padding='SAME',
                              hidden_sizes=hidden_sizes,
                              output_dim=output_dim,
                              std_share_network=False,
                              adaptive_std=False)
     model.build(self._input_ph)
     with tf.compat.v1.variable_scope(model.name, reuse=True):
         mean_output_weights = tf.compat.v1.get_variable(
             'dist_params/mean_network/output/kernel')
         mean_output_bias = tf.compat.v1.get_variable(
             'dist_params/mean_network/output/bias')
         log_std_output_weights = tf.compat.v1.get_variable(
             'dist_params/log_std_network/parameter')
     assert mean_output_weights.shape[1] == output_dim
     assert mean_output_bias.shape == output_dim
     assert log_std_output_weights.shape == output_dim
예제 #9
0
    def test_adaptive_std_output_shape(self, output_dim, hidden_sizes,
                                       std_hidden_sizes):
        model = GaussianCNNModel(
            num_filters=[3, 6],
            filter_dims=[3, 3],
            strides=[1, 1],
            padding='SAME',
            output_dim=output_dim,
            hidden_sizes=hidden_sizes,
            std_share_network=False,
            adaptive_std=True,
            hidden_nonlinearity=None,
            std_hidden_nonlinearity=None,
            std_filter_dims=[3, 3],
            std_num_filters=[3, 6],
            std_strides=[1, 1],
            std_padding='SAME',
            std_hidden_sizes=hidden_sizes,
            hidden_w_init=tf.constant_initializer(0.01),
            output_w_init=tf.constant_initializer(1),
            std_hidden_w_init=tf.constant_initializer(0.01),
            std_output_w_init=tf.constant_initializer(1))

        model.build(self._input_ph)
        with tf.compat.v1.variable_scope(model.name, reuse=True):
            mean_output_weights = tf.compat.v1.get_variable(
                'dist_params/mean_network/output/kernel')
            mean_output_bias = tf.compat.v1.get_variable(
                'dist_params/mean_network/output/bias')
            log_std_output_weights = tf.compat.v1.get_variable(
                'dist_params/log_std_network/output/kernel')
            log_std_output_bias = tf.compat.v1.get_variable(
                'dist_params/log_std_network/output/bias')

        assert mean_output_weights.shape[1] == output_dim
        assert mean_output_bias.shape == output_dim
        assert log_std_output_weights.shape[1] == output_dim
        assert log_std_output_bias.shape == output_dim