Exemple #1
0
    def test_adaptive_std_is_pickleable(self, output_dim, hidden_sizes,
                                        std_hidden_sizes):
        input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, 5))
        model = GaussianMLPModel2(output_dim=output_dim,
                                  hidden_sizes=hidden_sizes,
                                  std_hidden_sizes=std_hidden_sizes,
                                  std_share_network=False,
                                  adaptive_std=True,
                                  hidden_nonlinearity=None,
                                  hidden_w_init=tf.ones_initializer(),
                                  output_w_init=tf.ones_initializer(),
                                  std_hidden_nonlinearity=None,
                                  std_hidden_w_init=tf.ones_initializer(),
                                  std_output_w_init=tf.ones_initializer())
        dist = model.build(input_var)

        # get output bias
        with tf.compat.v1.variable_scope('GaussianMLPModel2', reuse=True):
            bias = tf.compat.v1.get_variable(
                'dist_params/mean_network/output/bias')
        # assign it to all ones
        bias.load(tf.ones_like(bias).eval())

        h = pickle.dumps(model)
        output1 = self.sess.run(
            [dist.loc, tf.math.log(dist.stddev())],
            feed_dict={input_var: self.obs})
        with tf.compat.v1.Session(graph=tf.Graph()) as sess:
            input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, 5))
            model_pickled = pickle.loads(h)
            dist2 = model_pickled.build(input_var)
            output2 = sess.run(
                [dist2.loc, tf.math.log(dist2.stddev())],
                feed_dict={input_var: self.obs})
            assert np.array_equal(output1, output2)
Exemple #2
0
 def test_std_share_network_shapes(self, output_dim, hidden_sizes):
     # should be 2 * output_dim
     model = GaussianMLPModel2(output_dim=output_dim,
                               hidden_sizes=hidden_sizes,
                               std_share_network=True)
     model.build(self.input_var)
     with tf.compat.v1.variable_scope(model.name, reuse=True):
         std_share_output_weights = tf.compat.v1.get_variable(
             'dist_params/mean_std_network/output/kernel')
         std_share_output_bias = tf.compat.v1.get_variable(
             'dist_params/mean_std_network/output/bias')
     assert std_share_output_weights.shape[1] == output_dim * 2
     assert std_share_output_bias.shape == output_dim * 2
Exemple #3
0
    def test_exp_max_std(self, output_dim, hidden_sizes):
        model = GaussianMLPModel2(output_dim=output_dim,
                                  hidden_sizes=hidden_sizes,
                                  std_share_network=False,
                                  init_std=10,
                                  max_std=1,
                                  std_parameterization='exp')
        dist = model.build(self.input_var)

        log_std = self.sess.run(tf.math.log(dist.stddev()),
                                feed_dict={self.input_var: self.obs})

        expected_log_std = np.full([1, output_dim], np.log(1))
        assert np.allclose(log_std, expected_log_std)
Exemple #4
0
 def test_without_std_share_network_shapes(self, output_dim, hidden_sizes):
     model = GaussianMLPModel2(output_dim=output_dim,
                               hidden_sizes=hidden_sizes,
                               std_share_network=False,
                               adaptive_std=False)
     model.build(self.input_var)
     with tf.compat.v1.variable_scope(model.name, reuse=True):
         mean_output_weights = tf.compat.v1.get_variable(
             'dist_params/mean_network/output/kernel')
         mean_output_bias = tf.compat.v1.get_variable(
             'dist_params/mean_network/output/bias')
         log_std_output_weights = tf.compat.v1.get_variable(
             'dist_params/log_std_network/parameter')
     assert mean_output_weights.shape[1] == output_dim
     assert mean_output_bias.shape == output_dim
     assert log_std_output_weights.shape == output_dim
Exemple #5
0
    def test_softplus_max_std(self, output_dim, hidden_sizes):
        model = GaussianMLPModel2(output_dim=output_dim,
                                  hidden_sizes=hidden_sizes,
                                  std_share_network=False,
                                  init_std=10,
                                  max_std=1,
                                  std_parameterization='softplus')
        dist = model.build(self.input_var)

        log_std = self.sess.run(tf.math.log(dist.stddev()),
                                feed_dict={self.input_var: self.obs})

        expected_log_std = np.full([1, output_dim], np.log(1))

        # This test fails just outside of the default absolute tolerance.
        assert np.allclose(log_std, expected_log_std, atol=1e-7)
Exemple #6
0
    def test_std_share_network_output_values(self, output_dim, hidden_sizes):
        model = GaussianMLPModel2(output_dim=output_dim,
                                  hidden_sizes=hidden_sizes,
                                  std_share_network=True,
                                  hidden_nonlinearity=None,
                                  std_parameterization='exp',
                                  hidden_w_init=tf.ones_initializer(),
                                  output_w_init=tf.ones_initializer())
        dist = model.build(self.input_var)

        mean, log_std = self.sess.run(
            [dist.loc, tf.math.log(dist.stddev())],
            feed_dict={self.input_var: self.obs})

        expected_mean = np.full([1, output_dim], 5 * np.prod(hidden_sizes))
        expected_log_std = np.full([1, output_dim], 5 * np.prod(hidden_sizes))
        assert np.array_equal(mean, expected_mean)
        assert np.array_equal(log_std, expected_log_std)
Exemple #7
0
    def test_softplus_output_values(self, output_dim, hidden_sizes):
        model = GaussianMLPModel2(output_dim=output_dim,
                                  hidden_sizes=hidden_sizes,
                                  hidden_nonlinearity=None,
                                  std_share_network=False,
                                  adaptive_std=False,
                                  init_std=2,
                                  std_parameterization='softplus',
                                  hidden_w_init=tf.ones_initializer(),
                                  output_w_init=tf.ones_initializer())
        dist = model.build(self.input_var)

        mean, log_std = self.sess.run(
            [dist.loc, tf.math.log(dist.stddev())],
            feed_dict={self.input_var: self.obs})

        expected_mean = np.full([1, output_dim], 5 * np.prod(hidden_sizes))
        expected_std_param = np.full([1, output_dim], np.log(np.exp(2) - 1))
        expected_log_std = np.log(np.log(1. + np.exp(expected_std_param)))
        assert np.array_equal(mean, expected_mean)
        assert np.allclose(log_std, expected_log_std)
    def __init__(self,
                 env_spec,
                 name='GaussianMLPPolicy',
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=tf.nn.tanh,
                 hidden_w_init=tf.initializers.glorot_uniform(),
                 hidden_b_init=tf.zeros_initializer(),
                 output_nonlinearity=None,
                 output_w_init=tf.initializers.glorot_uniform(),
                 output_b_init=tf.zeros_initializer(),
                 learn_std=True,
                 adaptive_std=False,
                 std_share_network=False,
                 init_std=1.0,
                 min_std=1e-6,
                 max_std=None,
                 std_hidden_sizes=(32, 32),
                 std_hidden_nonlinearity=tf.nn.tanh,
                 std_output_nonlinearity=None,
                 std_parameterization='exp',
                 layer_normalization=False):
        if not isinstance(env_spec.action_space, akro.Box):
            raise ValueError('GaussianMLPPolicy only works with '
                             'akro.Box action space, but not {}'.format(
                                 env_spec.action_space))
        super().__init__(name, env_spec)
        self.obs_dim = env_spec.observation_space.flat_dim
        self.action_dim = env_spec.action_space.flat_dim

        self._hidden_sizes = hidden_sizes
        self._hidden_nonlinearity = hidden_nonlinearity
        self._hidden_w_init = hidden_w_init
        self._hidden_b_init = hidden_b_init
        self._output_nonlinearity = output_nonlinearity
        self._output_w_init = output_w_init
        self._output_b_init = output_b_init
        self._learn_std = learn_std
        self._adaptive_std = adaptive_std
        self._std_share_network = std_share_network
        self._init_std = init_std
        self._min_std = min_std
        self._max_std = max_std
        self._std_hidden_sizes = std_hidden_sizes
        self._std_hidden_nonlinearity = std_hidden_nonlinearity
        self._std_output_nonlinearity = std_output_nonlinearity
        self._std_parameterization = std_parameterization
        self._layer_normalization = layer_normalization

        self._f_dist = None
        self._dist = None

        self.model = GaussianMLPModel2(
            output_dim=self.action_dim,
            hidden_sizes=hidden_sizes,
            hidden_nonlinearity=hidden_nonlinearity,
            hidden_w_init=hidden_w_init,
            hidden_b_init=hidden_b_init,
            output_nonlinearity=output_nonlinearity,
            output_w_init=output_w_init,
            output_b_init=output_b_init,
            learn_std=learn_std,
            adaptive_std=adaptive_std,
            std_share_network=std_share_network,
            init_std=init_std,
            min_std=min_std,
            max_std=max_std,
            std_hidden_sizes=std_hidden_sizes,
            std_hidden_nonlinearity=std_hidden_nonlinearity,
            std_output_nonlinearity=std_output_nonlinearity,
            std_parameterization=std_parameterization,
            layer_normalization=layer_normalization,
            name='GaussianMLPModel')
Exemple #9
0
 def test_unknown_std_parameterization(self):
     with pytest.raises(ValueError):
         GaussianMLPModel2(output_dim=1, std_parameterization='unknown')
Exemple #10
0
 def test_dist(self):
     model = GaussianMLPModel2(output_dim=1)
     dist = model.build(self.input_var)
     assert isinstance(dist, tfp.distributions.MultivariateNormalDiag)