def test_adaptive_std_is_pickleable(self, output_dim, hidden_sizes, std_hidden_sizes): input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, 5)) model = GaussianMLPModel2(output_dim=output_dim, hidden_sizes=hidden_sizes, std_hidden_sizes=std_hidden_sizes, std_share_network=False, adaptive_std=True, hidden_nonlinearity=None, hidden_w_init=tf.ones_initializer(), output_w_init=tf.ones_initializer(), std_hidden_nonlinearity=None, std_hidden_w_init=tf.ones_initializer(), std_output_w_init=tf.ones_initializer()) dist = model.build(input_var) # get output bias with tf.compat.v1.variable_scope('GaussianMLPModel2', reuse=True): bias = tf.compat.v1.get_variable( 'dist_params/mean_network/output/bias') # assign it to all ones bias.load(tf.ones_like(bias).eval()) h = pickle.dumps(model) output1 = self.sess.run( [dist.loc, tf.math.log(dist.stddev())], feed_dict={input_var: self.obs}) with tf.compat.v1.Session(graph=tf.Graph()) as sess: input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, 5)) model_pickled = pickle.loads(h) dist2 = model_pickled.build(input_var) output2 = sess.run( [dist2.loc, tf.math.log(dist2.stddev())], feed_dict={input_var: self.obs}) assert np.array_equal(output1, output2)
def test_std_share_network_shapes(self, output_dim, hidden_sizes): # should be 2 * output_dim model = GaussianMLPModel2(output_dim=output_dim, hidden_sizes=hidden_sizes, std_share_network=True) model.build(self.input_var) with tf.compat.v1.variable_scope(model.name, reuse=True): std_share_output_weights = tf.compat.v1.get_variable( 'dist_params/mean_std_network/output/kernel') std_share_output_bias = tf.compat.v1.get_variable( 'dist_params/mean_std_network/output/bias') assert std_share_output_weights.shape[1] == output_dim * 2 assert std_share_output_bias.shape == output_dim * 2
def test_exp_max_std(self, output_dim, hidden_sizes): model = GaussianMLPModel2(output_dim=output_dim, hidden_sizes=hidden_sizes, std_share_network=False, init_std=10, max_std=1, std_parameterization='exp') dist = model.build(self.input_var) log_std = self.sess.run(tf.math.log(dist.stddev()), feed_dict={self.input_var: self.obs}) expected_log_std = np.full([1, output_dim], np.log(1)) assert np.allclose(log_std, expected_log_std)
def test_without_std_share_network_shapes(self, output_dim, hidden_sizes): model = GaussianMLPModel2(output_dim=output_dim, hidden_sizes=hidden_sizes, std_share_network=False, adaptive_std=False) model.build(self.input_var) with tf.compat.v1.variable_scope(model.name, reuse=True): mean_output_weights = tf.compat.v1.get_variable( 'dist_params/mean_network/output/kernel') mean_output_bias = tf.compat.v1.get_variable( 'dist_params/mean_network/output/bias') log_std_output_weights = tf.compat.v1.get_variable( 'dist_params/log_std_network/parameter') assert mean_output_weights.shape[1] == output_dim assert mean_output_bias.shape == output_dim assert log_std_output_weights.shape == output_dim
def test_softplus_max_std(self, output_dim, hidden_sizes): model = GaussianMLPModel2(output_dim=output_dim, hidden_sizes=hidden_sizes, std_share_network=False, init_std=10, max_std=1, std_parameterization='softplus') dist = model.build(self.input_var) log_std = self.sess.run(tf.math.log(dist.stddev()), feed_dict={self.input_var: self.obs}) expected_log_std = np.full([1, output_dim], np.log(1)) # This test fails just outside of the default absolute tolerance. assert np.allclose(log_std, expected_log_std, atol=1e-7)
def test_std_share_network_output_values(self, output_dim, hidden_sizes): model = GaussianMLPModel2(output_dim=output_dim, hidden_sizes=hidden_sizes, std_share_network=True, hidden_nonlinearity=None, std_parameterization='exp', hidden_w_init=tf.ones_initializer(), output_w_init=tf.ones_initializer()) dist = model.build(self.input_var) mean, log_std = self.sess.run( [dist.loc, tf.math.log(dist.stddev())], feed_dict={self.input_var: self.obs}) expected_mean = np.full([1, output_dim], 5 * np.prod(hidden_sizes)) expected_log_std = np.full([1, output_dim], 5 * np.prod(hidden_sizes)) assert np.array_equal(mean, expected_mean) assert np.array_equal(log_std, expected_log_std)
def test_softplus_output_values(self, output_dim, hidden_sizes): model = GaussianMLPModel2(output_dim=output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=None, std_share_network=False, adaptive_std=False, init_std=2, std_parameterization='softplus', hidden_w_init=tf.ones_initializer(), output_w_init=tf.ones_initializer()) dist = model.build(self.input_var) mean, log_std = self.sess.run( [dist.loc, tf.math.log(dist.stddev())], feed_dict={self.input_var: self.obs}) expected_mean = np.full([1, output_dim], 5 * np.prod(hidden_sizes)) expected_std_param = np.full([1, output_dim], np.log(np.exp(2) - 1)) expected_log_std = np.log(np.log(1. + np.exp(expected_std_param))) assert np.array_equal(mean, expected_mean) assert np.allclose(log_std, expected_log_std)
def __init__(self, env_spec, name='GaussianMLPPolicy', hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.tanh, hidden_w_init=tf.initializers.glorot_uniform(), hidden_b_init=tf.zeros_initializer(), output_nonlinearity=None, output_w_init=tf.initializers.glorot_uniform(), output_b_init=tf.zeros_initializer(), learn_std=True, adaptive_std=False, std_share_network=False, init_std=1.0, min_std=1e-6, max_std=None, std_hidden_sizes=(32, 32), std_hidden_nonlinearity=tf.nn.tanh, std_output_nonlinearity=None, std_parameterization='exp', layer_normalization=False): if not isinstance(env_spec.action_space, akro.Box): raise ValueError('GaussianMLPPolicy only works with ' 'akro.Box action space, but not {}'.format( env_spec.action_space)) super().__init__(name, env_spec) self.obs_dim = env_spec.observation_space.flat_dim self.action_dim = env_spec.action_space.flat_dim self._hidden_sizes = hidden_sizes self._hidden_nonlinearity = hidden_nonlinearity self._hidden_w_init = hidden_w_init self._hidden_b_init = hidden_b_init self._output_nonlinearity = output_nonlinearity self._output_w_init = output_w_init self._output_b_init = output_b_init self._learn_std = learn_std self._adaptive_std = adaptive_std self._std_share_network = std_share_network self._init_std = init_std self._min_std = min_std self._max_std = max_std self._std_hidden_sizes = std_hidden_sizes self._std_hidden_nonlinearity = std_hidden_nonlinearity self._std_output_nonlinearity = std_output_nonlinearity self._std_parameterization = std_parameterization self._layer_normalization = layer_normalization self._f_dist = None self._dist = None self.model = GaussianMLPModel2( output_dim=self.action_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, output_nonlinearity=output_nonlinearity, output_w_init=output_w_init, output_b_init=output_b_init, learn_std=learn_std, adaptive_std=adaptive_std, std_share_network=std_share_network, init_std=init_std, min_std=min_std, max_std=max_std, std_hidden_sizes=std_hidden_sizes, std_hidden_nonlinearity=std_hidden_nonlinearity, std_output_nonlinearity=std_output_nonlinearity, std_parameterization=std_parameterization, layer_normalization=layer_normalization, name='GaussianMLPModel')
def test_unknown_std_parameterization(self): with pytest.raises(ValueError): GaussianMLPModel2(output_dim=1, std_parameterization='unknown')
def test_dist(self): model = GaussianMLPModel2(output_dim=1) dist = model.build(self.input_var) assert isinstance(dist, tfp.distributions.MultivariateNormalDiag)