class TestQfDerivedPolicy(TfGraphTestCase): def setUp(self): super().setUp() self.env = TfEnv(DummyDiscreteEnv()) self.qf = SimpleQFunction(self.env.spec) self.policy = DiscreteQfDerivedPolicy( env_spec=self.env.spec, qf=self.qf) self.sess.run(tf.global_variables_initializer()) self.env.reset() def test_discrete_qf_derived_policy(self): obs, _, _, _ = self.env.step(1) action = self.policy.get_action(obs) assert self.env.action_space.contains(action) actions = self.policy.get_actions([obs]) for action in actions: assert self.env.action_space.contains(action) def test_is_pickleable(self): with tf.variable_scope('SimpleQFunction/SimpleMLPModel', reuse=True): return_var = tf.get_variable('return_var') # assign it to all one return_var.load(tf.ones_like(return_var).eval()) obs, _, _, _ = self.env.step(1) action1 = self.policy.get_action(obs) p = pickle.dumps(self.policy) with tf.Session(graph=tf.Graph()): policy_pickled = pickle.loads(p) action2 = policy_pickled.get_action(obs) assert action1 == action2
def test_is_pickleable(self, obs_dim, action_dim): env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.q_functions.' 'discrete_mlp_q_function.MLPModel'), new=SimpleMLPModel): qf = DiscreteMLPQFunction(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) with tf.variable_scope( 'discrete_mlp_q_function/discrete_mlp_q_function', reuse=True): return_var = tf.get_variable('return_var') # assign it to all one return_var.load(tf.ones_like(return_var).eval()) output1 = self.sess.run(qf.q_vals, feed_dict={qf.input: [obs]}) h_data = pickle.dumps(qf) with tf.Session(graph=tf.Graph()) as sess: qf_pickled = pickle.loads(h_data) input_var = tf.placeholder(tf.float32, shape=(None, ) + obs_dim) q_vals = qf_pickled.get_qval_sym(input_var, 'another') output2 = sess.run(q_vals, feed_dict={input_var: [obs]}) assert np.array_equal(output1, output2)
def test_get_action(self, mock_rand, obs_dim, action_dim, filter_dims, filter_sizes, strides, padding, hidden_sizes): mock_rand.return_value = 0 env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.policies.' 'categorical_cnn_policy.MLPModel'), new=SimpleMLPModel): with mock.patch(('garage.tf.policies.' 'categorical_cnn_policy.CNNModel'), new=SimpleCNNModel): policy = CategoricalCNNPolicy(env_spec=env.spec, conv_filters=filter_dims, conv_filter_sizes=filter_sizes, conv_strides=strides, conv_pad=padding, hidden_sizes=hidden_sizes) env.reset() obs, _, _, _ = env.step(1) action, prob = policy.get_action(obs) expected_prob = np.full(action_dim, 0.5) assert env.action_space.contains(action) assert action == 0 assert np.array_equal(prob['prob'], expected_prob) actions, probs = policy.get_actions([obs, obs, obs]) for action, prob in zip(actions, probs['prob']): assert env.action_space.contains(action) assert action == 0 assert np.array_equal(prob, expected_prob)
def test_is_pickleable(self, mock_rand, obs_dim, action_dim): mock_rand.return_value = 0 env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.policies.' 'categorical_cnn_policy.MLPModel'), new=SimpleMLPModel): with mock.patch(('garage.tf.policies.' 'categorical_cnn_policy.CNNModel'), new=SimpleCNNModel): policy = CategoricalCNNPolicy(env_spec=env.spec, conv_filters=(32, ), conv_filter_sizes=(3, ), conv_strides=(1, ), conv_pad='SAME', hidden_sizes=(4, )) env.reset() obs, _, _, _ = env.step(1) with tf.compat.v1.variable_scope( 'CategoricalCNNPolicy/Sequential/MLPModel', reuse=True): return_var = tf.compat.v1.get_variable('return_var') # assign it to all one return_var.load(tf.ones_like(return_var).eval()) output1 = self.sess.run(policy.model.outputs, feed_dict={policy.model.input: [obs]}) p = pickle.dumps(policy) with tf.compat.v1.Session(graph=tf.Graph()) as sess: policy_pickled = pickle.loads(p) output2 = sess.run(policy_pickled.model.outputs, feed_dict={policy_pickled.model.input: [obs]}) assert np.array_equal(output1, output2)
def test_dist_info_sym(self, obs_dim, action_dim, filter_dims, filter_sizes, strides, padding, hidden_sizes): env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.policies.' 'categorical_cnn_policy.MLPModel'), new=SimpleMLPModel): with mock.patch(('garage.tf.policies.' 'categorical_cnn_policy.CNNModel'), new=SimpleCNNModel): policy = CategoricalCNNPolicy(env_spec=env.spec, conv_filters=filter_dims, conv_filter_sizes=filter_sizes, conv_strides=strides, conv_pad=padding, hidden_sizes=hidden_sizes) env.reset() obs, _, _, _ = env.step(1) expected_prob = np.full(action_dim, 0.5) obs_dim = env.spec.observation_space.shape state_input = tf.compat.v1.placeholder(tf.float32, shape=(None, ) + obs_dim) dist1 = policy.dist_info_sym(state_input, name='policy2') prob = self.sess.run(dist1['prob'], feed_dict={state_input: [obs]}) assert np.array_equal(prob[0], expected_prob)
def test_is_pickleable(self, obs_dim, action_dim): """Test if ContinuousMLPPolicy is pickleable""" env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.policies.' 'continuous_mlp_policy.MLPModel'), new=SimpleMLPModel): policy = ContinuousMLPPolicy(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) with tf.compat.v1.variable_scope('ContinuousMLPPolicy/MLPModel', reuse=True): return_var = tf.compat.v1.get_variable('return_var') # assign it to all one return_var.load(tf.ones_like(return_var).eval()) output1 = self.sess.run( policy.model.outputs, feed_dict={policy.model.input: [obs.flatten()]}) p = pickle.dumps(policy) with tf.compat.v1.Session(graph=tf.Graph()) as sess: policy_pickled = pickle.loads(p) output2 = sess.run( policy_pickled.model.outputs, feed_dict={policy_pickled.model.input: [obs.flatten()]}) assert np.array_equal(output1, output2)
def test_is_pickleable(self): env = TfEnv(DummyBoxEnv(obs_dim=(1, ), action_dim=(1, ))) with mock.patch(('garage.tf.policies.' 'gaussian_lstm_policy_with_model.GaussianLSTMModel'), new=SimpleGaussianLSTMModel): policy = GaussianLSTMPolicyWithModel( env_spec=env.spec, state_include_action=False) env.reset() obs = env.reset() with tf.variable_scope( 'GaussianLSTMPolicyWithModel/GaussianLSTMModel', reuse=True): return_var = tf.get_variable('return_var') # assign it to all one return_var.load(tf.ones_like(return_var).eval()) output1 = self.sess.run( policy.model.networks['default'].mean, feed_dict={policy.model.input: [[obs.flatten()], [obs.flatten()]]}) p = pickle.dumps(policy) with tf.Session(graph=tf.Graph()) as sess: policy_pickled = pickle.loads(p) output2 = sess.run( policy_pickled.model.networks['default'].mean, feed_dict={ policy_pickled.model.input: [[obs.flatten()], [obs.flatten()]] }) assert np.array_equal(output1, output2)
class TestResize(unittest.TestCase): @overrides def setUp(self): self.width = 16 self.height = 16 self.env = TfEnv(DummyDiscrete2DEnv()) self.env_r = TfEnv( Resize(DummyDiscrete2DEnv(), width=self.width, height=self.height)) def test_resize_invalid_environment_type(self): with self.assertRaises(ValueError): self.env.observation_space = Discrete(64) Resize(self.env, width=self.width, height=self.height) def test_resize_invalid_environment_shape(self): with self.assertRaises(ValueError): self.env.observation_space = Box(low=0, high=255, shape=(4, ), dtype=np.uint8) Resize(self.env, width=self.width, height=self.height) def test_resize_output_observation_space(self): assert self.env_r.observation_space.shape == (self.width, self.height) def test_resize_output_reset(self): assert self.env_r.reset().shape == (self.width, self.height) def test_resize_output_step(self): self.env_r.reset() obs_r, _, _, _ = self.env_r.step(1) assert obs_r.shape == (self.width, self.height)
def test_dist_info(self, obs_dim, embedding_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim)) with mock.patch(('garage.tf.embeddings.' 'gaussian_mlp_encoder.GaussianMLPModel'), new=SimpleGaussianMLPModel): embedding_spec = InOutSpec(input_space=env.spec.observation_space, output_space=env.spec.action_space) embedding = GaussianMLPEncoder(embedding_spec) env.reset() obs, _, _, _ = env.step(1) obs_dim = env.spec.observation_space.flat_dim obs_ph = tf.compat.v1.placeholder(tf.float32, shape=(None, obs_dim)) dist1_sym = embedding.dist_info_sym(obs_ph, name='p1_sym') # flatten output expected_mean = [np.full(np.prod(embedding_dim), 0.5)] expected_log_std = [np.full(np.prod(embedding_dim), np.log(0.5))] prob0 = embedding.dist_info(obs.flatten()) prob1 = self.sess.run(dist1_sym, feed_dict={obs_ph: [obs.flatten()]}) assert np.array_equal(prob0['mean'].flatten(), expected_mean[0]) assert np.array_equal(prob0['log_std'].flatten(), expected_log_std[0]) assert np.array_equal(prob1['mean'], expected_mean) assert np.array_equal(prob1['log_std'], expected_log_std)
def test_is_pickleable(self, obs_dim, action_dim, mock_rand): mock_rand.return_value = 0 env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.policies.' 'categorical_mlp_policy_with_model.MLPModel'), new=SimpleMLPModel): policy = CategoricalMLPPolicyWithModel(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) expected_prob = np.full(action_dim, 0.5) p = pickle.dumps(policy) with tf.Session(graph=tf.Graph()): policy_pickled = pickle.loads(p) action, prob = policy_pickled.get_action(obs) assert env.action_space.contains(action) assert action == 0 assert np.array_equal(prob['prob'], expected_prob) prob1 = policy.dist_info([obs.flatten()]) prob2 = policy_pickled.dist_info([obs.flatten()]) assert np.array_equal(prob1['prob'], prob2['prob']) assert np.array_equal(prob2['prob'][0], expected_prob)
def test_get_action(self, mock_rand, filter_dims, num_filters, strides, padding, hidden_sizes): mock_rand.return_value = 0 env = TfEnv(DummyDiscretePixelEnv()) policy = CategoricalCNNPolicy2(env_spec=env.spec, filter_dims=filter_dims, num_filters=num_filters, strides=strides, padding=padding, hidden_sizes=hidden_sizes) obs_var = tf.compat.v1.placeholder(tf.float32, shape=(None, ) + env.observation_space.shape, name='obs') policy.build(obs_var) env.reset() obs, _, _, _ = env.step(1) action, _ = policy.get_action(obs) assert env.action_space.contains(action) actions, _ = policy.get_actions([obs, obs, obs]) for action in actions: assert env.action_space.contains(action)
def test_get_action(self, obs_dim, task_num, latent_dim, action_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) embedding_spec = InOutSpec( input_space=akro.Box(low=np.zeros(task_num), high=np.ones(task_num)), output_space=akro.Box(low=np.zeros(latent_dim), high=np.ones(latent_dim))) encoder = GaussianMLPEncoder(embedding_spec) policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec, encoder=encoder) env.reset() obs, _, _, _ = env.step(1) latent = np.random.random((latent_dim, )) task = np.zeros(task_num) task[0] = 1 action1, _ = policy.get_action_given_latent(obs, latent) action2, _ = policy.get_action_given_task(obs, task) action3, _ = policy.get_action(np.concatenate([obs.flatten(), task])) assert env.action_space.contains(action1) assert env.action_space.contains(action2) assert env.action_space.contains(action3) obses, latents, tasks = [obs] * 3, [latent] * 3, [task] * 3 aug_obses = [np.concatenate([obs.flatten(), task])] * 3 action1n, _ = policy.get_actions_given_latents(obses, latents) action2n, _ = policy.get_actions_given_tasks(obses, tasks) action3n, _ = policy.get_actions(aug_obses) for action in chain(action1n, action2n, action3n): assert env.action_space.contains(action)
def test_is_pickleable(self, obs_dim, action_dim, mock_rand): mock_rand.return_value = 0 env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.policies.' 'categorical_mlp_policy_with_model.MLPModel'), new=SimpleMLPModel): policy = CategoricalMLPPolicyWithModel(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) with tf.variable_scope('CategoricalMLPPolicy/MLPModel', reuse=True): return_var = tf.get_variable('return_var') # assign it to all one return_var.load(tf.ones_like(return_var).eval()) output1 = self.sess.run( policy.model.outputs, feed_dict={policy.model.input: [obs.flatten()]}) p = pickle.dumps(policy) with tf.Session(graph=tf.Graph()) as sess: policy_pickled = pickle.loads(p) output2 = sess.run( policy_pickled.model.outputs, feed_dict={policy_pickled.model.input: [obs.flatten()]}) assert np.array_equal(output1, output2)
def test_is_pickleable(self, obs_dim, embedding_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim)) with mock.patch(('garage.tf.embeddings.' 'gaussian_mlp_encoder.GaussianMLPModel'), new=SimpleGaussianMLPModel): embedding_spec = InOutSpec(input_space=env.spec.observation_space, output_space=env.spec.action_space) embedding = GaussianMLPEncoder(embedding_spec) env.reset() obs, _, _, _ = env.step(1) obs_dim = env.spec.observation_space.flat_dim with tf.compat.v1.variable_scope('GaussianMLPEncoder/GaussianMLPModel', reuse=True): return_var = tf.compat.v1.get_variable('return_var') # assign it to all one return_var.load(tf.ones_like(return_var).eval()) output1 = self.sess.run( embedding.model.outputs[:-1], feed_dict={embedding.model.input: [obs.flatten()]}) p = pickle.dumps(embedding) with tf.compat.v1.Session(graph=tf.Graph()) as sess: embedding_pickled = pickle.loads(p) output2 = sess.run( embedding_pickled.model.outputs[:-1], feed_dict={embedding_pickled.model.input: [obs.flatten()]}) assert np.array_equal(output1, output2)
def test_is_pickleable(self, obs_dim, action_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.q_functions.' 'continuous_mlp_q_function.MLPMergeModel'), new=SimpleMLPMergeModel): qf = ContinuousMLPQFunction(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) obs = obs.flatten() act = np.full(action_dim, 0.5).flatten() with tf.compat.v1.variable_scope( 'ContinuousMLPQFunction/SimpleMLPMergeModel', reuse=True): return_var = tf.compat.v1.get_variable('return_var') # assign it to all one return_var.load(tf.ones_like(return_var).eval()) output1 = qf.get_qval([obs], [act]) h_data = pickle.dumps(qf) with tf.compat.v1.Session(graph=tf.Graph()): qf_pickled = pickle.loads(h_data) output2 = qf_pickled.get_qval([obs], [act]) assert np.array_equal(output1, output2)
def test_get_qval_sym(self, obs_dim, action_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.q_functions.' 'continuous_mlp_q_function.MLPMergeModel'), new=SimpleMLPMergeModel): qf = ContinuousMLPQFunction(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) obs = obs.flatten() act = np.full(action_dim, 0.5).flatten() output1 = qf.get_qval([obs], [act]) input_var1 = tf.compat.v1.placeholder(tf.float32, shape=(None, obs.shape[0])) input_var2 = tf.compat.v1.placeholder(tf.float32, shape=(None, act.shape[0])) q_vals = qf.get_qval_sym(input_var1, input_var2, 'another') output2 = self.sess.run(q_vals, feed_dict={ input_var1: [obs], input_var2: [act] }) expected_output = np.full((1, ), 0.5) assert np.array_equal(output1, output2) assert np.array_equal(output2[0], expected_output)
def test_get_action(self, obs_dim, action_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.policies.' 'gaussian_mlp_policy_with_model.GaussianMLPModel'), new=SimpleGaussianMLPModel): policy = GaussianMLPPolicyWithModel(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) action, prob = policy.get_action(obs) expected_action = np.full(action_dim, 0.75) expected_mean = np.full(action_dim, 0.5) expected_log_std = np.full(action_dim, 0.5) assert env.action_space.contains(action) assert np.array_equal(action, expected_action) assert np.array_equal(prob['mean'], expected_mean) assert np.array_equal(prob['log_std'], expected_log_std) actions, probs = policy.get_actions([obs, obs, obs]) for action, mean, log_std in zip(actions, probs['mean'], probs['log_std']): assert env.action_space.contains(action) assert np.array_equal(action, expected_action) assert np.array_equal(prob['mean'], expected_mean) assert np.array_equal(prob['log_std'], expected_log_std)
def test_is_pickleable(self): env = TfEnv(DummyDiscreteEnv(obs_dim=(1, ), action_dim=1)) with mock.patch(('garage.tf.policies.' 'categorical_gru_policy_with_model.GRUModel'), new=SimpleGRUModel): policy = CategoricalGRUPolicyWithModel(env_spec=env.spec, state_include_action=False) env.reset() obs = env.reset() with tf.variable_scope('CategoricalGRUPolicyWithModel/prob_network', reuse=True): return_var = tf.get_variable('return_var') # assign it to all one return_var.load(tf.ones_like(return_var).eval()) output1 = self.sess.run( policy.model.outputs[0], feed_dict={policy.model.input: [[obs.flatten()], [obs.flatten()]]}) p = pickle.dumps(policy) with tf.Session(graph=tf.Graph()) as sess: policy_pickled = pickle.loads(p) output2 = sess.run(policy_pickled.model.outputs[0], feed_dict={ policy_pickled.model.input: [[obs.flatten()], [obs.flatten()]] }) assert np.array_equal(output1, output2)
def test_is_pickleable(self, obs_dim, action_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.policies.' 'gaussian_mlp_policy_with_model.GaussianMLPModel'), new=SimpleGaussianMLPModel): policy = GaussianMLPPolicyWithModel(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) obs_dim = env.spec.observation_space.flat_dim with tf.variable_scope('GaussianMLPPolicyWithModel/GaussianMLPModel', reuse=True): return_var = tf.get_variable('return_var') # assign it to all one return_var.load(tf.ones_like(return_var).eval()) output1 = self.sess.run( policy.model.outputs[:-1], feed_dict={policy.model.input: [obs.flatten()]}) p = pickle.dumps(policy) with tf.Session(graph=tf.Graph()) as sess: policy_pickled = pickle.loads(p) output2 = sess.run( policy_pickled.model.outputs[:-1], feed_dict={policy_pickled.model.input: [obs.flatten()]}) assert np.array_equal(output1, output2)
def test_output_shape(self, obs_dim, action_dim): env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.q_functions.' 'discrete_mlp_q_function.MLPModel'), new=SimpleMLPModel): qf = DiscreteMLPQFunction(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) outputs = self.sess.run(qf.q_vals, feed_dict={qf.input: [obs]}) assert outputs.shape == (1, action_dim)
def test_dist_info(self, obs_dim, action_dim): env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.policies.' 'categorical_mlp_policy_with_model.MLPModel'), new=SimpleMLPModel): policy = CategoricalMLPPolicyWithModel(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) expected_prob = np.full(action_dim, 0.5) policy_probs = policy.dist_info([obs.flatten()]) assert np.array_equal(policy_probs['prob'][0], expected_prob)
def test_output_shape(self, obs_dim, action_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.q_functions.' 'continuous_mlp_q_function.MLPMergeModel'), new=SimpleMLPMergeModel): qf = ContinuousMLPQFunction(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) obs = obs.flatten() act = np.full(action_dim, 0.5).flatten() outputs = qf.get_qval([obs], [act]) assert outputs.shape == (1, 1)
def run_task(self, snapshot_config, *_): config = tf.ConfigProto(device_count={'GPU': 0}, allow_soft_placement=True, intra_op_parallelism_threads=12, inter_op_parallelism_threads=12) sess = tf.Session(config=config) with LocalTFRunner(snapshot_config=snapshot_config, sess=sess) as runner: env = gym.make(self._env) env = TfEnv(normalize(env)) env.reset() policy = GaussianMLPPolicy( env_spec=env.spec, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.tanh, output_nonlinearity=None, ) baseline = GaussianMLPBaseline( env_spec=env.spec, regressor_args=dict( hidden_sizes=(64, 64), use_trust_region=False, optimizer=FirstOrderOptimizer, optimizer_args=dict( batch_size=32, max_epochs=10, tf_optimizer_args=dict(learning_rate=1e-3), ), ), ) algo = PPO( env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, gae_lambda=0.95, lr_clip_range=0.2, policy_ent_coeff=0.0, optimizer_args=dict( batch_size=32, max_epochs=10, tf_optimizer_args=dict(learning_rate=1e-3), ), ) runner.setup(algo, env, sampler_args=dict(n_envs=12)) runner.train(n_epochs=5, batch_size=2048)
def test_get_embedding(self, obs_dim, embedding_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim)) embedding_spec = InOutSpec(input_space=env.spec.observation_space, output_space=env.spec.action_space) embedding = GaussianMLPEncoder(embedding_spec) task_input = tf.compat.v1.placeholder(tf.float32, shape=(None, None, embedding.input_dim)) embedding.build(task_input) env.reset() obs, _, _, _ = env.step(1) latent, _ = embedding.forward(obs) assert env.action_space.contains(latent)
def test_is_pickleable(self): env = TfEnv(DummyDiscreteEnv(obs_dim=(1, ), action_dim=1)) obs_var = tf.compat.v1.placeholder( tf.float32, shape=[None, None, env.observation_space.flat_dim], name='obs') policy = CategoricalLSTMPolicy2(env_spec=env.spec, state_include_action=False) policy.build(obs_var) policy.reset() obs = env.reset() policy.model._lstm_cell.weights[0].load( tf.ones_like(policy.model._lstm_cell.weights[0]).eval()) output1 = self.sess.run( [policy.distribution.logits], feed_dict={policy.model.input: [[obs.flatten()], [obs.flatten()]]}) p = pickle.dumps(policy) with tf.compat.v1.Session(graph=tf.Graph()) as sess: policy_pickled = pickle.loads(p) obs_var = tf.compat.v1.placeholder( tf.float32, shape=[None, None, env.observation_space.flat_dim], name='obs') policy_pickled.build(obs_var) output2 = sess.run([policy_pickled.distribution.logits], feed_dict={ policy_pickled.model.input: [[obs.flatten()], [obs.flatten()]] }) # noqa: E126 assert np.array_equal(output1, output2)
def test_is_pickleable(self, obs_dim, action_dim): env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim)) obs_var = tf.compat.v1.placeholder( tf.float32, shape=[None, env.observation_space.flat_dim], name='obs') policy = CategoricalMLPPolicy2(env_spec=env.spec) policy.build(obs_var) obs = env.reset() with tf.compat.v1.variable_scope( 'CategoricalMLPPolicy/CategoricalMLPModel', reuse=True): bias = tf.compat.v1.get_variable('mlp/hidden_0/bias') # assign it to all one bias.load(tf.ones_like(bias).eval()) output1 = self.sess.run( [policy.distribution.probs], feed_dict={policy.model.input: [obs.flatten()]}) p = pickle.dumps(policy) with tf.compat.v1.Session(graph=tf.Graph()) as sess: policy_pickled = pickle.loads(p) obs_var = tf.compat.v1.placeholder( tf.float32, shape=[None, env.observation_space.flat_dim], name='obs') policy_pickled.build(obs_var) output2 = sess.run( [policy_pickled.distribution.probs], feed_dict={policy_pickled.model.input: [obs.flatten()]}) assert np.array_equal(output1, output2)
def test_get_qval_max_pooling(self, filter_dims, num_filters, strides, pool_strides, pool_shapes): env = TfEnv(DummyDiscretePixelEnv()) obs = env.reset() with mock.patch(('garage.tf.models.' 'cnn_mlp_merge_model.CNNModelWithMaxPooling'), new=SimpleCNNModelWithMaxPooling): with mock.patch(('garage.tf.models.' 'cnn_mlp_merge_model.MLPMergeModel'), new=SimpleMLPMergeModel): qf = ContinuousCNNQFunction(env_spec=env.spec, filter_dims=filter_dims, num_filters=num_filters, strides=strides, max_pooling=True, pool_strides=pool_strides, pool_shapes=pool_shapes) action_dim = env.action_space.shape obs, _, _, _ = env.step(1) act = np.full(action_dim, 0.5) expected_output = np.full((1, ), 0.5) outputs = qf.get_qval([obs], [act]) assert np.array_equal(outputs[0], expected_output) outputs = qf.get_qval([obs, obs, obs], [act, act, act]) for output in outputs: assert np.array_equal(output, expected_output)
def test_dist_info_sym_wrong_input(self): env = TfEnv(DummyDiscreteEnv(obs_dim=(1, ), action_dim=1)) obs_ph = tf.placeholder(tf.float32, shape=(None, None, env.observation_space.flat_dim)) with mock.patch(('garage.tf.policies.' 'categorical_gru_policy_with_model.GRUModel'), new=SimpleGRUModel): policy = CategoricalGRUPolicyWithModel(env_spec=env.spec, state_include_action=True) policy.reset() obs = env.reset() policy.dist_info_sym( obs_var=obs_ph, state_info_vars={'prev_action': np.zeros((3, 1, 1))}, name='p2_sym') # observation batch size = 2 but prev_action batch size = 3 with pytest.raises(tf.errors.InvalidArgumentError): self.sess.run( policy.model.networks['p2_sym'].input, feed_dict={obs_ph: [[obs.flatten()], [obs.flatten()]]})
def test_get_action(self, mock_rand, obs_dim, action_dim, hidden_dim): mock_rand.return_value = 0 env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.policies.' 'categorical_gru_policy_with_model.GRUModel'), new=SimpleGRUModel): policy = CategoricalGRUPolicyWithModel(env_spec=env.spec, state_include_action=False) policy.reset() obs = env.reset() expected_prob = np.full(action_dim, 0.5) action, agent_info = policy.get_action(obs) assert env.action_space.contains(action) assert action == 0 assert np.array_equal(agent_info['prob'], expected_prob) actions, agent_infos = policy.get_actions([obs]) for action, prob in zip(actions, agent_infos['prob']): assert env.action_space.contains(action) assert action == 0 assert np.array_equal(prob, expected_prob)
def test_dist_info_sym_include_action(self, obs_dim, action_dim, hidden_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) obs_ph = tf.placeholder( tf.float32, shape=(None, None, env.observation_space.flat_dim)) with mock.patch(('garage.tf.policies.' 'gaussian_gru_policy_with_model.GaussianGRUModel'), new=SimpleGaussianGRUModel): policy = GaussianGRUPolicyWithModel( env_spec=env.spec, state_include_action=True) policy.reset() obs = env.reset() dist_sym = policy.dist_info_sym( obs_var=obs_ph, state_info_vars={'prev_action': np.zeros((2, 1) + action_dim)}, name='p2_sym') dist = self.sess.run( dist_sym, feed_dict={obs_ph: [[obs.flatten()], [obs.flatten()]]}) assert np.array_equal(dist['mean'], np.full((2, 1) + action_dim, 0.5)) assert np.array_equal(dist['log_std'], np.full((2, 1) + action_dim, 0.5))