def test_dist_info_sym(self, obs_dim, action_dim, hidden_dim): env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim)) obs_ph = tf.compat.v1.placeholder( tf.float32, shape=(None, None, env.observation_space.flat_dim)) with mock.patch(('metarl.tf.policies.' 'categorical_gru_policy.GRUModel'), new=SimpleGRUModel): policy = CategoricalGRUPolicy(env_spec=env.spec, state_include_action=False) policy.reset() obs = env.reset() dist_sym = policy.dist_info_sym(obs_var=obs_ph, state_info_vars=None, name='p2_sym') dist = self.sess.run( dist_sym, feed_dict={obs_ph: [[obs.flatten()], [obs.flatten()]]}) assert np.array_equal(dist['prob'], np.full((2, 1, action_dim), 0.5))
def test_dist_info_sym_wrong_input(self): env = TfEnv(DummyDiscreteEnv(obs_dim=(1, ), action_dim=1)) obs_ph = tf.compat.v1.placeholder( tf.float32, shape=(None, None, env.observation_space.flat_dim)) with mock.patch(('metarl.tf.policies.' 'categorical_gru_policy.GRUModel'), new=SimpleGRUModel): policy = CategoricalGRUPolicy(env_spec=env.spec, state_include_action=True) policy.reset() obs = env.reset() policy.dist_info_sym( obs_var=obs_ph, state_info_vars={'prev_action': np.zeros((3, 1, 1))}, name='p2_sym') # observation batch size = 2 but prev_action batch size = 3 with pytest.raises(tf.errors.InvalidArgumentError): self.sess.run( policy.model.networks['p2_sym'].input, feed_dict={obs_ph: [[obs.flatten()], [obs.flatten()]]})
def test_get_action_state_include_action(self, mock_normal, obs_dim, action_dim, hidden_dim): mock_normal.return_value = 0.5 env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('metarl.tf.policies.' 'gaussian_lstm_policy.GaussianLSTMModel'), new=SimpleGaussianLSTMModel): policy = GaussianLSTMPolicy(env_spec=env.spec, state_include_action=True) policy.reset() obs = env.reset() expected_action = np.full(action_dim, 0.5 * np.exp(0.5) + 0.5) action, agent_info = policy.get_action(obs) assert env.action_space.contains(action) assert np.allclose(action, expected_action, atol=1e-6) expected_mean = np.full(action_dim, 0.5) assert np.array_equal(agent_info['mean'], expected_mean) expected_log_std = np.full(action_dim, 0.5) assert np.array_equal(agent_info['log_std'], expected_log_std) expected_prev_action = np.full(action_dim, 0) assert np.array_equal(agent_info['prev_action'], expected_prev_action) policy.reset() actions, agent_infos = policy.get_actions([obs]) for action, mean, log_std, prev_action in zip( actions, agent_infos['mean'], agent_infos['log_std'], agent_infos['prev_action']): assert env.action_space.contains(action) assert np.allclose(action, np.full(action_dim, expected_action), atol=1e-6) assert np.array_equal(mean, expected_mean) assert np.array_equal(log_std, expected_log_std) assert np.array_equal(prev_action, expected_prev_action)
class TestNormalizedGym: def setup_method(self): self.env = TfEnv( normalize(gym.make('Pendulum-v0'), normalize_reward=True, normalize_obs=True, flatten_obs=True)) def teardown_method(self): self.env.close() def test_does_not_modify_action(self): a = self.env.action_space.sample() a_copy = a self.env.reset() self.env.step(a) assert a == a_copy def test_flatten(self): for _ in range(10): self.env.reset() for _ in range(5): self.env.render() action = self.env.action_space.sample() next_obs, _, done, _ = self.env.step(action) assert next_obs.shape == self.env.observation_space.low.shape if done: break def test_unflatten(self): for _ in range(10): self.env.reset() for _ in range(5): action = self.env.action_space.sample() next_obs, _, done, _ = self.env.step(action) # yapf: disable assert (self.env.observation_space.flatten(next_obs).shape == self.env.observation_space.flat_dim) # yapf: enable if done: break
class TestDiscreteCNNQFunction(TfGraphTestCase): def setup_method(self): super().setup_method() self.env = TfEnv(DummyDiscretePixelEnv()) self.obs = self.env.reset() # yapf: disable @pytest.mark.parametrize('filter_dims, num_filters, strides', [ ((3, ), (5, ), (1, )), ((3, ), (5, ), (2, )), ((3, 3), (5, 5), (1, 1)), ]) # yapf: enable def test_get_action(self, filter_dims, num_filters, strides): with mock.patch(('metarl.tf.q_functions.' 'discrete_cnn_q_function.CNNModel'), new=SimpleCNNModel): with mock.patch(('metarl.tf.q_functions.' 'discrete_cnn_q_function.MLPModel'), new=SimpleMLPModel): qf = DiscreteCNNQFunction(env_spec=self.env.spec, filter_dims=filter_dims, num_filters=num_filters, strides=strides, dueling=False) action_dim = self.env.action_space.n expected_output = np.full(action_dim, 0.5) outputs = self.sess.run(qf.q_vals, feed_dict={qf.input: [self.obs]}) assert np.array_equal(outputs[0], expected_output) outputs = self.sess.run( qf.q_vals, feed_dict={qf.input: [self.obs, self.obs, self.obs]}) for output in outputs: assert np.array_equal(output, expected_output) # yapf: disable @pytest.mark.parametrize('filter_dims, num_filters, strides', [ ((3,), (5,), (1,)), ((3,), (5,), (2,)), ((3, 3), (5, 5), (1, 1)), ]) # yapf: enable def test_get_action_dueling(self, filter_dims, num_filters, strides): with mock.patch(('metarl.tf.q_functions.' 'discrete_cnn_q_function.CNNModel'), new=SimpleCNNModel): with mock.patch(('metarl.tf.q_functions.' 'discrete_cnn_q_function.MLPDuelingModel'), new=SimpleMLPModel): qf = DiscreteCNNQFunction(env_spec=self.env.spec, filter_dims=filter_dims, num_filters=num_filters, strides=strides, dueling=True) action_dim = self.env.action_space.n expected_output = np.full(action_dim, 0.5) outputs = self.sess.run(qf.q_vals, feed_dict={qf.input: [self.obs]}) assert np.array_equal(outputs[0], expected_output) outputs = self.sess.run( qf.q_vals, feed_dict={qf.input: [self.obs, self.obs, self.obs]}) for output in outputs: assert np.array_equal(output, expected_output) # yapf: disable @pytest.mark.parametrize('filter_dims, num_filters, strides, ' 'pool_strides, pool_shapes', [ ((3, ), (5, ), (1, ), (1, 1), (1, 1)), # noqa: E122 ((3, ), (5, ), (2, ), (2, 2), (2, 2)), ((3, 3), (5, 5), (1, 1), (1, 1), (1, 1)), ((3, 3), (5, 5), (1, 1), (2, 2), (2, 2)) ]) # yapf: enable def test_get_action_max_pooling(self, filter_dims, num_filters, strides, pool_strides, pool_shapes): with mock.patch(('metarl.tf.q_functions.' 'discrete_cnn_q_function.CNNModelWithMaxPooling'), new=SimpleCNNModelWithMaxPooling): with mock.patch(('metarl.tf.q_functions.' 'discrete_cnn_q_function.MLPModel'), new=SimpleMLPModel): qf = DiscreteCNNQFunction(env_spec=self.env.spec, filter_dims=filter_dims, num_filters=num_filters, strides=strides, max_pooling=True, pool_strides=pool_strides, pool_shapes=pool_shapes, dueling=False) action_dim = self.env.action_space.n expected_output = np.full(action_dim, 0.5) outputs = self.sess.run(qf.q_vals, feed_dict={qf.input: [self.obs]}) assert np.array_equal(outputs[0], expected_output) outputs = self.sess.run( qf.q_vals, feed_dict={qf.input: [self.obs, self.obs, self.obs]}) for output in outputs: assert np.array_equal(output, expected_output) # yapf: disable @pytest.mark.parametrize('filter_dims, num_filters, strides', [ ((3, ), (5, ), (1, )), ((3, ), (5, ), (2, )), ((3, 3), (5, 5), (1, 1)) ]) # yapf: enable def test_get_qval_sym(self, filter_dims, num_filters, strides): with mock.patch(('metarl.tf.q_functions.' 'discrete_cnn_q_function.CNNModel'), new=SimpleCNNModel): with mock.patch(('metarl.tf.q_functions.' 'discrete_cnn_q_function.MLPModel'), new=SimpleMLPModel): qf = DiscreteCNNQFunction(env_spec=self.env.spec, filter_dims=filter_dims, num_filters=num_filters, strides=strides, dueling=False) output1 = self.sess.run(qf.q_vals, feed_dict={qf.input: [self.obs]}) obs_dim = self.env.observation_space.shape action_dim = self.env.action_space.n input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, ) + obs_dim) q_vals = qf.get_qval_sym(input_var, 'another') output2 = self.sess.run(q_vals, feed_dict={input_var: [self.obs]}) expected_output = np.full(action_dim, 0.5) assert np.array_equal(output1, output2) assert np.array_equal(output2[0], expected_output) # yapf: disable @pytest.mark.parametrize('filter_dims, num_filters, strides', [ ((3, ), (5, ), (1, )), ((3, ), (5, ), (2, )), ((3, 3), (5, 5), (1, 1)), ]) # yapf: enable def test_is_pickleable(self, filter_dims, num_filters, strides): with mock.patch(('metarl.tf.q_functions.' 'discrete_cnn_q_function.CNNModel'), new=SimpleCNNModel): with mock.patch(('metarl.tf.q_functions.' 'discrete_cnn_q_function.MLPModel'), new=SimpleMLPModel): qf = DiscreteCNNQFunction(env_spec=self.env.spec, filter_dims=filter_dims, num_filters=num_filters, strides=strides, dueling=False) with tf.compat.v1.variable_scope( 'DiscreteCNNQFunction/Sequential/SimpleMLPModel', reuse=True): return_var = tf.compat.v1.get_variable('return_var') # assign it to all one return_var.load(tf.ones_like(return_var).eval()) output1 = self.sess.run(qf.q_vals, feed_dict={qf.input: [self.obs]}) h_data = pickle.dumps(qf) with tf.compat.v1.Session(graph=tf.Graph()) as sess: qf_pickled = pickle.loads(h_data) output2 = sess.run(qf_pickled.q_vals, feed_dict={qf_pickled.input: [self.obs]}) assert np.array_equal(output1, output2) # yapf: disable @pytest.mark.parametrize('filter_dims, num_filters, strides', [ ((3, ), (5, ), (1, )), ((3, ), (5, ), (2, )), ((3, 3), (5, 5), (1, 1)) ]) # yapf: enable def test_clone(self, filter_dims, num_filters, strides): with mock.patch(('metarl.tf.q_functions.' 'discrete_cnn_q_function.CNNModel'), new=SimpleCNNModel): with mock.patch(('metarl.tf.q_functions.' 'discrete_cnn_q_function.MLPModel'), new=SimpleMLPModel): qf = DiscreteCNNQFunction(env_spec=self.env.spec, filter_dims=filter_dims, num_filters=num_filters, strides=strides, dueling=False) qf_clone = qf.clone('another_qf') assert qf_clone._filter_dims == qf._filter_dims assert qf_clone._num_filters == qf._num_filters assert qf_clone._strides == qf._strides
def test_normalize_pixel_patch_not_trigger(self): env = TfEnv(DummyBoxEnv()) obs = env.reset() obs_normalized = normalize_pixel_batch(env, obs) assert np.array_equal(obs, obs_normalized)
def test_normalize_pixel_patch(self): env = TfEnv(DummyDiscretePixelEnv()) obs = env.reset() obs_normalized = normalize_pixel_batch(env, obs) expected = [ob / 255.0 for ob in obs] assert np.allclose(obs_normalized, expected)