def test_output_shape(self, obs_dim, action_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.q_functions.' 'continuous_mlp_q_function.MLPMergeModel'), new=SimpleMLPMergeModel): qf = ContinuousMLPQFunction(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) obs = obs.flatten() act = np.full(action_dim, 0.5).flatten() obs_ph, act_ph = qf.inputs outputs = qf.get_qval([obs], [act]) assert outputs.shape == (1, 1)
def test_output_shape(self, obs_dim, act_dim, output_dim, hidden_sizes): env_spec = TfEnv(DummyBoxEnv()) obs = torch.ones(obs_dim, dtype=torch.float32).unsqueeze(0) act = torch.ones(act_dim, dtype=torch.float32).unsqueeze(0) nn_module = MLPModule(input_dim=obs_dim + act_dim, output_dim=output_dim, hidden_nonlinearity=None, hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) qf = ContinuousNNQFunction(env_spec, nn_module) output = qf.get_qval(obs, act) assert output.shape == (1, 1)
def test_param_values(self, obs_dim): box_env = TfEnv(DummyBoxEnv(obs_dim=obs_dim)) with mock.patch(('garage.tf.baselines.' 'gaussian_mlp_baseline_with_model.' 'GaussianMLPRegressorWithModel'), new=SimpleGaussianMLPRegressor): gmb = GaussianMLPBaselineWithModel(env_spec=box_env.spec) new_gmb = GaussianMLPBaselineWithModel( env_spec=box_env.spec, name='GaussianMLPBaselineWithModel2') old_param_values = gmb.get_param_values() new_param_values = new_gmb.get_param_values() assert not np.array_equal(old_param_values, new_param_values) new_gmb.set_param_values(old_param_values) new_param_values = new_gmb.get_param_values() assert np.array_equal(old_param_values, new_param_values)
def test_get_embedding(self, obs_dim, embedding_dim): env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim)) embedding_spec = InOutSpec(input_space=env.spec.observation_space, output_space=env.spec.action_space) embedding = GaussianMLPEncoder(embedding_spec) task_input = tf.compat.v1.placeholder(tf.float32, shape=(None, None, embedding.input_dim)) embedding.build(task_input) env.reset() obs, _, _, _ = env.step(1) latent, _ = embedding.forward(obs) assert env.action_space.contains(latent)
def test_get_action(self, obs_dim, action_dim, hidden_dim): env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) policy = GaussianLSTMPolicy(env_spec=env.spec, hidden_dim=hidden_dim, state_include_action=False) policy.reset() obs = env.reset() action, _ = policy.get_action(obs.flatten()) assert env.action_space.contains(action) actions, _ = policy.get_actions([obs.flatten()]) for action in actions: assert env.action_space.contains(action)
def test_output_shape(self, batch_size, hidden_sizes): env_spec = GymEnv(DummyBoxEnv()).spec obs_dim = env_spec.observation_space.flat_dim act_dim = env_spec.action_space.flat_dim obs = torch.ones(batch_size, obs_dim, dtype=torch.float32) act = torch.ones(batch_size, act_dim, dtype=torch.float32) qf = ContinuousMLPQFunction(env_spec=env_spec, hidden_nonlinearity=None, hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) output = qf(obs, act) assert output.shape == (batch_size, 1)
def test_build(self, obs_dim, action_dim): env = GymEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) policy = GaussianMLPPolicy(env_spec=env.spec) obs = env.reset()[0] state_input = tf.compat.v1.placeholder(tf.float32, shape=(None, None, policy.input_dim)) dist_sym = policy.build(state_input, name='dist_sym').dist dist_sym2 = policy.build(state_input, name='dist_sym2').dist output1 = self.sess.run([dist_sym.loc], feed_dict={state_input: [[obs.flatten()]]}) output2 = self.sess.run([dist_sym2.loc], feed_dict={state_input: [[obs.flatten()]]}) assert np.array_equal(output1, output2)
def test_is_pickleable(batch_size): env_spec = GymEnv(DummyBoxEnv()) obs_dim = env_spec.observation_space.flat_dim obs = torch.ones([batch_size, obs_dim], dtype=torch.float32) qf = DiscreteMLPQFunction(env_spec=env_spec, hidden_nonlinearity=None, hidden_sizes=(2, 2)) policy = DiscreteQFArgmaxPolicy(qf, env_spec) output1 = policy.get_actions(obs.numpy())[0] p = pickle.dumps(policy) policy_pickled = pickle.loads(p) output2 = policy_pickled.get_actions(obs.numpy())[0] assert np.array_equal(output1, output2)
def test_get_actions(self, obs_dim, act_dim, batch_size, hidden_sizes): env_spec = TfEnv(DummyBoxEnv()) obs = torch.ones([batch_size, obs_dim], dtype=torch.float32) nn_module = MLPModule( input_dim=obs_dim, output_dim=act_dim, hidden_nonlinearity=None, hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) policy = DeterministicPolicy(env_spec, nn_module) expected_output = np.full([batch_size, act_dim], fill_value=obs_dim * np.prod(hidden_sizes), dtype=np.float32) assert np.array_equal(policy.get_actions(obs), expected_output)
def test_is_pickleable(self): box_env_spec = GarageEnv(DummyBoxEnv(obs_dim=(2, ))).spec cmb = ContinuousMLPBaseline(env_spec=box_env_spec) with tf.compat.v1.variable_scope('ContinuousMLPBaseline', reuse=True): bias = tf.compat.v1.get_variable('mlp/hidden_0/bias') bias.load(tf.ones_like(bias).eval()) _, _, paths, _ = get_train_test_data() result1 = cmb.predict(paths) h = pickle.dumps(cmb) with tf.compat.v1.Session(graph=tf.Graph()): cmb_pickled = pickle.loads(h) result2 = cmb_pickled.predict(paths) assert np.array_equal(result1, result2)
def test_get_actions(self, batch_size, hidden_sizes): env_spec = TfEnv(DummyBoxEnv()) obs_dim = env_spec.observation_space.flat_dim act_dim = env_spec.action_space.flat_dim obs = torch.ones([batch_size, obs_dim], dtype=torch.float32) policy = DeterministicMLPPolicy(env_spec=env_spec, hidden_nonlinearity=None, hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) expected_output = np.full([batch_size, act_dim], fill_value=obs_dim * np.prod(hidden_sizes), dtype=np.float32) assert np.array_equal(policy.get_actions(obs)[0], expected_output)
def test_log_prob(self): """Test log_prob method of the policy.""" env_spec = MetaRLEnv(DummyBoxEnv()) init_std = 1. obs = torch.Tensor([0, 0, 0, 0]).float() action = torch.Tensor([0, 0]).float() policy = GaussianMLPPolicy(env_spec=env_spec, hidden_sizes=(1, ), init_std=init_std, hidden_nonlinearity=None, std_parameterization='exp', hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) dist = policy(obs) assert torch.allclose(dist.log_prob(action), policy.log_likelihood(obs, action))
def test_is_pickleable(self, batch_size, hidden_sizes): env_spec = TfEnv(DummyBoxEnv()) obs_dim = env_spec.observation_space.flat_dim obs = torch.ones([batch_size, obs_dim], dtype=torch.float32) policy = DeterministicMLPPolicy(env_spec=env_spec, hidden_nonlinearity=None, hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) output1 = policy.get_actions(obs)[0] p = pickle.dumps(policy) policy_pickled = pickle.loads(p) output2 = policy_pickled.get_actions(obs)[0] assert np.array_equal(output1, output2)
def test_get_action(self, obs_dim, action_dim): """Test get_action method""" env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) policy = ContinuousMLPPolicy(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) action, _ = policy.get_action(obs.flatten()) assert env.action_space.contains(action) actions, _ = policy.get_actions( [obs.flatten(), obs.flatten(), obs.flatten()]) for action in actions: assert env.action_space.contains(action)
def test_param_values(self): box_env_spec = GarageEnv(DummyBoxEnv(obs_dim=(2, ))).spec cmb = ContinuousMLPBaseline(env_spec=box_env_spec) new_cmb = ContinuousMLPBaseline(env_spec=box_env_spec, name='ContinuousMLPBaseline2') # Manual change the parameter of ContinuousMLPBaseline with tf.compat.v1.variable_scope('ContinuousMLPBaseline', reuse=True): bias = tf.compat.v1.get_variable('mlp/hidden_0/bias') bias.load(tf.ones_like(bias).eval()) old_param_values = cmb.get_param_values() new_param_values = new_cmb.get_param_values() assert not np.array_equal(old_param_values, new_param_values) new_cmb.set_param_values(old_param_values) new_param_values = new_cmb.get_param_values() assert np.array_equal(old_param_values, new_param_values)
def test_forward(hidden_sizes): env_spec = GymEnv(DummyBoxEnv()).spec obs_dim = env_spec.observation_space.flat_dim obs = torch.ones(obs_dim, dtype=torch.float32).unsqueeze(0) qf = DiscreteMLPQFunction(env_spec=env_spec, hidden_nonlinearity=None, hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) output = qf(obs) expected_output = torch.full([1, 1], fill_value=(obs_dim) * np.prod(hidden_sizes), dtype=torch.float32) assert torch.eq(output, expected_output).all()
def test_pickling(self): obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 5, 2 env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) embedding_spec = InOutSpec( input_space=akro.Box(low=np.zeros(task_num), high=np.ones(task_num)), output_space=akro.Box(low=np.zeros(latent_dim), high=np.ones(latent_dim))) encoder = GaussianMLPEncoder(embedding_spec) policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec, encoder=encoder) pickled = pickle.dumps(policy) with tf.compat.v1.variable_scope('resumed'): unpickled = pickle.loads(pickled) assert hasattr(unpickled, '_f_dist_obs_latent') assert hasattr(unpickled, '_f_dist_obs_task')
def setup_method(self): with mock.patch('tensorflow.random.normal') as mock_rand: mock_rand.return_value = 0.5 super().setup_method() self.box_env = TfEnv(DummyBoxEnv()) self.policy1 = GaussianMLPPolicy(env_spec=self.box_env, init_std=1.0, name='P1') self.policy2 = GaussianMLPPolicy(env_spec=self.box_env, init_std=1.2, name='P2') self.policy3 = GaussianMLPPolicyWithModel(env_spec=self.box_env, init_std=1.0, name='P3') self.policy4 = GaussianMLPPolicyWithModel(env_spec=self.box_env, init_std=1.2, name='P4') self.sess.run(tf.global_variables_initializer()) for a, b in zip(self.policy3.get_params(), self.policy1.get_params()): self.sess.run(tf.assign(b, a)) for a, b in zip(self.policy4.get_params(), self.policy2.get_params()): self.sess.run(tf.assign(b, a)) self.obs = [self.box_env.reset()] self.obs_ph = tf.placeholder( tf.float32, shape=(None, self.box_env.observation_space.flat_dim)) self.action_ph = tf.placeholder( tf.float32, shape=(None, self.box_env.action_space.flat_dim)) self.dist1_sym = self.policy1.dist_info_sym(self.obs_ph, name='p1_sym') self.dist2_sym = self.policy2.dist_info_sym(self.obs_ph, name='p2_sym') self.dist3_sym = self.policy3.dist_info_sym(self.obs_ph, name='p3_sym') self.dist4_sym = self.policy4.dist_info_sym(self.obs_ph, name='p4_sym') assert self.policy1.vectorized == self.policy2.vectorized assert self.policy3.vectorized == self.policy4.vectorized
def test_param_values(self): box_env_spec = GarageEnv(DummyBoxEnv(obs_dim=(2, ))).spec gmb = GaussianMLPBaseline(env_spec=box_env_spec) new_gmb = GaussianMLPBaseline(env_spec=box_env_spec, name='GaussianMLPBaseline2') # Manual change the parameter of GaussianMLPBaseline with tf.compat.v1.variable_scope('GaussianMLPBaseline', reuse=True): bias = tf.compat.v1.get_variable( 'dist_params/mean_network/hidden_0/bias') bias.load(tf.ones_like(bias).eval()) old_param_values = gmb.get_param_values() new_param_values = new_gmb.get_param_values() assert not np.array_equal(old_param_values, new_param_values) new_gmb.set_param_values(old_param_values) new_param_values = new_gmb.get_param_values() assert np.array_equal(old_param_values, new_param_values)
def test_to(self): """Test Tanh Gaussian Policy can be moved to cpu.""" env_spec = GymEnv(DummyBoxEnv()) init_std = 2. policy = TanhGaussianMLPPolicy(env_spec=env_spec, hidden_sizes=(1, ), init_std=init_std, hidden_nonlinearity=None, std_parameterization='exp', hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) if torch.cuda.is_available(): policy.to(torch.device('cuda:0')) assert str(next(policy.parameters()).device) == 'cuda:0' else: policy.to(None) assert str(next(policy.parameters()).device) == 'cpu'
def test_is_pickleable(hidden_sizes): env_spec = GymEnv(DummyBoxEnv()).spec obs_dim = env_spec.observation_space.flat_dim obs = torch.ones(obs_dim, dtype=torch.float32).unsqueeze(0) qf = DiscreteMLPQFunction(env_spec=env_spec, hidden_nonlinearity=None, hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) output1 = qf(obs) p = pickle.dumps(qf) qf_pickled = pickle.loads(p) output2 = qf_pickled(obs) assert torch.eq(output1, output2).all()
def test_is_pickleable(self, obs_dim, act_dim, batch_size, hidden_sizes): env_spec = TfEnv(DummyBoxEnv()) obs = torch.ones([batch_size, obs_dim], dtype=torch.float32) nn_module = MLPModule( input_dim=obs_dim, output_dim=act_dim, hidden_nonlinearity=None, hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) policy = DeterministicPolicy(env_spec, nn_module) output1 = policy.get_actions(obs) p = pickle.dumps(policy) policy_pickled = pickle.loads(p) output2 = policy_pickled.get_actions(obs) assert np.array_equal(output1, output2)
def test_get_qval(self, obs_dim, act_dim, output_dim, hidden_sizes): env_spec = TfEnv(DummyBoxEnv()) obs = torch.ones(obs_dim, dtype=torch.float32).unsqueeze(0) act = torch.ones(act_dim, dtype=torch.float32).unsqueeze(0) nn_module = MLPModule(input_dim=obs_dim + act_dim, output_dim=output_dim, hidden_nonlinearity=None, hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) qf = ContinuousNNQFunction(env_spec, nn_module) output = qf.get_qval(obs, act) expected_output = torch.full([1, output_dim], fill_value=(obs_dim + act_dim) * np.prod(hidden_sizes), dtype=torch.float32) assert torch.eq(output, expected_output)
def test_get_embedding(self, obs_dim, embedding_dim): env = GymEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim)) embedding_spec = InOutSpec(input_space=env.spec.observation_space, output_space=env.spec.action_space) embedding = GaussianMLPEncoder(embedding_spec) task_input = tf.compat.v1.placeholder(tf.float32, shape=(None, None, embedding.input_dim)) embedding.build(task_input, name='task_input') env.reset() obs = env.step(env.action_space.sample()).observation latent, _ = embedding.get_latent(obs) latents, _ = embedding.get_latents([obs] * 5) assert env.action_space.contains(latent) for latent in latents: assert env.action_space.contains(latent)
def test_is_pickleable(self, obs_dim, action_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.policies.' 'deterministic_mlp_policy_with_model.MLPModel'), new=SimpleMLPModel): policy = DeterministicMLPPolicyWithModel(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) action1, _ = policy.get_action(obs) p = pickle.dumps(policy) with tf.Session(graph=tf.Graph()): policy_pickled = pickle.loads(p) action2, _ = policy_pickled.get_action(obs) assert env.action_space.contains(action2) assert np.array_equal(action1, action2)
def test_build(self, obs_dim, action_dim): """Test build method""" env = GymEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) policy = ContinuousMLPPolicy(env_spec=env.spec) env.reset() obs = env.step(1).observation obs_dim = env.spec.observation_space.flat_dim state_input = tf.compat.v1.placeholder(tf.float32, shape=(None, obs_dim)) action_sym = policy.build(state_input, name='action_sym') action = self.sess.run(action_sym, feed_dict={state_input: [obs.flatten()]}) action = policy.action_space.unflatten(action) assert env.action_space.contains(action)
def test_is_pickleable(self): env = TfEnv(DummyBoxEnv(obs_dim=(1, ), action_dim=(1, ))) obs_var = tf.compat.v1.placeholder( tf.float32, shape=[None, None, env.observation_space.flat_dim], name='obs') policy = GaussianGRUPolicy(env_spec=env.spec, state_include_action=False) policy.build(obs_var) env.reset() obs = env.reset() with tf.compat.v1.variable_scope('GaussianGRUPolicy/GaussianGRUModel', reuse=True): param = tf.compat.v1.get_variable( 'dist_params/log_std_param/parameter') # assign it to all one param.load(tf.ones_like(param).eval()) output1 = self.sess.run( [policy.distribution.loc, policy.distribution.stddev()], feed_dict={policy.model.input: [[obs.flatten()], [obs.flatten()]]}) p = pickle.dumps(policy) with tf.compat.v1.Session(graph=tf.Graph()) as sess: policy_pickled = pickle.loads(p) obs_var = tf.compat.v1.placeholder( tf.float32, shape=[None, None, env.observation_space.flat_dim], name='obs') policy_pickled.build(obs_var) # yapf: disable output2 = sess.run( [ policy_pickled.distribution.loc, policy_pickled.distribution.stddev() ], feed_dict={ policy_pickled.model.input: [[obs.flatten()], [obs.flatten()]] }) assert np.array_equal(output1, output2)
def test_forward(self, hidden_sizes): env_spec = TfEnv(DummyBoxEnv()) obs_dim = env_spec.observation_space.flat_dim act_dim = env_spec.action_space.flat_dim obs = torch.ones(obs_dim, dtype=torch.float32).unsqueeze(0) act = torch.ones(act_dim, dtype=torch.float32).unsqueeze(0) qf = ContinuousMLPQFunction(env_spec=env_spec, hidden_nonlinearity=None, hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) output = qf(obs, act) expected_output = torch.full([1, 1], fill_value=(obs_dim + act_dim) * np.prod(hidden_sizes), dtype=torch.float32) assert torch.eq(output, expected_output)
def test_get_latent(self): obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 5, 2 env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) embedding_spec = InOutSpec( input_space=akro.Box(low=np.zeros(task_num), high=np.ones(task_num)), output_space=akro.Box(low=np.zeros(latent_dim), high=np.ones(latent_dim))) encoder = GaussianMLPEncoder(embedding_spec) policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec, encoder=encoder) task_id = 3 task_onehot = np.zeros(task_num) task_onehot[task_id] = 1 latent, latent_info = policy.get_latent(task_onehot) assert latent.shape == (latent_dim, ) assert latent_info['mean'].shape == (latent_dim, ) assert latent_info['log_std'].shape == (latent_dim, )
def test_get_action_np(self, hidden_sizes): """Test Policy get action function with numpy inputs.""" env_spec = GymEnv(DummyBoxEnv()) obs_dim = env_spec.observation_space.flat_dim act_dim = env_spec.action_space.flat_dim obs = np.ones((obs_dim, ), dtype=np.float32) init_std = 2. policy = TanhGaussianMLPPolicy(env_spec=env_spec, hidden_sizes=hidden_sizes, init_std=init_std, hidden_nonlinearity=None, std_parameterization='exp', hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) expected_mean = torch.full((act_dim, ), 1.0, dtype=torch.float) action, prob = policy.get_action(obs) assert np.allclose(prob['mean'], expected_mean.numpy(), rtol=1e-3) assert action.shape == (act_dim, )