def test_is_pickleable(self): box_env = TfEnv(DummyBoxEnv(obs_dim=(1, ))) with mock.patch(('garage.tf.baselines.' 'continuous_mlp_baseline_with_model.' 'ContinuousMLPRegressorWithModel'), new=SimpleMLPRegressor): cmb = ContinuousMLPBaselineWithModel(env_spec=box_env.spec) obs = {'observations': [np.full(1, 1), np.full(1, 1)]} with tf.compat.v1.variable_scope('ContinuousMLPBaselineWithModel', reuse=True): return_var = tf.compat.v1.get_variable('SimpleMLPModel/return_var') return_var.load(1.0) prediction = cmb.predict(obs) h = pickle.dumps(cmb) with tf.compat.v1.Session(graph=tf.Graph()): cmb_pickled = pickle.loads(h) prediction2 = cmb_pickled.predict(obs) assert np.array_equal(prediction, prediction2)
def test_is_pickleable(self): env = GarageEnv(DummyBoxEnv(obs_dim=(1, ), action_dim=(1, ))) policy = GaussianLSTMPolicy(env_spec=env.spec, state_include_action=False) env.reset() obs = env.reset() with tf.compat.v1.variable_scope('GaussianLSTMPolicy', reuse=True): param = tf.compat.v1.get_variable( 'dist_params/log_std_param/parameter') # assign it to all one param.load(tf.ones_like(param).eval()) state_input = tf.compat.v1.placeholder(tf.float32, shape=(None, None, policy.input_dim)) dist_sym = policy.build(state_input, name='dist_sym').dist output1 = self.sess.run( [dist_sym.loc, dist_sym.stddev()], feed_dict={state_input: [[obs.flatten()], [obs.flatten()]]}) p = pickle.dumps(policy) # yapf: disable with tf.compat.v1.Session(graph=tf.Graph()) as sess: policy_pickled = pickle.loads(p) state_input = tf.compat.v1.placeholder(tf.float32, shape=(None, None, policy.input_dim)) dist_sym = policy_pickled.build(state_input, name='dist_sym').dist output2 = sess.run( [ dist_sym.loc, dist_sym.stddev() ], feed_dict={ state_input: [[obs.flatten()], [obs.flatten()]] }) assert np.array_equal(output1, output2)
def test_is_pickleable(self, obs_dim, action_dim): env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) obs_var = tf.compat.v1.placeholder( tf.float32, shape=[None, None, env.observation_space.flat_dim], name='obs') policy = GaussianMLPPolicy(env_spec=env.spec) policy.build(obs_var) obs = env.reset() with tf.compat.v1.variable_scope('GaussianMLPPolicy/GaussianMLPModel', reuse=True): bias = tf.compat.v1.get_variable( 'dist_params/mean_network/hidden_0/bias') # assign it to all one bias.load(tf.ones_like(bias).eval()) output1 = self.sess.run( [policy.distribution.loc, policy.distribution.stddev()], feed_dict={policy.model.input: [[obs.flatten()]]}) p = pickle.dumps(policy) with tf.compat.v1.Session(graph=tf.Graph()) as sess: obs_var = tf.compat.v1.placeholder( tf.float32, shape=[None, None, env.observation_space.flat_dim], name='obs') policy_pickled = pickle.loads(p) policy_pickled.build(obs_var) output2 = sess.run( [ policy_pickled.distribution.loc, policy_pickled.distribution.stddev() ], feed_dict={policy_pickled.model.input: [[obs.flatten()]]}) assert np.array_equal(output1, output2)
def setUp(self, mock_rand): mock_rand.return_value = 0.5 super().setUp() self.box_env = TfEnv(DummyBoxEnv()) self.policy1 = GaussianMLPPolicy(env_spec=self.box_env, init_std=1.0, name='P1') self.policy2 = GaussianMLPPolicy(env_spec=self.box_env, init_std=1.2, name='P2') self.policy3 = GaussianMLPPolicyWithModel(env_spec=self.box_env, init_std=1.0, name='P3') self.policy4 = GaussianMLPPolicyWithModel(env_spec=self.box_env, init_std=1.2, name='P4') self.sess.run(tf.global_variables_initializer()) for a, b in zip(self.policy3.get_params(), self.policy1.get_params()): self.sess.run(tf.assign(b, a)) for a, b in zip(self.policy4.get_params(), self.policy2.get_params()): self.sess.run(tf.assign(b, a)) self.obs = [self.box_env.reset()] self.obs_ph = tf.placeholder( tf.float32, shape=(None, self.box_env.observation_space.flat_dim)) self.action_ph = tf.placeholder( tf.float32, shape=(None, self.box_env.action_space.flat_dim)) self.dist1_sym = self.policy1.dist_info_sym(self.obs_ph, name='p1_sym') self.dist2_sym = self.policy2.dist_info_sym(self.obs_ph, name='p2_sym') self.dist3_sym = self.policy3.dist_info_sym(self.obs_ph, name='p3_sym') self.dist4_sym = self.policy4.dist_info_sym(self.obs_ph, name='p4_sym') assert self.policy1.vectorized == self.policy2.vectorized assert self.policy3.vectorized == self.policy4.vectorized
def test_get_action(self, hidden_sizes): unflat_dim = (2, 2) env_spec = GymEnv(DummyBoxEnv(obs_dim=unflat_dim)) obs_dim = env_spec.observation_space.flat_dim act_dim = env_spec.action_space.flat_dim obs = torch.ones(obs_dim, dtype=torch.float32) obs_unflat = torch.ones(unflat_dim, dtype=torch.float32) obs_np = np.ones(obs_dim, dtype=np.float32) obs_np_unflat = np.ones(unflat_dim, dtype=np.float32) policy = DeterministicMLPPolicy(env_spec=env_spec, hidden_nonlinearity=None, hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) expected_output = np.full(act_dim, fill_value=obs_dim * np.prod(hidden_sizes), dtype=np.float32) assert np.array_equal(policy.get_action(obs)[0], expected_output) assert np.array_equal( policy.get_action(obs_unflat)[0], expected_output) assert np.array_equal(policy.get_action(obs_np)[0], expected_output) assert np.array_equal( policy.get_action(obs_np_unflat)[0], expected_output)
def test_get_action_state_include_action(self, obs_dim, action_dim, hidden_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) obs_var = tf.compat.v1.placeholder( tf.float32, shape=[ None, None, env.observation_space.flat_dim + np.prod(action_dim) ], name='obs') policy = GaussianLSTMPolicy2(env_spec=env.spec, hidden_dim=hidden_dim, state_include_action=True) policy.build(obs_var) policy.reset() obs = env.reset() action, _ = policy.get_action(obs.flatten()) assert env.action_space.contains(action) policy.reset() actions, _ = policy.get_actions([obs.flatten()]) for action in actions: assert env.action_space.contains(action)
def test_is_pickleable(self, batch_size, hidden_sizes): """Test if policy is unchanged after pickling.""" env_spec = GymEnv(DummyBoxEnv()) obs_dim = env_spec.observation_space.flat_dim obs = torch.ones([batch_size, obs_dim], dtype=torch.float32) init_std = 2. policy = TanhGaussianMLPPolicy(env_spec=env_spec, hidden_sizes=hidden_sizes, init_std=init_std, hidden_nonlinearity=None, std_parameterization='exp', hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) output1_action, output1_prob = policy.get_actions(obs) p = pickle.dumps(policy) policy_pickled = pickle.loads(p) output2_action, output2_prob = policy_pickled.get_actions(obs) assert np.allclose(output2_prob['mean'], output1_prob['mean'], rtol=1e-3) assert output1_action.shape == output2_action.shape
def test_get_action_state_include_action(self, mock_normal, obs_dim, action_dim, hidden_dim): mock_normal.return_value = 0.5 env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('metarl.tf.policies.' 'gaussian_lstm_policy.GaussianLSTMModel'), new=SimpleGaussianLSTMModel): policy = GaussianLSTMPolicy(env_spec=env.spec, state_include_action=True) policy.reset() obs = env.reset() expected_action = np.full(action_dim, 0.5 * np.exp(0.5) + 0.5) action, agent_info = policy.get_action(obs) assert env.action_space.contains(action) assert np.allclose(action, expected_action, atol=1e-6) expected_mean = np.full(action_dim, 0.5) assert np.array_equal(agent_info['mean'], expected_mean) expected_log_std = np.full(action_dim, 0.5) assert np.array_equal(agent_info['log_std'], expected_log_std) expected_prev_action = np.full(action_dim, 0) assert np.array_equal(agent_info['prev_action'], expected_prev_action) policy.reset() actions, agent_infos = policy.get_actions([obs]) for action, mean, log_std, prev_action in zip( actions, agent_infos['mean'], agent_infos['log_std'], agent_infos['prev_action']): assert env.action_space.contains(action) assert np.allclose(action, np.full(action_dim, expected_action), atol=1e-6) assert np.array_equal(mean, expected_mean) assert np.array_equal(log_std, expected_log_std) assert np.array_equal(prev_action, expected_prev_action)
def test_dist_info_sym_wrong_input(self): env = TfEnv(DummyBoxEnv(obs_dim=(1, ), action_dim=(1, ))) obs_ph = tf.compat.v1.placeholder( tf.float32, shape=(None, None, env.observation_space.flat_dim)) with mock.patch(('metarl.tf.policies.' 'gaussian_lstm_policy.GaussianLSTMModel'), new=SimpleGaussianLSTMModel): policy = GaussianLSTMPolicy(env_spec=env.spec, state_include_action=True) policy.reset() obs = env.reset() policy.dist_info_sym( obs_var=obs_ph, state_info_vars={'prev_action': np.zeros((3, 1, 1))}, name='p2_sym') # observation batch size = 2 but prev_action batch size = 3 with pytest.raises(tf.errors.InvalidArgumentError): self.sess.run( policy.model.networks['p2_sym'].input, feed_dict={obs_ph: [[obs.flatten()], [obs.flatten()]]})
def test_get_action_sym(self, obs_dim, action_dim): """Test get_action_sym method""" env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.policies.' 'continuous_mlp_policy.MLPModel'), new=SimpleMLPModel): policy = ContinuousMLPPolicy(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) obs_dim = env.spec.observation_space.flat_dim state_input = tf.compat.v1.placeholder(tf.float32, shape=(None, obs_dim)) action_sym = policy.get_action_sym(state_input, name='action_sym') expected_action = np.full(action_dim, 0.5) action = self.sess.run(action_sym, feed_dict={state_input: [obs.flatten()]}) action = policy.action_space.unflatten(action) assert np.array_equal(action, expected_action) assert env.action_space.contains(action)
def test_get_action(self, obs_dim, action_dim): """Test get_action method""" env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.policies.' 'continuous_mlp_policy.MLPModel'), new=SimpleMLPModel): policy = ContinuousMLPPolicy(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) action, _ = policy.get_action(obs.flatten()) expected_action = np.full(action_dim, 0.5) assert env.action_space.contains(action) assert np.array_equal(action, expected_action) actions, _ = policy.get_actions( [obs.flatten(), obs.flatten(), obs.flatten()]) for action in actions: assert env.action_space.contains(action) assert np.array_equal(action, expected_action)
def test_dist_info_sym(self, obs_dim, action_dim, hidden_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) obs_ph = tf.placeholder( tf.float32, shape=(None, None, env.observation_space.flat_dim)) with mock.patch(('garage.tf.policies.' 'gaussian_gru_policy_with_model.GaussianGRUModel'), new=SimpleGaussianGRUModel): policy = GaussianGRUPolicyWithModel( env_spec=env.spec, state_include_action=False) policy.reset() obs = env.reset() dist_sym = policy.dist_info_sym( obs_var=obs_ph, state_info_vars=None, name='p2_sym') dist = self.sess.run( dist_sym, feed_dict={obs_ph: [[obs.flatten()], [obs.flatten()]]}) assert np.array_equal(dist['mean'], np.full((2, 1) + action_dim, 0.5)) assert np.array_equal(dist['log_std'], np.full((2, 1) + action_dim, 0.5))
def test_module(self, reward_dim, latent_dim, hidden_sizes, updates): """Test all methods.""" env_spec = TfEnv(DummyBoxEnv()) latent_space = akro.Box(low=-1, high=1, shape=(latent_dim, ), dtype=np.float32) # add latent space to observation space to create a new space augmented_obs_space = akro.Tuple( (env_spec.observation_space, latent_space)) augmented_env_spec = EnvSpec(augmented_obs_space, env_spec.action_space) obs_dim = int(np.prod(env_spec.observation_space.shape)) action_dim = int(np.prod(env_spec.action_space.shape)) encoder_input_dim = obs_dim + action_dim + reward_dim encoder_output_dim = latent_dim * 2 encoder_hidden_sizes = (3, 2, encoder_output_dim) context_encoder = RecurrentEncoder(input_dim=encoder_input_dim, output_dim=encoder_output_dim, hidden_nonlinearity=None, hidden_sizes=encoder_hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) # policy needs to be able to accept obs_dim + latent_dim as input dim policy = GaussianMLPPolicy(env_spec=augmented_env_spec, hidden_sizes=hidden_sizes, hidden_nonlinearity=F.relu, output_nonlinearity=None) module = ContextConditionedPolicy(latent_dim=latent_dim, context_encoder=context_encoder, policy=policy, use_ib=True, use_next_obs=False) expected_shape = [1, latent_dim] module.reset_belief() assert torch.all(torch.eq(module.z_means, torch.zeros(expected_shape))) assert torch.all(torch.eq(module.z_vars, torch.ones(expected_shape))) module.sample_from_belief() assert all([a == b for a, b in zip(module.z.shape, expected_shape)]) module.detach_z() assert module.z.requires_grad is False context_dict = {} context_dict['observation'] = np.ones(obs_dim) context_dict['action'] = np.ones(action_dim) context_dict['reward'] = np.ones(reward_dim) context_dict['next_observation'] = np.ones(obs_dim) for _ in range(updates): module.update_context(context_dict) assert torch.all( torch.eq(module._context, torch.ones(updates, encoder_input_dim))) context = torch.randn(1, 1, encoder_input_dim) module.infer_posterior(context) assert all([a == b for a, b in zip(module.z.shape, expected_shape)]) t, b = 1, 2 obs = torch.randn((t, b, obs_dim), dtype=torch.float32) policy_output, task_z_out = module.forward(obs, context) assert policy_output is not None expected_shape = [b, latent_dim] assert all([a == b for a, b in zip(task_z_out.shape, expected_shape)]) obs = torch.randn(obs_dim) action = module.get_action(obs) assert len(action) == action_dim kl_div = module.compute_kl_div() assert kl_div != 0
def test_invalid_obs_shape(self, obs_dim): box_env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim)) with pytest.raises(ValueError): GaussianCNNBaseline(env_spec=box_env.spec)
def test_invalid_env(self): env = TfEnv(DummyBoxEnv()) with pytest.raises(ValueError): CategoricalMLPPolicy2(env_spec=env.spec)
def test_baseline(self): """Test the baseline initialization.""" box_env = TfEnv(DummyBoxEnv()) deterministic_mlp_baseline = DeterministicMLPBaseline(env_spec=box_env) gaussian_mlp_baseline = GaussianMLPBaseline(env_spec=box_env)
def test_state_info_specs_with_state_include_action(self): env = GarageEnv(DummyBoxEnv(obs_dim=(4, ), action_dim=(4, ))) policy = GaussianLSTMPolicy(env_spec=env.spec, state_include_action=True) assert policy.state_info_specs == [('prev_action', (4, ))]
def setup_method(self): self.env = GarageEnv(DummyBoxEnv(obs_dim=(4, 4), action_dim=(2, 2))) self.policy = DummyPolicy(self.env.spec)
def test_clone(self): env = TfEnv(DummyBoxEnv(obs_dim=(10, ), action_dim=(4, ))) policy = GaussianMLPPolicy(env_spec=env.spec) policy_clone = policy.clone('GaussnaMLPPolicyClone') assert policy.env_spec == policy_clone.env_spec
def test_clone(self, obs_dim, action_dim, hidden_sizes): env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) qf = ContinuousMLPQFunction(env_spec=env.spec, hidden_sizes=hidden_sizes) qf_clone = qf.clone('another_qf') assert qf_clone._hidden_sizes == qf._hidden_sizes
def setUp(self): super().setUp() self.env = TfEnv(DummyBoxEnv())
def setup_method(self): with mock.patch('tensorflow.random.normal') as mock_rand: mock_rand.return_value = 0.5 super().setup_method() env = TfEnv(DummyBoxEnv(obs_dim=(1, ), action_dim=(1, ))) self.default_initializer = tf.constant_initializer(1) self.default_hidden_nonlinearity = tf.nn.tanh self.default_recurrent_nonlinearity = tf.nn.sigmoid self.default_output_nonlinearity = None self.time_step = 1 self.policy1 = GaussianGRUPolicy( env_spec=env.spec, hidden_dim=4, hidden_nonlinearity=self.default_hidden_nonlinearity, recurrent_nonlinearity=self.default_recurrent_nonlinearity, recurrent_w_x_init=self.default_initializer, recurrent_w_h_init=self.default_initializer, output_nonlinearity=self.default_output_nonlinearity, output_w_init=self.default_initializer, state_include_action=True, name='P1') self.policy2 = GaussianGRUPolicy( env_spec=env.spec, hidden_dim=4, hidden_nonlinearity=self.default_hidden_nonlinearity, recurrent_nonlinearity=self.default_recurrent_nonlinearity, recurrent_w_x_init=self.default_initializer, recurrent_w_h_init=self.default_initializer, output_nonlinearity=self.default_output_nonlinearity, output_w_init=tf.constant_initializer(2), state_include_action=True, name='P2') self.sess.run(tf.compat.v1.global_variables_initializer()) self.policy3 = GaussianGRUPolicyWithModel( env_spec=env.spec, hidden_dim=4, hidden_nonlinearity=self.default_hidden_nonlinearity, hidden_w_init=self.default_initializer, recurrent_nonlinearity=self.default_recurrent_nonlinearity, recurrent_w_init=self.default_initializer, output_nonlinearity=self.default_output_nonlinearity, output_w_init=self.default_initializer, state_include_action=True, name='P3') self.policy4 = GaussianGRUPolicyWithModel( env_spec=env.spec, hidden_dim=4, hidden_nonlinearity=self.default_hidden_nonlinearity, hidden_w_init=self.default_initializer, recurrent_nonlinearity=self.default_recurrent_nonlinearity, recurrent_w_init=self.default_initializer, output_nonlinearity=self.default_output_nonlinearity, output_w_init=tf.constant_initializer(2), state_include_action=True, name='P4') self.policy1.reset() self.policy2.reset() self.policy3.reset() self.policy4.reset() self.obs = [env.reset()] self.obs = np.concatenate( [self.obs for _ in range(self.time_step)], axis=0) self.obs_ph = tf.compat.v1.placeholder( tf.float32, shape=(None, None, env.observation_space.flat_dim)) self.action_ph = tf.compat.v1.placeholder( tf.float32, shape=(None, None, env.action_space.flat_dim)) self.dist1_sym = self.policy1.dist_info_sym( obs_var=self.obs_ph, state_info_vars={ 'prev_action': np.zeros((2, self.time_step, 1)) }, name='p1_sym') self.dist2_sym = self.policy2.dist_info_sym( obs_var=self.obs_ph, state_info_vars={ 'prev_action': np.zeros((2, self.time_step, 1)) }, name='p2_sym') self.dist3_sym = self.policy3.dist_info_sym( obs_var=self.obs_ph, state_info_vars={ 'prev_action': np.zeros((2, self.time_step, 1)) }, name='p3_sym') self.dist4_sym = self.policy4.dist_info_sym( obs_var=self.obs_ph, state_info_vars={ 'prev_action': np.zeros((2, self.time_step, 1)) }, name='p4_sym')
def test_methods(): """Test PEARLWorker methods.""" env_spec = GarageEnv(DummyBoxEnv()) latent_dim = 5 latent_space = akro.Box(low=-1, high=1, shape=(latent_dim, ), dtype=np.float32) # add latent space to observation space to create a new space augmented_obs_space = akro.Tuple( (env_spec.observation_space, latent_space)) augmented_env_spec = EnvSpec(augmented_obs_space, env_spec.action_space) obs_dim = int(np.prod(env_spec.observation_space.shape)) action_dim = int(np.prod(env_spec.action_space.shape)) reward_dim = 1 encoder_input_dim = obs_dim + action_dim + reward_dim encoder_output_dim = latent_dim * 2 encoder_hidden_sizes = (3, 2, encoder_output_dim) context_encoder = MLPEncoder(input_dim=encoder_input_dim, output_dim=encoder_output_dim, hidden_nonlinearity=None, hidden_sizes=encoder_hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) policy = TanhGaussianMLPPolicy(env_spec=augmented_env_spec, hidden_sizes=(3, 5, 7), hidden_nonlinearity=F.relu, output_nonlinearity=None) context_policy = ContextConditionedPolicy(latent_dim=latent_dim, context_encoder=context_encoder, policy=policy, use_information_bottleneck=True, use_next_obs=False) max_path_length = 20 worker1 = PEARLWorker(seed=1, max_path_length=max_path_length, worker_number=1) worker1.update_agent(context_policy) worker1.update_env(env_spec) rollouts = worker1.rollout() assert rollouts.observations.shape == (max_path_length, obs_dim) assert rollouts.actions.shape == (max_path_length, action_dim) assert rollouts.rewards.shape == (max_path_length, ) worker2 = PEARLWorker(seed=1, max_path_length=max_path_length, worker_number=1, deterministic=True, accum_context=True) worker2.update_agent(context_policy) worker2.update_env(env_spec) rollouts = worker2.rollout() assert context_policy.context.shape == (1, max_path_length, encoder_input_dim) assert rollouts.observations.shape == (max_path_length, obs_dim) assert rollouts.actions.shape == (max_path_length, action_dim) assert rollouts.rewards.shape == (max_path_length, )
def test_get_action(self, mock_normal, obs_dim, task_num, latent_dim, action_dim): mock_normal.return_value = 0.5 env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch( 'garage.tf.policies.' 'gaussian_mlp_task_embedding_policy.GaussianMLPModel', new=SimpleGaussianMLPModel): embedding_spec = InOutSpec( input_space=akro.Box(low=np.zeros(task_num), high=np.ones(task_num)), output_space=akro.Box(low=np.zeros(latent_dim), high=np.ones(latent_dim))) encoder = GaussianMLPEncoder(embedding_spec) policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec, encoder=encoder) env.reset() obs, _, _, _ = env.step(1) latent = np.random.random((latent_dim, )) task = np.zeros(task_num) task[0] = 1 action1, prob1 = policy.get_action_given_latent(obs, latent) action2, prob2 = policy.get_action_given_task(obs, task) action3, prob3 = policy.get_action( np.concatenate([obs.flatten(), task])) expected_action = np.full(action_dim, 0.75) expected_mean = np.full(action_dim, 0.5) expected_log_std = np.full(action_dim, np.log(0.5)) assert env.action_space.contains(action1) assert np.array_equal(action1, expected_action) assert np.array_equal(prob1['mean'], expected_mean) assert np.array_equal(prob1['log_std'], expected_log_std) assert env.action_space.contains(action2) assert np.array_equal(action2, expected_action) assert np.array_equal(prob2['mean'], expected_mean) assert np.array_equal(prob2['log_std'], expected_log_std) assert env.action_space.contains(action3) assert np.array_equal(action3, expected_action) assert np.array_equal(prob3['mean'], expected_mean) assert np.array_equal(prob3['log_std'], expected_log_std) obses, latents, tasks = [obs] * 3, [latent] * 3, [task] * 3 aug_obses = [np.concatenate([obs.flatten(), task])] * 3 action1n, prob1n = policy.get_actions_given_latents(obses, latents) action2n, prob2n = policy.get_actions_given_tasks(obses, tasks) action3n, prob3n = policy.get_actions(aug_obses) for action, mean, log_std in chain( zip(action1n, prob1n['mean'], prob1n['log_std']), zip(action2n, prob2n['mean'], prob2n['log_std']), zip(action3n, prob3n['mean'], prob3n['log_std'])): assert env.action_space.contains(action) assert np.array_equal(action, expected_action) assert np.array_equal(mean, expected_mean) assert np.array_equal(log_std, expected_log_std)
def test_state_info_specs(self): env = GarageEnv(DummyBoxEnv(obs_dim=(4, ), action_dim=(4, ))) policy = GaussianLSTMPolicy(env_spec=env.spec, state_include_action=False) assert policy.state_info_specs == []
def test_normalize_pixel_patch_not_trigger(self): env = TfEnv(DummyBoxEnv()) obs = env.reset() obs_normalized = normalize_pixel_batch(env, obs) assert np.array_equal(obs, obs_normalized)
def test_clone(self): env = GarageEnv(DummyBoxEnv(obs_dim=(4, ), action_dim=(4, ))) policy = GaussianLSTMPolicy(env_spec=env.spec) policy_clone = policy.clone('GaussianLSTMPolicyClone') assert policy_clone.env_spec == policy.env_spec
def setup_method(self): super().setup_method() self.env = GarageEnv(DummyBoxEnv())
def test_invalid_env(self): env = GarageEnv(DummyBoxEnv()) with pytest.raises(ValueError): CategoricalLSTMPolicy(env_spec=env.spec)
def test_dist_info(self, obs_dim, task_num, latent_dim, action_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch( 'garage.tf.policies.' 'gaussian_mlp_task_embedding_policy.GaussianMLPModel', new=SimpleGaussianMLPModel): embedding_spec = InOutSpec( input_space=akro.Box(low=np.zeros(task_num), high=np.ones(task_num)), output_space=akro.Box(low=np.zeros(latent_dim), high=np.ones(latent_dim))) encoder = GaussianMLPEncoder(embedding_spec) policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec, encoder=encoder) env.reset() obs, _, _, _ = env.step(1) task = np.zeros(task_num) task[0] = 1 aug_obs = np.concatenate([obs.flatten(), task]) latent = np.random.random(latent_dim) obs_dim = env.spec.observation_space.flat_dim obs_ph = tf.compat.v1.placeholder(tf.float32, shape=(None, obs_dim)) task_ph = tf.compat.v1.placeholder(tf.float32, shape=(None, task_num)) latent_ph = tf.compat.v1.placeholder(tf.float32, shape=(None, latent_dim)) aug_obs_ph = tf.compat.v1.concat([obs_ph, task_ph], axis=1) dist0_sym = policy.dist_info_sym(aug_obs_ph, name='p0_sym') dist1_sym = policy.dist_info_sym_given_task(obs_ph, task_ph, name='p1_sym') dist2_sym = policy.dist_info_sym_given_latent(obs_ph, latent_ph, name='p2_sym') # flatten output expected_mean = [np.full(np.prod(action_dim), 0.5)] expected_log_std = [np.full(np.prod(action_dim), np.log(0.5))] prob0 = self.sess.run(dist0_sym, feed_dict={aug_obs_ph: [aug_obs.flatten()]}) prob1 = self.sess.run(dist1_sym, feed_dict={ obs_ph: [obs.flatten()], task_ph: [task] }) prob2 = self.sess.run(dist2_sym, feed_dict={ obs_ph: [obs.flatten()], latent_ph: [latent] }) prob3 = policy.dist_info(aug_obs) assert np.array_equal(prob0['mean'].flatten(), expected_mean[0]) assert np.array_equal(prob0['log_std'].flatten(), expected_log_std[0]) assert np.array_equal(prob1['mean'], expected_mean) assert np.array_equal(prob1['log_std'], expected_log_std) assert np.array_equal(prob2['mean'], expected_mean) assert np.array_equal(prob2['log_std'], expected_log_std) assert np.array_equal(prob3['mean'].flatten(), expected_mean[0]) assert np.array_equal(prob3['log_std'].flatten(), expected_log_std[0])