def test_agent_steps(self): batch_size = 1 observation_spec = TensorSpec((10, )) action_spec = BoundedTensorSpec((), dtype='int64') time_step = TimeStep( observation=observation_spec.zeros(outer_dims=(batch_size, )), prev_action=action_spec.zeros(outer_dims=(batch_size, ))) actor_net = functools.partial(ActorDistributionNetwork, fc_layer_params=(100, )) value_net = functools.partial(ValueNetwork, fc_layer_params=(100, )) # TODO: add a goal generator and an entropy target algorithm once they # are implemented. agent = Agent(observation_spec=observation_spec, action_spec=action_spec, rl_algorithm_cls=functools.partial( ActorCriticAlgorithm, actor_network_ctor=actor_net, value_network_ctor=value_net), intrinsic_reward_module=ICMAlgorithm( action_spec=action_spec, observation_spec=observation_spec)) predict_state = agent.get_initial_predict_state(batch_size) rollout_state = agent.get_initial_rollout_state(batch_size) train_state = agent.get_initial_train_state(batch_size) pred_step = agent.predict_step(time_step, predict_state, epsilon_greedy=0.1) self.assertEqual(pred_step.state.irm, ()) rollout_step = agent.rollout_step(time_step, rollout_state) self.assertNotEqual(rollout_step.state.irm, ()) exp = make_experience(time_step, rollout_step, rollout_state) train_step = agent.train_step(exp, train_state) self.assertNotEqual(train_step.state.irm, ()) self.assertTensorEqual(rollout_step.state.irm, train_step.state.irm)
def test_discrete_action(self): action_spec = BoundedTensorSpec((), dtype=torch.int64, minimum=0, maximum=3) alg = ICMAlgorithm(action_spec=action_spec, observation_spec=self._input_tensor_spec, hidden_size=self._hidden_size) state = self._input_tensor_spec.zeros(outer_dims=(1, )) alg_step = alg.train_step( self._time_step._replace(prev_action=action_spec.zeros( outer_dims=(1, ))), state) # the inverse net should predict a uniform distribution self.assertTensorClose( torch.sum(alg_step.info.loss.extra['inverse_loss']), torch.as_tensor( math.log(action_spec.maximum - action_spec.minimum + 1)), epsilon=1e-4)
def test_discrete_skill_loss(self): skill_spec = BoundedTensorSpec((), dtype=torch.int64, minimum=0, maximum=3) alg = DIAYNAlgorithm(skill_spec=skill_spec, encoding_net=self._encoding_net) skill = state = torch.nn.functional.one_hot( skill_spec.zeros(outer_dims=(1, )), int(skill_spec.maximum - skill_spec.minimum + 1)).to(torch.float32) alg_step = alg.train_step( self._time_step._replace( observation=[self._time_step.observation, skill]), state) # the discriminator should predict a uniform distribution self.assertTensorClose(torch.sum(alg_step.info.loss), torch.as_tensor( math.log(skill_spec.maximum - skill_spec.minimum + 1)), epsilon=1e-4)
def test_conditional_vae(self): """Test for one dimensional Gaussion, conditioned on a Bernoulli variable. """ prior_input_spec = BoundedTensorSpec((), 'int64') z_prior_network = EncodingNetwork( TensorSpec( (prior_input_spec.maximum - prior_input_spec.minimum + 1, )), fc_layer_params=(10, ) * 2, last_layer_size=2 * self._latent_dim, last_activation=math_ops.identity) preprocess_network = EncodingNetwork( input_tensor_spec=( z_prior_network.input_tensor_spec, self._input_spec, z_prior_network.output_spec, ), preprocessing_combiner=NestConcat(), fc_layer_params=(10, ) * 2, last_layer_size=self._latent_dim, last_activation=math_ops.identity) encoder = vae.VariationalAutoEncoder( self._latent_dim, preprocess_network=preprocess_network, z_prior_network=z_prior_network) decoding_layers = FC(self._latent_dim, 1) optimizer = torch.optim.Adam( list(encoder.parameters()) + list(decoding_layers.parameters()), lr=0.1) x_train = self._input_spec.randn(outer_dims=(10000, )) y_train = x_train.clone() y_train[:5000] = y_train[:5000] + 1.0 pr_train = torch.cat([ prior_input_spec.zeros(outer_dims=(5000, )), prior_input_spec.ones(outer_dims=(5000, )) ], dim=0) x_test = self._input_spec.randn(outer_dims=(100, )) y_test = x_test.clone() y_test[:50] = y_test[:50] + 1.0 pr_test = torch.cat([ prior_input_spec.zeros(outer_dims=(50, )), prior_input_spec.ones(outer_dims=(50, )) ], dim=0) pr_test = torch.nn.functional.one_hot( pr_test, int(z_prior_network.input_tensor_spec.shape[0])).to(torch.float32) for _ in range(self._epochs): idx = torch.randperm(x_train.shape[0]) x_train = x_train[idx] y_train = y_train[idx] pr_train = pr_train[idx] for i in range(0, x_train.shape[0], self._batch_size): optimizer.zero_grad() batch = x_train[i:i + self._batch_size] y_batch = y_train[i:i + self._batch_size] pr_batch = torch.nn.functional.one_hot( pr_train[i:i + self._batch_size], int(z_prior_network.input_tensor_spec.shape[0])).to( torch.float32) alg_step = encoder.train_step([pr_batch, batch]) outputs = decoding_layers(alg_step.output) loss = torch.mean(100 * self._loss_f(y_batch - outputs) + alg_step.info.loss) loss.backward() optimizer.step() y_hat_test = decoding_layers( encoder.train_step([pr_test, x_test]).output) reconstruction_loss = float( torch.mean(self._loss_f(y_test - y_hat_test))) print("reconstruction_loss:", reconstruction_loss) self.assertLess(reconstruction_loss, 0.05)