Esempio n. 1
0
    def test_agent_steps(self):
        batch_size = 1
        observation_spec = TensorSpec((10, ))
        action_spec = BoundedTensorSpec((), dtype='int64')
        time_step = TimeStep(
            observation=observation_spec.zeros(outer_dims=(batch_size, )),
            prev_action=action_spec.zeros(outer_dims=(batch_size, )))

        actor_net = functools.partial(ActorDistributionNetwork,
                                      fc_layer_params=(100, ))
        value_net = functools.partial(ValueNetwork, fc_layer_params=(100, ))

        # TODO: add a goal generator and an entropy target algorithm once they
        # are implemented.
        agent = Agent(observation_spec=observation_spec,
                      action_spec=action_spec,
                      rl_algorithm_cls=functools.partial(
                          ActorCriticAlgorithm,
                          actor_network_ctor=actor_net,
                          value_network_ctor=value_net),
                      intrinsic_reward_module=ICMAlgorithm(
                          action_spec=action_spec,
                          observation_spec=observation_spec))

        predict_state = agent.get_initial_predict_state(batch_size)
        rollout_state = agent.get_initial_rollout_state(batch_size)
        train_state = agent.get_initial_train_state(batch_size)

        pred_step = agent.predict_step(time_step,
                                       predict_state,
                                       epsilon_greedy=0.1)
        self.assertEqual(pred_step.state.irm, ())

        rollout_step = agent.rollout_step(time_step, rollout_state)
        self.assertNotEqual(rollout_step.state.irm, ())

        exp = make_experience(time_step, rollout_step, rollout_state)

        train_step = agent.train_step(exp, train_state)
        self.assertNotEqual(train_step.state.irm, ())

        self.assertTensorEqual(rollout_step.state.irm, train_step.state.irm)
Esempio n. 2
0
    def test_discrete_action(self):
        action_spec = BoundedTensorSpec((),
                                        dtype=torch.int64,
                                        minimum=0,
                                        maximum=3)
        alg = ICMAlgorithm(action_spec=action_spec,
                           observation_spec=self._input_tensor_spec,
                           hidden_size=self._hidden_size)
        state = self._input_tensor_spec.zeros(outer_dims=(1, ))

        alg_step = alg.train_step(
            self._time_step._replace(prev_action=action_spec.zeros(
                outer_dims=(1, ))), state)

        # the inverse net should predict a uniform distribution
        self.assertTensorClose(
            torch.sum(alg_step.info.loss.extra['inverse_loss']),
            torch.as_tensor(
                math.log(action_spec.maximum - action_spec.minimum + 1)),
            epsilon=1e-4)
Esempio n. 3
0
    def test_discrete_skill_loss(self):
        skill_spec = BoundedTensorSpec((),
                                       dtype=torch.int64,
                                       minimum=0,
                                       maximum=3)
        alg = DIAYNAlgorithm(skill_spec=skill_spec,
                             encoding_net=self._encoding_net)
        skill = state = torch.nn.functional.one_hot(
            skill_spec.zeros(outer_dims=(1, )),
            int(skill_spec.maximum - skill_spec.minimum + 1)).to(torch.float32)

        alg_step = alg.train_step(
            self._time_step._replace(
                observation=[self._time_step.observation, skill]), state)

        # the discriminator should predict a uniform distribution
        self.assertTensorClose(torch.sum(alg_step.info.loss),
                               torch.as_tensor(
                                   math.log(skill_spec.maximum -
                                            skill_spec.minimum + 1)),
                               epsilon=1e-4)
Esempio n. 4
0
    def test_conditional_vae(self):
        """Test for one dimensional Gaussion, conditioned on a Bernoulli variable.
        """
        prior_input_spec = BoundedTensorSpec((), 'int64')

        z_prior_network = EncodingNetwork(
            TensorSpec(
                (prior_input_spec.maximum - prior_input_spec.minimum + 1, )),
            fc_layer_params=(10, ) * 2,
            last_layer_size=2 * self._latent_dim,
            last_activation=math_ops.identity)
        preprocess_network = EncodingNetwork(
            input_tensor_spec=(
                z_prior_network.input_tensor_spec,
                self._input_spec,
                z_prior_network.output_spec,
            ),
            preprocessing_combiner=NestConcat(),
            fc_layer_params=(10, ) * 2,
            last_layer_size=self._latent_dim,
            last_activation=math_ops.identity)

        encoder = vae.VariationalAutoEncoder(
            self._latent_dim,
            preprocess_network=preprocess_network,
            z_prior_network=z_prior_network)
        decoding_layers = FC(self._latent_dim, 1)

        optimizer = torch.optim.Adam(
            list(encoder.parameters()) + list(decoding_layers.parameters()),
            lr=0.1)

        x_train = self._input_spec.randn(outer_dims=(10000, ))
        y_train = x_train.clone()
        y_train[:5000] = y_train[:5000] + 1.0
        pr_train = torch.cat([
            prior_input_spec.zeros(outer_dims=(5000, )),
            prior_input_spec.ones(outer_dims=(5000, ))
        ],
                             dim=0)

        x_test = self._input_spec.randn(outer_dims=(100, ))
        y_test = x_test.clone()
        y_test[:50] = y_test[:50] + 1.0
        pr_test = torch.cat([
            prior_input_spec.zeros(outer_dims=(50, )),
            prior_input_spec.ones(outer_dims=(50, ))
        ],
                            dim=0)
        pr_test = torch.nn.functional.one_hot(
            pr_test,
            int(z_prior_network.input_tensor_spec.shape[0])).to(torch.float32)

        for _ in range(self._epochs):
            idx = torch.randperm(x_train.shape[0])
            x_train = x_train[idx]
            y_train = y_train[idx]
            pr_train = pr_train[idx]
            for i in range(0, x_train.shape[0], self._batch_size):
                optimizer.zero_grad()
                batch = x_train[i:i + self._batch_size]
                y_batch = y_train[i:i + self._batch_size]
                pr_batch = torch.nn.functional.one_hot(
                    pr_train[i:i + self._batch_size],
                    int(z_prior_network.input_tensor_spec.shape[0])).to(
                        torch.float32)
                alg_step = encoder.train_step([pr_batch, batch])
                outputs = decoding_layers(alg_step.output)
                loss = torch.mean(100 * self._loss_f(y_batch - outputs) +
                                  alg_step.info.loss)
                loss.backward()
                optimizer.step()

        y_hat_test = decoding_layers(
            encoder.train_step([pr_test, x_test]).output)
        reconstruction_loss = float(
            torch.mean(self._loss_f(y_test - y_hat_test)))
        print("reconstruction_loss:", reconstruction_loss)
        self.assertLess(reconstruction_loss, 0.05)