예제 #1
0
    def test_continuous_action(self):
        action_spec = TensorSpec((4, ))
        alg = ICMAlgorithm(action_spec=action_spec,
                           observation_spec=self._input_tensor_spec,
                           hidden_size=self._hidden_size)
        state = self._input_tensor_spec.zeros(outer_dims=(1, ))

        alg_step = alg.train_step(
            self._time_step._replace(prev_action=action_spec.zeros(
                outer_dims=(1, ))), state)

        # the inverse net should predict a zero action vector
        self.assertTensorClose(
            torch.sum(alg_step.info.loss.extra['inverse_loss']),
            torch.as_tensor(0))
예제 #2
0
    def test_discrete_action(self):
        action_spec = BoundedTensorSpec((),
                                        dtype=torch.int64,
                                        minimum=0,
                                        maximum=3)
        alg = ICMAlgorithm(action_spec=action_spec,
                           observation_spec=self._input_tensor_spec,
                           hidden_size=self._hidden_size)
        state = self._input_tensor_spec.zeros(outer_dims=(1, ))

        alg_step = alg.train_step(
            self._time_step._replace(prev_action=action_spec.zeros(
                outer_dims=(1, ))), state)

        # the inverse net should predict a uniform distribution
        self.assertTensorClose(
            torch.sum(alg_step.info.loss.extra['inverse_loss']),
            torch.as_tensor(
                math.log(action_spec.maximum - action_spec.minimum + 1)),
            epsilon=1e-4)
예제 #3
0
    def test_agent_steps(self):
        batch_size = 1
        observation_spec = TensorSpec((10, ))
        action_spec = BoundedTensorSpec((), dtype='int64')
        time_step = TimeStep(
            observation=observation_spec.zeros(outer_dims=(batch_size, )),
            prev_action=action_spec.zeros(outer_dims=(batch_size, )))

        actor_net = functools.partial(ActorDistributionNetwork,
                                      fc_layer_params=(100, ))
        value_net = functools.partial(ValueNetwork, fc_layer_params=(100, ))

        # TODO: add a goal generator and an entropy target algorithm once they
        # are implemented.
        agent = Agent(observation_spec=observation_spec,
                      action_spec=action_spec,
                      rl_algorithm_cls=functools.partial(
                          ActorCriticAlgorithm,
                          actor_network_ctor=actor_net,
                          value_network_ctor=value_net),
                      intrinsic_reward_module=ICMAlgorithm(
                          action_spec=action_spec,
                          observation_spec=observation_spec))

        predict_state = agent.get_initial_predict_state(batch_size)
        rollout_state = agent.get_initial_rollout_state(batch_size)
        train_state = agent.get_initial_train_state(batch_size)

        pred_step = agent.predict_step(time_step,
                                       predict_state,
                                       epsilon_greedy=0.1)
        self.assertEqual(pred_step.state.irm, ())

        rollout_step = agent.rollout_step(time_step, rollout_state)
        self.assertNotEqual(rollout_step.state.irm, ())

        exp = make_experience(time_step, rollout_step, rollout_state)

        train_step = agent.train_step(exp, train_state)
        self.assertNotEqual(train_step.state.irm, ())

        self.assertTensorEqual(rollout_step.state.irm, train_step.state.irm)
예제 #4
0
def create_ac_algorithm(env,
                        actor_fc_layers=(200, 100),
                        value_fc_layers=(200, 100),
                        encoding_conv_layers=(),
                        encoding_fc_layers=(),
                        use_rnns=False,
                        use_icm=False,
                        learning_rate=5e-5,
                        algorithm_class=ActorCriticAlgorithm,
                        loss_class=ActorCriticLoss,
                        debug_summaries=False):
    """Create a simple ActorCriticAlgorithm.

    Args:
        env (TFEnvironment): A TFEnvironment
        actor_fc_layers (list[int]): list of fc layers parameters for actor network
        value_fc_layers (list[int]): list of fc layers parameters for value network
        encoding_conv_layers (list[int]): list of convolution layers parameters for encoding network
        encoding_fc_layers (list[int]): list of fc layers parameters for encoding network
        use_rnns (bool): True if rnn should be used
        use_icm (bool): True if intrinsic curiosity module should be used
        learning_rate (float): learning rate
        algorithm_class (type): class of the algorithm. Can be
            ActorCriticAlgorithm or PPOAlgorithm
        loss_class (type): the class of the loss. The signature of its
            constructor: loss_class(action_spec, debug_summaries)
        debug_summaries (bool): True if debug summaries should be created.
    """
    optimizer = tf.optimizers.Adam(learning_rate=learning_rate)

    if use_rnns:
        actor_net = ActorDistributionRnnNetwork(
            env.observation_spec(),
            env.action_spec(),
            input_fc_layer_params=actor_fc_layers,
            output_fc_layer_params=None)
        value_net = ValueRnnNetwork(env.observation_spec(),
                                    input_fc_layer_params=value_fc_layers,
                                    output_fc_layer_params=None)
    else:
        actor_net = ActorDistributionNetwork(env.observation_spec(),
                                             env.action_spec(),
                                             fc_layer_params=actor_fc_layers)
        value_net = ValueNetwork(env.observation_spec(),
                                 fc_layer_params=value_fc_layers)

    encoding_net = None
    if encoding_fc_layers or encoding_conv_layers:
        encoding_net = EncodingNetwork(
            input_tensor_spec=env.observation_spec(),
            conv_layer_params=encoding_conv_layers,
            fc_layer_params=encoding_fc_layers)

    icm = None
    if use_icm:
        feature_spec = env.observation_spec()
        if encoding_net:
            feature_spec = tf.TensorSpec((encoding_fc_layers[-1], ),
                                         dtype=tf.float32)
        icm = ICMAlgorithm(env.action_spec(),
                           feature_spec,
                           encoding_net=encoding_net)

    algorithm = algorithm_class(action_spec=env.action_spec(),
                                actor_network=actor_net,
                                value_network=value_net,
                                intrinsic_curiosity_module=icm,
                                loss_class=loss_class,
                                optimizer=optimizer,
                                debug_summaries=debug_summaries)

    return algorithm