Esempio n. 1
0
 def test_basic(self):
     state_dim = 8
     action_dim = 4
     model = GaussianFullyConnectedActor(
         state_dim,
         action_dim,
         sizes=[7, 6],
         activations=["relu", "relu"],
         use_batch_norm=True,
     )
     input = model.input_prototype()
     self.assertEqual((1, state_dim), input.float_features.shape)
     # Using batch norm requires more than 1 example in training, avoid that
     model.eval()
     action = model(input)
     self.assertEqual((1, action_dim), action.action.shape)
Esempio n. 2
0
 def test_get_log_prob(self):
     torch.manual_seed(0)
     state_dim = 8
     action_dim = 4
     model = GaussianFullyConnectedActor(
         state_dim,
         action_dim,
         sizes=[7, 6],
         activations=["relu", "relu"],
         use_batch_norm=False,
     )
     input = model.input_prototype()
     self.assertEqual((1, state_dim), input.float_features.shape)
     action = model(input)
     squashed_action = action.action.detach()
     action_log_prob = model.get_log_prob(input, squashed_action).detach()
     npt.assert_allclose(action.log_prob.detach(), action_log_prob, rtol=1e-4)
Esempio n. 3
0
 def build_actor(
     self,
     state_normalization_data: NormalizationData,
     action_normalization_data: NormalizationData,
 ) -> ModelBase:
     state_dim = get_num_output_features(
         state_normalization_data.dense_normalization_parameters)
     action_dim = get_num_output_features(
         action_normalization_data.dense_normalization_parameters)
     return GaussianFullyConnectedActor(
         state_dim=state_dim,
         action_dim=action_dim,
         sizes=self.sizes,
         activations=self.activations,
         use_batch_norm=self.use_batch_norm,
         use_layer_norm=self.use_layer_norm,
     )
Esempio n. 4
0
 def test_save_load(self):
     state_dim = 8
     action_dim = 4
     model = GaussianFullyConnectedActor(
         state_dim,
         action_dim,
         sizes=[7, 6],
         activations=["relu", "relu"],
         use_batch_norm=False,
     )
     expected_num_params, expected_num_inputs, expected_num_outputs = 6, 1, 1
     # Actor output is stochastic and won't match between PyTorch & Caffe2
     check_save_load(
         self,
         model,
         expected_num_params,
         expected_num_inputs,
         expected_num_outputs,
         check_equality=False,
     )
Esempio n. 5
0
    def build_actor(
        self,
        state_feature_config: rlt.ModelFeatureConfig,
        state_normalization_data: NormalizationData,
        action_normalization_data: NormalizationData,
    ) -> ModelBase:
        state_dim = get_num_output_features(
            state_normalization_data.dense_normalization_parameters)
        action_dim = get_num_output_features(
            action_normalization_data.dense_normalization_parameters)
        input_dim = state_dim
        embedding_dim = self.embedding_dim

        embedding_concat = None
        if embedding_dim is not None:
            embedding_concat = models.EmbeddingBagConcat(
                state_dim=state_dim,
                model_feature_config=state_feature_config,
                embedding_dim=embedding_dim,
            )
            input_dim = embedding_concat.output_dim

        gaussian_fc_actor = GaussianFullyConnectedActor(
            state_dim=input_dim,
            action_dim=action_dim,
            sizes=self.sizes,
            activations=self.activations,
            use_batch_norm=self.use_batch_norm,
            use_layer_norm=self.use_layer_norm,
            use_l2_normalization=self.use_l2_normalization,
        )

        if not embedding_dim:
            return gaussian_fc_actor

        assert embedding_concat is not None
        return models.Sequential(  # type: ignore
            embedding_concat,
            rlt.TensorFeatureData(),
            gaussian_fc_actor,
        )
Esempio n. 6
0
def get_sac_trainer(
    env: OpenAIGymEnvironment,
    rl_parameters: RLParameters,
    trainer_parameters: SACTrainerParameters,
    critic_training: FeedForwardParameters,
    actor_training: FeedForwardParameters,
    sac_value_training: Optional[FeedForwardParameters],
    use_gpu: bool,
) -> SACTrainer:
    assert rl_parameters == trainer_parameters.rl
    state_dim = get_num_output_features(env.normalization)
    action_dim = get_num_output_features(env.normalization_action)
    q1_network = FullyConnectedParametricDQN(state_dim, action_dim,
                                             critic_training.layers,
                                             critic_training.activations)
    q2_network = None
    # TODO:
    # if trainer_parameters.use_2_q_functions:
    #     q2_network = FullyConnectedParametricDQN(
    #         state_dim,
    #         action_dim,
    #         critic_training.layers,
    #         critic_training.activations,
    #     )
    value_network = None
    if sac_value_training:
        value_network = FullyConnectedNetwork(
            [state_dim] + sac_value_training.layers + [1],
            sac_value_training.activations + ["linear"],
        )
    actor_network = GaussianFullyConnectedActor(state_dim, action_dim,
                                                actor_training.layers,
                                                actor_training.activations)

    min_action_range_tensor_training = torch.full((1, action_dim), -1 + 1e-6)
    max_action_range_tensor_training = torch.full((1, action_dim), 1 - 1e-6)
    min_action_range_tensor_serving = (
        torch.from_numpy(env.action_space.low).float().unsqueeze(
            dim=0)  # type: ignore
    )
    max_action_range_tensor_serving = (
        torch.from_numpy(env.action_space.high).float().unsqueeze(
            dim=0)  # type: ignore
    )

    if use_gpu:
        q1_network.cuda()
        if q2_network:
            q2_network.cuda()
        if value_network:
            value_network.cuda()
        actor_network.cuda()

        min_action_range_tensor_training = min_action_range_tensor_training.cuda(
        )
        max_action_range_tensor_training = max_action_range_tensor_training.cuda(
        )
        min_action_range_tensor_serving = min_action_range_tensor_serving.cuda(
        )
        max_action_range_tensor_serving = max_action_range_tensor_serving.cuda(
        )

    return SACTrainer(
        q1_network,
        actor_network,
        trainer_parameters,
        use_gpu=use_gpu,
        value_network=value_network,
        q2_network=q2_network,
        min_action_range_tensor_training=min_action_range_tensor_training,
        max_action_range_tensor_training=max_action_range_tensor_training,
        min_action_range_tensor_serving=min_action_range_tensor_serving,
        max_action_range_tensor_serving=max_action_range_tensor_serving,
    )
Esempio n. 7
0
    def get_sac_trainer(
        self,
        env,
        use_gpu,
        use_2_q_functions=False,
        logged_action_uniform_prior=True,
        constrain_action_sum=False,
        use_value_network=True,
        use_alpha_optimizer=True,
        entropy_temperature=None,
    ):
        q_network_params = FeedForwardParameters(layers=[128, 64],
                                                 activations=["relu", "relu"])
        value_network_params = FeedForwardParameters(
            layers=[128, 64], activations=["relu", "relu"])
        actor_network_params = FeedForwardParameters(
            layers=[128, 64], activations=["relu", "relu"])

        state_dim = get_num_output_features(env.normalization)
        action_dim = get_num_output_features(
            env.normalization_continuous_action)
        q1_network = FullyConnectedParametricDQN(state_dim, action_dim,
                                                 q_network_params.layers,
                                                 q_network_params.activations)
        q2_network = None
        if use_2_q_functions:
            q2_network = FullyConnectedParametricDQN(
                state_dim,
                action_dim,
                q_network_params.layers,
                q_network_params.activations,
            )
        if constrain_action_sum:
            actor_network = DirichletFullyConnectedActor(
                state_dim,
                action_dim,
                actor_network_params.layers,
                actor_network_params.activations,
            )
        else:
            actor_network = GaussianFullyConnectedActor(
                state_dim,
                action_dim,
                actor_network_params.layers,
                actor_network_params.activations,
            )

        value_network = None
        if use_value_network:
            value_network = FullyConnectedNetwork(
                [state_dim] + value_network_params.layers + [1],
                value_network_params.activations + ["linear"],
            )

        if use_gpu:
            q1_network.cuda()
            if q2_network:
                q2_network.cuda()
            if value_network:
                value_network.cuda()
            actor_network.cuda()

        parameters = SACTrainerParameters(
            rl=RLParameters(gamma=DISCOUNT, target_update_rate=0.5),
            minibatch_size=self.minibatch_size,
            q_network_optimizer=OptimizerParameters(),
            value_network_optimizer=OptimizerParameters(),
            actor_network_optimizer=OptimizerParameters(),
            alpha_optimizer=OptimizerParameters()
            if use_alpha_optimizer else None,
            entropy_temperature=entropy_temperature,
            logged_action_uniform_prior=logged_action_uniform_prior,
        )

        return SACTrainer(
            q1_network,
            actor_network,
            parameters,
            use_gpu=use_gpu,
            value_network=value_network,
            q2_network=q2_network,
        )