Exemple #1
0
    def test_actor_wrapper(self):
        state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)}
        action_normalization_parameters = {
            i: _cont_action_norm()
            for i in range(101, 105)
        }
        state_preprocessor = Preprocessor(state_normalization_parameters,
                                          False)
        postprocessor = Postprocessor(action_normalization_parameters, False)

        # Test with FullyConnectedActor to make behavior deterministic
        actor = models.FullyConnectedActor(
            state_dim=len(state_normalization_parameters),
            action_dim=len(action_normalization_parameters),
            sizes=[16],
            activations=["relu"],
        )
        actor_with_preprocessor = ActorWithPreprocessor(
            actor, state_preprocessor, postprocessor)
        wrapper = ActorPredictorWrapper(actor_with_preprocessor)
        input_prototype = actor_with_preprocessor.input_prototype()
        action = wrapper(*input_prototype)
        self.assertEqual(action.shape,
                         (1, len(action_normalization_parameters)))

        expected_output = postprocessor(
            actor(rlt.FeatureData(
                state_preprocessor(*input_prototype[0]))).action)
        self.assertTrue((expected_output == action).all())
    def build_serving_module(
        self,
        actor: ModelBase,
        state_normalization_data: NormalizationData,
        action_normalization_data: NormalizationData,
    ) -> torch.nn.Module:
        """
        Returns a TorchScript predictor module
        """
        state_normalization_parameters = (
            state_normalization_data.dense_normalization_parameters)
        action_normalization_parameters = (
            action_normalization_data.dense_normalization_parameters)
        assert state_normalization_parameters is not None
        assert action_normalization_parameters is not None

        state_preprocessor = Preprocessor(state_normalization_parameters,
                                          use_gpu=False)
        postprocessor = Postprocessor(action_normalization_parameters,
                                      use_gpu=False)
        actor_with_preprocessor = ActorWithPreprocessor(
            actor.cpu_model().eval(), state_preprocessor, postprocessor)
        action_features = Preprocessor(action_normalization_parameters,
                                       use_gpu=False).sorted_features
        return ActorPredictorWrapper(actor_with_preprocessor, action_features)
 def build_ranking_serving_module(
     self,
     actor: ModelBase,
     state_normalization_data: NormalizationData,
     candidate_normalization_data: NormalizationData,
     num_candidates: int,
     action_normalization_data: NormalizationData,
 ) -> torch.nn.Module:
     state_preprocessor = Preprocessor(
         state_normalization_data.dense_normalization_parameters,
         use_gpu=False)
     candidate_preprocessor = Preprocessor(
         candidate_normalization_data.dense_normalization_parameters,
         use_gpu=False)
     postprocessor = Postprocessor(
         action_normalization_data.dense_normalization_parameters,
         use_gpu=False)
     actor_with_preprocessor = RankingActorWithPreprocessor(
         model=actor.cpu_model().eval(),
         state_preprocessor=state_preprocessor,
         candidate_preprocessor=candidate_preprocessor,
         num_candidates=num_candidates,
         action_postprocessor=postprocessor,
     )
     action_features = Preprocessor(
         action_normalization_data.dense_normalization_parameters,
         use_gpu=False).sorted_features
     return RankingActorPredictorWrapper(actor_with_preprocessor,
                                         action_features)
    def build_serving_module(
        self,
        actor: ModelBase,
        state_feature_config: rlt.ModelFeatureConfig,
        state_normalization_data: NormalizationData,
        action_normalization_data: NormalizationData,
        serve_mean_policy: bool = False,
    ) -> torch.nn.Module:
        """
        Returns a TorchScript predictor module
        """

        state_preprocessor = Preprocessor(
            state_normalization_data.dense_normalization_parameters,
            use_gpu=False)
        postprocessor = Postprocessor(
            action_normalization_data.dense_normalization_parameters,
            use_gpu=False)
        actor_with_preprocessor = ActorWithPreprocessor(
            actor.cpu_model().eval(),
            state_preprocessor,
            state_feature_config,
            postprocessor,
            serve_mean_policy=serve_mean_policy,
        )
        action_features = Preprocessor(
            action_normalization_data.dense_normalization_parameters,
            use_gpu=False).sorted_features
        return ActorPredictorWrapper(actor_with_preprocessor,
                                     state_feature_config, action_features)
Exemple #5
0
    def test_do_not_preprocess(self):
        normalization_parameters = {
            i: NormalizationParameters(feature_type=DO_NOT_PREPROCESS)
            for i in range(1, 5)
        }
        preprocessor = Preprocessor(normalization_parameters, use_gpu=False)
        postprocessor = Postprocessor(normalization_parameters, use_gpu=False)

        x = torch.randn(3, 4)
        presence = torch.ones_like(x, dtype=torch.uint8)
        y = postprocessor(preprocessor(x, presence))
        npt.assert_allclose(x, y)
Exemple #6
0
 def get_actor_predictor(self, trainer, environment):
     state_preprocessor = Preprocessor(environment.normalization, False)
     postprocessor = Postprocessor(
         environment.normalization_continuous_action, False)
     actor_with_preprocessor = ActorWithPreprocessor(
         trainer.actor_network.cpu_model().eval(), state_preprocessor,
         postprocessor)
     serving_module = ActorPredictorWrapper(actor_with_preprocessor)
     predictor = ActorTorchPredictor(
         serving_module,
         sort_features_by_normalization(
             environment.normalization_continuous_action)[0],
     )
     return predictor
Exemple #7
0
    def test_continuous_action(self):
        normalization_parameters = {
            i: NormalizationParameters(feature_type=CONTINUOUS_ACTION,
                                       min_value=-5.0 * i,
                                       max_value=10.0 * i)
            for i in range(1, 5)
        }
        preprocessor = Preprocessor(normalization_parameters, use_gpu=False)
        postprocessor = Postprocessor(normalization_parameters, use_gpu=False)

        x = torch.rand(3, 4) * torch.tensor([15, 30, 45, 60]) + torch.tensor(
            [-5, -10, -15, -20])
        presence = torch.ones_like(x, dtype=torch.uint8)
        y = postprocessor(preprocessor(x, presence))
        npt.assert_allclose(x, y, rtol=1e-4)