예제 #1
0
    def test_actor_wrapper(self):
        state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)}
        action_normalization_parameters = {
            i: _cont_action_norm() for i in range(101, 105)
        }
        state_preprocessor = Preprocessor(state_normalization_parameters, False)
        postprocessor = Postprocessor(action_normalization_parameters, False)

        # Test with FullyConnectedActor to make behavior deterministic
        actor = FullyConnectedActor(
            state_dim=len(state_normalization_parameters),
            action_dim=len(action_normalization_parameters),
            sizes=[16],
            activations=["relu"],
        )
        actor_with_preprocessor = ActorWithPreprocessor(
            actor, state_preprocessor, postprocessor
        )
        wrapper = ActorPredictorWrapper(actor_with_preprocessor)
        input_prototype = actor_with_preprocessor.input_prototype()
        action = wrapper(*input_prototype)
        self.assertEqual(action.shape, (1, len(action_normalization_parameters)))

        expected_output = postprocessor(
            actor(
                rlt.PreprocessedState.from_tensor(
                    state_preprocessor(*input_prototype[0])
                )
            ).action
        )
        self.assertTrue((expected_output == action).all())
예제 #2
0
 def get_actor_predictor(self, trainer, environment):
     state_preprocessor = Preprocessor(environment.normalization, False)
     postprocessor = Postprocessor(
         environment.normalization_continuous_action, False)
     actor_with_preprocessor = ActorWithPreprocessor(
         trainer.actor_network.cpu_model().eval(), state_preprocessor,
         postprocessor)
     serving_module = ActorPredictorWrapper(actor_with_preprocessor)
     predictor = ActorTorchPredictor(
         serving_module,
         sort_features_by_normalization(
             environment.normalization_continuous_action)[0],
     )
     return predictor