def test_actor_wrapper(self): state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)} action_normalization_parameters = { i: _cont_action_norm() for i in range(101, 105) } state_preprocessor = Preprocessor(state_normalization_parameters, False) postprocessor = Postprocessor(action_normalization_parameters, False) # Test with FullyConnectedActor to make behavior deterministic actor = FullyConnectedActor( state_dim=len(state_normalization_parameters), action_dim=len(action_normalization_parameters), sizes=[16], activations=["relu"], ) actor_with_preprocessor = ActorWithPreprocessor( actor, state_preprocessor, postprocessor ) wrapper = ActorPredictorWrapper(actor_with_preprocessor) input_prototype = actor_with_preprocessor.input_prototype() action = wrapper(*input_prototype) self.assertEqual(action.shape, (1, len(action_normalization_parameters))) expected_output = postprocessor( actor( rlt.PreprocessedState.from_tensor( state_preprocessor(*input_prototype[0]) ) ).action ) self.assertTrue((expected_output == action).all())
def get_actor_predictor(self, trainer, environment): state_preprocessor = Preprocessor(environment.normalization, False) postprocessor = Postprocessor( environment.normalization_continuous_action, False) actor_with_preprocessor = ActorWithPreprocessor( trainer.actor_network.cpu_model().eval(), state_preprocessor, postprocessor) serving_module = ActorPredictorWrapper(actor_with_preprocessor) predictor = ActorTorchPredictor( serving_module, sort_features_by_normalization( environment.normalization_continuous_action)[0], ) return predictor