def test_actor_wrapper(self): state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)} action_normalization_parameters = { i: _cont_action_norm() for i in range(101, 105) } state_preprocessor = Preprocessor(state_normalization_parameters, False) postprocessor = Postprocessor(action_normalization_parameters, False) # Test with FullyConnectedActor to make behavior deterministic actor = FullyConnectedActor( state_dim=len(state_normalization_parameters), action_dim=len(action_normalization_parameters), sizes=[16], activations=["relu"], ) actor_with_preprocessor = ActorWithPreprocessor( actor, state_preprocessor, postprocessor ) wrapper = ActorPredictorWrapper(actor_with_preprocessor) input_prototype = actor_with_preprocessor.input_prototype() action = wrapper(*input_prototype) self.assertEqual(action.shape, (1, len(action_normalization_parameters))) expected_output = postprocessor( actor( rlt.PreprocessedState.from_tensor( state_preprocessor(*input_prototype[0]) ) ).action ) self.assertTrue((expected_output == action).all())
def build_serving_module( self, actor: ModelBase, state_normalization_data: NormalizationData, action_normalization_data: NormalizationData, ) -> torch.nn.Module: """ Returns a TorchScript predictor module """ state_normalization_parameters = ( state_normalization_data.dense_normalization_parameters) action_normalization_parameters = ( action_normalization_data.dense_normalization_parameters) assert state_normalization_parameters is not None assert action_normalization_parameters is not None state_preprocessor = Preprocessor(state_normalization_parameters, use_gpu=False) postprocessor = Postprocessor(action_normalization_parameters, use_gpu=False) actor_with_preprocessor = ActorWithPreprocessor( actor.cpu_model().eval(), state_preprocessor, postprocessor) action_features = Preprocessor(action_normalization_parameters, use_gpu=False).sorted_features return ActorPredictorWrapper(actor_with_preprocessor, action_features)
def test_do_not_preprocess(self): normalization_parameters = { i: NormalizationParameters(feature_type=DO_NOT_PREPROCESS) for i in range(1, 5) } preprocessor = Preprocessor(normalization_parameters, use_gpu=False) postprocessor = Postprocessor(normalization_parameters, use_gpu=False) x = torch.randn(3, 4) presence = torch.ones_like(x, dtype=torch.uint8) y = postprocessor(preprocessor(x, presence)) npt.assert_allclose(x, y)
def get_actor_predictor(self, trainer, environment): state_preprocessor = Preprocessor(environment.normalization, False) postprocessor = Postprocessor( environment.normalization_continuous_action, False) actor_with_preprocessor = ActorWithPreprocessor( trainer.actor_network.cpu_model().eval(), state_preprocessor, postprocessor) serving_module = ActorPredictorWrapper(actor_with_preprocessor) predictor = ActorTorchPredictor( serving_module, sort_features_by_normalization( environment.normalization_continuous_action)[0], ) return predictor
def test_continuous_action(self): normalization_parameters = { i: NormalizationParameters(feature_type=CONTINUOUS_ACTION, min_value=-5.0 * i, max_value=10.0 * i) for i in range(1, 5) } preprocessor = Preprocessor(normalization_parameters, use_gpu=False) postprocessor = Postprocessor(normalization_parameters, use_gpu=False) x = torch.rand(3, 4) * torch.tensor([15, 30, 45, 60]) + torch.tensor( [-5, -10, -15, -20]) presence = torch.ones_like(x, dtype=torch.uint8) y = postprocessor(preprocessor(x, presence)) npt.assert_allclose(x, y, rtol=1e-5)