Beispiel #1
0
 def build_serving_module(
     self,
     seq_len: int,
     synthetic_reward_network: ModelBase,
     state_normalization_data: NormalizationData,
     action_normalization_data: Optional[NormalizationData] = None,
     discrete_action_names: Optional[List[str]] = None,
 ) -> torch.nn.Module:
     """
     Returns a TorchScript predictor module
     """
     state_preprocessor = Preprocessor(
         state_normalization_data.dense_normalization_parameters)
     if not discrete_action_names:
         assert action_normalization_data is not None
         action_preprocessor = Preprocessor(
             action_normalization_data.dense_normalization_parameters)
         return SyntheticRewardPredictorWrapper(
             seq_len,
             state_preprocessor,
             action_preprocessor,
             # pyre-fixme[29]: `Union[torch.Tensor, torch.nn.Module]` is not a
             #  function.
             synthetic_reward_network.export_mlp().cpu().eval(),
         )
     else:
         # TODO add Discrete Single Step Synthetic Reward Predictor
         return torch.jit.script(torch.nn.Linear(1, 1))
Beispiel #2
0
 def build_serving_module(
     self,
     synthetic_reward_network: ModelBase,
     state_normalization_data: NormalizationData,
     action_normalization_data: Optional[NormalizationData] = None,
     discrete_action_names: Optional[List[str]] = None,
 ) -> torch.nn.Module:
     """
     Returns a TorchScript predictor module
     """
     state_preprocessor = Preprocessor(
         state_normalization_data.dense_normalization_parameters
     )
     if not discrete_action_names:
         assert action_normalization_data is not None
         action_preprocessor = Preprocessor(
             action_normalization_data.dense_normalization_parameters
         )
         synthetic_reward_with_preprocessor = ParametricDqnWithPreprocessor(
             # pyre-fixme[29]: `Union[torch.Tensor, torch.nn.Module]` is not a
             #  function.
             synthetic_reward_network.export_mlp().cpu().eval(),
             state_preprocessor,
             action_preprocessor,
         )
         return ParametricSingleStepSyntheticRewardPredictorWrapper(
             synthetic_reward_with_preprocessor
         )
     else:
         raise NotImplementedError(
             "Discrete Single Step Synthetic Reward Predictor has not been implemented"
         )