def build_ranking_serving_module(
     self,
     actor: ModelBase,
     state_normalization_data: NormalizationData,
     candidate_normalization_data: NormalizationData,
     num_candidates: int,
     action_normalization_data: NormalizationData,
 ) -> torch.nn.Module:
     state_preprocessor = Preprocessor(
         state_normalization_data.dense_normalization_parameters,
         use_gpu=False)
     candidate_preprocessor = Preprocessor(
         candidate_normalization_data.dense_normalization_parameters,
         use_gpu=False)
     postprocessor = Postprocessor(
         action_normalization_data.dense_normalization_parameters,
         use_gpu=False)
     actor_with_preprocessor = RankingActorWithPreprocessor(
         model=actor.cpu_model().eval(),
         state_preprocessor=state_preprocessor,
         candidate_preprocessor=candidate_preprocessor,
         num_candidates=num_candidates,
         action_postprocessor=postprocessor,
     )
     action_features = Preprocessor(
         action_normalization_data.dense_normalization_parameters,
         use_gpu=False).sorted_features
     return RankingActorPredictorWrapper(actor_with_preprocessor,
                                         action_features)
예제 #2
0
    def build_serving_module(
        self,
        actor: ModelBase,
        state_normalization_data: NormalizationData,
        action_normalization_data: NormalizationData,
    ) -> torch.nn.Module:
        """
        Returns a TorchScript predictor module
        """
        state_normalization_parameters = (
            state_normalization_data.dense_normalization_parameters)
        action_normalization_parameters = (
            action_normalization_data.dense_normalization_parameters)
        assert state_normalization_parameters is not None
        assert action_normalization_parameters is not None

        state_preprocessor = Preprocessor(state_normalization_parameters,
                                          use_gpu=False)
        postprocessor = Postprocessor(action_normalization_parameters,
                                      use_gpu=False)
        actor_with_preprocessor = ActorWithPreprocessor(
            actor.cpu_model().eval(), state_preprocessor, postprocessor)
        action_features = Preprocessor(action_normalization_parameters,
                                       use_gpu=False).sorted_features
        return ActorPredictorWrapper(actor_with_preprocessor, action_features)
예제 #3
0
def sparse_input_prototype(
    model: ModelBase,
    state_preprocessor: Preprocessor,
    state_feature_config: rlt.ModelFeatureConfig,
):
    name2id = state_feature_config.name2id
    model_prototype = model.input_prototype()
    # Terrible hack to make JIT tracing works. Python dict doesn't have type
    # so we need to insert something so JIT tracer can infer the type.
    state_id_list_features = {
        42: (torch.zeros(1, dtype=torch.long), torch.tensor([], dtype=torch.long))
    }
    state_id_score_list_features = {
        42: (
            torch.zeros(1, dtype=torch.long),
            torch.tensor([], dtype=torch.long),
            torch.tensor([], dtype=torch.float),
        )
    }
    if isinstance(model_prototype, rlt.FeatureData):
        if model_prototype.id_list_features:
            state_id_list_features = {
                name2id[k]: v for k, v in model_prototype.id_list_features.items()
            }
        if model_prototype.id_score_list_features:
            state_id_score_list_features = {
                name2id[k]: v for k, v in model_prototype.id_score_list_features.items()
            }

    input = rlt.ServingFeatureData(
        float_features_with_presence=state_preprocessor.input_prototype(),
        id_list_features=state_id_list_features,
        id_score_list_features=state_id_score_list_features,
    )
    return (input,)
    def build_serving_module(
        self,
        actor: ModelBase,
        state_feature_config: rlt.ModelFeatureConfig,
        state_normalization_data: NormalizationData,
        action_normalization_data: NormalizationData,
        serve_mean_policy: bool = False,
    ) -> torch.nn.Module:
        """
        Returns a TorchScript predictor module
        """

        state_preprocessor = Preprocessor(
            state_normalization_data.dense_normalization_parameters,
            use_gpu=False)
        postprocessor = Postprocessor(
            action_normalization_data.dense_normalization_parameters,
            use_gpu=False)
        actor_with_preprocessor = ActorWithPreprocessor(
            actor.cpu_model().eval(),
            state_preprocessor,
            state_feature_config,
            postprocessor,
            serve_mean_policy=serve_mean_policy,
        )
        action_features = Preprocessor(
            action_normalization_data.dense_normalization_parameters,
            use_gpu=False).sorted_features
        return ActorPredictorWrapper(actor_with_preprocessor,
                                     state_feature_config, action_features)
예제 #5
0
 def build_serving_module(
     self,
     seq_len: int,
     synthetic_reward_network: ModelBase,
     state_normalization_data: NormalizationData,
     action_normalization_data: Optional[NormalizationData] = None,
     discrete_action_names: Optional[List[str]] = None,
 ) -> torch.nn.Module:
     """
     Returns a TorchScript predictor module
     """
     state_preprocessor = Preprocessor(
         state_normalization_data.dense_normalization_parameters)
     if not discrete_action_names:
         assert action_normalization_data is not None
         action_preprocessor = Preprocessor(
             action_normalization_data.dense_normalization_parameters)
         return SyntheticRewardPredictorWrapper(
             seq_len,
             state_preprocessor,
             action_preprocessor,
             # pyre-fixme[29]: `Union[torch.Tensor, torch.nn.Module]` is not a
             #  function.
             synthetic_reward_network.export_mlp().cpu().eval(),
         )
     else:
         # TODO add Discrete Single Step Synthetic Reward Predictor
         return torch.jit.script(torch.nn.Linear(1, 1))
예제 #6
0
def sparse_input_prototype(
    model: ModelBase,
    state_preprocessor: Preprocessor,
    state_feature_config: rlt.ModelFeatureConfig,
):
    name2id = state_feature_config.name2id
    model_prototype = model.input_prototype()
    # Terrible hack to make JIT tracing works. Python dict doesn't have type
    # so we need to insert something so JIT tracer can infer the type.
    state_id_list_features = FAKE_STATE_ID_LIST_FEATURES
    state_id_score_list_features = FAKE_STATE_ID_SCORE_LIST_FEATURES
    if isinstance(model_prototype, rlt.FeatureData):
        if model_prototype.id_list_features:
            state_id_list_features = {
                name2id[k]: v
                for k, v in model_prototype.id_list_features.items()
            }
        if model_prototype.id_score_list_features:
            state_id_score_list_features = {
                name2id[k]: v
                for k, v in model_prototype.id_score_list_features.items()
            }

    input = rlt.ServingFeatureData(
        float_features_with_presence=state_preprocessor.input_prototype(),
        id_list_features=state_id_list_features,
        id_score_list_features=state_id_score_list_features,
    )
    return (input, )
예제 #7
0
 def build_serving_module(
     self,
     synthetic_reward_network: ModelBase,
     state_normalization_data: NormalizationData,
     action_normalization_data: Optional[NormalizationData] = None,
     discrete_action_names: Optional[List[str]] = None,
 ) -> torch.nn.Module:
     """
     Returns a TorchScript predictor module
     """
     state_preprocessor = Preprocessor(
         state_normalization_data.dense_normalization_parameters
     )
     if not discrete_action_names:
         assert action_normalization_data is not None
         action_preprocessor = Preprocessor(
             action_normalization_data.dense_normalization_parameters
         )
         synthetic_reward_with_preprocessor = ParametricDqnWithPreprocessor(
             # pyre-fixme[29]: `Union[torch.Tensor, torch.nn.Module]` is not a
             #  function.
             synthetic_reward_network.export_mlp().cpu().eval(),
             state_preprocessor,
             action_preprocessor,
         )
         return ParametricSingleStepSyntheticRewardPredictorWrapper(
             synthetic_reward_with_preprocessor
         )
     else:
         raise NotImplementedError(
             "Discrete Single Step Synthetic Reward Predictor has not been implemented"
         )
예제 #8
0
 def build_serving_module(
     self,
     q_network: ModelBase,
     state_normalization_data: NormalizationData,
     action_names: List[str],
     state_feature_config: rlt.ModelFeatureConfig,
 ) -> torch.nn.Module:
     """
     Returns a TorchScript predictor module
     """
     state_preprocessor = Preprocessor(
         state_normalization_data.dense_normalization_parameters, False)
     dqn_with_preprocessor = DiscreteDqnWithPreprocessor(
         q_network.cpu_model().eval(), state_preprocessor)
     return DiscreteDqnPredictorWrapper(dqn_with_preprocessor, action_names,
                                        state_feature_config)
예제 #9
0
 def build_serving_module(
     self,
     q_network: ModelBase,
     state_normalization_parameters: Dict[int, NormalizationParameters],
     action_normalization_parameters: Dict[int, NormalizationParameters],
 ) -> torch.nn.Module:
     """
     Returns a TorchScript predictor module
     """
     state_preprocessor = Preprocessor(state_normalization_parameters, False)
     action_preprocessor = Preprocessor(action_normalization_parameters, False)
     dqn_with_preprocessor = ParametricDqnWithPreprocessor(
         q_network.cpu_model().eval(), state_preprocessor, action_preprocessor
     )
     return ParametricDqnPredictorWrapper(
         dqn_with_preprocessor=dqn_with_preprocessor
     )
    def build_serving_module(
        self,
        actor: ModelBase,
        state_normalization_data: NormalizationData,
        action_feature_ids: List[int],
    ) -> torch.nn.Module:
        """
        Returns a TorchScript predictor module
        """

        state_preprocessor = Preprocessor(
            state_normalization_data.dense_normalization_parameters, use_gpu=False
        )
        actor_with_preprocessor = ActorWithPreprocessor(
            actor.cpu_model().eval(),
            state_preprocessor,
        )
        return ActorPredictorWrapper(actor_with_preprocessor, action_feature_ids)
예제 #11
0
 def build_binary_difference_scorer(
     self,
     q_network: ModelBase,
     state_normalization_data: NormalizationData,
     action_names: List[str],
     state_feature_config: rlt.ModelFeatureConfig,
 ) -> torch.nn.Module:
     """
     Returns softmax(1) - softmax(0)
     """
     assert len(action_names) == 2
     state_preprocessor = Preprocessor(
         state_normalization_data.dense_normalization_parameters, False)
     binary_difference_scorer_with_preprocessor = (
         BinaryDifferenceScorerWithPreprocessor(
             q_network.cpu_model().eval(), state_preprocessor,
             state_feature_config))
     return BinaryDifferenceScorerPredictorWrapper(
         binary_difference_scorer_with_preprocessor, state_feature_config)
예제 #12
0
 def build_serving_module(
     self,
     q_network: ModelBase,
     state_normalization_parameters: Dict[int, NormalizationParameters],
     action_names: List[str],
     state_feature_config: rlt.ModelFeatureConfig,
 ) -> torch.nn.Module:
     """
     Returns a TorchScript predictor module
     """
     state_preprocessor = Preprocessor(state_normalization_parameters,
                                       False)
     dqn_with_preprocessor = DiscreteDqnWithPreprocessor(
         torch.nn.Sequential(  # type: ignore
             q_network.cpu_model().eval(), _Mean()),
         state_preprocessor,
     )
     return DiscreteDqnPredictorWrapper(dqn_with_preprocessor, action_names,
                                        state_feature_config)
예제 #13
0
    def __init__(
        self,
        *,
        shared_network: ModelBase,
        advantage_network: ModelBase,
        value_network: ModelBase,
    ) -> None:
        """
        Dueling Q-Network Architecture: https://arxiv.org/abs/1511.06581
        """
        super().__init__()
        self.shared_network = shared_network
        input_prototype = shared_network.input_prototype()
        assert isinstance(
            input_prototype, rlt.FeatureData
        ), f"shared_network should expect FeatureData as input"
        self.advantage_network = advantage_network
        self.value_network = value_network

        _check_connection(self)
        self._name = "unnamed"
예제 #14
0
    def __init__(
        self,
        *,
        shared_network: ModelBase,
        advantage_network: ModelBase,
        value_network: ModelBase,
    ) -> None:
        """
        Dueling Q-Network Architecture: https://arxiv.org/abs/1511.06581
        """
        super().__init__()
        advantage_network_input = advantage_network.input_prototype()
        assert (isinstance(advantage_network_input, tuple)
                and len(advantage_network_input) == 2)
        assert advantage_network_input[0].has_float_features_only

        self.shared_network = shared_network
        self.advantage_network = advantage_network
        self.value_network = value_network

        _check_connection(self)
        self._name = "unnamed"