def build_ranking_serving_module( self, actor: ModelBase, state_normalization_data: NormalizationData, candidate_normalization_data: NormalizationData, num_candidates: int, action_normalization_data: NormalizationData, ) -> torch.nn.Module: state_preprocessor = Preprocessor( state_normalization_data.dense_normalization_parameters, use_gpu=False) candidate_preprocessor = Preprocessor( candidate_normalization_data.dense_normalization_parameters, use_gpu=False) postprocessor = Postprocessor( action_normalization_data.dense_normalization_parameters, use_gpu=False) actor_with_preprocessor = RankingActorWithPreprocessor( model=actor.cpu_model().eval(), state_preprocessor=state_preprocessor, candidate_preprocessor=candidate_preprocessor, num_candidates=num_candidates, action_postprocessor=postprocessor, ) action_features = Preprocessor( action_normalization_data.dense_normalization_parameters, use_gpu=False).sorted_features return RankingActorPredictorWrapper(actor_with_preprocessor, action_features)
def build_serving_module( self, actor: ModelBase, state_normalization_data: NormalizationData, action_normalization_data: NormalizationData, ) -> torch.nn.Module: """ Returns a TorchScript predictor module """ state_normalization_parameters = ( state_normalization_data.dense_normalization_parameters) action_normalization_parameters = ( action_normalization_data.dense_normalization_parameters) assert state_normalization_parameters is not None assert action_normalization_parameters is not None state_preprocessor = Preprocessor(state_normalization_parameters, use_gpu=False) postprocessor = Postprocessor(action_normalization_parameters, use_gpu=False) actor_with_preprocessor = ActorWithPreprocessor( actor.cpu_model().eval(), state_preprocessor, postprocessor) action_features = Preprocessor(action_normalization_parameters, use_gpu=False).sorted_features return ActorPredictorWrapper(actor_with_preprocessor, action_features)
def sparse_input_prototype( model: ModelBase, state_preprocessor: Preprocessor, state_feature_config: rlt.ModelFeatureConfig, ): name2id = state_feature_config.name2id model_prototype = model.input_prototype() # Terrible hack to make JIT tracing works. Python dict doesn't have type # so we need to insert something so JIT tracer can infer the type. state_id_list_features = { 42: (torch.zeros(1, dtype=torch.long), torch.tensor([], dtype=torch.long)) } state_id_score_list_features = { 42: ( torch.zeros(1, dtype=torch.long), torch.tensor([], dtype=torch.long), torch.tensor([], dtype=torch.float), ) } if isinstance(model_prototype, rlt.FeatureData): if model_prototype.id_list_features: state_id_list_features = { name2id[k]: v for k, v in model_prototype.id_list_features.items() } if model_prototype.id_score_list_features: state_id_score_list_features = { name2id[k]: v for k, v in model_prototype.id_score_list_features.items() } input = rlt.ServingFeatureData( float_features_with_presence=state_preprocessor.input_prototype(), id_list_features=state_id_list_features, id_score_list_features=state_id_score_list_features, ) return (input,)
def build_serving_module( self, actor: ModelBase, state_feature_config: rlt.ModelFeatureConfig, state_normalization_data: NormalizationData, action_normalization_data: NormalizationData, serve_mean_policy: bool = False, ) -> torch.nn.Module: """ Returns a TorchScript predictor module """ state_preprocessor = Preprocessor( state_normalization_data.dense_normalization_parameters, use_gpu=False) postprocessor = Postprocessor( action_normalization_data.dense_normalization_parameters, use_gpu=False) actor_with_preprocessor = ActorWithPreprocessor( actor.cpu_model().eval(), state_preprocessor, state_feature_config, postprocessor, serve_mean_policy=serve_mean_policy, ) action_features = Preprocessor( action_normalization_data.dense_normalization_parameters, use_gpu=False).sorted_features return ActorPredictorWrapper(actor_with_preprocessor, state_feature_config, action_features)
def build_serving_module( self, seq_len: int, synthetic_reward_network: ModelBase, state_normalization_data: NormalizationData, action_normalization_data: Optional[NormalizationData] = None, discrete_action_names: Optional[List[str]] = None, ) -> torch.nn.Module: """ Returns a TorchScript predictor module """ state_preprocessor = Preprocessor( state_normalization_data.dense_normalization_parameters) if not discrete_action_names: assert action_normalization_data is not None action_preprocessor = Preprocessor( action_normalization_data.dense_normalization_parameters) return SyntheticRewardPredictorWrapper( seq_len, state_preprocessor, action_preprocessor, # pyre-fixme[29]: `Union[torch.Tensor, torch.nn.Module]` is not a # function. synthetic_reward_network.export_mlp().cpu().eval(), ) else: # TODO add Discrete Single Step Synthetic Reward Predictor return torch.jit.script(torch.nn.Linear(1, 1))
def sparse_input_prototype( model: ModelBase, state_preprocessor: Preprocessor, state_feature_config: rlt.ModelFeatureConfig, ): name2id = state_feature_config.name2id model_prototype = model.input_prototype() # Terrible hack to make JIT tracing works. Python dict doesn't have type # so we need to insert something so JIT tracer can infer the type. state_id_list_features = FAKE_STATE_ID_LIST_FEATURES state_id_score_list_features = FAKE_STATE_ID_SCORE_LIST_FEATURES if isinstance(model_prototype, rlt.FeatureData): if model_prototype.id_list_features: state_id_list_features = { name2id[k]: v for k, v in model_prototype.id_list_features.items() } if model_prototype.id_score_list_features: state_id_score_list_features = { name2id[k]: v for k, v in model_prototype.id_score_list_features.items() } input = rlt.ServingFeatureData( float_features_with_presence=state_preprocessor.input_prototype(), id_list_features=state_id_list_features, id_score_list_features=state_id_score_list_features, ) return (input, )
def build_serving_module( self, synthetic_reward_network: ModelBase, state_normalization_data: NormalizationData, action_normalization_data: Optional[NormalizationData] = None, discrete_action_names: Optional[List[str]] = None, ) -> torch.nn.Module: """ Returns a TorchScript predictor module """ state_preprocessor = Preprocessor( state_normalization_data.dense_normalization_parameters ) if not discrete_action_names: assert action_normalization_data is not None action_preprocessor = Preprocessor( action_normalization_data.dense_normalization_parameters ) synthetic_reward_with_preprocessor = ParametricDqnWithPreprocessor( # pyre-fixme[29]: `Union[torch.Tensor, torch.nn.Module]` is not a # function. synthetic_reward_network.export_mlp().cpu().eval(), state_preprocessor, action_preprocessor, ) return ParametricSingleStepSyntheticRewardPredictorWrapper( synthetic_reward_with_preprocessor ) else: raise NotImplementedError( "Discrete Single Step Synthetic Reward Predictor has not been implemented" )
def build_serving_module( self, q_network: ModelBase, state_normalization_data: NormalizationData, action_names: List[str], state_feature_config: rlt.ModelFeatureConfig, ) -> torch.nn.Module: """ Returns a TorchScript predictor module """ state_preprocessor = Preprocessor( state_normalization_data.dense_normalization_parameters, False) dqn_with_preprocessor = DiscreteDqnWithPreprocessor( q_network.cpu_model().eval(), state_preprocessor) return DiscreteDqnPredictorWrapper(dqn_with_preprocessor, action_names, state_feature_config)
def build_serving_module( self, q_network: ModelBase, state_normalization_parameters: Dict[int, NormalizationParameters], action_normalization_parameters: Dict[int, NormalizationParameters], ) -> torch.nn.Module: """ Returns a TorchScript predictor module """ state_preprocessor = Preprocessor(state_normalization_parameters, False) action_preprocessor = Preprocessor(action_normalization_parameters, False) dqn_with_preprocessor = ParametricDqnWithPreprocessor( q_network.cpu_model().eval(), state_preprocessor, action_preprocessor ) return ParametricDqnPredictorWrapper( dqn_with_preprocessor=dqn_with_preprocessor )
def build_serving_module( self, actor: ModelBase, state_normalization_data: NormalizationData, action_feature_ids: List[int], ) -> torch.nn.Module: """ Returns a TorchScript predictor module """ state_preprocessor = Preprocessor( state_normalization_data.dense_normalization_parameters, use_gpu=False ) actor_with_preprocessor = ActorWithPreprocessor( actor.cpu_model().eval(), state_preprocessor, ) return ActorPredictorWrapper(actor_with_preprocessor, action_feature_ids)
def build_binary_difference_scorer( self, q_network: ModelBase, state_normalization_data: NormalizationData, action_names: List[str], state_feature_config: rlt.ModelFeatureConfig, ) -> torch.nn.Module: """ Returns softmax(1) - softmax(0) """ assert len(action_names) == 2 state_preprocessor = Preprocessor( state_normalization_data.dense_normalization_parameters, False) binary_difference_scorer_with_preprocessor = ( BinaryDifferenceScorerWithPreprocessor( q_network.cpu_model().eval(), state_preprocessor, state_feature_config)) return BinaryDifferenceScorerPredictorWrapper( binary_difference_scorer_with_preprocessor, state_feature_config)
def build_serving_module( self, q_network: ModelBase, state_normalization_parameters: Dict[int, NormalizationParameters], action_names: List[str], state_feature_config: rlt.ModelFeatureConfig, ) -> torch.nn.Module: """ Returns a TorchScript predictor module """ state_preprocessor = Preprocessor(state_normalization_parameters, False) dqn_with_preprocessor = DiscreteDqnWithPreprocessor( torch.nn.Sequential( # type: ignore q_network.cpu_model().eval(), _Mean()), state_preprocessor, ) return DiscreteDqnPredictorWrapper(dqn_with_preprocessor, action_names, state_feature_config)
def __init__( self, *, shared_network: ModelBase, advantage_network: ModelBase, value_network: ModelBase, ) -> None: """ Dueling Q-Network Architecture: https://arxiv.org/abs/1511.06581 """ super().__init__() self.shared_network = shared_network input_prototype = shared_network.input_prototype() assert isinstance( input_prototype, rlt.FeatureData ), f"shared_network should expect FeatureData as input" self.advantage_network = advantage_network self.value_network = value_network _check_connection(self) self._name = "unnamed"
def __init__( self, *, shared_network: ModelBase, advantage_network: ModelBase, value_network: ModelBase, ) -> None: """ Dueling Q-Network Architecture: https://arxiv.org/abs/1511.06581 """ super().__init__() advantage_network_input = advantage_network.input_prototype() assert (isinstance(advantage_network_input, tuple) and len(advantage_network_input) == 2) assert advantage_network_input[0].has_float_features_only self.shared_network = shared_network self.advantage_network = advantage_network self.value_network = value_network _check_connection(self) self._name = "unnamed"