def build_ranking_serving_module( self, actor: ModelBase, state_normalization_data: NormalizationData, candidate_normalization_data: NormalizationData, num_candidates: int, action_normalization_data: NormalizationData, ) -> torch.nn.Module: state_preprocessor = Preprocessor( state_normalization_data.dense_normalization_parameters, use_gpu=False) candidate_preprocessor = Preprocessor( candidate_normalization_data.dense_normalization_parameters, use_gpu=False) postprocessor = Postprocessor( action_normalization_data.dense_normalization_parameters, use_gpu=False) actor_with_preprocessor = RankingActorWithPreprocessor( model=actor.cpu_model().eval(), state_preprocessor=state_preprocessor, candidate_preprocessor=candidate_preprocessor, num_candidates=num_candidates, action_postprocessor=postprocessor, ) action_features = Preprocessor( action_normalization_data.dense_normalization_parameters, use_gpu=False).sorted_features return RankingActorPredictorWrapper(actor_with_preprocessor, action_features)
def build_serving_module( self, synthetic_reward_network: ModelBase, state_normalization_data: NormalizationData, action_normalization_data: Optional[NormalizationData] = None, discrete_action_names: Optional[List[str]] = None, ) -> torch.nn.Module: """ Returns a TorchScript predictor module """ state_preprocessor = Preprocessor( state_normalization_data.dense_normalization_parameters ) if not discrete_action_names: assert action_normalization_data is not None action_preprocessor = Preprocessor( action_normalization_data.dense_normalization_parameters ) synthetic_reward_with_preprocessor = ParametricDqnWithPreprocessor( # pyre-fixme[29]: `Union[torch.Tensor, torch.nn.Module]` is not a # function. synthetic_reward_network.export_mlp().cpu().eval(), state_preprocessor, action_preprocessor, ) return ParametricSingleStepSyntheticRewardPredictorWrapper( synthetic_reward_with_preprocessor ) else: raise NotImplementedError( "Discrete Single Step Synthetic Reward Predictor has not been implemented" )
def test_parametric_wrapper(self): state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)} action_normalization_parameters = { i: _cont_norm() for i in range(5, 9) } state_preprocessor = Preprocessor(state_normalization_parameters, False) action_preprocessor = Preprocessor(action_normalization_parameters, False) dqn = models.FullyConnectedCritic( state_dim=len(state_normalization_parameters), action_dim=len(action_normalization_parameters), sizes=[16], activations=["relu"], ) dqn_with_preprocessor = ParametricDqnWithPreprocessor( dqn, state_preprocessor=state_preprocessor, action_preprocessor=action_preprocessor, ) wrapper = ParametricDqnPredictorWrapper(dqn_with_preprocessor) input_prototype = dqn_with_preprocessor.input_prototype() output_action_names, q_value = wrapper(*input_prototype) self.assertEqual(output_action_names, ["Q"]) self.assertEqual(q_value.shape, (1, 1)) expected_output = dqn( rlt.FeatureData(state_preprocessor(*input_prototype[0])), rlt.FeatureData(action_preprocessor(*input_prototype[1])), ) self.assertTrue((expected_output == q_value).all())
def build_serving_module( self, actor: ModelBase, state_normalization_data: NormalizationData, action_normalization_data: NormalizationData, ) -> torch.nn.Module: """ Returns a TorchScript predictor module """ state_normalization_parameters = ( state_normalization_data.dense_normalization_parameters) action_normalization_parameters = ( action_normalization_data.dense_normalization_parameters) assert state_normalization_parameters is not None assert action_normalization_parameters is not None state_preprocessor = Preprocessor(state_normalization_parameters, use_gpu=False) postprocessor = Postprocessor(action_normalization_parameters, use_gpu=False) actor_with_preprocessor = ActorWithPreprocessor( actor.cpu_model().eval(), state_preprocessor, postprocessor) action_features = Preprocessor(action_normalization_parameters, use_gpu=False).sorted_features return ActorPredictorWrapper(actor_with_preprocessor, action_features)
def _test_synthetic_reward_net_builder_continuous_actions( self, builder: SyntheticRewardNetBuilder): """ This test does the following steps: 1. create a net builder 2. use the net builder to create a synthetic reward network 3. export the synthetic reward network 4. use the exported network to create a predictor wrapper 5. create raw input and preprocessed inputs 6. compare if the results between the following matches: a. synthetic reward network on preprocessed input b. export network on preprocessed input c. predictor wrapper on raw input """ state_normalization_data = _create_norm(STATE_DIM) action_normalization_data = _create_norm(ACTION_DIM, offset=STATE_DIM) state_preprocessor = Preprocessor( state_normalization_data.dense_normalization_parameters) action_preprocessor = Preprocessor( action_normalization_data.dense_normalization_parameters) reward_net = builder.build_synthetic_reward_network( state_normalization_data, action_normalization_data=action_normalization_data, ).eval() input = _create_input() preprocessed_input = _create_preprocessed_input( input, state_preprocessor, action_preprocessor) output = reward_net(preprocessed_input).predicted_reward assert output.shape == (BATCH_SIZE, 1) # pyre-fixme[29]: `Union[torch.Tensor, torch.nn.Module]` is not a function. export_net = reward_net.export_mlp().cpu().eval() export_output = export_net(preprocessed_input.state.float_features, preprocessed_input.action) predictor_wrapper = builder.build_serving_module( SEQ_LEN, reward_net, state_normalization_data, action_normalization_data=action_normalization_data, ) self.assertIsInstance(predictor_wrapper, SyntheticRewardPredictorWrapper) for i in range(BATCH_SIZE): input_to_predictor = torch.cat( (input.state.float_features[:, i, :], input.action[:, i, :]), dim=1) input_to_predictor_presence = torch.ones(SEQ_LEN, STATE_DIM + ACTION_DIM) predictor_output = predictor_wrapper( (input_to_predictor, input_to_predictor_presence)) if IS_FB_ENVIRONMENT: predictor_output = predictor_output[1][2] npt.assert_array_almost_equal(predictor_output, export_output[i], decimal=4) npt.assert_almost_equal( torch.sum(predictor_output[-input.valid_step[i]:]), output[i], decimal=4, )
def build_serving_module( self, actor: ModelBase, state_feature_config: rlt.ModelFeatureConfig, state_normalization_data: NormalizationData, action_normalization_data: NormalizationData, serve_mean_policy: bool = False, ) -> torch.nn.Module: """ Returns a TorchScript predictor module """ state_preprocessor = Preprocessor( state_normalization_data.dense_normalization_parameters, use_gpu=False) postprocessor = Postprocessor( action_normalization_data.dense_normalization_parameters, use_gpu=False) actor_with_preprocessor = ActorWithPreprocessor( actor.cpu_model().eval(), state_preprocessor, state_feature_config, postprocessor, serve_mean_policy=serve_mean_policy, ) action_features = Preprocessor( action_normalization_data.dense_normalization_parameters, use_gpu=False).sorted_features return ActorPredictorWrapper(actor_with_preprocessor, state_feature_config, action_features)
def build_serving_module( self, seq_len: int, synthetic_reward_network: ModelBase, state_normalization_data: NormalizationData, action_normalization_data: Optional[NormalizationData] = None, discrete_action_names: Optional[List[str]] = None, ) -> torch.nn.Module: """ Returns a TorchScript predictor module """ state_preprocessor = Preprocessor( state_normalization_data.dense_normalization_parameters) if not discrete_action_names: assert action_normalization_data is not None action_preprocessor = Preprocessor( action_normalization_data.dense_normalization_parameters) return SyntheticRewardPredictorWrapper( seq_len, state_preprocessor, action_preprocessor, # pyre-fixme[29]: `Union[torch.Tensor, torch.nn.Module]` is not a # function. synthetic_reward_network.export_mlp().cpu().eval(), ) else: # TODO add Discrete Single Step Synthetic Reward Predictor return torch.jit.script(torch.nn.Linear(1, 1))
def save_models(self, path: str): export_time = round(time.time()) output_path = os.path.expanduser(path) pytorch_output_path = os.path.join(output_path, "trainer_{}.pt".format(export_time)) torchscript_output_path = os.path.join( path, "model_{}.torchscript".format(export_time)) state_preprocessor = Preprocessor(self.state_normalization, False) action_preprocessor = Preprocessor(self.action_normalization, False) # pyre-fixme[16]: `ParametricDqnWorkflow` has no attribute `trainer`. # pyre-fixme[16]: `ParametricDqnWorkflow` has no attribute `trainer`. q_network = self.trainer.q_network dqn_with_preprocessor = ParametricDqnWithPreprocessor( q_network.cpu_model().eval(), state_preprocessor, action_preprocessor) serving_module = ParametricDqnPredictorWrapper( dqn_with_preprocessor=dqn_with_preprocessor) logger.info("Saving PyTorch trainer to {}".format(pytorch_output_path)) save_model_to_file(self.trainer, pytorch_output_path) # pyre-fixme[16]: `ParametricDqnWorkflow` has no attribute # `save_torchscript_model`. # pyre-fixme[16]: `ParametricDqnWorkflow` has no attribute # `save_torchscript_model`. self.save_torchscript_model(serving_module, torchscript_output_path)
def get_predictor(self, trainer, environment): state_preprocessor = Preprocessor(environment.normalization, False) action_preprocessor = Preprocessor(environment.normalization_action, False) q_network = self.current_predictor_network dqn_with_preprocessor = ParametricDqnWithPreprocessor( q_network.cpu_model().eval(), state_preprocessor, action_preprocessor) serving_module = ParametricDqnPredictorWrapper( dqn_with_preprocessor=dqn_with_preprocessor) predictor = ParametricDqnTorchPredictor(serving_module) return predictor
def _test_seq2slate_model_with_preprocessor( self, model: str, output_arch: Seq2SlateOutputArch): state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)} candidate_normalization_parameters = { i: _cont_norm() for i in range(101, 106) } state_preprocessor = Preprocessor(state_normalization_parameters, False) candidate_preprocessor = Preprocessor( candidate_normalization_parameters, False) candidate_size = 10 slate_size = 4 seq2slate = None if model == "transformer": seq2slate = Seq2SlateTransformerNet( state_dim=len(state_normalization_parameters), candidate_dim=len(candidate_normalization_parameters), num_stacked_layers=2, num_heads=2, dim_model=10, dim_feedforward=10, max_src_seq_len=candidate_size, max_tgt_seq_len=slate_size, output_arch=output_arch, temperature=0.5, ) else: raise NotImplementedError(f"model type {model} is unknown") seq2slate_with_preprocessor = Seq2SlateWithPreprocessor( seq2slate, state_preprocessor, candidate_preprocessor, greedy=True) input_prototype = seq2slate_with_preprocessor.input_prototype() if seq2slate_with_preprocessor.can_be_traced(): seq2slate_with_preprocessor_jit = torch.jit.trace( seq2slate_with_preprocessor, seq2slate_with_preprocessor.input_prototype(), ) else: seq2slate_with_preprocessor_jit = torch.jit.script( seq2slate_with_preprocessor) expected_output = seq2slate_with_preprocessor(*input_prototype) jit_output = seq2slate_with_preprocessor_jit(*input_prototype) self.verify_results(expected_output, jit_output) # Test if scripted model can handle variable lengths of input input_prototype = change_cand_size_slate_ranking(input_prototype, 20) expected_output = seq2slate_with_preprocessor(*input_prototype) jit_output = seq2slate_with_preprocessor_jit(*input_prototype) self.verify_results(expected_output, jit_output)
def test_discrete_wrapper(self): ids = range(1, 5) state_normalization_parameters = {i: _cont_norm() for i in ids} state_preprocessor = Preprocessor(state_normalization_parameters, False) action_dim = 2 dqn = models.FullyConnectedDQN( state_dim=len(state_normalization_parameters), action_dim=action_dim, sizes=[16], activations=["relu"], ) state_feature_config = rlt.ModelFeatureConfig(float_feature_infos=[ rlt.FloatFeatureInfo(feature_id=i, name=f"feat_{i}") for i in ids ]) dqn_with_preprocessor = DiscreteDqnWithPreprocessor( dqn, state_preprocessor, state_feature_config) action_names = ["L", "R"] wrapper = DiscreteDqnPredictorWrapper(dqn_with_preprocessor, action_names, state_feature_config) input_prototype = dqn_with_preprocessor.input_prototype()[0] output_action_names, q_values = wrapper(input_prototype) self.assertEqual(action_names, output_action_names) self.assertEqual(q_values.shape, (1, 2)) state_with_presence = input_prototype.float_features_with_presence expected_output = dqn( rlt.FeatureData(state_preprocessor(*state_with_presence))) self.assertTrue((expected_output == q_values).all())
def sparse_input_prototype( model: ModelBase, state_preprocessor: Preprocessor, state_feature_config: rlt.ModelFeatureConfig, ): name2id = state_feature_config.name2id model_prototype = model.input_prototype() # Terrible hack to make JIT tracing works. Python dict doesn't have type # so we need to insert something so JIT tracer can infer the type. state_id_list_features = { 42: (torch.zeros(1, dtype=torch.long), torch.tensor([], dtype=torch.long)) } state_id_score_list_features = { 42: ( torch.zeros(1, dtype=torch.long), torch.tensor([], dtype=torch.long), torch.tensor([], dtype=torch.float), ) } if isinstance(model_prototype, rlt.FeatureData): if model_prototype.id_list_features: state_id_list_features = { name2id[k]: v for k, v in model_prototype.id_list_features.items() } if model_prototype.id_score_list_features: state_id_score_list_features = { name2id[k]: v for k, v in model_prototype.id_score_list_features.items() } input = rlt.ServingFeatureData( float_features_with_presence=state_preprocessor.input_prototype(), id_list_features=state_id_list_features, id_score_list_features=state_id_score_list_features, ) return (input,)
def sparse_input_prototype( model: ModelBase, state_preprocessor: Preprocessor, state_feature_config: rlt.ModelFeatureConfig, ): name2id = state_feature_config.name2id model_prototype = model.input_prototype() # Terrible hack to make JIT tracing works. Python dict doesn't have type # so we need to insert something so JIT tracer can infer the type. state_id_list_features = FAKE_STATE_ID_LIST_FEATURES state_id_score_list_features = FAKE_STATE_ID_SCORE_LIST_FEATURES if isinstance(model_prototype, rlt.FeatureData): if model_prototype.id_list_features: state_id_list_features = { name2id[k]: v for k, v in model_prototype.id_list_features.items() } if model_prototype.id_score_list_features: state_id_score_list_features = { name2id[k]: v for k, v in model_prototype.id_score_list_features.items() } input = rlt.ServingFeatureData( float_features_with_presence=state_preprocessor.input_prototype(), id_list_features=state_id_list_features, id_score_list_features=state_id_score_list_features, ) return (input, )
def build_batch_preprocessor(self) -> BatchPreprocessor: return DiscreteDqnBatchPreprocessor( state_preprocessor=Preprocessor( normalization_parameters=self.state_normalization_parameters, use_gpu=self.use_gpu, ) )
def test_preprocessing_network(self): feature_value_map = read_data() normalization_parameters = {} name_preprocessed_blob_map = {} for feature_name, feature_values in feature_value_map.items(): normalization_parameters[ feature_name] = normalization.identify_parameter( feature_name, feature_values, feature_type=self._feature_type_override(feature_name), ) feature_values[ 0] = MISSING_VALUE # Set one entry to MISSING_VALUE to test that preprocessor = Preprocessor( {feature_name: normalization_parameters[feature_name]}, False) feature_values_matrix = torch.from_numpy( np.expand_dims(feature_values, -1)) normalized_feature_values = preprocessor( feature_values_matrix, (feature_values_matrix != MISSING_VALUE)) name_preprocessed_blob_map[ feature_name] = normalized_feature_values.numpy() test_features = NumpyFeatureProcessor.preprocess( feature_value_map, normalization_parameters) for feature_name in feature_value_map: normalized_features = name_preprocessed_blob_map[feature_name] if feature_name != ENUM_FEATURE_ID: normalized_features = np.squeeze(normalized_features, -1) tolerance = 0.01 if feature_name == BOXCOX_FEATURE_ID: # At the limit, boxcox has some numerical instability tolerance = 0.5 non_matching = np.where( np.logical_not( np.isclose( normalized_features.flatten(), test_features[feature_name].flatten(), rtol=tolerance, atol=tolerance, ))) self.assertTrue( np.all( np.isclose( normalized_features.flatten(), test_features[feature_name].flatten(), rtol=tolerance, atol=tolerance, )), "{} does not match: {} \n!=\n {}".format( feature_name, normalized_features.flatten()[non_matching], test_features[feature_name].flatten()[non_matching], ), )
def test_actor_wrapper(self): state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)} action_normalization_parameters = { i: _cont_action_norm() for i in range(101, 105) } state_preprocessor = Preprocessor(state_normalization_parameters, False) postprocessor = Postprocessor(action_normalization_parameters, False) # Test with FullyConnectedActor to make behavior deterministic actor = models.FullyConnectedActor( state_dim=len(state_normalization_parameters), action_dim=len(action_normalization_parameters), sizes=[16], activations=["relu"], ) actor_with_preprocessor = ActorWithPreprocessor( actor, state_preprocessor, postprocessor) wrapper = ActorPredictorWrapper(actor_with_preprocessor) input_prototype = actor_with_preprocessor.input_prototype() action = wrapper(*input_prototype) self.assertEqual(action.shape, (1, len(action_normalization_parameters))) expected_output = postprocessor( actor(rlt.FeatureData( state_preprocessor(*input_prototype[0]))).action) self.assertTrue((expected_output == action).all())
def test_discrete_wrapper_with_id_list_none(self): state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)} state_preprocessor = Preprocessor(state_normalization_parameters, False) action_dim = 2 dqn = FullyConnectedDQN( state_dim=len(state_normalization_parameters), action_dim=action_dim, sizes=[16], activations=["relu"], ) dqn_with_preprocessor = DiscreteDqnWithPreprocessorWithIdList( dqn, state_preprocessor) action_names = ["L", "R"] wrapper = DiscreteDqnPredictorWrapperWithIdList( dqn_with_preprocessor, action_names) input_prototype = dqn_with_preprocessor.input_prototype() output_action_names, q_values = wrapper(*input_prototype) self.assertEqual(action_names, output_action_names) self.assertEqual(q_values.shape, (1, 2)) expected_output = dqn( rlt.PreprocessedState.from_tensor( state_preprocessor(*input_prototype[0]))).q_values self.assertTrue((expected_output == q_values).all())
def test_discrete_wrapper_with_id_list(self): state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)} state_preprocessor = Preprocessor(state_normalization_parameters, False) action_dim = 2 state_feature_config = rlt.ModelFeatureConfig( float_feature_infos=[ rlt.FloatFeatureInfo(name=str(i), feature_id=i) for i in range(1, 5) ], id_list_feature_configs=[ rlt.IdListFeatureConfig(name="A", feature_id=10, id_mapping_name="A_mapping") ], id_mapping_config={"A_mapping": rlt.IdMapping(ids=[0, 1, 2])}, ) embedding_concat = models.EmbeddingBagConcat( state_dim=len(state_normalization_parameters), model_feature_config=state_feature_config, embedding_dim=8, ) dqn = models.Sequential( embedding_concat, rlt.TensorFeatureData(), models.FullyConnectedDQN( embedding_concat.output_dim, action_dim=action_dim, sizes=[16], activations=["relu"], ), ) dqn_with_preprocessor = DiscreteDqnWithPreprocessor( dqn, state_preprocessor, state_feature_config) action_names = ["L", "R"] wrapper = DiscreteDqnPredictorWrapper(dqn_with_preprocessor, action_names, state_feature_config) input_prototype = dqn_with_preprocessor.input_prototype()[0] output_action_names, q_values = wrapper(input_prototype) self.assertEqual(action_names, output_action_names) self.assertEqual(q_values.shape, (1, 2)) feature_id_to_name = { config.feature_id: config.name for config in state_feature_config.id_list_feature_configs } state_id_list_features = { feature_id_to_name[k]: v for k, v in input_prototype.id_list_features.items() } state_with_presence = input_prototype.float_features_with_presence expected_output = dqn( rlt.FeatureData( float_features=state_preprocessor(*state_with_presence), id_list_features=state_id_list_features, )) self.assertTrue((expected_output == q_values).all())
def build_serving_module( self, q_network: ModelBase, state_normalization_parameters: Dict[int, NormalizationParameters], action_normalization_parameters: Dict[int, NormalizationParameters], ) -> torch.nn.Module: """ Returns a TorchScript predictor module """ state_preprocessor = Preprocessor(state_normalization_parameters, False) action_preprocessor = Preprocessor(action_normalization_parameters, False) dqn_with_preprocessor = ParametricDqnWithPreprocessor( q_network.cpu_model().eval(), state_preprocessor, action_preprocessor ) return ParametricDqnPredictorWrapper( dqn_with_preprocessor=dqn_with_preprocessor )
def build_batch_preprocessor(self) -> BatchPreprocessor: state_preprocessor = Preprocessor( self.state_normalization_data.dense_normalization_parameters, ) return DiscreteDqnBatchPreprocessor( num_actions=len(self.model_manager.action_names), state_preprocessor=state_preprocessor, )
def _test_seq2reward_with_preprocessor(self, plan_short_sequence): state_dim = 4 action_dim = 2 seq_len = 3 model = FakeSeq2RewardNetwork() state_normalization_parameters = { i: NormalizationParameters(feature_type=DO_NOT_PREPROCESS, mean=0.0, stddev=1.0) for i in range(1, state_dim) } state_preprocessor = Preprocessor(state_normalization_parameters, False) if plan_short_sequence: step_prediction_model = FakeStepPredictionNetwork(seq_len) model_with_preprocessor = Seq2RewardPlanShortSeqWithPreprocessor( model, step_prediction_model, state_preprocessor, seq_len, action_dim, ) else: model_with_preprocessor = Seq2RewardWithPreprocessor( model, state_preprocessor, seq_len, action_dim, ) input_prototype = rlt.ServingFeatureData( float_features_with_presence=state_preprocessor.input_prototype(), id_list_features=FAKE_STATE_ID_LIST_FEATURES, id_score_list_features=FAKE_STATE_ID_SCORE_LIST_FEATURES, ) q_values = model_with_preprocessor(input_prototype) if plan_short_sequence: # When planning for 1, 2, and 3 steps ahead, # the expected q values are respectively: # [0, 1], [1, 11], [11, 111] # Weighting the expected q values by predicted step # probabilities [0.33, 0.33, 0.33], we have [4, 41] expected_q_values = torch.tensor([[4.0, 41.0]]) else: expected_q_values = torch.tensor([[11.0, 111.0]]) assert torch.all(expected_q_values == q_values)
def build_batch_preprocessor(self) -> BatchPreprocessor: return DiscreteDqnBatchPreprocessor( num_actions=len(self.action_names), state_preprocessor=Preprocessor( normalization_parameters=self.state_normalization_parameters, use_gpu=self.use_gpu, ), use_gpu=self.use_gpu, )
def __init__( self, model_params: ContinuousActionModelParameters, state_normalization: Dict[int, NormalizationParameters], action_normalization: Dict[int, NormalizationParameters], use_gpu: bool, use_all_avail_gpus: bool, ): logger.info("Running Parametric DQN workflow with params:") logger.info(model_params) self.model_params = model_params self.state_normalization = state_normalization self.action_normalization = action_normalization trainer = create_parametric_dqn_trainer_from_params( model_params, state_normalization, action_normalization, use_gpu=use_gpu, use_all_avail_gpus=use_all_avail_gpus, ) trainer = update_model_for_warm_start(trainer) assert (type(trainer) == ParametricDQNTrainer ), "Warm started wrong model type: " + str(type(trainer)) evaluator = Evaluator( None, model_params.rl.gamma, trainer, metrics_to_score=trainer.metrics_to_score, ) # pyre-fixme[19]: Expected 0 positional arguments. # pyre-fixme[19]: Expected 0 positional arguments. super().__init__( ParametricDqnBatchPreprocessor( Preprocessor(state_normalization, use_gpu), Preprocessor(action_normalization, use_gpu), ), trainer, evaluator, model_params.training.minibatch_size, )
def build_batch_preprocessor(self) -> BatchPreprocessor: return DiscreteDqnBatchPreprocessor( # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `action_names`. num_actions=len(self.action_names), state_preprocessor=Preprocessor( normalization_parameters=self.state_normalization_parameters, use_gpu=self.use_gpu, ), use_gpu=self.use_gpu, )
def __call__(self, data): if self._preprocessor is None: self._preprocessor = Preprocessor(self.normalization_parameters, device=self.device) for k in self.keys: value, presence = data[k] data[k] = self._preprocessor(value.to(self.device), presence.to(self.device)) return data
def test_quantile_boundary_logic(self): """Test quantile logic when feaure value == quantile boundary.""" input = torch.tensor([[0.0], [80.0], [100.0]]) norm_params = NormalizationParameters( feature_type="QUANTILE", boxcox_lambda=None, boxcox_shift=None, mean=0, stddev=1, possible_values=None, quantiles=[0.0, 80.0, 100.0], min_value=0.0, max_value=100.0, ) preprocessor = Preprocessor({1: norm_params}, False) output = preprocessor._preprocess_QUANTILE(0, input.float(), [norm_params]) expected_output = torch.tensor([[0.0], [0.5], [1.0]]) self.assertTrue(np.all(np.isclose(output, expected_output)))
def test_do_not_preprocess(self): normalization_parameters = { i: NormalizationParameters(feature_type=DO_NOT_PREPROCESS) for i in range(1, 5) } preprocessor = Preprocessor(normalization_parameters, use_gpu=False) postprocessor = Postprocessor(normalization_parameters, use_gpu=False) x = torch.randn(3, 4) presence = torch.ones_like(x, dtype=torch.uint8) y = postprocessor(preprocessor(x, presence)) npt.assert_allclose(x, y)
def test_predictor_torch_export(self): """Verify that q-values before model export equal q-values after model export. Meant to catch issues with export logic.""" environment = Gridworld() samples = Samples( mdp_ids=["0"], sequence_numbers=[0], sequence_number_ordinals=[1], states=[{0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0, 5: 1.0, 15: 1.0, 24: 1.0}], actions=["D"], action_probabilities=[0.5], rewards=[0], possible_actions=[["R", "D"]], next_states=[{5: 1.0}], next_actions=["U"], terminals=[False], possible_next_actions=[["R", "U", "D"]], ) tdps = environment.preprocess_samples(samples, 1) assert len(tdps) == 1, "Invalid number of data pages" trainer = self.get_trainer(environment, {}, False, False, False) input = rlt.FeatureData(tdps[0].states) pre_export_q_values = trainer.q_network(input).detach().numpy() preprocessor = Preprocessor(environment.normalization, False) cpu_q_network = trainer.q_network.cpu_model() cpu_q_network.eval() dqn_with_preprocessor = DiscreteDqnWithPreprocessor(cpu_q_network, preprocessor) serving_module = DiscreteDqnPredictorWrapper( dqn_with_preprocessor, action_names=environment.ACTIONS ) with tempfile.TemporaryDirectory() as tmpdirname: buf = export_module_to_buffer(serving_module) tmp_path = os.path.join(tmpdirname, "model") with open(tmp_path, "wb") as f: f.write(buf.getvalue()) f.close() predictor = DiscreteDqnTorchPredictor(torch.jit.load(tmp_path)) post_export_q_values = predictor.predict([samples.states[0]]) for i, action in enumerate(environment.ACTIONS): self.assertAlmostEqual( float(pre_export_q_values[0][i]), float(post_export_q_values[0][action]), places=4, )
def get_actor_predictor(self, trainer, environment): state_preprocessor = Preprocessor(environment.normalization, False) postprocessor = Postprocessor( environment.normalization_continuous_action, False) actor_with_preprocessor = ActorWithPreprocessor( trainer.actor_network.cpu_model().eval(), state_preprocessor, postprocessor) serving_module = ActorPredictorWrapper(actor_with_preprocessor) predictor = ActorTorchPredictor( serving_module, sort_features_by_normalization( environment.normalization_continuous_action)[0], ) return predictor
def test_continuous_action(self): normalization_parameters = { i: NormalizationParameters(feature_type=CONTINUOUS_ACTION, min_value=-5.0 * i, max_value=10.0 * i) for i in range(1, 5) } preprocessor = Preprocessor(normalization_parameters, use_gpu=False) postprocessor = Postprocessor(normalization_parameters, use_gpu=False) x = torch.rand(3, 4) * torch.tensor([15, 30, 45, 60]) + torch.tensor( [-5, -10, -15, -20]) presence = torch.ones_like(x, dtype=torch.uint8) y = postprocessor(preprocessor(x, presence)) npt.assert_allclose(x, y, rtol=1e-4)