コード例 #1
0
 def build_ranking_serving_module(
     self,
     actor: ModelBase,
     state_normalization_data: NormalizationData,
     candidate_normalization_data: NormalizationData,
     num_candidates: int,
     action_normalization_data: NormalizationData,
 ) -> torch.nn.Module:
     state_preprocessor = Preprocessor(
         state_normalization_data.dense_normalization_parameters,
         use_gpu=False)
     candidate_preprocessor = Preprocessor(
         candidate_normalization_data.dense_normalization_parameters,
         use_gpu=False)
     postprocessor = Postprocessor(
         action_normalization_data.dense_normalization_parameters,
         use_gpu=False)
     actor_with_preprocessor = RankingActorWithPreprocessor(
         model=actor.cpu_model().eval(),
         state_preprocessor=state_preprocessor,
         candidate_preprocessor=candidate_preprocessor,
         num_candidates=num_candidates,
         action_postprocessor=postprocessor,
     )
     action_features = Preprocessor(
         action_normalization_data.dense_normalization_parameters,
         use_gpu=False).sorted_features
     return RankingActorPredictorWrapper(actor_with_preprocessor,
                                         action_features)
コード例 #2
0
 def build_serving_module(
     self,
     synthetic_reward_network: ModelBase,
     state_normalization_data: NormalizationData,
     action_normalization_data: Optional[NormalizationData] = None,
     discrete_action_names: Optional[List[str]] = None,
 ) -> torch.nn.Module:
     """
     Returns a TorchScript predictor module
     """
     state_preprocessor = Preprocessor(
         state_normalization_data.dense_normalization_parameters
     )
     if not discrete_action_names:
         assert action_normalization_data is not None
         action_preprocessor = Preprocessor(
             action_normalization_data.dense_normalization_parameters
         )
         synthetic_reward_with_preprocessor = ParametricDqnWithPreprocessor(
             # pyre-fixme[29]: `Union[torch.Tensor, torch.nn.Module]` is not a
             #  function.
             synthetic_reward_network.export_mlp().cpu().eval(),
             state_preprocessor,
             action_preprocessor,
         )
         return ParametricSingleStepSyntheticRewardPredictorWrapper(
             synthetic_reward_with_preprocessor
         )
     else:
         raise NotImplementedError(
             "Discrete Single Step Synthetic Reward Predictor has not been implemented"
         )
コード例 #3
0
    def test_parametric_wrapper(self):
        state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)}
        action_normalization_parameters = {
            i: _cont_norm()
            for i in range(5, 9)
        }
        state_preprocessor = Preprocessor(state_normalization_parameters,
                                          False)
        action_preprocessor = Preprocessor(action_normalization_parameters,
                                           False)
        dqn = models.FullyConnectedCritic(
            state_dim=len(state_normalization_parameters),
            action_dim=len(action_normalization_parameters),
            sizes=[16],
            activations=["relu"],
        )
        dqn_with_preprocessor = ParametricDqnWithPreprocessor(
            dqn,
            state_preprocessor=state_preprocessor,
            action_preprocessor=action_preprocessor,
        )
        wrapper = ParametricDqnPredictorWrapper(dqn_with_preprocessor)

        input_prototype = dqn_with_preprocessor.input_prototype()
        output_action_names, q_value = wrapper(*input_prototype)
        self.assertEqual(output_action_names, ["Q"])
        self.assertEqual(q_value.shape, (1, 1))

        expected_output = dqn(
            rlt.FeatureData(state_preprocessor(*input_prototype[0])),
            rlt.FeatureData(action_preprocessor(*input_prototype[1])),
        )
        self.assertTrue((expected_output == q_value).all())
コード例 #4
0
    def build_serving_module(
        self,
        actor: ModelBase,
        state_normalization_data: NormalizationData,
        action_normalization_data: NormalizationData,
    ) -> torch.nn.Module:
        """
        Returns a TorchScript predictor module
        """
        state_normalization_parameters = (
            state_normalization_data.dense_normalization_parameters)
        action_normalization_parameters = (
            action_normalization_data.dense_normalization_parameters)
        assert state_normalization_parameters is not None
        assert action_normalization_parameters is not None

        state_preprocessor = Preprocessor(state_normalization_parameters,
                                          use_gpu=False)
        postprocessor = Postprocessor(action_normalization_parameters,
                                      use_gpu=False)
        actor_with_preprocessor = ActorWithPreprocessor(
            actor.cpu_model().eval(), state_preprocessor, postprocessor)
        action_features = Preprocessor(action_normalization_parameters,
                                       use_gpu=False).sorted_features
        return ActorPredictorWrapper(actor_with_preprocessor, action_features)
コード例 #5
0
    def _test_synthetic_reward_net_builder_continuous_actions(
            self, builder: SyntheticRewardNetBuilder):
        """
        This test does the following steps:
        1. create a net builder
        2. use the net builder to create a synthetic reward network
        3. export the synthetic reward network
        4. use the exported network to create a predictor wrapper
        5. create raw input and preprocessed inputs
        6. compare if the results between the following matches:
            a. synthetic reward network on preprocessed input
            b. export network on preprocessed input
            c. predictor wrapper on raw input
        """
        state_normalization_data = _create_norm(STATE_DIM)
        action_normalization_data = _create_norm(ACTION_DIM, offset=STATE_DIM)
        state_preprocessor = Preprocessor(
            state_normalization_data.dense_normalization_parameters)
        action_preprocessor = Preprocessor(
            action_normalization_data.dense_normalization_parameters)
        reward_net = builder.build_synthetic_reward_network(
            state_normalization_data,
            action_normalization_data=action_normalization_data,
        ).eval()
        input = _create_input()
        preprocessed_input = _create_preprocessed_input(
            input, state_preprocessor, action_preprocessor)
        output = reward_net(preprocessed_input).predicted_reward
        assert output.shape == (BATCH_SIZE, 1)

        # pyre-fixme[29]: `Union[torch.Tensor, torch.nn.Module]` is not a function.
        export_net = reward_net.export_mlp().cpu().eval()
        export_output = export_net(preprocessed_input.state.float_features,
                                   preprocessed_input.action)
        predictor_wrapper = builder.build_serving_module(
            SEQ_LEN,
            reward_net,
            state_normalization_data,
            action_normalization_data=action_normalization_data,
        )
        self.assertIsInstance(predictor_wrapper,
                              SyntheticRewardPredictorWrapper)
        for i in range(BATCH_SIZE):
            input_to_predictor = torch.cat(
                (input.state.float_features[:, i, :], input.action[:, i, :]),
                dim=1)
            input_to_predictor_presence = torch.ones(SEQ_LEN,
                                                     STATE_DIM + ACTION_DIM)
            predictor_output = predictor_wrapper(
                (input_to_predictor, input_to_predictor_presence))
            if IS_FB_ENVIRONMENT:
                predictor_output = predictor_output[1][2]
            npt.assert_array_almost_equal(predictor_output,
                                          export_output[i],
                                          decimal=4)
            npt.assert_almost_equal(
                torch.sum(predictor_output[-input.valid_step[i]:]),
                output[i],
                decimal=4,
            )
コード例 #6
0
    def build_serving_module(
        self,
        actor: ModelBase,
        state_feature_config: rlt.ModelFeatureConfig,
        state_normalization_data: NormalizationData,
        action_normalization_data: NormalizationData,
        serve_mean_policy: bool = False,
    ) -> torch.nn.Module:
        """
        Returns a TorchScript predictor module
        """

        state_preprocessor = Preprocessor(
            state_normalization_data.dense_normalization_parameters,
            use_gpu=False)
        postprocessor = Postprocessor(
            action_normalization_data.dense_normalization_parameters,
            use_gpu=False)
        actor_with_preprocessor = ActorWithPreprocessor(
            actor.cpu_model().eval(),
            state_preprocessor,
            state_feature_config,
            postprocessor,
            serve_mean_policy=serve_mean_policy,
        )
        action_features = Preprocessor(
            action_normalization_data.dense_normalization_parameters,
            use_gpu=False).sorted_features
        return ActorPredictorWrapper(actor_with_preprocessor,
                                     state_feature_config, action_features)
コード例 #7
0
 def build_serving_module(
     self,
     seq_len: int,
     synthetic_reward_network: ModelBase,
     state_normalization_data: NormalizationData,
     action_normalization_data: Optional[NormalizationData] = None,
     discrete_action_names: Optional[List[str]] = None,
 ) -> torch.nn.Module:
     """
     Returns a TorchScript predictor module
     """
     state_preprocessor = Preprocessor(
         state_normalization_data.dense_normalization_parameters)
     if not discrete_action_names:
         assert action_normalization_data is not None
         action_preprocessor = Preprocessor(
             action_normalization_data.dense_normalization_parameters)
         return SyntheticRewardPredictorWrapper(
             seq_len,
             state_preprocessor,
             action_preprocessor,
             # pyre-fixme[29]: `Union[torch.Tensor, torch.nn.Module]` is not a
             #  function.
             synthetic_reward_network.export_mlp().cpu().eval(),
         )
     else:
         # TODO add Discrete Single Step Synthetic Reward Predictor
         return torch.jit.script(torch.nn.Linear(1, 1))
コード例 #8
0
    def save_models(self, path: str):
        export_time = round(time.time())
        output_path = os.path.expanduser(path)
        pytorch_output_path = os.path.join(output_path,
                                           "trainer_{}.pt".format(export_time))
        torchscript_output_path = os.path.join(
            path, "model_{}.torchscript".format(export_time))

        state_preprocessor = Preprocessor(self.state_normalization, False)
        action_preprocessor = Preprocessor(self.action_normalization, False)
        # pyre-fixme[16]: `ParametricDqnWorkflow` has no attribute `trainer`.
        # pyre-fixme[16]: `ParametricDqnWorkflow` has no attribute `trainer`.
        q_network = self.trainer.q_network
        dqn_with_preprocessor = ParametricDqnWithPreprocessor(
            q_network.cpu_model().eval(), state_preprocessor,
            action_preprocessor)
        serving_module = ParametricDqnPredictorWrapper(
            dqn_with_preprocessor=dqn_with_preprocessor)
        logger.info("Saving PyTorch trainer to {}".format(pytorch_output_path))
        save_model_to_file(self.trainer, pytorch_output_path)
        # pyre-fixme[16]: `ParametricDqnWorkflow` has no attribute
        #  `save_torchscript_model`.
        # pyre-fixme[16]: `ParametricDqnWorkflow` has no attribute
        #  `save_torchscript_model`.
        self.save_torchscript_model(serving_module, torchscript_output_path)
コード例 #9
0
ファイル: test_gridworld_td3.py プロジェクト: zwcdp/ReAgent
 def get_predictor(self, trainer, environment):
     state_preprocessor = Preprocessor(environment.normalization, False)
     action_preprocessor = Preprocessor(environment.normalization_action,
                                        False)
     q_network = self.current_predictor_network
     dqn_with_preprocessor = ParametricDqnWithPreprocessor(
         q_network.cpu_model().eval(), state_preprocessor,
         action_preprocessor)
     serving_module = ParametricDqnPredictorWrapper(
         dqn_with_preprocessor=dqn_with_preprocessor)
     predictor = ParametricDqnTorchPredictor(serving_module)
     return predictor
コード例 #10
0
    def _test_seq2slate_model_with_preprocessor(
            self, model: str, output_arch: Seq2SlateOutputArch):
        state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)}
        candidate_normalization_parameters = {
            i: _cont_norm()
            for i in range(101, 106)
        }
        state_preprocessor = Preprocessor(state_normalization_parameters,
                                          False)
        candidate_preprocessor = Preprocessor(
            candidate_normalization_parameters, False)
        candidate_size = 10
        slate_size = 4

        seq2slate = None
        if model == "transformer":
            seq2slate = Seq2SlateTransformerNet(
                state_dim=len(state_normalization_parameters),
                candidate_dim=len(candidate_normalization_parameters),
                num_stacked_layers=2,
                num_heads=2,
                dim_model=10,
                dim_feedforward=10,
                max_src_seq_len=candidate_size,
                max_tgt_seq_len=slate_size,
                output_arch=output_arch,
                temperature=0.5,
            )
        else:
            raise NotImplementedError(f"model type {model} is unknown")

        seq2slate_with_preprocessor = Seq2SlateWithPreprocessor(
            seq2slate, state_preprocessor, candidate_preprocessor, greedy=True)
        input_prototype = seq2slate_with_preprocessor.input_prototype()

        if seq2slate_with_preprocessor.can_be_traced():
            seq2slate_with_preprocessor_jit = torch.jit.trace(
                seq2slate_with_preprocessor,
                seq2slate_with_preprocessor.input_prototype(),
            )
        else:
            seq2slate_with_preprocessor_jit = torch.jit.script(
                seq2slate_with_preprocessor)
        expected_output = seq2slate_with_preprocessor(*input_prototype)
        jit_output = seq2slate_with_preprocessor_jit(*input_prototype)
        self.verify_results(expected_output, jit_output)

        # Test if scripted model can handle variable lengths of input
        input_prototype = change_cand_size_slate_ranking(input_prototype, 20)
        expected_output = seq2slate_with_preprocessor(*input_prototype)
        jit_output = seq2slate_with_preprocessor_jit(*input_prototype)
        self.verify_results(expected_output, jit_output)
コード例 #11
0
    def test_discrete_wrapper(self):
        ids = range(1, 5)
        state_normalization_parameters = {i: _cont_norm() for i in ids}
        state_preprocessor = Preprocessor(state_normalization_parameters,
                                          False)
        action_dim = 2
        dqn = models.FullyConnectedDQN(
            state_dim=len(state_normalization_parameters),
            action_dim=action_dim,
            sizes=[16],
            activations=["relu"],
        )
        state_feature_config = rlt.ModelFeatureConfig(float_feature_infos=[
            rlt.FloatFeatureInfo(feature_id=i, name=f"feat_{i}") for i in ids
        ])
        dqn_with_preprocessor = DiscreteDqnWithPreprocessor(
            dqn, state_preprocessor, state_feature_config)
        action_names = ["L", "R"]
        wrapper = DiscreteDqnPredictorWrapper(dqn_with_preprocessor,
                                              action_names,
                                              state_feature_config)
        input_prototype = dqn_with_preprocessor.input_prototype()[0]
        output_action_names, q_values = wrapper(input_prototype)
        self.assertEqual(action_names, output_action_names)
        self.assertEqual(q_values.shape, (1, 2))

        state_with_presence = input_prototype.float_features_with_presence
        expected_output = dqn(
            rlt.FeatureData(state_preprocessor(*state_with_presence)))
        self.assertTrue((expected_output == q_values).all())
コード例 #12
0
def sparse_input_prototype(
    model: ModelBase,
    state_preprocessor: Preprocessor,
    state_feature_config: rlt.ModelFeatureConfig,
):
    name2id = state_feature_config.name2id
    model_prototype = model.input_prototype()
    # Terrible hack to make JIT tracing works. Python dict doesn't have type
    # so we need to insert something so JIT tracer can infer the type.
    state_id_list_features = {
        42: (torch.zeros(1, dtype=torch.long), torch.tensor([], dtype=torch.long))
    }
    state_id_score_list_features = {
        42: (
            torch.zeros(1, dtype=torch.long),
            torch.tensor([], dtype=torch.long),
            torch.tensor([], dtype=torch.float),
        )
    }
    if isinstance(model_prototype, rlt.FeatureData):
        if model_prototype.id_list_features:
            state_id_list_features = {
                name2id[k]: v for k, v in model_prototype.id_list_features.items()
            }
        if model_prototype.id_score_list_features:
            state_id_score_list_features = {
                name2id[k]: v for k, v in model_prototype.id_score_list_features.items()
            }

    input = rlt.ServingFeatureData(
        float_features_with_presence=state_preprocessor.input_prototype(),
        id_list_features=state_id_list_features,
        id_score_list_features=state_id_score_list_features,
    )
    return (input,)
コード例 #13
0
ファイル: predictor_wrapper.py プロジェクト: j-jiafei/ReAgent
def sparse_input_prototype(
    model: ModelBase,
    state_preprocessor: Preprocessor,
    state_feature_config: rlt.ModelFeatureConfig,
):
    name2id = state_feature_config.name2id
    model_prototype = model.input_prototype()
    # Terrible hack to make JIT tracing works. Python dict doesn't have type
    # so we need to insert something so JIT tracer can infer the type.
    state_id_list_features = FAKE_STATE_ID_LIST_FEATURES
    state_id_score_list_features = FAKE_STATE_ID_SCORE_LIST_FEATURES
    if isinstance(model_prototype, rlt.FeatureData):
        if model_prototype.id_list_features:
            state_id_list_features = {
                name2id[k]: v
                for k, v in model_prototype.id_list_features.items()
            }
        if model_prototype.id_score_list_features:
            state_id_score_list_features = {
                name2id[k]: v
                for k, v in model_prototype.id_score_list_features.items()
            }

    input = rlt.ServingFeatureData(
        float_features_with_presence=state_preprocessor.input_prototype(),
        id_list_features=state_id_list_features,
        id_score_list_features=state_id_score_list_features,
    )
    return (input, )
コード例 #14
0
 def build_batch_preprocessor(self) -> BatchPreprocessor:
     return DiscreteDqnBatchPreprocessor(
         state_preprocessor=Preprocessor(
             normalization_parameters=self.state_normalization_parameters,
             use_gpu=self.use_gpu,
         )
     )
コード例 #15
0
    def test_preprocessing_network(self):
        feature_value_map = read_data()

        normalization_parameters = {}
        name_preprocessed_blob_map = {}

        for feature_name, feature_values in feature_value_map.items():
            normalization_parameters[
                feature_name] = normalization.identify_parameter(
                    feature_name,
                    feature_values,
                    feature_type=self._feature_type_override(feature_name),
                )
            feature_values[
                0] = MISSING_VALUE  # Set one entry to MISSING_VALUE to test that

            preprocessor = Preprocessor(
                {feature_name: normalization_parameters[feature_name]}, False)
            feature_values_matrix = torch.from_numpy(
                np.expand_dims(feature_values, -1))
            normalized_feature_values = preprocessor(
                feature_values_matrix,
                (feature_values_matrix != MISSING_VALUE))
            name_preprocessed_blob_map[
                feature_name] = normalized_feature_values.numpy()

        test_features = NumpyFeatureProcessor.preprocess(
            feature_value_map, normalization_parameters)

        for feature_name in feature_value_map:
            normalized_features = name_preprocessed_blob_map[feature_name]
            if feature_name != ENUM_FEATURE_ID:
                normalized_features = np.squeeze(normalized_features, -1)

            tolerance = 0.01
            if feature_name == BOXCOX_FEATURE_ID:
                # At the limit, boxcox has some numerical instability
                tolerance = 0.5
            non_matching = np.where(
                np.logical_not(
                    np.isclose(
                        normalized_features.flatten(),
                        test_features[feature_name].flatten(),
                        rtol=tolerance,
                        atol=tolerance,
                    )))
            self.assertTrue(
                np.all(
                    np.isclose(
                        normalized_features.flatten(),
                        test_features[feature_name].flatten(),
                        rtol=tolerance,
                        atol=tolerance,
                    )),
                "{} does not match: {} \n!=\n {}".format(
                    feature_name,
                    normalized_features.flatten()[non_matching],
                    test_features[feature_name].flatten()[non_matching],
                ),
            )
コード例 #16
0
    def test_actor_wrapper(self):
        state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)}
        action_normalization_parameters = {
            i: _cont_action_norm()
            for i in range(101, 105)
        }
        state_preprocessor = Preprocessor(state_normalization_parameters,
                                          False)
        postprocessor = Postprocessor(action_normalization_parameters, False)

        # Test with FullyConnectedActor to make behavior deterministic
        actor = models.FullyConnectedActor(
            state_dim=len(state_normalization_parameters),
            action_dim=len(action_normalization_parameters),
            sizes=[16],
            activations=["relu"],
        )
        actor_with_preprocessor = ActorWithPreprocessor(
            actor, state_preprocessor, postprocessor)
        wrapper = ActorPredictorWrapper(actor_with_preprocessor)
        input_prototype = actor_with_preprocessor.input_prototype()
        action = wrapper(*input_prototype)
        self.assertEqual(action.shape,
                         (1, len(action_normalization_parameters)))

        expected_output = postprocessor(
            actor(rlt.FeatureData(
                state_preprocessor(*input_prototype[0]))).action)
        self.assertTrue((expected_output == action).all())
コード例 #17
0
    def test_discrete_wrapper_with_id_list_none(self):
        state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)}
        state_preprocessor = Preprocessor(state_normalization_parameters,
                                          False)
        action_dim = 2
        dqn = FullyConnectedDQN(
            state_dim=len(state_normalization_parameters),
            action_dim=action_dim,
            sizes=[16],
            activations=["relu"],
        )
        dqn_with_preprocessor = DiscreteDqnWithPreprocessorWithIdList(
            dqn, state_preprocessor)
        action_names = ["L", "R"]
        wrapper = DiscreteDqnPredictorWrapperWithIdList(
            dqn_with_preprocessor, action_names)
        input_prototype = dqn_with_preprocessor.input_prototype()
        output_action_names, q_values = wrapper(*input_prototype)
        self.assertEqual(action_names, output_action_names)
        self.assertEqual(q_values.shape, (1, 2))

        expected_output = dqn(
            rlt.PreprocessedState.from_tensor(
                state_preprocessor(*input_prototype[0]))).q_values
        self.assertTrue((expected_output == q_values).all())
コード例 #18
0
    def test_discrete_wrapper_with_id_list(self):
        state_normalization_parameters = {i: _cont_norm() for i in range(1, 5)}
        state_preprocessor = Preprocessor(state_normalization_parameters,
                                          False)
        action_dim = 2
        state_feature_config = rlt.ModelFeatureConfig(
            float_feature_infos=[
                rlt.FloatFeatureInfo(name=str(i), feature_id=i)
                for i in range(1, 5)
            ],
            id_list_feature_configs=[
                rlt.IdListFeatureConfig(name="A",
                                        feature_id=10,
                                        id_mapping_name="A_mapping")
            ],
            id_mapping_config={"A_mapping": rlt.IdMapping(ids=[0, 1, 2])},
        )
        embedding_concat = models.EmbeddingBagConcat(
            state_dim=len(state_normalization_parameters),
            model_feature_config=state_feature_config,
            embedding_dim=8,
        )
        dqn = models.Sequential(
            embedding_concat,
            rlt.TensorFeatureData(),
            models.FullyConnectedDQN(
                embedding_concat.output_dim,
                action_dim=action_dim,
                sizes=[16],
                activations=["relu"],
            ),
        )

        dqn_with_preprocessor = DiscreteDqnWithPreprocessor(
            dqn, state_preprocessor, state_feature_config)
        action_names = ["L", "R"]
        wrapper = DiscreteDqnPredictorWrapper(dqn_with_preprocessor,
                                              action_names,
                                              state_feature_config)
        input_prototype = dqn_with_preprocessor.input_prototype()[0]
        output_action_names, q_values = wrapper(input_prototype)
        self.assertEqual(action_names, output_action_names)
        self.assertEqual(q_values.shape, (1, 2))

        feature_id_to_name = {
            config.feature_id: config.name
            for config in state_feature_config.id_list_feature_configs
        }
        state_id_list_features = {
            feature_id_to_name[k]: v
            for k, v in input_prototype.id_list_features.items()
        }
        state_with_presence = input_prototype.float_features_with_presence
        expected_output = dqn(
            rlt.FeatureData(
                float_features=state_preprocessor(*state_with_presence),
                id_list_features=state_id_list_features,
            ))
        self.assertTrue((expected_output == q_values).all())
コード例 #19
0
 def build_serving_module(
     self,
     q_network: ModelBase,
     state_normalization_parameters: Dict[int, NormalizationParameters],
     action_normalization_parameters: Dict[int, NormalizationParameters],
 ) -> torch.nn.Module:
     """
     Returns a TorchScript predictor module
     """
     state_preprocessor = Preprocessor(state_normalization_parameters, False)
     action_preprocessor = Preprocessor(action_normalization_parameters, False)
     dqn_with_preprocessor = ParametricDqnWithPreprocessor(
         q_network.cpu_model().eval(), state_preprocessor, action_preprocessor
     )
     return ParametricDqnPredictorWrapper(
         dqn_with_preprocessor=dqn_with_preprocessor
     )
コード例 #20
0
 def build_batch_preprocessor(self) -> BatchPreprocessor:
     state_preprocessor = Preprocessor(
         self.state_normalization_data.dense_normalization_parameters,
     )
     return DiscreteDqnBatchPreprocessor(
         num_actions=len(self.model_manager.action_names),
         state_preprocessor=state_preprocessor,
     )
コード例 #21
0
    def _test_seq2reward_with_preprocessor(self, plan_short_sequence):
        state_dim = 4
        action_dim = 2
        seq_len = 3
        model = FakeSeq2RewardNetwork()
        state_normalization_parameters = {
            i: NormalizationParameters(feature_type=DO_NOT_PREPROCESS,
                                       mean=0.0,
                                       stddev=1.0)
            for i in range(1, state_dim)
        }
        state_preprocessor = Preprocessor(state_normalization_parameters,
                                          False)

        if plan_short_sequence:
            step_prediction_model = FakeStepPredictionNetwork(seq_len)
            model_with_preprocessor = Seq2RewardPlanShortSeqWithPreprocessor(
                model,
                step_prediction_model,
                state_preprocessor,
                seq_len,
                action_dim,
            )
        else:
            model_with_preprocessor = Seq2RewardWithPreprocessor(
                model,
                state_preprocessor,
                seq_len,
                action_dim,
            )
        input_prototype = rlt.ServingFeatureData(
            float_features_with_presence=state_preprocessor.input_prototype(),
            id_list_features=FAKE_STATE_ID_LIST_FEATURES,
            id_score_list_features=FAKE_STATE_ID_SCORE_LIST_FEATURES,
        )
        q_values = model_with_preprocessor(input_prototype)
        if plan_short_sequence:
            # When planning for 1, 2, and 3 steps ahead,
            # the expected q values are respectively:
            # [0, 1], [1, 11], [11, 111]
            # Weighting the expected q values by predicted step
            # probabilities [0.33, 0.33, 0.33], we have [4, 41]
            expected_q_values = torch.tensor([[4.0, 41.0]])
        else:
            expected_q_values = torch.tensor([[11.0, 111.0]])
        assert torch.all(expected_q_values == q_values)
コード例 #22
0
 def build_batch_preprocessor(self) -> BatchPreprocessor:
     return DiscreteDqnBatchPreprocessor(
         num_actions=len(self.action_names),
         state_preprocessor=Preprocessor(
             normalization_parameters=self.state_normalization_parameters,
             use_gpu=self.use_gpu,
         ),
         use_gpu=self.use_gpu,
     )
コード例 #23
0
    def __init__(
        self,
        model_params: ContinuousActionModelParameters,
        state_normalization: Dict[int, NormalizationParameters],
        action_normalization: Dict[int, NormalizationParameters],
        use_gpu: bool,
        use_all_avail_gpus: bool,
    ):
        logger.info("Running Parametric DQN workflow with params:")
        logger.info(model_params)
        self.model_params = model_params
        self.state_normalization = state_normalization
        self.action_normalization = action_normalization

        trainer = create_parametric_dqn_trainer_from_params(
            model_params,
            state_normalization,
            action_normalization,
            use_gpu=use_gpu,
            use_all_avail_gpus=use_all_avail_gpus,
        )
        trainer = update_model_for_warm_start(trainer)
        assert (type(trainer) == ParametricDQNTrainer
                ), "Warm started wrong model type: " + str(type(trainer))

        evaluator = Evaluator(
            None,
            model_params.rl.gamma,
            trainer,
            metrics_to_score=trainer.metrics_to_score,
        )

        # pyre-fixme[19]: Expected 0 positional arguments.
        # pyre-fixme[19]: Expected 0 positional arguments.
        super().__init__(
            ParametricDqnBatchPreprocessor(
                Preprocessor(state_normalization, use_gpu),
                Preprocessor(action_normalization, use_gpu),
            ),
            trainer,
            evaluator,
            model_params.training.minibatch_size,
        )
コード例 #24
0
ファイル: discrete_dqn_base.py プロジェクト: zhaonann/ReAgent
 def build_batch_preprocessor(self) -> BatchPreprocessor:
     return DiscreteDqnBatchPreprocessor(
         # pyre-fixme[16]: `DiscreteDQNBase` has no attribute `action_names`.
         num_actions=len(self.action_names),
         state_preprocessor=Preprocessor(
             normalization_parameters=self.state_normalization_parameters,
             use_gpu=self.use_gpu,
         ),
         use_gpu=self.use_gpu,
     )
コード例 #25
0
ファイル: transforms.py プロジェクト: dwtcourses/ReAgent
    def __call__(self, data):
        if self._preprocessor is None:
            self._preprocessor = Preprocessor(self.normalization_parameters,
                                              device=self.device)

        for k in self.keys:
            value, presence = data[k]
            data[k] = self._preprocessor(value.to(self.device),
                                         presence.to(self.device))

        return data
コード例 #26
0
    def test_quantile_boundary_logic(self):
        """Test quantile logic when feaure value == quantile boundary."""
        input = torch.tensor([[0.0], [80.0], [100.0]])
        norm_params = NormalizationParameters(
            feature_type="QUANTILE",
            boxcox_lambda=None,
            boxcox_shift=None,
            mean=0,
            stddev=1,
            possible_values=None,
            quantiles=[0.0, 80.0, 100.0],
            min_value=0.0,
            max_value=100.0,
        )
        preprocessor = Preprocessor({1: norm_params}, False)
        output = preprocessor._preprocess_QUANTILE(0, input.float(), [norm_params])

        expected_output = torch.tensor([[0.0], [0.5], [1.0]])

        self.assertTrue(np.all(np.isclose(output, expected_output)))
コード例 #27
0
    def test_do_not_preprocess(self):
        normalization_parameters = {
            i: NormalizationParameters(feature_type=DO_NOT_PREPROCESS)
            for i in range(1, 5)
        }
        preprocessor = Preprocessor(normalization_parameters, use_gpu=False)
        postprocessor = Postprocessor(normalization_parameters, use_gpu=False)

        x = torch.randn(3, 4)
        presence = torch.ones_like(x, dtype=torch.uint8)
        y = postprocessor(preprocessor(x, presence))
        npt.assert_allclose(x, y)
コード例 #28
0
    def test_predictor_torch_export(self):
        """Verify that q-values before model export equal q-values after
        model export. Meant to catch issues with export logic."""
        environment = Gridworld()
        samples = Samples(
            mdp_ids=["0"],
            sequence_numbers=[0],
            sequence_number_ordinals=[1],
            states=[{0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0, 5: 1.0, 15: 1.0, 24: 1.0}],
            actions=["D"],
            action_probabilities=[0.5],
            rewards=[0],
            possible_actions=[["R", "D"]],
            next_states=[{5: 1.0}],
            next_actions=["U"],
            terminals=[False],
            possible_next_actions=[["R", "U", "D"]],
        )
        tdps = environment.preprocess_samples(samples, 1)
        assert len(tdps) == 1, "Invalid number of data pages"

        trainer = self.get_trainer(environment, {}, False, False, False)
        input = rlt.FeatureData(tdps[0].states)

        pre_export_q_values = trainer.q_network(input).detach().numpy()

        preprocessor = Preprocessor(environment.normalization, False)
        cpu_q_network = trainer.q_network.cpu_model()
        cpu_q_network.eval()
        dqn_with_preprocessor = DiscreteDqnWithPreprocessor(cpu_q_network, preprocessor)
        serving_module = DiscreteDqnPredictorWrapper(
            dqn_with_preprocessor, action_names=environment.ACTIONS
        )

        with tempfile.TemporaryDirectory() as tmpdirname:
            buf = export_module_to_buffer(serving_module)
            tmp_path = os.path.join(tmpdirname, "model")
            with open(tmp_path, "wb") as f:
                f.write(buf.getvalue())
                f.close()
                predictor = DiscreteDqnTorchPredictor(torch.jit.load(tmp_path))

        post_export_q_values = predictor.predict([samples.states[0]])

        for i, action in enumerate(environment.ACTIONS):
            self.assertAlmostEqual(
                float(pre_export_q_values[0][i]),
                float(post_export_q_values[0][action]),
                places=4,
            )
コード例 #29
0
ファイル: test_gridworld_td3.py プロジェクト: zwcdp/ReAgent
 def get_actor_predictor(self, trainer, environment):
     state_preprocessor = Preprocessor(environment.normalization, False)
     postprocessor = Postprocessor(
         environment.normalization_continuous_action, False)
     actor_with_preprocessor = ActorWithPreprocessor(
         trainer.actor_network.cpu_model().eval(), state_preprocessor,
         postprocessor)
     serving_module = ActorPredictorWrapper(actor_with_preprocessor)
     predictor = ActorTorchPredictor(
         serving_module,
         sort_features_by_normalization(
             environment.normalization_continuous_action)[0],
     )
     return predictor
コード例 #30
0
    def test_continuous_action(self):
        normalization_parameters = {
            i: NormalizationParameters(feature_type=CONTINUOUS_ACTION,
                                       min_value=-5.0 * i,
                                       max_value=10.0 * i)
            for i in range(1, 5)
        }
        preprocessor = Preprocessor(normalization_parameters, use_gpu=False)
        postprocessor = Postprocessor(normalization_parameters, use_gpu=False)

        x = torch.rand(3, 4) * torch.tensor([15, 30, 45, 60]) + torch.tensor(
            [-5, -10, -15, -20])
        presence = torch.ones_like(x, dtype=torch.uint8)
        y = postprocessor(preprocessor(x, presence))
        npt.assert_allclose(x, y, rtol=1e-4)