Beispiel #1
0
def test_diagnostics(default_model_storage: ModelStorage,
                     default_execution_context: ExecutionContext):
    domain = Domain.from_yaml(DOMAIN_YAML)
    policy = TEDPolicy(
        TEDPolicy.get_default_config(),
        default_model_storage,
        Resource("TEDPolicy"),
        default_execution_context,
    )
    GREET_RULE = DialogueStateTracker.from_events(
        "greet rule",
        evts=[
            UserUttered(intent={"name": GREET_INTENT_NAME}),
            ActionExecuted(UTTER_GREET_ACTION),
            ActionExecuted(ACTION_LISTEN_NAME),
            UserUttered(intent={"name": GREET_INTENT_NAME}),
            ActionExecuted(ACTION_LISTEN_NAME),
        ],
    )
    precomputations = None
    policy.train([GREET_RULE], domain, precomputations)
    prediction = policy.predict_action_probabilities(
        GREET_RULE,
        domain,
        precomputations,
    )

    assert prediction.diagnostic_data
    assert "attention_weights" in prediction.diagnostic_data
    assert isinstance(prediction.diagnostic_data.get("attention_weights"),
                      np.ndarray)
Beispiel #2
0
    def test_ignore_action_unlikely_intent(
        self,
        trained_policy: TEDPolicy,
        default_domain: Domain,
        tracker_events_with_action: List[Event],
        tracker_events_without_action: List[Event],
    ):
        precomputations = None
        tracker_with_action = DialogueStateTracker.from_events(
            "test 1", evts=tracker_events_with_action)
        tracker_without_action = DialogueStateTracker.from_events(
            "test 2", evts=tracker_events_without_action)
        prediction_with_action = trained_policy.predict_action_probabilities(
            tracker_with_action,
            default_domain,
            precomputations,
        )
        prediction_without_action = trained_policy.predict_action_probabilities(
            tracker_without_action,
            default_domain,
            precomputations,
        )

        # If the weights didn't change then both trackers
        # should result in same prediction.
        assert (prediction_with_action.probabilities ==
                prediction_without_action.probabilities)
Beispiel #3
0
    def test_normalization(
        self,
        trained_policy: TEDPolicy,
        tracker: DialogueStateTracker,
        default_domain: Domain,
        monkeypatch: MonkeyPatch,
    ):
        precomputations = None
        # first check the output is what we expect
        prediction = trained_policy.predict_action_probabilities(
            tracker,
            default_domain,
            precomputations,
        )
        assert not prediction.is_end_to_end_prediction
        # count number of non-zero confidences
        assert (sum([
            confidence > 0 for confidence in prediction.probabilities
        ]) == trained_policy.config[RANKING_LENGTH])
        # check that the norm is still 1
        assert sum(prediction.probabilities) == pytest.approx(1)

        # also check our function is called
        mock = Mock()
        monkeypatch.setattr(train_utils, "normalize", mock.normalize)
        trained_policy.predict_action_probabilities(
            tracker,
            default_domain,
            precomputations,
        )

        mock.normalize.assert_called_once()
Beispiel #4
0
def trained_ted(
    tmp_path_factory: TempPathFactory, moodbot_domain_path: Path,
) -> TEDPolicyGraphComponent:
    training_files = "data/test_moodbot/data/stories.yml"
    domain = Domain.load(moodbot_domain_path)
    trackers = training.load_data(str(training_files), domain)
    policy = TEDPolicyGraphComponent.create(
        {**TEDPolicyGraphComponent.get_default_config(), EPOCHS: 1},
        LocalModelStorage.create(tmp_path_factory.mktemp("storage")),
        Resource("ted"),
        ExecutionContext(GraphSchema({})),
    )
    policy.train(trackers, domain)

    return policy
Beispiel #5
0
    def test_label_data_assembly(self, trained_policy: TEDPolicy,
                                 default_domain: Domain):
        state_featurizer = trained_policy.featurizer.state_featurizer
        encoded_all_labels = state_featurizer.encode_all_labels(
            default_domain, precomputations=None)

        attribute_data, _ = model_data_utils.convert_to_data_format(
            encoded_all_labels)
        assembled_label_data = trained_policy._assemble_label_data(
            attribute_data, default_domain)
        assembled_label_data_signature = assembled_label_data.get_signature()

        assert list(assembled_label_data_signature.keys()) == [
            f"{LABEL}_{ACTION_NAME}",
            f"{LABEL}",
        ]
        assert assembled_label_data.num_examples == default_domain.num_actions
        assert list(assembled_label_data_signature[f"{LABEL}_{ACTION_NAME}"].
                    keys()) == [
                        MASK,
                        SENTENCE,
                    ]
        assert list(assembled_label_data_signature[LABEL].keys()) == [IDS]
        assert (assembled_label_data_signature[f"{LABEL}_{ACTION_NAME}"]
                [SENTENCE][0].units == default_domain.num_actions)
Beispiel #6
0
 def _config(
         self,
         config_override: Optional[Dict[Text,
                                        Any]] = None) -> Dict[Text, Any]:
     config_override = config_override or {}
     return {
         **TEDPolicy.get_default_config(),
         **config_override,
     }
Beispiel #7
0
 def _config(
         self,
         config_override: Optional[Dict[Text,
                                        Any]] = None) -> Dict[Text, Any]:
     config_override = config_override or {}
     return {
         **TEDPolicy.get_default_config(),
         POLICY_MAX_HISTORY: self.max_history,
         **config_override,
     }
Beispiel #8
0
 def _config(
         self,
         config_override: Optional[Dict[Text,
                                        Any]] = None) -> Dict[Text, Any]:
     config_override = config_override or {}
     return {
         **TEDPolicy.get_default_config(),
         MODEL_CONFIDENCE: LINEAR_NORM,
         **config_override,
     }
Beispiel #9
0
 def _config(
         self,
         config_override: Optional[Dict[Text,
                                        Any]] = None) -> Dict[Text, Any]:
     config_override = config_override or {}
     return {
         **TEDPolicy.get_default_config(),
         SCALE_LOSS: False,
         EVAL_NUM_EXAMPLES: 4,
         **config_override,
     }
Beispiel #10
0
 def _config(
         self,
         config_override: Optional[Dict[Text,
                                        Any]] = None) -> Dict[Text, Any]:
     config_override = config_override or {}
     return {
         **TEDPolicy.get_default_config(),
         KEY_RELATIVE_ATTENTION: True,
         VALUE_RELATIVE_ATTENTION: True,
         MAX_RELATIVE_POSITION: 5,
         **config_override,
     }
Beispiel #11
0
    def test_gen_batch(self, trained_policy: TEDPolicy, default_domain: Domain,
                       stories_path: Path):
        training_trackers = tests.core.test_policies.train_trackers(
            default_domain, stories_path, augmentation_factor=0)
        precomputations = None
        training_data, label_ids, entity_tags = trained_policy._featurize_for_training(
            training_trackers,
            default_domain,
            precomputations,
        )

        _, all_labels = trained_policy._create_label_data(
            default_domain, precomputations)
        model_data = trained_policy._create_model_data(training_data,
                                                       label_ids, entity_tags,
                                                       all_labels)
        batch_size = 2
        data_generator = RasaBatchDataGenerator(model_data,
                                                batch_size=batch_size,
                                                shuffle=False,
                                                batch_strategy="sequence")
        iterator = iter(data_generator)
        # model data keys were sorted, so the order is alphabetical
        (
            (
                batch_action_name_mask,
                _,
                _,
                batch_action_name_sentence_shape,
                batch_dialogue_length,
                batch_entities_mask,
                _,
                _,
                batch_entities_sentence_shape,
                batch_intent_mask,
                _,
                _,
                batch_intent_sentence_shape,
                batch_label_ids,
                batch_slots_mask,
                _,
                _,
                batch_slots_sentence_shape,
            ),
            _,
        ) = next(iterator)

        assert (batch_label_ids.shape[0] == batch_size
                and batch_dialogue_length.shape[0] == batch_size)
        # batch and dialogue dimensions are NOT combined for masks
        assert (batch_slots_mask.shape[0] == batch_size
                and batch_intent_mask.shape[0] == batch_size
                and batch_entities_mask.shape[0] == batch_size
                and batch_action_name_mask.shape[0] == batch_size)
        # some features might be "fake" so there sequence is `0`
        seq_len = max([
            batch_intent_sentence_shape[1],
            batch_action_name_sentence_shape[1],
            batch_entities_sentence_shape[1],
            batch_slots_sentence_shape[1],
        ])
        assert (batch_intent_sentence_shape[1] == seq_len
                or batch_intent_sentence_shape[1] == 0)
        assert (batch_action_name_sentence_shape[1] == seq_len
                or batch_action_name_sentence_shape[1] == 0)
        assert (batch_entities_sentence_shape[1] == seq_len
                or batch_entities_sentence_shape[1] == 0)
        assert (batch_slots_sentence_shape[1] == seq_len
                or batch_slots_sentence_shape[1] == 0)

        data_generator = RasaBatchDataGenerator(model_data,
                                                batch_size=batch_size,
                                                shuffle=True,
                                                batch_strategy="balanced")
        iterator = iter(data_generator)

        (
            (
                batch_action_name_mask,
                _,
                _,
                batch_action_name_sentence_shape,
                batch_dialogue_length,
                batch_entities_mask,
                _,
                _,
                batch_entities_sentence_shape,
                batch_intent_mask,
                _,
                _,
                batch_intent_sentence_shape,
                batch_label_ids,
                batch_slots_mask,
                _,
                _,
                batch_slots_sentence_shape,
            ),
            _,
        ) = next(iterator)

        assert (batch_label_ids.shape[0] == batch_size
                and batch_dialogue_length.shape[0] == batch_size)
        # some features might be "fake" so there sequence is `0`
        seq_len = max([
            batch_intent_sentence_shape[1],
            batch_action_name_sentence_shape[1],
            batch_entities_sentence_shape[1],
            batch_slots_sentence_shape[1],
        ])
        assert (batch_intent_sentence_shape[1] == seq_len
                or batch_intent_sentence_shape[1] == 0)
        assert (batch_action_name_sentence_shape[1] == seq_len
                or batch_action_name_sentence_shape[1] == 0)
        assert (batch_entities_sentence_shape[1] == seq_len
                or batch_entities_sentence_shape[1] == 0)
        assert (batch_slots_sentence_shape[1] == seq_len
                or batch_slots_sentence_shape[1] == 0)
Beispiel #12
0
class TestTEDPolicy(PolicyTestCollection):
    @staticmethod
    def _policy_class_to_test() -> Type[TEDPolicy]:
        return TEDPolicy

    def test_train_model_checkpointing(self, tmp_path: Path):
        checkpoint_dir = get_checkpoint_dir_path(tmp_path)
        assert not checkpoint_dir.is_dir()

        train_core(
            domain="data/test_domains/default.yml",
            stories="data/test_yaml_stories/stories_defaultdomain.yml",
            train_path=str(tmp_path),
            output=str(tmp_path),
            config="data/test_config/config_ted_policy_model_checkpointing.yml",
        )
        assert checkpoint_dir.is_dir()

    def test_doesnt_checkpoint_with_no_checkpointing(self, tmp_path: Path):
        checkpoint_dir = get_checkpoint_dir_path(tmp_path)
        assert not checkpoint_dir.is_dir()

        train_core(
            domain="data/test_domains/default.yml",
            stories="data/test_yaml_stories/stories_defaultdomain.yml",
            train_path=str(tmp_path),
            output=str(tmp_path),
            config=
            "data/test_config/config_ted_policy_no_model_checkpointing.yml",
        )
        assert not checkpoint_dir.is_dir()

    def test_doesnt_checkpoint_with_zero_eval_num_examples(
            self, tmp_path: Path):
        checkpoint_dir = get_checkpoint_dir_path(tmp_path)
        assert not checkpoint_dir.is_dir()
        config_file = "config_ted_policy_model_checkpointing_zero_eval_num_examples.yml"
        with pytest.warns(UserWarning) as warning:
            train_core(
                domain="data/test_domains/default.yml",
                stories="data/test_yaml_stories/stories_defaultdomain.yml",
                train_path=str(tmp_path),
                output=str(tmp_path),
                config=f"data/test_config/{config_file}",
            )
        warn_text = (
            f"You have opted to save the best model, but the value of "
            f"'{EVAL_NUM_EXAMPLES}' is not greater than 0. No checkpoint model will be "
            f"saved.")
        assert not checkpoint_dir.is_dir()
        assert len([w for w in warning if warn_text in str(w.message)]) == 1

    @pytest.mark.parametrize(
        "should_finetune, epoch_override, expected_epoch_value",
        [
            (
                True,
                TEDPolicy.get_default_config()[EPOCHS] + 1,
                TEDPolicy.get_default_config()[EPOCHS] + 1,
            ),
            (
                False,
                TEDPolicy.get_default_config()[EPOCHS] + 1,
                TEDPolicy.get_default_config()[EPOCHS],
            ),  # trained_policy uses default epochs during training
        ],
    )
    def test_epoch_override_when_loaded(
        self,
        trained_policy: TEDPolicy,
        should_finetune: bool,
        epoch_override: int,
        expected_epoch_value: int,
        resource: Resource,
        model_storage: ModelStorage,
        execution_context: ExecutionContext,
    ):
        execution_context.is_finetuning = should_finetune
        loaded_policy = trained_policy.__class__.load(
            {
                **self._config(), EPOCH_OVERRIDE: epoch_override
            },
            model_storage,
            resource,
            execution_context,
        )

        assert loaded_policy.config[EPOCHS] == expected_epoch_value

    def test_train_fails_with_checkpoint_zero_eval_num_epochs(
            self, tmp_path: Path):
        checkpoint_dir = get_checkpoint_dir_path(tmp_path)
        assert not checkpoint_dir.is_dir()
        config_file = "config_ted_policy_model_checkpointing_zero_every_num_epochs.yml"
        with pytest.raises(InvalidConfigException):
            with pytest.warns(UserWarning) as warning:
                train_core(
                    domain="data/test_domains/default.yml",
                    stories="data/test_yaml_stories/stories_defaultdomain.yml",
                    train_path=str(tmp_path),
                    output=str(tmp_path),
                    config=f"data/test_config/{config_file}",
                )
        warn_text = (
            f"You have opted to save the best model, but the value of "
            f"'{EVAL_NUM_EPOCHS}' is not -1 or greater than 0. Training will fail."
        )
        assert len([w for w in warning if warn_text in str(w.message)]) == 1
        assert not checkpoint_dir.is_dir()

    def test_training_with_no_intent(
        self,
        featurizer: Optional[TrackerFeaturizer],
        default_domain: Domain,
        tmp_path: Path,
        caplog: LogCaptureFixture,
        model_storage: ModelStorage,
        resource: Resource,
        execution_context: ExecutionContext,
    ):
        stories = tmp_path / "stories.yml"
        stories.write_text("""
            version: "2.0"
            stories:
            - story: test path
              steps:
              - action: utter_greet
            """)
        policy = self.create_policy(
            featurizer=featurizer,
            model_storage=model_storage,
            resource=resource,
            execution_context=execution_context,
        )
        import tests.core.test_policies

        training_trackers = tests.core.test_policies.train_trackers(
            default_domain, str(stories), augmentation_factor=20)
        with pytest.raises(RasaException) as e:
            policy.train(training_trackers,
                         default_domain,
                         precomputations=None)

        assert "No user features specified. Cannot train 'TED' model." == str(
            e.value)

    def test_similarity_type(self, trained_policy: TEDPolicy):
        assert trained_policy.config[SIMILARITY_TYPE] == "inner"

    def test_ranking_length(self, trained_policy: TEDPolicy):
        assert trained_policy.config[RANKING_LENGTH] == 10

    def test_normalization(
        self,
        trained_policy: TEDPolicy,
        tracker: DialogueStateTracker,
        default_domain: Domain,
        monkeypatch: MonkeyPatch,
    ):
        precomputations = None
        # first check the output is what we expect
        prediction = trained_policy.predict_action_probabilities(
            tracker,
            default_domain,
            precomputations,
        )
        assert not prediction.is_end_to_end_prediction
        # count number of non-zero confidences
        assert (sum([
            confidence > 0 for confidence in prediction.probabilities
        ]) == trained_policy.config[RANKING_LENGTH])
        # check that the norm is still 1
        assert sum(prediction.probabilities) == pytest.approx(1)

        # also check our function is called
        mock = Mock()
        monkeypatch.setattr(train_utils, "normalize", mock.normalize)
        trained_policy.predict_action_probabilities(
            tracker,
            default_domain,
            precomputations,
        )

        mock.normalize.assert_called_once()

    def test_label_data_assembly(self, trained_policy: TEDPolicy,
                                 default_domain: Domain):
        state_featurizer = trained_policy.featurizer.state_featurizer
        encoded_all_labels = state_featurizer.encode_all_labels(
            default_domain, precomputations=None)

        attribute_data, _ = model_data_utils.convert_to_data_format(
            encoded_all_labels)
        assembled_label_data = trained_policy._assemble_label_data(
            attribute_data, default_domain)
        assembled_label_data_signature = assembled_label_data.get_signature()

        assert list(assembled_label_data_signature.keys()) == [
            f"{LABEL}_{ACTION_NAME}",
            f"{LABEL}",
        ]
        assert assembled_label_data.num_examples == default_domain.num_actions
        assert list(assembled_label_data_signature[f"{LABEL}_{ACTION_NAME}"].
                    keys()) == [
                        MASK,
                        SENTENCE,
                    ]
        assert list(assembled_label_data_signature[LABEL].keys()) == [IDS]
        assert (assembled_label_data_signature[f"{LABEL}_{ACTION_NAME}"]
                [SENTENCE][0].units == default_domain.num_actions)

    def test_gen_batch(self, trained_policy: TEDPolicy, default_domain: Domain,
                       stories_path: Path):
        training_trackers = tests.core.test_policies.train_trackers(
            default_domain, stories_path, augmentation_factor=0)
        precomputations = None
        training_data, label_ids, entity_tags = trained_policy._featurize_for_training(
            training_trackers,
            default_domain,
            precomputations,
        )

        _, all_labels = trained_policy._create_label_data(
            default_domain, precomputations)
        model_data = trained_policy._create_model_data(training_data,
                                                       label_ids, entity_tags,
                                                       all_labels)
        batch_size = 2
        data_generator = RasaBatchDataGenerator(model_data,
                                                batch_size=batch_size,
                                                shuffle=False,
                                                batch_strategy="sequence")
        iterator = iter(data_generator)
        # model data keys were sorted, so the order is alphabetical
        (
            (
                batch_action_name_mask,
                _,
                _,
                batch_action_name_sentence_shape,
                batch_dialogue_length,
                batch_entities_mask,
                _,
                _,
                batch_entities_sentence_shape,
                batch_intent_mask,
                _,
                _,
                batch_intent_sentence_shape,
                batch_label_ids,
                batch_slots_mask,
                _,
                _,
                batch_slots_sentence_shape,
            ),
            _,
        ) = next(iterator)

        assert (batch_label_ids.shape[0] == batch_size
                and batch_dialogue_length.shape[0] == batch_size)
        # batch and dialogue dimensions are NOT combined for masks
        assert (batch_slots_mask.shape[0] == batch_size
                and batch_intent_mask.shape[0] == batch_size
                and batch_entities_mask.shape[0] == batch_size
                and batch_action_name_mask.shape[0] == batch_size)
        # some features might be "fake" so there sequence is `0`
        seq_len = max([
            batch_intent_sentence_shape[1],
            batch_action_name_sentence_shape[1],
            batch_entities_sentence_shape[1],
            batch_slots_sentence_shape[1],
        ])
        assert (batch_intent_sentence_shape[1] == seq_len
                or batch_intent_sentence_shape[1] == 0)
        assert (batch_action_name_sentence_shape[1] == seq_len
                or batch_action_name_sentence_shape[1] == 0)
        assert (batch_entities_sentence_shape[1] == seq_len
                or batch_entities_sentence_shape[1] == 0)
        assert (batch_slots_sentence_shape[1] == seq_len
                or batch_slots_sentence_shape[1] == 0)

        data_generator = RasaBatchDataGenerator(model_data,
                                                batch_size=batch_size,
                                                shuffle=True,
                                                batch_strategy="balanced")
        iterator = iter(data_generator)

        (
            (
                batch_action_name_mask,
                _,
                _,
                batch_action_name_sentence_shape,
                batch_dialogue_length,
                batch_entities_mask,
                _,
                _,
                batch_entities_sentence_shape,
                batch_intent_mask,
                _,
                _,
                batch_intent_sentence_shape,
                batch_label_ids,
                batch_slots_mask,
                _,
                _,
                batch_slots_sentence_shape,
            ),
            _,
        ) = next(iterator)

        assert (batch_label_ids.shape[0] == batch_size
                and batch_dialogue_length.shape[0] == batch_size)
        # some features might be "fake" so there sequence is `0`
        seq_len = max([
            batch_intent_sentence_shape[1],
            batch_action_name_sentence_shape[1],
            batch_entities_sentence_shape[1],
            batch_slots_sentence_shape[1],
        ])
        assert (batch_intent_sentence_shape[1] == seq_len
                or batch_intent_sentence_shape[1] == 0)
        assert (batch_action_name_sentence_shape[1] == seq_len
                or batch_action_name_sentence_shape[1] == 0)
        assert (batch_entities_sentence_shape[1] == seq_len
                or batch_entities_sentence_shape[1] == 0)
        assert (batch_slots_sentence_shape[1] == seq_len
                or batch_slots_sentence_shape[1] == 0)

    @pytest.mark.parametrize(
        "tracker_events_with_action, tracker_events_without_action",
        [
            (
                [
                    ActionExecuted(ACTION_LISTEN_NAME),
                    UserUttered(text="hello", intent={"name": "greet"}),
                    ActionExecuted(ACTION_UNLIKELY_INTENT_NAME),
                ],
                [
                    ActionExecuted(ACTION_LISTEN_NAME),
                    UserUttered(text="hello", intent={"name": "greet"}),
                ],
            ),
            (
                [
                    ActionExecuted(ACTION_LISTEN_NAME),
                    UserUttered(text="hello", intent={"name": "greet"}),
                    EntitiesAdded(entities=[
                        {
                            "entity": "name",
                            "value": "Peter"
                        },
                    ]),
                    ActionExecuted(ACTION_UNLIKELY_INTENT_NAME),
                    ActionExecuted("utter_greet"),
                ],
                [
                    ActionExecuted(ACTION_LISTEN_NAME),
                    UserUttered(text="hello", intent={"name": "greet"}),
                    EntitiesAdded(entities=[
                        {
                            "entity": "name",
                            "value": "Peter"
                        },
                    ]),
                    ActionExecuted("utter_greet"),
                ],
            ),
            (
                [
                    ActionExecuted(ACTION_LISTEN_NAME),
                    UserUttered(text="hello", intent={"name": "greet"}),
                    ActionExecuted(ACTION_UNLIKELY_INTENT_NAME),
                    ActionExecuted("some_form"),
                    ActiveLoop("some_form"),
                    ActionExecuted(ACTION_LISTEN_NAME),
                    UserUttered(text="default", intent={"name": "default"}),
                    ActionExecuted(ACTION_UNLIKELY_INTENT_NAME),
                ],
                [
                    ActionExecuted(ACTION_LISTEN_NAME),
                    UserUttered(text="hello", intent={"name": "greet"}),
                    ActionExecuted(ACTION_UNLIKELY_INTENT_NAME),
                    ActionExecuted("some_form"),
                    ActiveLoop("some_form"),
                    ActionExecuted(ACTION_LISTEN_NAME),
                    UserUttered(text="default", intent={"name": "default"}),
                ],
            ),
        ],
    )
    def test_ignore_action_unlikely_intent(
        self,
        trained_policy: TEDPolicy,
        default_domain: Domain,
        tracker_events_with_action: List[Event],
        tracker_events_without_action: List[Event],
    ):
        precomputations = None
        tracker_with_action = DialogueStateTracker.from_events(
            "test 1", evts=tracker_events_with_action)
        tracker_without_action = DialogueStateTracker.from_events(
            "test 2", evts=tracker_events_without_action)
        prediction_with_action = trained_policy.predict_action_probabilities(
            tracker_with_action,
            default_domain,
            precomputations,
        )
        prediction_without_action = trained_policy.predict_action_probabilities(
            tracker_without_action,
            default_domain,
            precomputations,
        )

        # If the weights didn't change then both trackers
        # should result in same prediction.
        assert (prediction_with_action.probabilities ==
                prediction_without_action.probabilities)

    @pytest.mark.parametrize(
        "featurizer_config, tracker_featurizer, state_featurizer",
        [
            (None, MaxHistoryTrackerFeaturizer(), SingleStateFeaturizer),
            ([], MaxHistoryTrackerFeaturizer(), SingleStateFeaturizer),
        ],
    )
    def test_empty_featurizer_configs(
        self,
        featurizer_config: Optional[Dict[Text, Any]],
        model_storage: ModelStorage,
        resource: Resource,
        execution_context: ExecutionContext,
        tracker_featurizer: MaxHistoryTrackerFeaturizer,
        state_featurizer: Type[SingleStateFeaturizer],
    ):
        featurizer_config_override = ({
            "featurizer": featurizer_config
        } if featurizer_config else {})
        policy = self.create_policy(
            None,
            model_storage=model_storage,
            resource=resource,
            execution_context=execution_context,
            config=self._config(featurizer_config_override),
        )

        featurizer = policy.featurizer
        assert isinstance(featurizer, tracker_featurizer.__class__)

        if featurizer_config:
            expected_max_history = featurizer_config[0].get(POLICY_MAX_HISTORY)
        else:
            expected_max_history = self._config().get(POLICY_MAX_HISTORY)

        assert featurizer.max_history == expected_max_history

        assert isinstance(featurizer.state_featurizer, state_featurizer)