Example #1
0
def test_train_val_split(model_data: RasaModelData):
    train_model_data, test_model_data = model_data.split(2, 42)

    for key, values in model_data.items():
        assert len(values) == len(train_model_data.get(key))
        assert len(values) == len(test_model_data.get(key))
        for sub_key, data in values.items():
            assert len(data) == len(train_model_data.get(key, sub_key))
            assert len(data) == len(test_model_data.get(key, sub_key))
            for i, v in enumerate(data):
                if isinstance(v[0], list):
                    assert (
                        v[0][0].dtype
                        == train_model_data.get(key, sub_key)[i][0][0].dtype
                    )
                else:
                    assert v[0].dtype == train_model_data.get(key, sub_key)[i][0].dtype

    for values in train_model_data.values():
        for data in values.values():
            for v in data:
                assert np.array(v).shape[0] == 3

    for values in test_model_data.values():
        for data in values.values():
            for v in data:
                assert np.array(v).shape[0] == 2
Example #2
0
    def _load_model_class(
        cls,
        tf_model_file: Text,
        model_data_example: RasaModelData,
        label_data: RasaModelData,
        entity_tag_specs: List[EntityTagSpec],
        config: Dict[Text, Any],
        finetune_mode: bool = False,
    ) -> "RasaModel":

        predict_data_example = RasaModelData(
            label_key=model_data_example.label_key,
            data={
                feature_name: features
                for feature_name, features in model_data_example.items()
                if TEXT in feature_name
            },
        )
        return cls.model_class(config[USE_TEXT_AS_LABEL]).load(
            tf_model_file,
            model_data_example,
            predict_data_example,
            data_signature=model_data_example.get_signature(),
            label_data=label_data,
            entity_tag_specs=entity_tag_specs,
            config=copy.deepcopy(config),
            finetune_mode=finetune_mode,
        )
Example #3
0
    def load(cls, path: Text) -> "TEDPolicy":
        """Loads a policy from the storage.

        **Needs to load its featurizer**
        """

        if not os.path.exists(path):
            raise Exception(f"Failed to load TED policy model. Path "
                            f"'{os.path.abspath(path)}' doesn't exist.")

        model_path = Path(path)
        tf_model_file = model_path / f"{SAVE_MODEL_FILE_NAME}.tf_model"

        featurizer = TrackerFeaturizer.load(path)

        if not (model_path /
                f"{SAVE_MODEL_FILE_NAME}.data_example.pkl").is_file():
            return cls(featurizer=featurizer)

        loaded_data = io_utils.json_unpickle(
            model_path / f"{SAVE_MODEL_FILE_NAME}.data_example.pkl")
        label_data = io_utils.json_unpickle(
            model_path / f"{SAVE_MODEL_FILE_NAME}.label_data.pkl")
        meta = io_utils.pickle_load(model_path /
                                    f"{SAVE_MODEL_FILE_NAME}.meta.pkl")
        priority = io_utils.json_unpickle(
            model_path / f"{SAVE_MODEL_FILE_NAME}.priority.pkl")

        model_data_example = RasaModelData(label_key=LABEL_IDS,
                                           data=loaded_data)
        meta = train_utils.update_similarity_type(meta)

        model = TED.load(
            str(tf_model_file),
            model_data_example,
            data_signature=model_data_example.get_signature(),
            config=meta,
            max_history_tracker_featurizer_used=isinstance(
                featurizer, MaxHistoryTrackerFeaturizer),
            label_data=label_data,
        )

        # build the graph for prediction
        predict_data_example = RasaModelData(
            label_key=LABEL_IDS,
            data={
                feature_name: features
                for feature_name, features in model_data_example.items()
                if DIALOGUE in feature_name
            },
        )
        model.build_for_predict(predict_data_example)

        return cls(featurizer=featurizer,
                   priority=priority,
                   model=model,
                   **meta)
Example #4
0
def test_train_val_split(model_data: RasaModelData):
    train_model_data, test_model_data = model_data.split(2, 42)

    for k, values in model_data.items():
        assert len(values) == len(train_model_data.get(k))
        assert len(values) == len(test_model_data.get(k))
        for i, v in enumerate(values):
            assert v[0].dtype == train_model_data.get(k)[i][0].dtype

    for values in train_model_data.values():
        for v in values:
            assert v.shape[0] == 3

    for values in test_model_data.values():
        for v in values:
            assert v.shape[0] == 2
Example #5
0
 def _construct_model_initialization_data(
     cls, loaded_data: Dict[Text, Dict[Text, List[FeatureArray]]]
 ) -> Tuple[RasaModelData, RasaModelData]:
     model_data_example = RasaModelData(label_key=LABEL_KEY,
                                        label_sub_key=LABEL_SUB_KEY,
                                        data=loaded_data)
     predict_data_example = RasaModelData(
         label_key=LABEL_KEY,
         label_sub_key=LABEL_SUB_KEY,
         data={
             feature_name: features
             for feature_name, features in model_data_example.items()
             if feature_name
             # we need to remove label features for prediction if they are present
             in PREDICTION_FEATURES
         },
     )
     return model_data_example, predict_data_example
Example #6
0
    def _load_model(
        cls,
        index_label_id_mapping: Dict[int, Text],
        index_tag_id_mapping: Dict[int, Text],
        label_data: RasaModelData,
        meta: Dict[Text, Any],
        data_example: Dict[Text, List[np.ndarray]],
        model_dir: Text,
    ):
        file_name = meta.get("file")
        tf_model_file = os.path.join(model_dir, file_name + ".tf_model")

        label_key = LABEL_IDS if meta[INTENT_CLASSIFICATION] else None
        model_data_example = RasaModelData(label_key=label_key,
                                           data=data_example)

        model = cls.model_class(meta).load(
            tf_model_file,
            model_data_example,
            data_signature=model_data_example.get_signature(),
            label_data=label_data,
            index_label_id_mapping=index_label_id_mapping,
            index_tag_id_mapping=index_tag_id_mapping,
            config=meta,
        )

        # build the graph for prediction
        predict_data_example = RasaModelData(
            label_key=label_key,
            data={
                feature_name: features
                for feature_name, features in model_data_example.items()
                if TEXT in feature_name
            },
        )

        model.build_for_predict(predict_data_example)

        return model
Example #7
0
    def load(cls, path: Union[Text, Path]) -> "TEDPolicy":
        """Loads a policy from the storage.
        **Needs to load its featurizer**
        """
        model_path = Path(path)

        if not model_path.exists():
            logger.error(
                f"Failed to load TED policy model. Path "
                f"'{model_path.absolute()}' doesn't exist."
            )
            return

        tf_model_file = model_path / f"{SAVE_MODEL_FILE_NAME}.tf_model"

        featurizer = TrackerFeaturizer.load(path)

        if not (model_path / f"{SAVE_MODEL_FILE_NAME}.data_example.pkl").is_file():
            return cls(featurizer=featurizer)

        loaded_data = io_utils.pickle_load(
            model_path / f"{SAVE_MODEL_FILE_NAME}.data_example.pkl"
        )
        label_data = io_utils.pickle_load(
            model_path / f"{SAVE_MODEL_FILE_NAME}.label_data.pkl"
        )
        zero_state_features = io_utils.pickle_load(
            model_path / f"{SAVE_MODEL_FILE_NAME}.zero_state_features.pkl"
        )
        label_data = RasaModelData(data=label_data)
        meta = io_utils.pickle_load(model_path / f"{SAVE_MODEL_FILE_NAME}.meta.pkl")
        priority = io_utils.json_unpickle(
            model_path / f"{SAVE_MODEL_FILE_NAME}.priority.pkl"
        )

        model_data_example = RasaModelData(
            label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY, data=loaded_data
        )
        meta = train_utils.update_similarity_type(meta)

        model = TED.load(
            str(tf_model_file),
            model_data_example,
            data_signature=model_data_example.get_signature(),
            config=meta,
            max_history_tracker_featurizer_used=isinstance(
                featurizer, MaxHistoryTrackerFeaturizer
            ),
            label_data=label_data,
        )

        # build the graph for prediction
        predict_data_example = RasaModelData(
            label_key=LABEL_KEY,
            label_sub_key=LABEL_SUB_KEY,
            data={
                feature_name: features
                for feature_name, features in model_data_example.items()
                if feature_name
                in STATE_LEVEL_FEATURES + FEATURES_TO_ENCODE + [DIALOGUE]
            },
        )
        model.build_for_predict(predict_data_example)

        return cls(
            featurizer=featurizer,
            priority=priority,
            model=model,
            zero_state_features=zero_state_features,
            **meta,
        )