Example #1
0
    def _load_from_files(cls, meta: Dict[Text, Any], model_dir: Text):
        file_name = meta.get("file")

        model_dir = Path(model_dir)

        data_example = io_utils.pickle_load(model_dir /
                                            f"{file_name}.data_example.pkl")
        label_data = io_utils.pickle_load(model_dir /
                                          f"{file_name}.label_data.pkl")
        index_label_id_mapping = io_utils.json_unpickle(
            model_dir / f"{file_name}.index_label_id_mapping.pkl")
        index_tag_id_mapping = io_utils.json_unpickle(
            model_dir / f"{file_name}.index_tag_id_mapping.pkl")

        # jsonpickle converts dictionary keys to strings
        index_label_id_mapping = {
            int(key): value
            for key, value in index_label_id_mapping.items()
        }
        if index_tag_id_mapping is not None:
            index_tag_id_mapping = {
                int(key): value
                for key, value in index_tag_id_mapping.items()
            }

        return (
            index_label_id_mapping,
            index_tag_id_mapping,
            label_data,
            meta,
            data_example,
        )
    def load(
        cls,
        config: Dict[Text, Any],
        model_storage: ModelStorage,
        resource: Resource,
        execution_context: ExecutionContext,
        **kwargs: Any,
    ) -> SklearnIntentClassifier:
        """Loads trained component (see parent class for full docstring)."""
        from sklearn.preprocessing import LabelEncoder

        try:
            with model_storage.read_from(resource) as model_dir:
                file_name = cls.__name__
                classifier_file = model_dir / f"{file_name}_classifier.pkl"

                if classifier_file.exists():
                    classifier = io_utils.json_unpickle(classifier_file)

                    encoder_file = model_dir / f"{file_name}_encoder.pkl"
                    classes = io_utils.json_unpickle(encoder_file)
                    encoder = LabelEncoder()
                    encoder.classes_ = classes

                    return cls(config, model_storage, resource, classifier,
                               encoder)
        except ValueError:
            logger.debug(
                f"Failed to load '{cls.__name__}' from model storage. Resource "
                f"'{resource.name}' doesn't exist.")
        return cls(config, model_storage, resource)
Example #3
0
    def load(cls, path: Text) -> "TEDPolicy":
        """Loads a policy from the storage.

        **Needs to load its featurizer**
        """

        if not os.path.exists(path):
            raise Exception(f"Failed to load TED policy model. Path "
                            f"'{os.path.abspath(path)}' doesn't exist.")

        model_path = Path(path)
        tf_model_file = model_path / f"{SAVE_MODEL_FILE_NAME}.tf_model"

        featurizer = TrackerFeaturizer.load(path)

        if not (model_path /
                f"{SAVE_MODEL_FILE_NAME}.data_example.pkl").is_file():
            return cls(featurizer=featurizer)

        loaded_data = io_utils.json_unpickle(
            model_path / f"{SAVE_MODEL_FILE_NAME}.data_example.pkl")
        label_data = io_utils.json_unpickle(
            model_path / f"{SAVE_MODEL_FILE_NAME}.label_data.pkl")
        meta = io_utils.pickle_load(model_path /
                                    f"{SAVE_MODEL_FILE_NAME}.meta.pkl")
        priority = io_utils.json_unpickle(
            model_path / f"{SAVE_MODEL_FILE_NAME}.priority.pkl")

        model_data_example = RasaModelData(label_key=LABEL_IDS,
                                           data=loaded_data)
        meta = train_utils.update_similarity_type(meta)

        model = TED.load(
            str(tf_model_file),
            model_data_example,
            data_signature=model_data_example.get_signature(),
            config=meta,
            max_history_tracker_featurizer_used=isinstance(
                featurizer, MaxHistoryTrackerFeaturizer),
            label_data=label_data,
        )

        # build the graph for prediction
        predict_data_example = RasaModelData(
            label_key=LABEL_IDS,
            data={
                feature_name: features
                for feature_name, features in model_data_example.items()
                if DIALOGUE in feature_name
            },
        )
        model.build_for_predict(predict_data_example)

        return cls(featurizer=featurizer,
                   priority=priority,
                   model=model,
                   **meta)
Example #4
0
    def load(
        cls,
        meta: Dict[Text, Any],
        model_dir: Text,
        model_metadata: Optional[Metadata] = None,
        cached_component: Optional["CountVectorsFeaturizer"] = None,
        should_finetune: bool = False,
        **kwargs: Any,
    ) -> "CountVectorsFeaturizer":
        """Loads trained component (see parent class for full docstring)."""
        file_name = meta.get("file")
        featurizer_file = os.path.join(model_dir, file_name)

        if not os.path.exists(featurizer_file):
            return cls(meta)

        vocabulary = io_utils.json_unpickle(featurizer_file)

        share_vocabulary = meta["use_shared_vocab"]

        if share_vocabulary:
            vectorizers = cls._create_shared_vocab_vectorizers(
                meta, vocabulary=vocabulary)
        else:
            vectorizers = cls._create_independent_vocab_vectorizers(
                meta, vocabulary=vocabulary)

        ftr = cls(meta, vectorizers, should_finetune)

        # make sure the vocabulary has been loaded correctly
        for attribute in vectorizers:
            ftr.vectorizers[attribute]._validate_vocabulary()

        return ftr
Example #5
0
    def load(
        cls,
        meta: Dict[Text, Any],
        model_dir: Text = None,
        model_metadata: Metadata = None,
        cached_component: Optional["ResponseSelector"] = None,
        **kwargs: Any,
    ) -> "ResponseSelector":
        """Loads the trained model from the provided directory."""

        model = super().load(
            meta, model_dir, model_metadata, cached_component, **kwargs
        )
        if model == cls(component_config=meta):
            model.retrieval_intent_mapping = {}
            return model  # pytype: disable=bad-return-type

        file_name = meta.get("file")
        model_dir = Path(model_dir)

        retrieval_intent_mapping = io_utils.json_unpickle(
            model_dir / f"{file_name}.retrieval_intent_mapping.pkl"
        )

        model.retrieval_intent_mapping = retrieval_intent_mapping

        return model  # pytype: disable=bad-return-type
Example #6
0
    def load(
        cls,
        meta: Dict[Text, Any],
        model_dir: Optional[Text] = None,
        model_metadata: Optional[Metadata] = None,
        cached_component: Optional["CountVectorsFeaturizer"] = None,
        **kwargs: Any,
    ) -> "CountVectorsFeaturizer":

        file_name = meta.get("file")
        featurizer_file = os.path.join(model_dir, file_name)

        if not os.path.exists(featurizer_file):
            return cls(meta)

        vocabulary = io_utils.json_unpickle(featurizer_file)

        share_vocabulary = meta["use_shared_vocab"]

        if share_vocabulary:
            vectorizers = cls._create_shared_vocab_vectorizers(
                meta, vocabulary=vocabulary)
        else:
            vectorizers = cls._create_independent_vocab_vectorizers(
                meta, vocabulary=vocabulary)

        return cls(meta, vectorizers)
Example #7
0
 def load(cls, meta: Dict[Text, Any], model_dir: Optional[Text] = None, model_metadata: Optional[Metadata] = None,
     cached_component: Optional["SklearnIntentClassifier"] = None, **kwargs: Any) -> "SklearnIntentClassifier":
     class_encoder_file = os.path.join(model_dir, meta.get("class_encoder"))
     if os.path.exists(class_encoder_file):
         class_encoder = io_utils.json_unpickle(class_encoder_file)
         return cls(meta, class_encoder)
     else:
         return cls(meta)
 def load(cls,
          meta: Dict[Text, Any],
          model_dir=None,
          model_metadata=None,
          cached_component=None,
          **kwargs):
     file_name = meta.get("classifier_file")
     classifier_file = os.path.join(model_dir, file_name)
     return json_unpickle(classifier_file)
    def load(
        cls,
        config: Dict[Text, Any],
        model_storage: ModelStorage,
        resource: Resource,
        execution_context: ExecutionContext,
        **kwargs: Any,
    ) -> CountVectorsFeaturizer:
        """Loads trained component (see parent class for full docstring)."""
        try:
            with model_storage.read_from(resource) as model_dir:
                featurizer_file = model_dir / "vocabularies.pkl"
                vocabulary = io_utils.json_unpickle(featurizer_file)

                share_vocabulary = config["use_shared_vocab"]

                if share_vocabulary:
                    vectorizers = cls._create_shared_vocab_vectorizers(
                        config, vocabulary=vocabulary
                    )
                else:
                    vectorizers = cls._create_independent_vocab_vectorizers(
                        config, vocabulary=vocabulary
                    )

                oov_words = rasa.shared.utils.io.read_json_file(
                    model_dir / "oov_words.json"
                )

                ftr = cls(
                    config,
                    model_storage,
                    resource,
                    execution_context,
                    vectorizers=vectorizers,
                    oov_token=config["OOV_token"],
                    oov_words=oov_words,
                )

                # make sure the vocabulary has been loaded correctly
                for attribute in vectorizers:
                    ftr.vectorizers[attribute]._validate_vocabulary()

                return ftr

        except (ValueError, FileNotFoundError, FileIOException):
            logger.debug(
                f"Failed to load `{cls.__class__.__name__}` from model storage. "
                f"Resource '{resource.name}' doesn't exist."
            )
            return cls(
                config=config,
                model_storage=model_storage,
                resource=resource,
                execution_context=execution_context,
            )
Example #10
0
    def load(
        cls,
        meta: Dict[Text, Any],
        model_dir: Optional[Text] = None,
        model_metadata: Optional[Metadata] = None,
        cached_component: Optional["SklearnIntentClassifier"] = None,
        **kwargs: Any,
    ) -> "SklearnIntentClassifier":
        from sklearn.preprocessing import LabelEncoder

        classifier_file = os.path.join(model_dir, meta.get("classifier"))
        encoder_file = os.path.join(model_dir, meta.get("encoder"))

        if os.path.exists(classifier_file):
            classifier = io_utils.json_unpickle(classifier_file)
            classes = io_utils.json_unpickle(encoder_file)
            encoder = LabelEncoder()
            encoder.classes_ = classes
            return cls(meta, classifier, encoder)
        else:
            return cls(meta)
Example #11
0
    def load(
        cls,
        meta: Dict[Text, Any],
        model_dir: Text,
        model_metadata: Optional[Metadata] = None,
        cached_component: Optional["SklearnIntentClassifier"] = None,
        **kwargs: Any,
    ) -> "SklearnIntentClassifier":
        """Loads trained component (see parent class for full docstring)."""
        from sklearn.preprocessing import LabelEncoder

        classifier_file = os.path.join(model_dir, meta["classifier"])
        encoder_file = os.path.join(model_dir, meta["encoder"])

        if os.path.exists(classifier_file):
            classifier = io_utils.json_unpickle(classifier_file)
            classes = io_utils.json_unpickle(encoder_file)
            encoder = LabelEncoder()
            encoder.classes_ = classes
            return cls(meta, classifier, encoder)
        else:
            return cls(meta)
    def load(
        cls,
        meta: Dict[Text, Any],
        model_dir: Optional[Text] = None,
        model_metadata: Optional[Metadata] = None,
        cached_component: Optional["LexicalSyntacticFeaturizer"] = None,
        **kwargs: Any,
    ) -> "LexicalSyntacticFeaturizer":

        file_name = meta.get("file")

        feature_to_idx_file = Path(model_dir) / f"{file_name}.feature_to_idx_dict.pkl"
        feature_to_idx_dict = io_utils.json_unpickle(feature_to_idx_file)

        return LexicalSyntacticFeaturizer(meta, feature_to_idx_dict=feature_to_idx_dict)
    def load(
        cls,
        meta: Dict[Text, Any],
        model_dir: Text,
        model_metadata: Optional[Metadata] = None,
        cached_component: Optional["LexicalSyntacticFeaturizer"] = None,
        **kwargs: Any,
    ) -> "LexicalSyntacticFeaturizer":
        """Loads trained component (see parent class for full docstring)."""
        file_name = meta.get("file")

        feature_to_idx_file = Path(
            model_dir) / f"{file_name}.feature_to_idx_dict.pkl"
        feature_to_idx_dict = io_utils.json_unpickle(feature_to_idx_file)

        return LexicalSyntacticFeaturizer(
            meta, feature_to_idx_dict=feature_to_idx_dict)
Example #14
0
    def _load_model_utilities(cls, model_path: Path) -> Dict[Text, Any]:
        """Loads model's utility attributes.

        Args:
            model_path: Path where model is to be persisted.
        """
        tf_model_file = model_path / f"{cls._metadata_filename()}.tf_model"
        loaded_data = io_utils.pickle_load(
            model_path / f"{cls._metadata_filename()}.data_example.pkl")
        label_data = io_utils.pickle_load(
            model_path / f"{cls._metadata_filename()}.label_data.pkl")
        fake_features = io_utils.pickle_load(
            model_path / f"{cls._metadata_filename()}.fake_features.pkl")
        label_data = RasaModelData(data=label_data)
        meta = io_utils.pickle_load(model_path /
                                    f"{cls._metadata_filename()}.meta.pkl")
        priority = io_utils.json_unpickle(
            model_path / f"{cls._metadata_filename()}.priority.pkl")
        entity_tag_specs = rasa.shared.utils.io.read_json_file(
            model_path / f"{cls._metadata_filename()}.entity_tag_specs.json")
        entity_tag_specs = [
            EntityTagSpec(
                tag_name=tag_spec["tag_name"],
                ids_to_tags={
                    int(key): value
                    for key, value in tag_spec["ids_to_tags"].items()
                },
                tags_to_ids={
                    key: int(value)
                    for key, value in tag_spec["tags_to_ids"].items()
                },
                num_tags=tag_spec["num_tags"],
            ) for tag_spec in entity_tag_specs
        ]

        return {
            "tf_model_file": tf_model_file,
            "loaded_data": loaded_data,
            "fake_features": fake_features,
            "label_data": label_data,
            "meta": meta,
            "priority": priority,
            "entity_tag_specs": entity_tag_specs,
        }
Example #15
0
    def load(cls, path: Union[Text, Path]) -> "TEDPolicy":
        """Loads a policy from the storage.
        **Needs to load its featurizer**
        """
        model_path = Path(path)

        if not model_path.exists():
            logger.error(
                f"Failed to load TED policy model. Path "
                f"'{model_path.absolute()}' doesn't exist."
            )
            return

        tf_model_file = model_path / f"{SAVE_MODEL_FILE_NAME}.tf_model"

        featurizer = TrackerFeaturizer.load(path)

        if not (model_path / f"{SAVE_MODEL_FILE_NAME}.data_example.pkl").is_file():
            return cls(featurizer=featurizer)

        loaded_data = io_utils.pickle_load(
            model_path / f"{SAVE_MODEL_FILE_NAME}.data_example.pkl"
        )
        label_data = io_utils.pickle_load(
            model_path / f"{SAVE_MODEL_FILE_NAME}.label_data.pkl"
        )
        zero_state_features = io_utils.pickle_load(
            model_path / f"{SAVE_MODEL_FILE_NAME}.zero_state_features.pkl"
        )
        label_data = RasaModelData(data=label_data)
        meta = io_utils.pickle_load(model_path / f"{SAVE_MODEL_FILE_NAME}.meta.pkl")
        priority = io_utils.json_unpickle(
            model_path / f"{SAVE_MODEL_FILE_NAME}.priority.pkl"
        )

        model_data_example = RasaModelData(
            label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY, data=loaded_data
        )
        meta = train_utils.update_similarity_type(meta)

        model = TED.load(
            str(tf_model_file),
            model_data_example,
            data_signature=model_data_example.get_signature(),
            config=meta,
            max_history_tracker_featurizer_used=isinstance(
                featurizer, MaxHistoryTrackerFeaturizer
            ),
            label_data=label_data,
        )

        # build the graph for prediction
        predict_data_example = RasaModelData(
            label_key=LABEL_KEY,
            label_sub_key=LABEL_SUB_KEY,
            data={
                feature_name: features
                for feature_name, features in model_data_example.items()
                if feature_name
                in STATE_LEVEL_FEATURES + FEATURES_TO_ENCODE + [DIALOGUE]
            },
        )
        model.build_for_predict(predict_data_example)

        return cls(
            featurizer=featurizer,
            priority=priority,
            model=model,
            zero_state_features=zero_state_features,
            **meta,
        )