def from_path(cls, path, **shared):
        """Loads a :class:`CRFSlotFiller` instance from a path

        The data at the given path must have been generated using
        :func:`~CRFSlotFiller.persist`
        """
        path = Path(path)
        model_path = path / "slot_filler.json"
        if not model_path.exists():
            raise LoadingError("Missing slot filler model file: %s" %
                               model_path.name)

        with model_path.open(encoding="utf8") as f:
            model = json.load(f)

        slot_filler_config = cls.config_type.from_dict(model["config"])
        slot_filler = cls(config=slot_filler_config, **shared)
        slot_filler.language = model["language_code"]
        slot_filler.intent = model["intent"]
        slot_filler.slot_name_mapping = model["slot_name_mapping"]
        crf_model_file = model["crf_model_file"]
        if crf_model_file is not None:
            crf = _crf_model_from_path(path / crf_model_file)
            slot_filler.crf_model = crf
        return slot_filler
Esempio n. 2
0
    def from_path(cls, path, **shared):
        path = Path(path)
        model_path = path / "vectorizer.json"
        if not model_path.exists():
            raise LoadingError("Missing vectorizer model file: %s" %
                               model_path.name)

        with model_path.open(encoding="utf8") as f:
            vectorizer_dict = json.load(f)
        config = vectorizer_dict.pop("config")

        self = cls(config, **shared)
        self._language = vectorizer_dict["language_code"]
        self._word_pairs = None

        builtin_entity_scope = vectorizer_dict["builtin_entity_scope"]
        if builtin_entity_scope is not None:
            builtin_entity_scope = set(builtin_entity_scope)
        self.builtin_entity_scope = builtin_entity_scope

        if vectorizer_dict["word_pairs"]:
            self._word_pairs = {
                tuple(p): int(i)
                for i, p in iteritems(vectorizer_dict["word_pairs"])
            }
        return self
Esempio n. 3
0
    def from_path(cls, path, **shared):
        path = Path(path)

        model_path = path / "featurizer.json"
        if not model_path.exists():
            raise LoadingError("Missing featurizer model file: %s" %
                               model_path.name)
        with model_path.open("r", encoding="utf-8") as f:
            featurizer_dict = json.load(f)

        featurizer_config = featurizer_dict["config"]
        featurizer = cls(featurizer_config, **shared)

        featurizer.language = featurizer_dict["language_code"]

        tfidf_vectorizer = featurizer_dict["tfidf_vectorizer"]
        if tfidf_vectorizer:
            vectorizer_path = path / featurizer_dict["tfidf_vectorizer"]
            tfidf_vectorizer = TfidfVectorizer.from_path(
                vectorizer_path, **shared)
        featurizer.tfidf_vectorizer = tfidf_vectorizer

        cooccurrence_vectorizer = featurizer_dict["cooccurrence_vectorizer"]
        if cooccurrence_vectorizer:
            vectorizer_path = path / featurizer_dict["cooccurrence_vectorizer"]
            cooccurrence_vectorizer = CooccurrenceVectorizer.from_path(
                vectorizer_path, **shared)
        featurizer.cooccurrence_vectorizer = cooccurrence_vectorizer

        return featurizer
Esempio n. 4
0
    def load_from_path(cls, unit_path, unit_name=None, **shared):
        """Load a :class:`ProcessingUnit` from a persisted processing unit
        directory

        Args:
            unit_path (str or :class:`pathlib.Path`): path to the persisted
                processing unit
            unit_name (str, optional): Name of the processing unit to load.
                By default, the unit name is assumed to be stored in a
                "metadata.json" file located in the directory at unit_path.

        Raises:
            LoadingError: when unit_name is None and no metadata file is found
                in the processing unit directory
        """
        unit_path = Path(unit_path)
        if unit_name is None:
            metadata_path = unit_path / "metadata.json"
            if not metadata_path.exists():
                raise LoadingError(
                    "Missing metadata for processing unit at path %s" %
                    str(unit_path))
            with metadata_path.open(encoding="utf8") as f:
                metadata = json.load(f)
            unit_name = metadata["unit_name"]
        unit = cls.by_name(unit_name)
        return unit.from_path(unit_path, **shared)
Esempio n. 5
0
    def from_path(cls, path, **shared):
        """Loads a :class:`SnipsNLUEngine` instance from a directory path

        The data at the given path must have been generated using
        :func:`~SnipsNLUEngine.persist`

        Args:
            path (str): The path where the nlu engine is stored

        Raises:
            LoadingError: when some files are missing
            IncompatibleModelError: when trying to load an engine model which
                is not compatible with the current version of the lib
        """
        directory_path = Path(path)
        model_path = directory_path / "nlu_engine.json"
        if not model_path.exists():
            raise LoadingError("Missing nlu engine model file: %s" %
                               model_path.name)

        with model_path.open(encoding="utf8") as f:
            model = json.load(f)
        model_version = model.get("model_version")
        if model_version is None or model_version != __model_version__:
            raise IncompatibleModelError(model_version)

        dataset_metadata = model["dataset_metadata"]
        if shared.get(RESOURCES) is None and dataset_metadata is not None:
            language = dataset_metadata["language_code"]
            resources_dir = directory_path / "resources" / language
            if resources_dir.is_dir():
                resources = load_resources_from_dir(resources_dir)
                shared[RESOURCES] = resources

        if shared.get(BUILTIN_ENTITY_PARSER) is None:
            path = model["builtin_entity_parser"]
            if path is not None:
                parser_path = directory_path / path
                shared[BUILTIN_ENTITY_PARSER] = BuiltinEntityParser.from_path(
                    parser_path)

        if shared.get(CUSTOM_ENTITY_PARSER) is None:
            path = model["custom_entity_parser"]
            if path is not None:
                parser_path = directory_path / path
                shared[CUSTOM_ENTITY_PARSER] = CustomEntityParser.from_path(
                    parser_path)

        config = cls.config_type.from_dict(model["config"])
        nlu_engine = cls(config=config, **shared)
        nlu_engine.dataset_metadata = dataset_metadata
        intent_parsers = []
        for parser_idx, parser_name in enumerate(model["intent_parsers"]):
            parser_config = config.intent_parsers_configs[parser_idx]
            intent_parser_path = directory_path / parser_name
            intent_parser = IntentParser.load_from_path(
                intent_parser_path, parser_config.unit_name, **shared)
            intent_parsers.append(intent_parser)
        nlu_engine.intent_parsers = intent_parsers
        return nlu_engine
    def from_path(cls, path, **shared):
        """Loads a :class:`DeterministicIntentParser` instance from a path

        The data at the given path must have been generated using
        :func:`~DeterministicIntentParser.persist`
        """
        path = Path(path)
        model_path = path / "intent_parser.json"
        if not model_path.exists():
            raise LoadingError(
                "Missing deterministic intent parser metadata file: %s" %
                model_path.name)

        with model_path.open(encoding="utf8") as f:
            metadata = json.load(f)
        return cls.from_dict(metadata, **shared)
    def from_path(cls, path, **shared):
        import numpy as np
        import scipy.sparse as sp
        from sklearn.feature_extraction.text import (TfidfTransformer,
                                                     TfidfVectorizer as
                                                     SklearnTfidfVectorizer)

        path = Path(path)

        model_path = path / "vectorizer.json"
        if not model_path.exists():
            raise LoadingError("Missing vectorizer model file: %s" %
                               model_path.name)
        with model_path.open("r", encoding="utf-8") as f:
            vectorizer_dict = json.load(f)

        vectorizer = cls(vectorizer_dict["config"], **shared)
        vectorizer._language = vectorizer_dict["language_code"]

        builtin_entity_scope = vectorizer_dict["builtin_entity_scope"]
        if builtin_entity_scope is not None:
            builtin_entity_scope = set(builtin_entity_scope)
        vectorizer.builtin_entity_scope = builtin_entity_scope

        vectorizer_ = vectorizer_dict["vectorizer"]
        if vectorizer_:
            vocab = vectorizer_["vocab"]
            idf_diag_data = vectorizer_["idf_diag"]
            idf_diag_data = np.array(idf_diag_data)

            idf_diag_shape = (len(idf_diag_data), len(idf_diag_data))
            row = list(range(idf_diag_shape[0]))
            col = list(range(idf_diag_shape[0]))
            idf_diag = sp.csr_matrix((idf_diag_data, (row, col)),
                                     shape=idf_diag_shape)

            tfidf_transformer = TfidfTransformer()
            tfidf_transformer._idf_diag = idf_diag

            vectorizer_ = SklearnTfidfVectorizer(
                tokenizer=lambda x: tokenize_light(x, vectorizer._language))
            vectorizer_.vocabulary_ = vocab

            vectorizer_._tfidf = tfidf_transformer

        vectorizer._tfidf_vectorizer = vectorizer_
        return vectorizer
Esempio n. 8
0
    def from_path(cls, path, **shared):
        """Loads a :class:`LogRegIntentClassifier` instance from a path

        The data at the given path must have been generated using
        :func:`~LogRegIntentClassifier.persist`
        """
        import numpy as np
        from sklearn.linear_model import SGDClassifier

        path = Path(path)
        model_path = path / "intent_classifier.json"
        if not model_path.exists():
            raise LoadingError("Missing intent classifier model file: %s"
                               % model_path.name)

        with model_path.open(encoding="utf8") as f:
            model_dict = json.load(f)

        # Create the classifier
        config = LogRegIntentClassifierConfig.from_dict(model_dict["config"])
        intent_classifier = cls(config=config, **shared)
        intent_classifier.intent_list = model_dict['intent_list']

        # Create the underlying SGD classifier
        sgd_classifier = None
        coeffs = model_dict['coeffs']
        intercept = model_dict['intercept']
        t_ = model_dict["t_"]
        if coeffs is not None and intercept is not None:
            sgd_classifier = SGDClassifier(**LOG_REG_ARGS)
            sgd_classifier.coef_ = np.array(coeffs)
            sgd_classifier.intercept_ = np.array(intercept)
            sgd_classifier.t_ = t_
        intent_classifier.classifier = sgd_classifier

        # Add the featurizer
        featurizer = model_dict['featurizer']
        if featurizer is not None:
            featurizer_path = path / featurizer
            intent_classifier.featurizer = Featurizer.from_path(
                featurizer_path, **shared)

        return intent_classifier
    def from_path(cls, path, **shared):
        """Loads a :class:`ProbabilisticIntentParser` instance from a path

        The data at the given path must have been generated using
        :func:`~ProbabilisticIntentParser.persist`
        """
        path = Path(path)
        model_path = path / "intent_parser.json"
        if not model_path.exists():
            raise LoadingError(
                "Missing probabilistic intent parser model file: %s" %
                model_path.name)

        with model_path.open(encoding="utf8") as f:
            model = json.load(f)

        config = cls.config_type.from_dict(model["config"])
        parser = cls(config=config, **shared)
        classifier = None
        intent_classifier_path = path / "intent_classifier"
        if intent_classifier_path.exists():
            classifier_unit_name = config.intent_classifier_config.unit_name
            classifier = IntentClassifier.load_from_path(
                intent_classifier_path, classifier_unit_name, **shared)

        slot_fillers = dict()
        slot_filler_unit_name = config.slot_filler_config.unit_name
        for slot_filler_conf in model["slot_fillers"]:
            intent = slot_filler_conf["intent"]
            slot_filler_path = path / slot_filler_conf["slot_filler_name"]
            slot_filler = SlotFiller.load_from_path(slot_filler_path,
                                                    slot_filler_unit_name,
                                                    **shared)
            slot_fillers[intent] = slot_filler

        parser.intent_classifier = classifier
        parser.slot_fillers = slot_fillers
        return parser