Exemple #1
0
    def from_dict(cls, obj_dict):
        """Creates a :class:`Featurizer` instance from a :obj:`dict`

        The dict must have been generated with :func:`~Featurizer.to_dict`
        """
        language = obj_dict['language_code']
        config = FeaturizerConfig.from_dict(obj_dict["config"])
        tfidf_vectorizer = _deserialize_tfidf_vectorizer(
            obj_dict["tfidf_vectorizer"], language, config)
        entity_utterances_to_entity_names = {
            k: set(v) for k, v in
            iteritems(obj_dict['entity_utterances_to_feature_names'])
        }
        self = cls(
            language=language,
            tfidf_vectorizer=tfidf_vectorizer,
            pvalue_threshold=obj_dict['pvalue_threshold'],
            entity_utterances_to_feature_names=
            entity_utterances_to_entity_names,
            best_features=obj_dict['best_features'],
            config=config,
            unknown_words_replacement_string=obj_dict[
                "unknown_words_replacement_string"]
        )
        return self
Exemple #2
0
    def test_featurizer_config(self):
        # Given
        config_dict = {
            "sublinear_tf": True,
        }

        # When
        config = FeaturizerConfig.from_dict(config_dict)
        serialized_config = config.to_dict()

        # Then
        self.assertDictEqual(config_dict, serialized_config)
Exemple #3
0
    def test_featurizer_config(self):
        # Given
        config_dict = {
            "sublinear_tf": True,
            "pvalue_threshold": 0.4,
            "word_clusters_name": None
        }

        # When
        config = FeaturizerConfig.from_dict(config_dict)
        serialized_config = config.to_dict()

        # Then
        self.assertDictEqual(config_dict, serialized_config)
Exemple #4
0
    def test_featurizer_config(self):
        # Given
        tfid_vectorizer_config = TfidfVectorizerConfig()
        cooccurrence_vectorizer_config = CooccurrenceVectorizerConfig()
        config_dict = {
            "unit_name": "featurizer",
            "pvalue_threshold": 0.2,
            "added_cooccurrence_feature_ratio": 0.2,
            "tfidf_vectorizer_config": tfid_vectorizer_config.to_dict(),
            "cooccurrence_vectorizer_config":
                cooccurrence_vectorizer_config.to_dict()
        }

        # When
        config = FeaturizerConfig.from_dict(config_dict)
        serialized_config = config.to_dict()

        # Then
        self.assertDictEqual(config_dict, serialized_config)
Exemple #5
0
    def from_dict(cls, obj_dict, **shared):
        """Creates a :class:`Featurizer` instance from a :obj:`dict`

        The dict must have been generated with :func:`~Featurizer.to_dict`
        """
        language = obj_dict["language_code"]
        config = FeaturizerConfig.from_dict(obj_dict["config"])
        tfidf_vectorizer = _deserialize_tfidf_vectorizer(
            obj_dict["tfidf_vectorizer"], language, config.sublinear_tf)
        self = cls(
            language=language,
            tfidf_vectorizer=tfidf_vectorizer,
            best_features=obj_dict["best_features"],
            config=config,
            unknown_words_replacement_string=obj_dict[
                "unknown_words_replacement_string"],
            builtin_entity_parser=shared.get(BUILTIN_ENTITY_PARSER),
            custom_entity_parser=shared.get(CUSTOM_ENTITY_PARSER)
        )
        return self
Exemple #6
0
    def from_dict(cls, obj_dict):
        """Creates a :class:`Featurizer` instance from a :obj:`dict`

        The dict must have been generated with :func:`~Featurizer.to_dict`
        """
        language = obj_dict['language_code']
        config = FeaturizerConfig.from_dict(obj_dict["config"])
        tfidf_vectorizer = _deserialize_tfidf_vectorizer(
            obj_dict["tfidf_vectorizer"], language, config.sublinear_tf)
        entity_utterances_to_entity_names = {
            k: set(v)
            for k, v in iteritems(
                obj_dict['entity_utterances_to_feature_names'])
        }
        self = cls(language=language,
                   tfidf_vectorizer=tfidf_vectorizer,
                   entity_utterances_to_feature_names=
                   entity_utterances_to_entity_names,
                   best_features=obj_dict['best_features'],
                   config=config,
                   unknown_words_replacement_string=obj_dict[
                       "unknown_words_replacement_string"])
        return self