Exemple #1
0
    def load(self,
             dir_path='data/models/sequenceLabelling/',
             weight_file=DEFAULT_WEIGHT_FILE_NAME):
        model_path = os.path.join(dir_path, self.model_config.model_name)
        self.model_config = ModelConfig.load(
            os.path.join(model_path, CONFIG_FILE_NAME))

        if self.model_config.embeddings_name is not None:
            # load embeddings
            # Do not use cache in 'prediction/production' mode
            self.embeddings = Embeddings(self.model_config.embeddings_name,
                                         resource_registry=self.registry,
                                         use_ELMo=self.model_config.use_ELMo,
                                         use_cache=False)
            self.model_config.word_embedding_size = self.embeddings.embed_size
        else:
            self.embeddings = None
            self.model_config.word_embedding_size = 0

        self.p = Preprocessor.load(
            os.path.join(dir_path, self.model_config.model_name,
                         PROCESSOR_FILE_NAME))
        self.model = get_model(self.model_config,
                               self.p,
                               ntags=len(self.p.vocab_tag),
                               load_pretrained_weights=False,
                               local_path=os.path.join(
                                   dir_path, self.model_config.model_name))
        print(
            "load weights from",
            os.path.join(dir_path, self.model_config.model_name, weight_file))
        self.model.load(filepath=os.path.join(
            dir_path, self.model_config.model_name, weight_file))
        self.model.print_summary()
Exemple #2
0
    def test_serialize_to_json(self, tmp_path):
        preprocessor = FeaturesPreprocessor(features_indices=[1])
        features_batch = [[[FEATURE_VALUE_1, FEATURE_VALUE_2],
                           [FEATURE_VALUE_1, FEATURE_VALUE_3],
                           [FEATURE_VALUE_1, FEATURE_VALUE_4]]]
        X_train = [['Word1']]
        y_train = [['label1']]
        preprocessor.fit(features_batch)
        word_preprocessor = Preprocessor(feature_preprocessor=preprocessor)
        word_preprocessor.fit(X_train, y_train)

        serialised_file_path = os.path.join(str(tmp_path), "serialised.json")
        word_preprocessor.save(file_path=serialised_file_path)

        back = Preprocessor.load(serialised_file_path)

        assert back is not None
        assert back.feature_preprocessor is not None
        original_as_dict = word_preprocessor.__dict__
        back_as_dict = back.__dict__
        for key in back_as_dict.keys():
            if key == 'feature_preprocessor':
                for sub_key in back_as_dict[key].__dict__.keys():
                    assert back_as_dict[key].__dict__[
                        sub_key] == original_as_dict[key].__dict__[sub_key]
            else:
                assert back_as_dict[key] == original_as_dict[key]
Exemple #3
0
    def test_load_withUmmappedVariable_shouldIgnore(self, preprocessor2: str):
        p = Preprocessor.load(preprocessor2)

        assert len(p.vocab_char) == 70
Exemple #4
0
    def test_load_example(self, preprocessor1):
        p = Preprocessor.load(preprocessor1)

        assert len(p.vocab_char) == 70