Exemple #1
0
 def test_should_be_able_to_inverse_transform_label(self):
     preprocessor = Preprocessor()
     X = [['Word1']]
     y = [['label1']]
     _, y_transformed = preprocessor.fit_transform(X, y)
     y_inverse = preprocessor.inverse_transform(y_transformed[0])
     assert y_inverse == y[0]
Exemple #2
0
 def test_should_transform_unseen_label(self):
     preprocessor = Preprocessor(return_lengths=False, padding=False)
     X_train = [['Word1']]
     y_train = [['label1']]
     X_test = [['Word1', 'Word1']]
     y_test = [['label1', 'label2']]
     p = preprocessor.fit(X_train, y_train)
     _, y_transformed = p.transform(X_test, y_test)
     assert y_transformed == [[1, 0]]
Exemple #3
0
    def load(self,
             dir_path='data/models/sequenceLabelling/',
             weight_file=DEFAULT_WEIGHT_FILE_NAME):
        model_path = os.path.join(dir_path, self.model_config.model_name)
        self.model_config = ModelConfig.load(
            os.path.join(model_path, CONFIG_FILE_NAME))

        if self.model_config.embeddings_name is not None:
            # load embeddings
            # Do not use cache in 'prediction/production' mode
            self.embeddings = Embeddings(self.model_config.embeddings_name,
                                         resource_registry=self.registry,
                                         use_ELMo=self.model_config.use_ELMo,
                                         use_cache=False)
            self.model_config.word_embedding_size = self.embeddings.embed_size
        else:
            self.embeddings = None
            self.model_config.word_embedding_size = 0

        self.p = Preprocessor.load(
            os.path.join(dir_path, self.model_config.model_name,
                         PROCESSOR_FILE_NAME))
        self.model = get_model(self.model_config,
                               self.p,
                               ntags=len(self.p.vocab_tag),
                               load_pretrained_weights=False,
                               local_path=os.path.join(
                                   dir_path, self.model_config.model_name))
        print(
            "load weights from",
            os.path.join(dir_path, self.model_config.model_name, weight_file))
        self.model.load(filepath=os.path.join(
            dir_path, self.model_config.model_name, weight_file))
        self.model.print_summary()
Exemple #4
0
 def test_should_fit_single_word_dataset(self):
     preprocessor = Preprocessor()
     X = [['Word1']]
     y = [['label1']]
     X_transformed, y_transformed = preprocessor.fit_transform(X, y)
     LOGGER.debug('vocab_char: %s', preprocessor.vocab_char)
     LOGGER.debug('vocab_case: %s', preprocessor.vocab_case)
     LOGGER.debug('vocab_tag: %s', preprocessor.vocab_tag)
     LOGGER.debug('X_transformed: %s', X_transformed)
     LOGGER.debug('y_transformed: %s', y_transformed)
     for c in 'Word1':
         assert c in preprocessor.vocab_char
     for case in {'numeric', 'allLower', 'allUpper', 'initialUpper'}:
         assert case in preprocessor.vocab_case
     assert 'label1' in preprocessor.vocab_tag
     assert len(X_transformed) == 1
     assert len(y_transformed) == 1
Exemple #5
0
    def test_serialize_to_json(self, tmp_path):
        preprocessor = FeaturesPreprocessor(features_indices=[1])
        features_batch = [[[FEATURE_VALUE_1, FEATURE_VALUE_2],
                           [FEATURE_VALUE_1, FEATURE_VALUE_3],
                           [FEATURE_VALUE_1, FEATURE_VALUE_4]]]
        X_train = [['Word1']]
        y_train = [['label1']]
        preprocessor.fit(features_batch)
        word_preprocessor = Preprocessor(feature_preprocessor=preprocessor)
        word_preprocessor.fit(X_train, y_train)

        serialised_file_path = os.path.join(str(tmp_path), "serialised.json")
        word_preprocessor.save(file_path=serialised_file_path)

        back = Preprocessor.load(serialised_file_path)

        assert back is not None
        assert back.feature_preprocessor is not None
        original_as_dict = word_preprocessor.__dict__
        back_as_dict = back.__dict__
        for key in back_as_dict.keys():
            if key == 'feature_preprocessor':
                for sub_key in back_as_dict[key].__dict__.keys():
                    assert back_as_dict[key].__dict__[
                        sub_key] == original_as_dict[key].__dict__[sub_key]
            else:
                assert back_as_dict[key] == original_as_dict[key]
Exemple #6
0
    def test_load_withUmmappedVariable_shouldIgnore(self, preprocessor2: str):
        p = Preprocessor.load(preprocessor2)

        assert len(p.vocab_char) == 70
Exemple #7
0
    def test_load_example(self, preprocessor1):
        p = Preprocessor.load(preprocessor1)

        assert len(p.vocab_char) == 70
Exemple #8
0
 def test_should_fit_empty_dataset(self):
     preprocessor = Preprocessor()
     preprocessor.fit([], [])
Exemple #9
0
 def test_should_be_able_to_instantiate_with_default_values(self):
     Preprocessor()