Example #1
0
    def test_serialize_to_json(self, tmp_path):
        preprocessor = FeaturesPreprocessor(features_indices=[1])
        features_batch = [[[FEATURE_VALUE_1, FEATURE_VALUE_2],
                           [FEATURE_VALUE_1, FEATURE_VALUE_3],
                           [FEATURE_VALUE_1, FEATURE_VALUE_4]]]
        X_train = [['Word1']]
        y_train = [['label1']]
        preprocessor.fit(features_batch)
        word_preprocessor = Preprocessor(feature_preprocessor=preprocessor)
        word_preprocessor.fit(X_train, y_train)

        serialised_file_path = os.path.join(str(tmp_path), "serialised.json")
        word_preprocessor.save(file_path=serialised_file_path)

        back = Preprocessor.load(serialised_file_path)

        assert back is not None
        assert back.feature_preprocessor is not None
        original_as_dict = word_preprocessor.__dict__
        back_as_dict = back.__dict__
        for key in back_as_dict.keys():
            if key == 'feature_preprocessor':
                for sub_key in back_as_dict[key].__dict__.keys():
                    assert back_as_dict[key].__dict__[
                        sub_key] == original_as_dict[key].__dict__[sub_key]
            else:
                assert back_as_dict[key] == original_as_dict[key]
Example #2
0
 def test_should_transform_unseen_label(self):
     preprocessor = Preprocessor(return_lengths=False, padding=False)
     X_train = [['Word1']]
     y_train = [['label1']]
     X_test = [['Word1', 'Word1']]
     y_test = [['label1', 'label2']]
     p = preprocessor.fit(X_train, y_train)
     _, y_transformed = p.transform(X_test, y_test)
     assert y_transformed == [[1, 0]]
Example #3
0
 def test_should_fit_empty_dataset(self):
     preprocessor = Preprocessor()
     preprocessor.fit([], [])