Ejemplo n.º 1
0
 def test_should_fit_single_value_feature(self):
     preprocessor = FeaturesPreprocessor()
     features_batch = [[[FEATURE_VALUE_1]]]
     features_transformed = preprocessor.fit_transform(features_batch)
     features_length = len(preprocessor.features_indices)
     assert features_length == 1
     assert all_close(features_transformed, [[[1]]])
Ejemplo n.º 2
0
 def test_should_serialize_features_indices_input_preprocessor(self):
     features_preprocessor = DelftFeaturesPreprocessor(features_indices=[0])
     features_preprocessor.fit(SAMPLE_FEATURES)
     preprocessor = DelftWordPreprocessor(
         feature_preprocessor=features_preprocessor)
     LOGGER.debug('original features_vocabulary_size: %s',
                  features_preprocessor.features_vocabulary_size)
     LOGGER.debug('original features_indices: %s',
                  features_preprocessor.features_indices)
     LOGGER.debug('original features_map_to_index: %s',
                  features_preprocessor.features_map_to_index)
     output_json = json.dumps(get_preprocessor_json(preprocessor))
     LOGGER.debug('output_json: %s', output_json)
     loaded_preprocessor = get_preprocessor_for_json(
         json.loads(output_json))
     LOGGER.debug('type: %s', type(loaded_preprocessor))
     assert isinstance(loaded_preprocessor, DelftWordPreprocessor)
     loaded_features_preprocessor = loaded_preprocessor.feature_preprocessor
     LOGGER.debug('type: %s', type(loaded_features_preprocessor))
     assert isinstance(loaded_features_preprocessor,
                       DelftFeaturesPreprocessor)
     assert (loaded_features_preprocessor.features_vocabulary_size ==
             features_preprocessor.features_vocabulary_size)
     assert (loaded_features_preprocessor.features_indices ==
             features_preprocessor.features_indices)
     assert (loaded_features_preprocessor.features_map_to_index ==
             features_preprocessor.features_map_to_index)
Ejemplo n.º 3
0
    def test_serialize_to_json(self, tmp_path):
        preprocessor = FeaturesPreprocessor(features_indices=[1])
        features_batch = [[[FEATURE_VALUE_1, FEATURE_VALUE_2],
                           [FEATURE_VALUE_1, FEATURE_VALUE_3],
                           [FEATURE_VALUE_1, FEATURE_VALUE_4]]]
        X_train = [['Word1']]
        y_train = [['label1']]
        preprocessor.fit(features_batch)
        word_preprocessor = Preprocessor(feature_preprocessor=preprocessor)
        word_preprocessor.fit(X_train, y_train)

        serialised_file_path = os.path.join(str(tmp_path), "serialised.json")
        word_preprocessor.save(file_path=serialised_file_path)

        back = Preprocessor.load(serialised_file_path)

        assert back is not None
        assert back.feature_preprocessor is not None
        original_as_dict = word_preprocessor.__dict__
        back_as_dict = back.__dict__
        for key in back_as_dict.keys():
            if key == 'feature_preprocessor':
                for sub_key in back_as_dict[key].__dict__.keys():
                    assert back_as_dict[key].__dict__[
                        sub_key] == original_as_dict[key].__dict__[sub_key]
            else:
                assert back_as_dict[key] == original_as_dict[key]
Ejemplo n.º 4
0
 def test_should_fit_single_multiple_value_features(self):
     preprocessor = FeaturesPreprocessor()
     features_batch = [[[FEATURE_VALUE_1], [FEATURE_VALUE_2]]]
     features_transformed = preprocessor.fit_transform(features_batch)
     features_length = len(preprocessor.features_indices)
     assert features_length == 1
     assert len(features_transformed[0]) == 2
     assert np.array_equal(features_transformed, np.asarray([[[1], [2]]]))
Ejemplo n.º 5
0
 def test_should_select_features(self):
     preprocessor = FeaturesPreprocessor(features_indices=[1])
     features_batch = [[[FEATURE_VALUE_1, FEATURE_VALUE_2],
                        [FEATURE_VALUE_1, FEATURE_VALUE_3],
                        [FEATURE_VALUE_1, FEATURE_VALUE_4]]]
     features_transformed = preprocessor.fit_transform(features_batch)
     features_length = len(preprocessor.features_indices)
     assert features_length == 1
     assert all_close(features_transformed, [[[1], [2], [3]]])
def get_features_preprocessor(
        model_config: ModelConfig,
        features: np.array = None) -> T_FeaturesPreprocessor:
    if not model_config.use_features:
        LOGGER.info('features not enabled')
        return None
    if features is None:
        LOGGER.info('no features available')
        return None
    if model_config.use_features_indices_input:
        LOGGER.info(
            'using feature indices as input, features_indices=%s, features_vocab_size=%s',
            model_config.features_indices, model_config.features_vocabulary_size
        )
        return FeaturesPreprocessor(
            features_indices=model_config.features_indices,
            features_vocabulary_size=model_config.features_vocabulary_size
        )
    LOGGER.info(
        'using feature indices=%s', model_config.features_indices
    )
    return ScienceBeamFeaturesPreprocessor(
        features_indices=model_config.features_indices,
        continuous_features_indices=model_config.continuous_features_indices
    )
Ejemplo n.º 7
0
 def test_should_fit_empty_dataset(self):
     preprocessor = FeaturesPreprocessor()
     preprocessor.fit([])
Ejemplo n.º 8
0
 def test_should_be_able_to_instantiate_with_default_values(self):
     FeaturesPreprocessor()
Ejemplo n.º 9
0
 def test_should_transform_unseen_to_zero(self):
     preprocessor = FeaturesPreprocessor()
     features_batch = [[[FEATURE_VALUE_1]]]
     preprocessor.fit(features_batch)
     features_transformed = preprocessor.transform([[[FEATURE_VALUE_2]]])
     assert all_close(features_transformed, [[[0]]])