Example #1
0
def test_feature_engineering(tmp_dir):
    dataset = common.generate_structured_data(dtype='dataset')
    feature = preprocessor_module.FeatureEngineering()
    feature.column_names = common.COLUMN_NAMES_FROM_NUMPY
    feature.column_types = common.COLUMN_TYPES_FROM_NUMPY
    new_dataset = run_preprocessor(feature, dataset,
                                   common.generate_data(dtype='dataset'),
                                   tf.float32, tmp_dir)
    assert isinstance(new_dataset, tf.data.Dataset)
Example #2
0
 def build_feature_engineering(self, hp, input_node):
     output_node = input_node
     feature_engineering = self.feature_engineering
     if feature_engineering is None:
         # TODO: If False, use plain label encoding.
         feature_engineering = hp.Choice('feature_engineering', [True],
                                         default=True)
     if feature_engineering:
         output_node = preprocessor.FeatureEngineering()(output_node)
     return output_node
Example #3
0
def test_feature_engineering():
    dataset = common.generate_structured_data(dtype='dataset')
    feature = preprocessor_module.FeatureEngineering()
    feature.input_node = ak.StructuredDataInput(
        column_names=common.COLUMN_NAMES_FROM_NUMPY,
        column_types=common.COLUMN_TYPES_FROM_NUMPY)
    new_dataset = run_preprocessor(feature, dataset,
                                   common.generate_data(dtype='dataset'),
                                   tf.float32)
    assert isinstance(new_dataset, tf.data.Dataset)
Example #4
0
def test_feature_engineering_fix_keyerror():
    data = structured_data(100)
    dataset = tf.data.Dataset.from_tensor_slices(data)
    feature = preprocessor.FeatureEngineering()
    feature.input_node = ak.StructuredDataInput(
        column_names=COLUMN_NAMES_FROM_NUMPY,
        column_types=COLUMN_TYPES_FROM_NUMPY)
    feature.set_hp(kerastuner.HyperParameters())
    for x in dataset:
        feature.update(x)
    feature.finalize()
    feature.set_config(feature.get_config())
    for a in dataset:
        feature.transform(a)

    def map_func(x):
        return tf.py_function(feature.transform, inp=[x], Tout=(tf.float64, ))

    new_dataset = dataset.map(map_func)
    assert isinstance(new_dataset, tf.data.Dataset)