def test_feature_engineering(tmp_dir): dataset = common.generate_structured_data(dtype='dataset') feature = preprocessor_module.FeatureEngineering() feature.column_names = common.COLUMN_NAMES_FROM_NUMPY feature.column_types = common.COLUMN_TYPES_FROM_NUMPY new_dataset = run_preprocessor(feature, dataset, common.generate_data(dtype='dataset'), tf.float32, tmp_dir) assert isinstance(new_dataset, tf.data.Dataset)
def build_feature_engineering(self, hp, input_node): output_node = input_node feature_engineering = self.feature_engineering if feature_engineering is None: # TODO: If False, use plain label encoding. feature_engineering = hp.Choice('feature_engineering', [True], default=True) if feature_engineering: output_node = preprocessor.FeatureEngineering()(output_node) return output_node
def test_feature_engineering(): dataset = common.generate_structured_data(dtype='dataset') feature = preprocessor_module.FeatureEngineering() feature.input_node = ak.StructuredDataInput( column_names=common.COLUMN_NAMES_FROM_NUMPY, column_types=common.COLUMN_TYPES_FROM_NUMPY) new_dataset = run_preprocessor(feature, dataset, common.generate_data(dtype='dataset'), tf.float32) assert isinstance(new_dataset, tf.data.Dataset)
def test_feature_engineering_fix_keyerror(): data = structured_data(100) dataset = tf.data.Dataset.from_tensor_slices(data) feature = preprocessor.FeatureEngineering() feature.input_node = ak.StructuredDataInput( column_names=COLUMN_NAMES_FROM_NUMPY, column_types=COLUMN_TYPES_FROM_NUMPY) feature.set_hp(kerastuner.HyperParameters()) for x in dataset: feature.update(x) feature.finalize() feature.set_config(feature.get_config()) for a in dataset: feature.transform(a) def map_func(x): return tf.py_function(feature.transform, inp=[x], Tout=(tf.float64, )) new_dataset = dataset.map(map_func) assert isinstance(new_dataset, tf.data.Dataset)