def test_preprocessing(): input_shape = (33, ) output_shape = (1, ) x_train_1 = common.generate_data(num_instances=100, shape=input_shape, dtype='dataset') x_train_2 = common.generate_data(num_instances=100, shape=input_shape, dtype='dataset') y_train = common.generate_data(num_instances=100, shape=output_shape, dtype='dataset') dataset = tf.data.Dataset.zip(((x_train_1, x_train_2), y_train)) input_node1 = ak.Input(shape=input_shape) temp_node1 = ak.Normalization()(input_node1) output_node1 = ak.DenseBlock()(temp_node1) output_node3 = ak.Normalization()(temp_node1) output_node3 = ak.DenseBlock()(output_node3) input_node2 = ak.Input(shape=input_shape) output_node2 = ak.Normalization()(input_node2) output_node2 = ak.DenseBlock()(output_node2) output_node = ak.Merge()([output_node1, output_node2, output_node3]) output_node = ak.RegressionHead()(output_node) graph = graph_module.HyperBuiltGraphHyperModel([input_node1, input_node2], output_node) graph.preprocess(hp=kerastuner.HyperParameters(), dataset=dataset, validation_data=dataset, fit=True)
def test_normalize(): dataset = common.generate_data(dtype='dataset') new_dataset = run_preprocessor(preprocessor_module.Normalization(), dataset, common.generate_data(dtype='dataset'), dtype=tf.float32) assert isinstance(new_dataset, tf.data.Dataset)
def test_text_regressor(tmp_dir): (train_x, train_y), (test_x, test_y) = common.imdb_raw() train_y = common.generate_data(num_instances=train_y.shape[0], shape=(1, )) test_y = common.generate_data(num_instances=test_y.shape[0], shape=(1, )) clf = ak.TextRegressor(directory=tmp_dir, max_trials=2, seed=common.SEED) clf.fit(train_x, train_y, epochs=1, validation_data=(test_x, test_y)) assert clf.predict(test_x).shape == (len(test_x), 1)
def test_augment(): dataset = common.generate_data(dtype='dataset') new_dataset = run_preprocessor(preprocessor.ImageAugmentation(seed=common.SEED), dataset, common.generate_data(dtype='dataset'), tf.float32) assert isinstance(new_dataset, tf.data.Dataset)
def test_lgbm_regressor(): dataset = common.generate_data(11, (32, ), dtype='dataset') y = common.generate_data(11, (1, ), dtype='dataset') instance = preprocessor_module.LightGBMBlock(seed=common.SEED) instance.lightgbm_block = preprocessor_module.LightGBMRegressor( seed=common.SEED) new_dataset = run_preprocessor(instance, dataset, y, tf.float32) assert isinstance(new_dataset, tf.data.Dataset)
def test_image_input_with_illegal_dim(): x = common.generate_data(shape=(32, )) input_node = node.ImageInput() with pytest.raises(ValueError) as info: input_node.fit(x) x = input_node.transform(x) assert 'Expect the data to ImageInput to have 3' in str(info.value)
def test_io_api(tmp_dir): (image_x, train_y), (test_x, test_y) = mnist.load_data() (text_x, train_y), (test_x, test_y) = common.imdb_raw() num_instances = 20 image_x = image_x[:num_instances] text_x = text_x[:num_instances] structured_data_x = common.generate_structured_data( num_instances=num_instances) classification_y = common.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = common.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. automodel = ak.AutoModel( inputs=[ak.ImageInput(), ak.TextInput(), ak.StructuredDataInput()], outputs=[ ak.RegressionHead(metrics=['mae']), ak.ClassificationHead(loss='categorical_crossentropy', metrics=['accuracy']) ], directory=tmp_dir, max_trials=2, seed=common.SEED) automodel.fit([image_x, text_x, structured_data_x], [regression_y, classification_y], epochs=2, validation_split=0.2)
def test_image_classifier(tmp_dir): train_x = common.generate_data(num_instances=100, shape=(32, 32, 3)) train_y = common.generate_one_hot_labels(num_instances=100, num_classes=10) clf = ak.ImageClassifier(directory=tmp_dir, max_trials=2, seed=common.SEED) clf.fit(train_x, train_y, epochs=1, validation_split=0.2) keras_model = clf.export_model() assert clf.predict(train_x).shape == (len(train_x), 10) assert isinstance(keras_model, tf.keras.Model)
def test_image_input_with_three_dim(): x = common.generate_data(shape=(32, 32)) input_node = node.ImageInput() x = input_node.transform(x) assert isinstance(x, tf.data.Dataset) for a in x: assert a.shape == (32, 32, 1) break
def test_feature_engineering(tmp_dir): dataset = common.generate_structured_data(dtype='dataset') feature = preprocessor_module.FeatureEngineering() feature.column_names = common.COLUMN_NAMES_FROM_NUMPY feature.column_types = common.COLUMN_TYPES_FROM_NUMPY new_dataset = run_preprocessor(feature, dataset, common.generate_data(dtype='dataset'), tf.float32, tmp_dir) assert isinstance(new_dataset, tf.data.Dataset)
def test_lgbm_classifier(): dataset = common.generate_data(11, (32,), dtype='dataset') y = common.generate_one_hot_labels(11, dtype='dataset') instance = preprocessor.LightGBMBlock(seed=common.SEED) instance.lightgbm_block = preprocessor.LightGBMClassifier(seed=common.SEED) new_dataset = run_preprocessor(instance, dataset, y, tf.float32) assert isinstance(new_dataset, tf.data.Dataset)
def test_ngram(): texts = ['The cat sat on the mat.', 'The dog sat on the log.', 'Dogs and cats living together.'] dataset = tf.data.Dataset.from_tensor_slices(texts) new_dataset = run_preprocessor(preprocessor.TextToNgramVector(), dataset, common.generate_data(dtype='dataset'), tf.float32) assert isinstance(new_dataset, tf.data.Dataset)
def test_feature_engineering(): dataset = common.generate_structured_data(dtype='dataset') feature = preprocessor_module.FeatureEngineering() feature.input_node = ak.StructuredDataInput( column_names=common.COLUMN_NAMES_FROM_NUMPY, column_types=common.COLUMN_TYPES_FROM_NUMPY) new_dataset = run_preprocessor(feature, dataset, common.generate_data(dtype='dataset'), tf.float32) assert isinstance(new_dataset, tf.data.Dataset)
def test_functional_api(tmp_dir): # Prepare the data. num_instances = 20 (image_x, train_y), (test_x, test_y) = mnist.load_data() (text_x, train_y), (test_x, test_y) = common.imdb_raw() (structured_data_x, train_y), (test_x, test_y) = common.dataframe_numpy() image_x = image_x[:num_instances] text_x = text_x[:num_instances] structured_data_x = structured_data_x[:num_instances] classification_y = common.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = common.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. image_input = ak.ImageInput() output = ak.Normalization()(image_input) output = ak.ImageAugmentation()(output) outputs1 = ak.ResNetBlock(version='next')(image_input) outputs2 = ak.XceptionBlock()(image_input) image_output = ak.Merge()((outputs1, outputs2)) structured_data_input = ak.StructuredDataInput( column_names=common.COLUMN_NAMES_FROM_CSV, column_types=common.COLUMN_TYPES_FROM_CSV) structured_data_output = ak.FeatureEngineering()(structured_data_input) structured_data_output = ak.DenseBlock()(structured_data_output) text_input = ak.TextInput() outputs1 = ak.TextToIntSequence()(text_input) outputs1 = ak.EmbeddingBlock()(outputs1) outputs1 = ak.ConvBlock(separable=True)(outputs1) outputs1 = ak.SpatialReduction()(outputs1) outputs2 = ak.TextToNgramVector()(text_input) outputs2 = ak.DenseBlock()(outputs2) text_output = ak.Merge()((outputs1, outputs2)) merged_outputs = ak.Merge()( (structured_data_output, image_output, text_output)) regression_outputs = ak.RegressionHead()(merged_outputs) classification_outputs = ak.ClassificationHead()(merged_outputs) automodel = ak.GraphAutoModel( inputs=[image_input, text_input, structured_data_input], directory=tmp_dir, outputs=[regression_outputs, classification_outputs], max_trials=2, seed=common.SEED) automodel.fit((image_x, text_x, structured_data_x), (regression_y, classification_y), validation_split=0.2, epochs=2)
def test_structured_data_from_numpy_regressor(tmp_dir): num_data = 500 num_train = 400 data = common.generate_structured_data(num_data) x_train, x_test = data[:num_train], data[num_train:] y = common.generate_data(num_instances=num_data, shape=(1, )) y_train, y_test = y[:num_train], y[num_train:] clf = ak.StructuredDataRegressor(directory=tmp_dir, max_trials=1, seed=common.SEED) clf.fit(x_train, y_train, epochs=2, validation_data=(x_train, y_train)) assert clf.predict(x_test).shape == (len(y_test), 1)
def test_lgbm_classifier(): dataset = common.generate_data(100, (32, ), dtype='dataset') y = common.generate_one_hot_labels(100, num_classes=3, dtype='dataset') instance = preprocessor_module.LightGBMBlock(seed=common.SEED) instance.lightgbm_block = preprocessor_module.LightGBMClassifier( seed=common.SEED) instance.lightgbm_block.num_classes = 3 new_dataset = run_preprocessor(instance, dataset, y, tf.float32) for (x, ) in new_dataset: assert x.shape == (3, ) break assert isinstance(new_dataset, tf.data.Dataset)
def test_lgbm_classifier_two_classes(): dataset = common.generate_data(11, (32, ), dtype='dataset') y = tf.data.Dataset.from_tensor_slices( np.random.randint(0, 2, 11).reshape(-1, 1)) instance = preprocessor_module.LightGBMBlock(seed=common.SEED) instance.lightgbm_block = preprocessor_module.LightGBMClassifier( seed=common.SEED) instance.lightgbm_block.num_classes = 11 new_dataset = run_preprocessor(instance, dataset, y, tf.float32) for (x, ) in new_dataset: assert x.shape == (1, ) break assert isinstance(new_dataset, tf.data.Dataset)
def test_structured_regressor(init, fit): num_data = 500 train_x = common.generate_structured_data(num_data) train_y = common.generate_data(num_instances=100, shape=(1,)) clf = ak.StructuredDataRegressor( column_names=common.COLUMN_NAMES_FROM_NUMPY, directory=tmp_dir, max_trials=1, seed=common.SEED) clf.fit(train_x, train_y, epochs=2, validation_data=(train_x, train_y)) assert init.called assert fit.called
def test_sequence(): texts = ['The cat sat on the mat.', 'The dog sat on the log.', 'Dogs and cats living together.'] dataset = tf.data.Dataset.from_tensor_slices(texts) new_dataset = run_preprocessor( preprocessor_module.TextToIntSequence(), dataset, common.generate_data(dtype='dataset'), tf.int64) assert isinstance(new_dataset, tf.data.Dataset) for (x,) in new_dataset: assert x.shape == (6,) break
def test_add_early_stopping(_2, get_trials, _1, _, run_trial, tmp_dir): trial = kerastuner.engine.trial.Trial() trial.hyperparameters = kerastuner.HyperParameters() get_trials.return_value = [trial] input_shape = (32,) num_instances = 100 num_classes = 10 x = common.generate_data(num_instances=num_instances, shape=input_shape, dtype='dataset') y = common.generate_one_hot_labels(num_instances=num_instances, num_classes=num_classes, dtype='dataset') input_node = ak.Input(shape=input_shape) output_node = input_node output_node = ak.DenseBlock()(output_node) output_node = ak.ClassificationHead(num_classes=num_classes, output_shape=(num_classes,))(output_node) hypermodel = ak.hypermodel.graph.HyperBuiltGraphHyperModel(input_node, output_node) tuner = ak.tuner.RandomSearch( hypermodel=hypermodel, objective='val_loss', max_trials=1, directory=tmp_dir, seed=common.SEED) tuner.search(x=tf.data.Dataset.zip((x, y)), validation_data=(x, y), epochs=20, callbacks=[]) _, kwargs = run_trial.call_args_list[0] callbacks = kwargs['callbacks'] assert len(callbacks) == 1 assert isinstance(callbacks[0], tf.keras.callbacks.EarlyStopping) _, kwargs = run_trial.call_args_list[1] callbacks = kwargs['callbacks'] assert len(callbacks) == 0
def test_image_regressor(tmp_dir): train_x = common.generate_data(num_instances=100, shape=(32, 32, 3)) train_y = common.generate_data(num_instances=100, shape=(1, )) clf = ak.ImageRegressor(directory=tmp_dir, max_trials=2, seed=common.SEED) clf.fit(train_x, train_y, epochs=1, validation_split=0.2) assert clf.predict(train_x).shape == (len(train_x), 1)
def test_text_input_with_illegal_dim(): x = common.generate_data(shape=(32,)) input_node = node.TextInput() with pytest.raises(ValueError) as info: x = input_node.transform(x) assert 'Expect the data to TextInput to have 1' in str(info.value)
def test_image_input(): x = common.generate_data() input_node = node.ImageInput() input_node.fit(x) assert isinstance(input_node.transform(x), tf.data.Dataset)