def test_dataset_and_y(tuner_fn, tmp_path): x1 = test_utils.generate_data() y1 = test_utils.generate_data(shape=(1,)) x = tf.data.Dataset.from_tensor_slices((x1, x1)) y = tf.data.Dataset.from_tensor_slices((y1, y1)) val_dataset = tf.data.Dataset.from_tensor_slices(((x1,), (y1, y1))) dataset_error(x, y, val_dataset, "Expected y to be None", tmp_path)
def test_data_io_consistency_validation(tuner_fn, tmp_path): x1 = test_utils.generate_data() y1 = test_utils.generate_data(shape=(1,)) dataset = tf.data.Dataset.from_tensor_slices(((x1, x1), (y1, y1))) val_dataset = tf.data.Dataset.from_tensor_slices(((x1,), (y1, y1))) dataset_error( dataset, None, val_dataset, "Expected x in validation_data to have", tmp_path )
def test_image_regressor(tmp_path): train_x = test_utils.generate_data(num_instances=320, shape=(32, 32, 3)) train_y = test_utils.generate_data(num_instances=320, shape=(1, )) clf = ak.ImageRegressor(directory=tmp_path, max_trials=2, seed=test_utils.SEED) clf.fit(train_x, train_y, epochs=1, validation_split=0.2) clf.export_model() assert clf.predict(train_x).shape == (len(train_x), 1)
def test_single_input_predict_doesnt_crash(tuner_fn, tmp_path): auto_model = get_single_io_auto_model(tmp_path) x1 = test_utils.generate_data() y1 = test_utils.generate_data(shape=(1,)) dataset = tf.data.Dataset.from_tensor_slices((x1, y1)) auto_model.fit(dataset, None, epochs=2, validation_data=dataset) dataset2 = tf.data.Dataset.from_tensor_slices((x1, y1)) auto_model.predict(dataset2)
def test_img_reg_fit_call_auto_model_fit(fit, tmp_path): auto_model = ak.ImageRegressor(directory=tmp_path, seed=test_utils.SEED) auto_model.fit( x=test_utils.generate_data(num_instances=100, shape=(32, 32, 3)), y=test_utils.generate_data(num_instances=100, shape=(1,)), ) assert fit.is_called
def test_img_seg_fit_call_auto_model_fit(fit, tmp_path): auto_model = ak.tasks.image.ImageSegmenter( directory=tmp_path, seed=test_utils.SEED ) auto_model.fit( x=test_utils.generate_data(num_instances=100, shape=(32, 32, 3)), y=test_utils.generate_data(num_instances=100, shape=(32, 32)), ) assert fit.is_called
def test_single_nested_dataset_doesnt_crash(tuner_fn, tmp_path): auto_model = ak.AutoModel( ak.ImageInput(), ak.RegressionHead(), directory=tmp_path, max_trials=2, overwrite=False, ) x1 = test_utils.generate_data() y1 = test_utils.generate_data(shape=(1,)) dataset = tf.data.Dataset.from_tensor_slices(((x1,), y1)) auto_model.fit(dataset, epochs=2)
def test_image_blocks(tmp_path): num_instances = 10 x_train = test_utils.generate_data(num_instances=num_instances, shape=(28, 28)) y_train = np.random.randint(0, 10, num_instances) input_node = ak.ImageInput() output = ak.Normalization()(input_node) output = ak.ImageAugmentation()(output) outputs1 = ak.ResNetBlock(version="v2")(output) outputs2 = ak.XceptionBlock()(output) output_node = ak.Merge()((outputs1, outputs2)) output_node = ak.ClassificationHead()(output_node) automodel = ak.AutoModel( inputs=input_node, outputs=output_node, directory=tmp_path, max_trials=1, seed=test_utils.SEED, ) automodel.fit(x_train, y_train, validation_data=(x_train, y_train), epochs=1)
def test_img_clf_fit_call_auto_model_fit(fit, tmp_path): auto_model = ak.ImageClassifier(directory=tmp_path, seed=test_utils.SEED) auto_model.fit( x=test_utils.generate_data(num_instances=100, shape=(32, 32, 3)), y=test_utils.generate_one_hot_labels(num_instances=100, num_classes=10), ) assert fit.is_called
def test_text_regressor(tmp_path): train_x = test_utils.generate_text_data(num_instances=300) test_x = train_x train_y = test_utils.generate_data(num_instances=300, shape=(1, )) test_y = train_y clf = ak.TextRegressor(directory=tmp_path, max_trials=2, seed=test_utils.SEED) clf.fit(train_x, train_y, epochs=1, validation_data=(test_x, test_y)) clf.export_model() assert clf.predict(test_x).shape == (len(test_x), 1)
def test_structured_reg_fit_call_auto_model_fit(fit, tmp_path): auto_model = ak.StructuredDataRegressor(directory=tmp_path, seed=test_utils.SEED) auto_model.fit( x=pd.read_csv(test_utils.TRAIN_CSV_PATH).to_numpy().astype( np.unicode)[:100], y=test_utils.generate_data(num_instances=100, shape=(1, )), ) assert fit.is_called
def test_structured_data_regressor(tmp_path): num_data = 500 num_train = 400 data = (pd.read_csv(test_utils.TRAIN_CSV_PATH).to_numpy().astype( np.unicode)[:num_data]) x_train, x_test = data[:num_train], data[num_train:] y = test_utils.generate_data(num_instances=num_data, shape=tuple()) y_train, y_test = y[:num_train], y[num_train:] clf = ak.StructuredDataRegressor(directory=tmp_path, max_trials=2, seed=test_utils.SEED) clf.fit(x_train, y_train, epochs=11, validation_data=(x_train, y_train)) clf.export_model() assert clf.predict(x_test).shape == (len(y_test), 1)
def test_image_classifier(tmp_path): train_x = test_utils.generate_data(num_instances=320, shape=(32, 32)) train_y = test_utils.generate_one_hot_labels(num_instances=320, num_classes=10) clf = ak.ImageClassifier( directory=tmp_path, max_trials=2, seed=test_utils.SEED, distribution_strategy=tf.distribute.MirroredStrategy(), ) clf.fit(train_x, train_y, epochs=1, validation_split=0.2) keras_model = clf.export_model() clf.evaluate(train_x, train_y) assert clf.predict(train_x).shape == (len(train_x), 10) assert isinstance(keras_model, tf.keras.Model)
def test_text_and_structured_data(tmp_path): # Prepare the data. num_instances = 80 x_text = test_utils.generate_text_data(num_instances) x_structured_data = pd.read_csv(test_utils.TRAIN_CSV_PATH) x_structured_data = x_structured_data[:num_instances] y_classification = test_utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) y_regression = test_utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. structured_data_input = ak.StructuredDataInput() structured_data_output = ak.CategoricalToNumerical()(structured_data_input) structured_data_output = ak.DenseBlock()(structured_data_output) text_input = ak.TextInput() outputs1 = ak.TextToIntSequence()(text_input) outputs1 = ak.Embedding()(outputs1) outputs1 = ak.ConvBlock(separable=True)(outputs1) outputs1 = ak.SpatialReduction()(outputs1) outputs2 = ak.TextToNgramVector()(text_input) outputs2 = ak.DenseBlock()(outputs2) text_output = ak.Merge()((outputs1, outputs2)) merged_outputs = ak.Merge()((structured_data_output, text_output)) regression_outputs = ak.RegressionHead()(merged_outputs) classification_outputs = ak.ClassificationHead()(merged_outputs) automodel = ak.AutoModel( inputs=[text_input, structured_data_input], directory=tmp_path, outputs=[regression_outputs, classification_outputs], max_trials=2, tuner=ak.Hyperband, seed=test_utils.SEED, ) automodel.fit( (x_text, x_structured_data), (y_regression, y_classification), validation_split=0.2, epochs=1, )
def test_timeseries_forecaster(tmp_path): lookback = 2 predict_from = 1 predict_until = 10 train_x = test_utils.generate_data_with_categorical(num_instances=100) train_y = test_utils.generate_data(num_instances=80, shape=(1, )) clf = ak.TimeseriesForecaster( lookback=lookback, directory=tmp_path, predict_from=predict_from, predict_until=predict_until, max_trials=2, seed=test_utils.SEED, ) clf.fit(train_x, train_y, epochs=1, validation_data=(train_x, train_y)) keras_model = clf.export_model() clf.evaluate(train_x, train_y) assert clf.predict(train_x).shape == (predict_until - predict_from + 1, 1) assert clf.fit_and_predict( train_x, train_y, epochs=1, validation_split=0.2).shape == (predict_until - predict_from + 1, 1) assert isinstance(keras_model, tf.keras.Model)
def test_data_io_consistency_output(tuner_fn, tmp_path): x1 = test_utils.generate_data() y1 = test_utils.generate_data(shape=(1,)) dataset = tf.data.Dataset.from_tensor_slices(((x1, x1), (y1,))) dataset_error(dataset, None, dataset, "Expected y to have", tmp_path)
def test_multi_io_with_tf_dataset_doesnt_crash(tuner_fn, tmp_path): auto_model = get_multi_io_auto_model(tmp_path) x1 = test_utils.generate_data() y1 = test_utils.generate_data(shape=(1,)) dataset = tf.data.Dataset.from_tensor_slices(((x1, x1), (y1, y1))) auto_model.fit(dataset, epochs=2)
def test_image_input_adapter_transform_to_dataset(): x = test_utils.generate_data() adapter = input_adapters.ImageAdapter() assert isinstance(adapter.adapt(x, batch_size=32), tf.data.Dataset)