def test_io_api(tmp_path): num_instances = 100 (image_x, train_y), (test_x, test_y) = mnist.load_data() (text_x, train_y), (test_x, test_y) = utils.imdb_raw(num_instances=num_instances) image_x = image_x[:num_instances] text_x = text_x[:num_instances] structured_data_x = utils.generate_structured_data( num_instances=num_instances) classification_y = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. automodel = ak.AutoModel( inputs=[ak.ImageInput(), ak.TextInput(), ak.StructuredDataInput()], outputs=[ ak.RegressionHead(metrics=['mae']), ak.ClassificationHead(loss='categorical_crossentropy', metrics=['accuracy']) ], directory=tmp_path, max_trials=2, seed=utils.SEED) automodel.fit([image_x, text_x, structured_data_x], [regression_y, classification_y], epochs=1, validation_split=0.2)
def test_text_classifier(tmp_path): (train_x, train_y), (test_x, test_y) = utils.imdb_raw() clf = ak.TextClassifier(directory=tmp_path, max_trials=2, seed=utils.SEED, metrics=['accuracy'], objective='accuracy') clf.fit(train_x, train_y, epochs=2, validation_data=(test_x, test_y)) clf.export_model() assert clf.predict(test_x).shape == (len(test_x), 1) assert clf.tuner._get_best_trial_epochs() == 2
def test_text_regressor(tmp_path): (train_x, train_y), (test_x, test_y) = utils.imdb_raw() train_y = utils.generate_data(num_instances=train_y.shape[0], shape=(1, )) test_y = utils.generate_data(num_instances=test_y.shape[0], shape=(1, )) clf = ak.TextRegressor(directory=tmp_path, max_trials=2, seed=utils.SEED) clf.fit(train_x, train_y, epochs=1, validation_data=(test_x, test_y)) clf.export_model() assert clf.predict(test_x).shape == (len(test_x), 1)
def test_functional_api(tmp_path): # Prepare the data. num_instances = 80 (image_x, train_y), (test_x, test_y) = mnist.load_data() (text_x, train_y), (test_x, test_y) = utils.imdb_raw() (structured_data_x, train_y), (test_x, test_y) = utils.dataframe_numpy() image_x = image_x[:num_instances] text_x = text_x[:num_instances] structured_data_x = structured_data_x[:num_instances] classification_y = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. image_input = ak.ImageInput() output = ak.Normalization()(image_input) output = ak.ImageAugmentation()(output) outputs1 = ak.ResNetBlock(version='next')(output) outputs2 = ak.XceptionBlock()(output) image_output = ak.Merge()((outputs1, outputs2)) structured_data_input = ak.StructuredDataInput() structured_data_output = ak.CategoricalToNumerical()(structured_data_input) structured_data_output = ak.DenseBlock()(structured_data_output) text_input = ak.TextInput() outputs1 = ak.TextToIntSequence()(text_input) outputs1 = ak.Embedding()(outputs1) outputs1 = ak.ConvBlock(separable=True)(outputs1) outputs1 = ak.SpatialReduction()(outputs1) outputs2 = ak.TextToNgramVector()(text_input) outputs2 = ak.DenseBlock()(outputs2) text_output = ak.Merge()((outputs1, outputs2)) merged_outputs = ak.Merge()( (structured_data_output, image_output, text_output)) regression_outputs = ak.RegressionHead()(merged_outputs) classification_outputs = ak.ClassificationHead()(merged_outputs) automodel = ak.AutoModel( inputs=[image_input, text_input, structured_data_input], directory=tmp_path, outputs=[regression_outputs, classification_outputs], max_trials=2, tuner=ak.Hyperband, seed=utils.SEED) automodel.fit((image_x, text_x, structured_data_x), (regression_y, classification_y), validation_split=0.2, epochs=1)
def test_text_and_structured_data(tmp_path): # Prepare the data. num_instances = 80 (x_text, y_train), (x_test, y_test) = utils.imdb_raw() x_structured_data = pd.read_csv(utils.TRAIN_CSV_PATH) x_text = x_text[:num_instances] x_structured_data = x_structured_data[:num_instances] y_classification = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) y_regression = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. structured_data_input = ak.StructuredDataInput() structured_data_output = ak.CategoricalToNumerical()(structured_data_input) structured_data_output = ak.DenseBlock()(structured_data_output) text_input = ak.TextInput() outputs1 = ak.TextToIntSequence()(text_input) outputs1 = ak.Embedding()(outputs1) outputs1 = ak.ConvBlock(separable=True)(outputs1) outputs1 = ak.SpatialReduction()(outputs1) outputs2 = ak.TextToNgramVector()(text_input) outputs2 = ak.DenseBlock()(outputs2) text_output = ak.Merge()((outputs1, outputs2)) merged_outputs = ak.Merge()((structured_data_output, text_output)) regression_outputs = ak.RegressionHead()(merged_outputs) classification_outputs = ak.ClassificationHead()(merged_outputs) automodel = ak.AutoModel( inputs=[text_input, structured_data_input], directory=tmp_path, outputs=[regression_outputs, classification_outputs], max_trials=2, tuner=ak.Hyperband, seed=utils.SEED, ) automodel.fit( (x_text, x_structured_data), (y_regression, y_classification), validation_split=0.2, epochs=1, )
def test_preprocessing_adapt_with_text_vec(): class MockLayer(preprocessing.TextVectorization): def adapt(self, *args, **kwargs): super().adapt(*args, **kwargs) self.is_called = True (x_train, y_train), (x_test, y_test) = utils.imdb_raw() dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(32) layer1 = MockLayer(max_tokens=5000, output_mode="int", output_sequence_length=40) model = tf.keras.models.Sequential() model.add(tf.keras.Input(shape=(1,), dtype=tf.string)) model.add(layer1) model.add(tf.keras.layers.Embedding(50001, 10)) model.add(tf.keras.layers.Dense(1)) tuner_module.AutoTuner.adapt(model, dataset) assert layer1.is_called
def test_io_api(tmp_path): num_instances = 20 (image_x, train_y), (test_x, test_y) = mnist.load_data() (text_x, train_y), (test_x, test_y) = utils.imdb_raw(num_instances=num_instances) image_x = image_x[:num_instances] text_x = text_x[:num_instances] structured_data_x = (pd.read_csv(utils.TRAIN_CSV_PATH).to_numpy().astype( np.unicode)[:num_instances]) classification_y = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. automodel = ak.AutoModel( inputs=[ak.ImageInput(), ak.TextInput(), ak.StructuredDataInput()], outputs=[ ak.RegressionHead(metrics=["mae"]), ak.ClassificationHead(loss="categorical_crossentropy", metrics=["accuracy"]), ], directory=tmp_path, max_trials=2, tuner=ak.RandomSearch, seed=utils.SEED, ) automodel.fit( [image_x, text_x, structured_data_x], [regression_y, classification_y], epochs=1, validation_split=0.2, batch_size=4, )
def test_imdb_accuracy_over_84(tmp_path): (x_train, y_train), (x_test, y_test) = utils.imdb_raw(num_instances=None) clf = ak.TextClassifier(max_trials=2, directory=tmp_path) clf.fit(x_train, y_train, epochs=2) accuracy = clf.evaluate(x_test, y_test)[1] assert accuracy >= 0.84
def test_text_classifier(tmp_dir): (train_x, train_y), (test_x, test_y) = utils.imdb_raw() clf = ak.TextClassifier(directory=tmp_dir, max_trials=2, seed=utils.SEED) clf.fit(train_x, train_y, epochs=1, validation_data=(test_x, test_y)) clf.export_model() assert clf.predict(test_x).shape == (len(test_x), 1)