def test_io_api(tmp_path): num_instances = 20 image_x = utils.generate_data(num_instances=num_instances, shape=(28, 28)) text_x = utils.generate_text_data(num_instances=num_instances) image_x = image_x[:num_instances] structured_data_x = (pd.read_csv(utils.TRAIN_CSV_PATH).to_numpy().astype( np.unicode)[:num_instances]) classification_y = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. automodel = ak.AutoModel( inputs=[ak.ImageInput(), ak.TextInput(), ak.StructuredDataInput()], outputs=[ ak.RegressionHead(metrics=["mae"]), ak.ClassificationHead(loss="categorical_crossentropy", metrics=["accuracy"]), ], directory=tmp_path, max_trials=2, tuner=ak.RandomSearch, seed=utils.SEED, ) automodel.fit( [image_x, text_x, structured_data_x], [regression_y, classification_y], epochs=1, validation_split=0.2, batch_size=4, )
def test_text_regressor(tmp_path): train_x = utils.generate_text_data(num_instances=300) test_x = train_x train_y = utils.generate_data(num_instances=300, shape=(1,)) test_y = train_y clf = ak.TextRegressor(directory=tmp_path, max_trials=2, seed=utils.SEED) clf.fit(train_x, train_y, epochs=1, validation_data=(test_x, test_y)) clf.export_model() assert clf.predict(test_x).shape == (len(test_x), 1)
def test_text_and_structured_data(tmp_path): # Prepare the data. num_instances = 80 x_text = utils.generate_text_data(num_instances) x_structured_data = pd.read_csv(utils.TRAIN_CSV_PATH) x_structured_data = x_structured_data[:num_instances] y_classification = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3 ) y_regression = utils.generate_data(num_instances=num_instances, shape=(1,)) # Build model and train. structured_data_input = ak.StructuredDataInput() structured_data_output = ak.CategoricalToNumerical()(structured_data_input) structured_data_output = ak.DenseBlock()(structured_data_output) text_input = ak.TextInput() outputs1 = ak.TextToIntSequence()(text_input) outputs1 = ak.Embedding()(outputs1) outputs1 = ak.ConvBlock(separable=True)(outputs1) outputs1 = ak.SpatialReduction()(outputs1) outputs2 = ak.TextToNgramVector()(text_input) outputs2 = ak.DenseBlock()(outputs2) text_output = ak.Merge()((outputs1, outputs2)) merged_outputs = ak.Merge()((structured_data_output, text_output)) regression_outputs = ak.RegressionHead()(merged_outputs) classification_outputs = ak.ClassificationHead()(merged_outputs) automodel = ak.AutoModel( inputs=[text_input, structured_data_input], directory=tmp_path, outputs=[regression_outputs, classification_outputs], max_trials=2, tuner=ak.Hyperband, seed=utils.SEED, ) automodel.fit( (x_text, x_structured_data), (y_regression, y_classification), validation_split=0.2, epochs=1, )
def test_text_classifier(tmp_path): train_x = utils.generate_text_data(num_instances=320) train_y = np.random.randint(0, 2, 320) test_x = train_x test_y = train_y clf = ak.TextClassifier( directory=tmp_path, max_trials=2, seed=utils.SEED, metrics=["accuracy"], objective="accuracy", ) clf.fit( train_x, train_y, epochs=2, validation_data=(test_x, test_y), batch_size=6 ) clf.export_model() assert clf.predict(test_x).shape == (len(test_x), 1) assert clf.tuner._get_best_trial_epochs() <= 2
def test_preprocessing_adapt_with_text_vec(): class MockLayer(preprocessing.TextVectorization): def adapt(self, *args, **kwargs): super().adapt(*args, **kwargs) self.is_called = True x_train = utils.generate_text_data() y_train = np.random.randint(0, 2, (100, )) dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(32) layer1 = MockLayer(max_tokens=5000, output_mode="int", output_sequence_length=40) model = tf.keras.models.Sequential() model.add(tf.keras.Input(shape=(1, ), dtype=tf.string)) model.add(layer1) model.add(tf.keras.layers.Embedding(50001, 10)) model.add(tf.keras.layers.Dense(1)) tuner_module.AutoTuner.adapt(model, dataset) assert layer1.is_called