예제 #1
0
def test_io_api(tmp_path):
    num_instances = 20
    image_x = utils.generate_data(num_instances=num_instances, shape=(28, 28))
    text_x = utils.generate_text_data(num_instances=num_instances)

    image_x = image_x[:num_instances]
    structured_data_x = (pd.read_csv(utils.TRAIN_CSV_PATH).to_numpy().astype(
        np.unicode)[:num_instances])
    classification_y = utils.generate_one_hot_labels(
        num_instances=num_instances, num_classes=3)
    regression_y = utils.generate_data(num_instances=num_instances,
                                       shape=(1, ))

    # Build model and train.
    automodel = ak.AutoModel(
        inputs=[ak.ImageInput(),
                ak.TextInput(),
                ak.StructuredDataInput()],
        outputs=[
            ak.RegressionHead(metrics=["mae"]),
            ak.ClassificationHead(loss="categorical_crossentropy",
                                  metrics=["accuracy"]),
        ],
        directory=tmp_path,
        max_trials=2,
        tuner=ak.RandomSearch,
        seed=utils.SEED,
    )
    automodel.fit(
        [image_x, text_x, structured_data_x],
        [regression_y, classification_y],
        epochs=1,
        validation_split=0.2,
        batch_size=4,
    )
예제 #2
0
def test_text_regressor(tmp_path):
    train_x = utils.generate_text_data(num_instances=300)
    test_x = train_x
    train_y = utils.generate_data(num_instances=300, shape=(1,))
    test_y = train_y
    clf = ak.TextRegressor(directory=tmp_path, max_trials=2, seed=utils.SEED)
    clf.fit(train_x, train_y, epochs=1, validation_data=(test_x, test_y))
    clf.export_model()
    assert clf.predict(test_x).shape == (len(test_x), 1)
예제 #3
0
def test_text_and_structured_data(tmp_path):
    # Prepare the data.
    num_instances = 80
    x_text = utils.generate_text_data(num_instances)
    x_structured_data = pd.read_csv(utils.TRAIN_CSV_PATH)

    x_structured_data = x_structured_data[:num_instances]
    y_classification = utils.generate_one_hot_labels(
        num_instances=num_instances, num_classes=3
    )
    y_regression = utils.generate_data(num_instances=num_instances, shape=(1,))

    # Build model and train.
    structured_data_input = ak.StructuredDataInput()
    structured_data_output = ak.CategoricalToNumerical()(structured_data_input)
    structured_data_output = ak.DenseBlock()(structured_data_output)

    text_input = ak.TextInput()
    outputs1 = ak.TextToIntSequence()(text_input)
    outputs1 = ak.Embedding()(outputs1)
    outputs1 = ak.ConvBlock(separable=True)(outputs1)
    outputs1 = ak.SpatialReduction()(outputs1)
    outputs2 = ak.TextToNgramVector()(text_input)
    outputs2 = ak.DenseBlock()(outputs2)
    text_output = ak.Merge()((outputs1, outputs2))

    merged_outputs = ak.Merge()((structured_data_output, text_output))

    regression_outputs = ak.RegressionHead()(merged_outputs)
    classification_outputs = ak.ClassificationHead()(merged_outputs)
    automodel = ak.AutoModel(
        inputs=[text_input, structured_data_input],
        directory=tmp_path,
        outputs=[regression_outputs, classification_outputs],
        max_trials=2,
        tuner=ak.Hyperband,
        seed=utils.SEED,
    )

    automodel.fit(
        (x_text, x_structured_data),
        (y_regression, y_classification),
        validation_split=0.2,
        epochs=1,
    )
예제 #4
0
def test_text_classifier(tmp_path):
    train_x = utils.generate_text_data(num_instances=320)
    train_y = np.random.randint(0, 2, 320)
    test_x = train_x
    test_y = train_y
    clf = ak.TextClassifier(
        directory=tmp_path,
        max_trials=2,
        seed=utils.SEED,
        metrics=["accuracy"],
        objective="accuracy",
    )
    clf.fit(
        train_x, train_y, epochs=2, validation_data=(test_x, test_y), batch_size=6
    )
    clf.export_model()
    assert clf.predict(test_x).shape == (len(test_x), 1)
    assert clf.tuner._get_best_trial_epochs() <= 2
예제 #5
0
def test_preprocessing_adapt_with_text_vec():
    class MockLayer(preprocessing.TextVectorization):
        def adapt(self, *args, **kwargs):
            super().adapt(*args, **kwargs)
            self.is_called = True

    x_train = utils.generate_text_data()
    y_train = np.random.randint(0, 2, (100, ))
    dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(32)
    layer1 = MockLayer(max_tokens=5000,
                       output_mode="int",
                       output_sequence_length=40)
    model = tf.keras.models.Sequential()
    model.add(tf.keras.Input(shape=(1, ), dtype=tf.string))
    model.add(layer1)
    model.add(tf.keras.layers.Embedding(50001, 10))
    model.add(tf.keras.layers.Dense(1))

    tuner_module.AutoTuner.adapt(model, dataset)

    assert layer1.is_called