Esempio n. 1
0
def test_cast_to_int32_return_int32():
    dataset = utils.generate_one_hot_labels(100, 10, "dataset")
    dataset = dataset.map(lambda x: tf.cast(x, tf.uint8))
    dataset = common.CastToInt32().transform(dataset)
    for data in dataset:
        assert data.dtype == tf.int32
        break
Esempio n. 2
0
def test_io_api(tmp_path):
    num_instances = 20
    image_x = utils.generate_data(num_instances=num_instances, shape=(28, 28))
    text_x = utils.generate_text_data(num_instances=num_instances)

    image_x = image_x[:num_instances]
    structured_data_x = (pd.read_csv(utils.TRAIN_CSV_PATH).to_numpy().astype(
        np.unicode)[:num_instances])
    classification_y = utils.generate_one_hot_labels(
        num_instances=num_instances, num_classes=3)
    regression_y = utils.generate_data(num_instances=num_instances,
                                       shape=(1, ))

    # Build model and train.
    automodel = ak.AutoModel(
        inputs=[ak.ImageInput(),
                ak.TextInput(),
                ak.StructuredDataInput()],
        outputs=[
            ak.RegressionHead(metrics=["mae"]),
            ak.ClassificationHead(loss="categorical_crossentropy",
                                  metrics=["accuracy"]),
        ],
        directory=tmp_path,
        max_trials=2,
        tuner=ak.RandomSearch,
        seed=utils.SEED,
    )
    automodel.fit(
        [image_x, text_x, structured_data_x],
        [regression_y, classification_y],
        epochs=1,
        validation_split=0.2,
        batch_size=4,
    )
Esempio n. 3
0
def test_io_api(tmp_path):
    num_instances = 100
    (image_x, train_y), (test_x, test_y) = mnist.load_data()
    (text_x, train_y), (test_x,
                        test_y) = utils.imdb_raw(num_instances=num_instances)

    image_x = image_x[:num_instances]
    text_x = text_x[:num_instances]
    structured_data_x = utils.generate_structured_data(
        num_instances=num_instances)
    classification_y = utils.generate_one_hot_labels(
        num_instances=num_instances, num_classes=3)
    regression_y = utils.generate_data(num_instances=num_instances,
                                       shape=(1, ))

    # Build model and train.
    automodel = ak.AutoModel(
        inputs=[ak.ImageInput(),
                ak.TextInput(),
                ak.StructuredDataInput()],
        outputs=[
            ak.RegressionHead(metrics=['mae']),
            ak.ClassificationHead(loss='categorical_crossentropy',
                                  metrics=['accuracy'])
        ],
        directory=tmp_path,
        max_trials=2,
        seed=utils.SEED)
    automodel.fit([image_x, text_x, structured_data_x],
                  [regression_y, classification_y],
                  epochs=1,
                  validation_split=0.2)
Esempio n. 4
0
def test_structured_clf_fit_call_auto_model_fit(fit, tmp_path):
    auto_model = ak.StructuredDataClassifier(directory=tmp_path, seed=utils.SEED)

    auto_model.fit(
        x=utils.generate_structured_data(num_instances=100),
        y=utils.generate_one_hot_labels(num_instances=100, num_classes=3))

    assert fit.is_called
def test_clf_head_transform_df_to_dataset():
    adapter = output_adapter.ClassificationHeadAdapter(name="a")

    y = adapter.fit_transform(
        pd.DataFrame(utils.generate_one_hot_labels(dtype="np", num_classes=10))
    )

    assert isinstance(y, tf.data.Dataset)
Esempio n. 6
0
def test_img_clf_fit_call_auto_model_fit(fit, tmp_path):
    auto_model = ak.ImageClassifier(directory=tmp_path, seed=utils.SEED)

    auto_model.fit(x=utils.generate_data(num_instances=100, shape=(32, 32, 3)),
                   y=utils.generate_one_hot_labels(num_instances=100,
                                                   num_classes=10))

    assert fit.is_called
def test_infer_ten_classes():
    adapter = output_adapter.ClassificationHeadAdapter(name="a")

    adapter.fit_transform(
        utils.generate_one_hot_labels(dtype="dataset", num_classes=10)
    )

    assert adapter.num_classes == 10
Esempio n. 8
0
def test_image_classifier(tmp_path):
    train_x = utils.generate_data(num_instances=100, shape=(32, 32, 3))
    train_y = utils.generate_one_hot_labels(num_instances=100, num_classes=10)
    clf = ak.ImageClassifier(directory=tmp_path, max_trials=2, seed=utils.SEED)
    clf.fit(train_x, train_y, epochs=1, validation_split=0.2)
    keras_model = clf.export_model()
    clf.evaluate(train_x, train_y)
    assert clf.predict(train_x).shape == (len(train_x), 10)
    assert isinstance(keras_model, tf.keras.Model)
def test_clf_head_one_hot_shape_error():
    adapter = output_adapter.ClassificationHeadAdapter(name="a", num_classes=9)

    with pytest.raises(ValueError) as info:
        adapter.fit_transform(
            utils.generate_one_hot_labels(dtype="np", num_classes=10)
        )

    assert "Expect one hot encoded labels to have shape" in str(info.value)
Esempio n. 10
0
def test_infer_ten_classes():
    analyser = output_analysers.ClassificationAnalyser(name="a")
    dataset = utils.generate_one_hot_labels(dtype="dataset", num_classes=10)

    for data in dataset:
        analyser.update(data)
    analyser.finalize()

    assert analyser.num_classes == 10
def test_structured_clf_fit_call_auto_model_fit(fit, tmp_path):
    auto_model = ak.StructuredDataClassifier(directory=tmp_path,
                                             seed=utils.SEED)

    auto_model.fit(
        x=pd.read_csv(utils.TRAIN_CSV_PATH).to_numpy().astype(
            np.unicode)[:100],
        y=utils.generate_one_hot_labels(num_instances=100, num_classes=3),
    )

    assert fit.is_called
Esempio n. 12
0
def test_clf_head_one_hot_shape_error():
    analyser = output_analysers.ClassificationAnalyser(name="a", num_classes=9)
    dataset = tf.data.Dataset.from_tensor_slices(
        utils.generate_one_hot_labels(dtype="np", num_classes=10)).batch(32)

    with pytest.raises(ValueError) as info:
        for data in dataset:
            analyser.update(data)
        analyser.finalize()

    assert "Expect the target data for a to have shape" in str(info.value)
Esempio n. 13
0
def test_functional_api(tmp_path):
    # Prepare the data.
    num_instances = 80
    (image_x, train_y), (test_x, test_y) = mnist.load_data()
    (text_x, train_y), (test_x, test_y) = utils.imdb_raw()
    (structured_data_x, train_y), (test_x, test_y) = utils.dataframe_numpy()

    image_x = image_x[:num_instances]
    text_x = text_x[:num_instances]
    structured_data_x = structured_data_x[:num_instances]
    classification_y = utils.generate_one_hot_labels(
        num_instances=num_instances, num_classes=3)
    regression_y = utils.generate_data(num_instances=num_instances,
                                       shape=(1, ))

    # Build model and train.
    image_input = ak.ImageInput()
    output = ak.Normalization()(image_input)
    output = ak.ImageAugmentation()(output)
    outputs1 = ak.ResNetBlock(version='next')(output)
    outputs2 = ak.XceptionBlock()(output)
    image_output = ak.Merge()((outputs1, outputs2))

    structured_data_input = ak.StructuredDataInput()
    structured_data_output = ak.CategoricalToNumerical()(structured_data_input)
    structured_data_output = ak.DenseBlock()(structured_data_output)

    text_input = ak.TextInput()
    outputs1 = ak.TextToIntSequence()(text_input)
    outputs1 = ak.Embedding()(outputs1)
    outputs1 = ak.ConvBlock(separable=True)(outputs1)
    outputs1 = ak.SpatialReduction()(outputs1)
    outputs2 = ak.TextToNgramVector()(text_input)
    outputs2 = ak.DenseBlock()(outputs2)
    text_output = ak.Merge()((outputs1, outputs2))

    merged_outputs = ak.Merge()(
        (structured_data_output, image_output, text_output))

    regression_outputs = ak.RegressionHead()(merged_outputs)
    classification_outputs = ak.ClassificationHead()(merged_outputs)
    automodel = ak.AutoModel(
        inputs=[image_input, text_input, structured_data_input],
        directory=tmp_path,
        outputs=[regression_outputs, classification_outputs],
        max_trials=2,
        tuner=ak.Hyperband,
        seed=utils.SEED)

    automodel.fit((image_x, text_x, structured_data_x),
                  (regression_y, classification_y),
                  validation_split=0.2,
                  epochs=1)
Esempio n. 14
0
def test_structured_data_classifier(tmp_path):
    num_data = 500
    num_train = 400
    data = pd.read_csv(utils.TRAIN_CSV_PATH).to_numpy().astype(np.unicode)[:num_data]
    x_train, x_test = data[:num_train], data[num_train:]
    y = utils.generate_one_hot_labels(num_instances=num_data, num_classes=3)
    y_train, y_test = y[:num_train], y[num_train:]
    clf = ak.StructuredDataClassifier(
        directory=tmp_path, max_trials=1, seed=utils.SEED
    )
    clf.fit(x_train, y_train, epochs=2, validation_data=(x_train, y_train))
    clf.export_model()
    assert clf.predict(x_test).shape == (len(y_test), 3)
Esempio n. 15
0
def test_structured_data_from_numpy_classifier(tmp_path):
    num_data = 500
    num_train = 400
    data = utils.generate_structured_data(num_data)
    x_train, x_test = data[:num_train], data[num_train:]
    y = utils.generate_one_hot_labels(num_instances=num_data, num_classes=3)
    y_train, y_test = y[:num_train], y[num_train:]
    clf = ak.StructuredDataClassifier(directory=tmp_path,
                                      max_trials=1,
                                      seed=utils.SEED)
    clf.fit(x_train, y_train, epochs=2, validation_data=(x_train, y_train))
    clf.export_model()
    assert clf.predict(x_test).shape == (len(y_test), 3)
Esempio n. 16
0
def test_structured_classifier(init, fit, tmp_dir):
    num_data = 500
    train_x = utils.generate_structured_data(num_data)
    train_y = utils.generate_one_hot_labels(num_instances=num_data, num_classes=3)

    clf = structured_data.StructuredDataClassifier(
        column_names=utils.COLUMN_NAMES_FROM_NUMPY,
        directory=tmp_dir,
        max_trials=1,
        seed=utils.SEED)
    clf.fit(train_x, train_y, epochs=2, validation_data=(train_x, train_y))

    assert init.called
    assert fit.called
Esempio n. 17
0
def test_clf_head_hpps_with_uint8_contain_cast_to_int32():
    dataset = utils.generate_one_hot_labels(100, 10, "dataset")
    dataset = dataset.map(lambda x: tf.cast(x, tf.uint8))
    head = head_module.ClassificationHead(shape=(8, ))
    analyser = head.get_analyser()
    for data in dataset:
        analyser.update(data)
    analyser.finalize()
    head.config_from_analyser(analyser)

    assert any([
        isinstance(hpp, hyper_preprocessors.DefaultHyperPreprocessor)
        and isinstance(hpp.preprocessor, preprocessors.CastToInt32)
        for hpp in head.get_hyper_preprocessors()
    ])
def test_text_and_structured_data(tmp_path):
    # Prepare the data.
    num_instances = 80
    (x_text, y_train), (x_test, y_test) = utils.imdb_raw()
    x_structured_data = pd.read_csv(utils.TRAIN_CSV_PATH)

    x_text = x_text[:num_instances]
    x_structured_data = x_structured_data[:num_instances]
    y_classification = utils.generate_one_hot_labels(
        num_instances=num_instances, num_classes=3)
    y_regression = utils.generate_data(num_instances=num_instances,
                                       shape=(1, ))

    # Build model and train.
    structured_data_input = ak.StructuredDataInput()
    structured_data_output = ak.CategoricalToNumerical()(structured_data_input)
    structured_data_output = ak.DenseBlock()(structured_data_output)

    text_input = ak.TextInput()
    outputs1 = ak.TextToIntSequence()(text_input)
    outputs1 = ak.Embedding()(outputs1)
    outputs1 = ak.ConvBlock(separable=True)(outputs1)
    outputs1 = ak.SpatialReduction()(outputs1)
    outputs2 = ak.TextToNgramVector()(text_input)
    outputs2 = ak.DenseBlock()(outputs2)
    text_output = ak.Merge()((outputs1, outputs2))

    merged_outputs = ak.Merge()((structured_data_output, text_output))

    regression_outputs = ak.RegressionHead()(merged_outputs)
    classification_outputs = ak.ClassificationHead()(merged_outputs)
    automodel = ak.AutoModel(
        inputs=[text_input, structured_data_input],
        directory=tmp_path,
        outputs=[regression_outputs, classification_outputs],
        max_trials=2,
        tuner=ak.Hyperband,
        seed=utils.SEED,
    )

    automodel.fit(
        (x_text, x_structured_data),
        (y_regression, y_classification),
        validation_split=0.2,
        epochs=1,
    )
Esempio n. 19
0
def test_infer_num_classes():
    y = utils.generate_one_hot_labels(dtype='dataset')
    adapter = output_adapter.ClassificationHeadAdapter(name='a')
    y = adapter.fit(y)
    assert adapter.num_classes == 10
Esempio n. 20
0
def test_tf_dataset():
    y = utils.generate_one_hot_labels(dtype='dataset')
    head = output_adapter.ClassificationHeadAdapter(name='a')
    y = head.fit_transform(y)
    assert isinstance(head.transform(y), tf.data.Dataset)