def test_io_api(tmp_dir):
    (image_x, train_y), (test_x, test_y) = mnist.load_data()
    (text_x, train_y), (test_x, test_y) = common.imdb_raw()

    num_instances = 20
    image_x = image_x[:num_instances]
    text_x = text_x[:num_instances]
    structured_data_x = common.generate_structured_data(
        num_instances=num_instances)
    classification_y = common.generate_one_hot_labels(
        num_instances=num_instances, num_classes=3)
    regression_y = common.generate_data(num_instances=num_instances,
                                        shape=(1, ))

    # Build model and train.
    automodel = ak.AutoModel(
        inputs=[ak.ImageInput(),
                ak.TextInput(),
                ak.StructuredDataInput()],
        outputs=[
            ak.RegressionHead(metrics=['mae']),
            ak.ClassificationHead(loss='categorical_crossentropy',
                                  metrics=['accuracy'])
        ],
        directory=tmp_dir,
        max_trials=2,
        seed=common.SEED)
    automodel.fit([image_x, text_x, structured_data_x],
                  [regression_y, classification_y],
                  epochs=2,
                  validation_split=0.2)
Exemple #2
0
def test_structured_data_input_col_type_without_name():
    num_data = 500
    train_x = common.generate_structured_data(num_data)
    with pytest.raises(ValueError) as info:
        input_node = node.StructuredDataInput(
            column_types=common.COLUMN_TYPES_FROM_NUMPY)
        input_node.fit(train_x)
    assert str(info.value) == 'Column names must be specified.'
Exemple #3
0
def test_feature_engineering(tmp_dir):
    dataset = common.generate_structured_data(dtype='dataset')
    feature = preprocessor_module.FeatureEngineering()
    feature.column_names = common.COLUMN_NAMES_FROM_NUMPY
    feature.column_types = common.COLUMN_TYPES_FROM_NUMPY
    new_dataset = run_preprocessor(feature, dataset,
                                   common.generate_data(dtype='dataset'),
                                   tf.float32, tmp_dir)
    assert isinstance(new_dataset, tf.data.Dataset)
def test_structured_data_assembler():
    data = common.generate_structured_data()
    dataset = tf.data.Dataset.from_tensor_slices(data)
    assembler = meta_model.StructuredDataAssembler()
    for line in dataset:
        assembler.update(line)

    input_node = node.StructuredDataInput()
    assembler.assemble(input_node)
    assert isinstance(input_node.out_blocks[0], ak.StructuredDataBlock)
Exemple #5
0
def test_feature_engineering():
    dataset = common.generate_structured_data(dtype='dataset')
    feature = preprocessor_module.FeatureEngineering()
    feature.input_node = ak.StructuredDataInput(
        column_names=common.COLUMN_NAMES_FROM_NUMPY,
        column_types=common.COLUMN_TYPES_FROM_NUMPY)
    new_dataset = run_preprocessor(feature, dataset,
                                   common.generate_data(dtype='dataset'),
                                   tf.float32)
    assert isinstance(new_dataset, tf.data.Dataset)
def test_structured_data_assembler():
    data = common.generate_structured_data()
    dataset = tf.data.Dataset.from_tensor_slices(data)
    assembler = meta_model.StructuredDataAssembler(
        column_names=common.COLUMN_NAMES_FROM_NUMPY)
    for line in dataset:
        assembler.update(line)

    input_node = node.StructuredDataInput()
    assembler.assemble(input_node)
    assert input_node.column_types == common.COLUMN_TYPES_FROM_NUMPY
Exemple #7
0
def test_structured_data_from_numpy_classifier(tmp_dir):
    num_data = 500
    num_train = 400
    data = common.generate_structured_data(num_data)
    x_train, x_test = data[:num_train], data[num_train:]
    y = common.generate_one_hot_labels(num_instances=num_data, num_classes=3)
    y_train, y_test = y[:num_train], y[num_train:]
    clf = ak.StructuredDataClassifier(directory=tmp_dir,
                                      max_trials=1,
                                      seed=common.SEED)
    clf.fit(x_train, y_train, epochs=2, validation_data=(x_train, y_train))
    assert clf.predict(x_test).shape == (len(y_test), 3)
Exemple #8
0
def test_structured_data_from_numpy_regressor(tmp_dir):
    num_data = 500
    num_train = 400
    data = common.generate_structured_data(num_data)
    x_train, x_test = data[:num_train], data[num_train:]
    y = common.generate_data(num_instances=num_data, shape=(1, ))
    y_train, y_test = y[:num_train], y[num_train:]
    clf = ak.StructuredDataRegressor(directory=tmp_dir,
                                     max_trials=1,
                                     seed=common.SEED)
    clf.fit(x_train, y_train, epochs=2, validation_data=(x_train, y_train))
    assert clf.predict(x_test).shape == (len(y_test), 1)
def test_structured_regressor(init, fit):
    num_data = 500
    train_x = common.generate_structured_data(num_data)
    train_y = common.generate_data(num_instances=100, shape=(1,))

    clf = ak.StructuredDataRegressor(
        column_names=common.COLUMN_NAMES_FROM_NUMPY,
        directory=tmp_dir,
        max_trials=1,
        seed=common.SEED)
    clf.fit(train_x, train_y, epochs=2, validation_data=(train_x, train_y))

    assert init.called
    assert fit.called
Exemple #10
0
def test_structured_classifier(init, fit):
    num_data = 500
    train_x = common.generate_structured_data(num_data)
    train_y = common.generate_one_hot_labels(num_instances=num_data, num_classes=3)

    clf = ak.StructuredDataClassifier(
        column_names=common.COLUMN_NAMES_FROM_NUMPY,
        directory=tmp_dir,
        max_trials=1,
        seed=common.SEED)
    clf.fit(train_x, train_y, epochs=2, validation_data=(train_x, train_y))

    assert init.called
    assert fit.called
Exemple #11
0
def test_structured_data_input(tmp_dir):
    num_data = 500
    data = common.generate_structured_data(num_data)
    x_train = data
    y = np.random.randint(0, 3, num_data)
    y_train = y

    input_node = ak.StructuredDataInput(
        column_names=common.COLUMN_NAMES_FROM_NUMPY,
        column_types=common.COLUMN_TYPES_FROM_NUMPY)
    output_node = input_node
    output_node = ak.StructuredDataBlock()(output_node)
    output_node = ak.ClassificationHead(loss='categorical_crossentropy',
                                        metrics=['accuracy'])(output_node)

    auto_model = ak.GraphAutoModel(input_node,
                                   output_node,
                                   directory=tmp_dir,
                                   max_trials=1)
    auto_model.fit(x_train,
                   y_train,
                   epochs=1,
                   validation_data=(x_train, y_train))
    auto_model.predict(x_train)