Esempio n. 1
0
def test_io_api(tmp_path):
    num_instances = 100
    (image_x, train_y), (test_x, test_y) = mnist.load_data()
    (text_x, train_y), (test_x,
                        test_y) = utils.imdb_raw(num_instances=num_instances)

    image_x = image_x[:num_instances]
    text_x = text_x[:num_instances]
    structured_data_x = utils.generate_structured_data(
        num_instances=num_instances)
    classification_y = utils.generate_one_hot_labels(
        num_instances=num_instances, num_classes=3)
    regression_y = utils.generate_data(num_instances=num_instances,
                                       shape=(1, ))

    # Build model and train.
    automodel = ak.AutoModel(
        inputs=[ak.ImageInput(),
                ak.TextInput(),
                ak.StructuredDataInput()],
        outputs=[
            ak.RegressionHead(metrics=['mae']),
            ak.ClassificationHead(loss='categorical_crossentropy',
                                  metrics=['accuracy'])
        ],
        directory=tmp_path,
        max_trials=2,
        seed=utils.SEED)
    automodel.fit([image_x, text_x, structured_data_x],
                  [regression_y, classification_y],
                  epochs=1,
                  validation_split=0.2)
def applyAutoKeras(X_train, y_train, X_test, y_test, SavePath,
                   max_trials=100, epochs=300, useSavedModels = True):

    if not useSavedModels or not os.path.isdir(SavePath+"/keras_auto_model/best_model/"):
        input_node = ak.StructuredDataInput()
        output_node = ak.DenseBlock()(input_node)
        #output_node = ak.ConvBlock()(output_node)
        output_node = ak.RegressionHead()(output_node)
        AKRegressor = ak.AutoModel(
            inputs=input_node,
            outputs=output_node,
            max_trials=max_trials,
            overwrite=True,
            tuner="bayesian",
            project_name=SavePath+"/keras_auto_model"
        )
        print(" X_train shape: {0}\n y_train shape: {1}\n X_test shape: {2}\n y_test shape: {3}".format(X_train.shape, y_train.shape, X_test.shape, y_test.shape))
        AKRegressor.fit(x=X_train, y=y_train[:,0],epochs=epochs,verbose=1, batch_size=int(X_train.shape[0]/10), shuffle=False, use_multiprocessing=True)
        AKRegressor.export_model()
    else:
        AKRegressor = tf.keras.models.load_model(SavePath+"/keras_auto_model/best_model/")
        
    y_hat = AKRegressor.predict(X_test)
    print("AUTOKERAS - Score: ")
    print("MAE: %.4f" % mean_absolute_error(y_test[:,0], y_hat))
        
    return y_hat
Esempio n. 3
0
def test_io_api(tmp_path):
    num_instances = 20
    image_x = utils.generate_data(num_instances=num_instances, shape=(28, 28))
    text_x = utils.generate_text_data(num_instances=num_instances)

    image_x = image_x[:num_instances]
    structured_data_x = (pd.read_csv(utils.TRAIN_CSV_PATH).to_numpy().astype(
        np.unicode)[:num_instances])
    classification_y = utils.generate_one_hot_labels(
        num_instances=num_instances, num_classes=3)
    regression_y = utils.generate_data(num_instances=num_instances,
                                       shape=(1, ))

    # Build model and train.
    automodel = ak.AutoModel(
        inputs=[ak.ImageInput(),
                ak.TextInput(),
                ak.StructuredDataInput()],
        outputs=[
            ak.RegressionHead(metrics=["mae"]),
            ak.ClassificationHead(loss="categorical_crossentropy",
                                  metrics=["accuracy"]),
        ],
        directory=tmp_path,
        max_trials=2,
        tuner=ak.RandomSearch,
        seed=utils.SEED,
    )
    automodel.fit(
        [image_x, text_x, structured_data_x],
        [regression_y, classification_y],
        epochs=1,
        validation_split=0.2,
        batch_size=4,
    )
Esempio n. 4
0
def test_feature_engineering():
    dataset = common.generate_structured_data(dtype='dataset')
    feature = preprocessor_module.FeatureEngineering()
    feature.input_node = ak.StructuredDataInput(
        column_names=common.COLUMN_NAMES_FROM_NUMPY,
        column_types=common.COLUMN_TYPES_FROM_NUMPY)
    new_dataset = run_preprocessor(feature, dataset,
                                   common.generate_data(dtype='dataset'),
                                   tf.float32)
    assert isinstance(new_dataset, tf.data.Dataset)
Esempio n. 5
0
    def train():
        house_dataset = fetch_california_housing()
        df = pd.DataFrame(np.concatenate(
            (house_dataset.data, house_dataset.target.reshape(-1, 1)), axis=1),
                          columns=house_dataset.feature_names + ['Price'])
        train_size = int(df.shape[0] * 0.9)
        df[:train_size].to_csv('train.csv', index=False)
        df[train_size:].to_csv('eval.csv', index=False)
        train_file_path = 'train.csv'
        test_file_path = 'eval.csv'

        # x_train as pandas.DataFrame, y_train as pandas.Series
        x_train = pd.read_csv(train_file_path)
        print(type(x_train))  # pandas.DataFrame
        y_train = x_train.pop('Price')
        print(type(y_train))  # pandas.Series

        # You can also use pandas.DataFrame for y_train.
        y_train = pd.DataFrame(y_train)
        print(type(y_train))  # pandas.DataFrame

        # You can also use numpy.ndarray for x_train and y_train.
        x_train = x_train.to_numpy().astype(np.unicode)
        y_train = y_train.to_numpy()
        print(type(x_train))  # numpy.ndarray
        print(type(y_train))  # numpy.ndarray

        # Preparing testing data.
        x_test = pd.read_csv(test_file_path)
        y_test = x_test.pop('Price')

        # Initialize the structured data regressor.
        input_node = ak.StructuredDataInput()
        output_node = ak.StructuredDataBlock(
            categorical_encoding=True)(input_node)
        output_node = ak.RegressionHead()(output_node)
        reg = ak.AutoModel(inputs=input_node,
                           outputs=output_node,
                           overwrite=True,
                           max_trials=3)

        # Feed the structured data regressor with training data.
        reg.fit(
            x_train,
            y_train,
            # Split the training data and use the last 15% as validation data.
            validation_split=0.15,
            epochs=10)

        # Predict with the best model.
        predicted_y = reg.predict(test_file_path)

        # Evaluate the best model with testing data.
        print(reg.evaluate(test_file_path, 'Price'))
Esempio n. 6
0
def test_functional_api(tmp_dir):
    # Prepare the data.
    num_instances = 20
    (image_x, train_y), (test_x, test_y) = mnist.load_data()
    (text_x, train_y), (test_x, test_y) = common.imdb_raw()
    (structured_data_x, train_y), (test_x, test_y) = common.dataframe_numpy()

    image_x = image_x[:num_instances]
    text_x = text_x[:num_instances]
    structured_data_x = structured_data_x[:num_instances]
    classification_y = common.generate_one_hot_labels(
        num_instances=num_instances, num_classes=3)
    regression_y = common.generate_data(num_instances=num_instances,
                                        shape=(1, ))

    # Build model and train.
    image_input = ak.ImageInput()
    output = ak.Normalization()(image_input)
    output = ak.ImageAugmentation()(output)
    outputs1 = ak.ResNetBlock(version='next')(image_input)
    outputs2 = ak.XceptionBlock()(image_input)
    image_output = ak.Merge()((outputs1, outputs2))

    structured_data_input = ak.StructuredDataInput(
        column_names=common.COLUMN_NAMES_FROM_CSV,
        column_types=common.COLUMN_TYPES_FROM_CSV)
    structured_data_output = ak.FeatureEngineering()(structured_data_input)
    structured_data_output = ak.DenseBlock()(structured_data_output)

    text_input = ak.TextInput()
    outputs1 = ak.TextToIntSequence()(text_input)
    outputs1 = ak.EmbeddingBlock()(outputs1)
    outputs1 = ak.ConvBlock(separable=True)(outputs1)
    outputs1 = ak.SpatialReduction()(outputs1)
    outputs2 = ak.TextToNgramVector()(text_input)
    outputs2 = ak.DenseBlock()(outputs2)
    text_output = ak.Merge()((outputs1, outputs2))

    merged_outputs = ak.Merge()(
        (structured_data_output, image_output, text_output))

    regression_outputs = ak.RegressionHead()(merged_outputs)
    classification_outputs = ak.ClassificationHead()(merged_outputs)
    automodel = ak.GraphAutoModel(
        inputs=[image_input, text_input, structured_data_input],
        directory=tmp_dir,
        outputs=[regression_outputs, classification_outputs],
        max_trials=2,
        seed=common.SEED)

    automodel.fit((image_x, text_x, structured_data_x),
                  (regression_y, classification_y),
                  validation_split=0.2,
                  epochs=2)
Esempio n. 7
0
def test_functional_api(tmp_path):
    # Prepare the data.
    num_instances = 80
    (image_x, train_y), (test_x, test_y) = mnist.load_data()
    (text_x, train_y), (test_x, test_y) = utils.imdb_raw()
    (structured_data_x, train_y), (test_x, test_y) = utils.dataframe_numpy()

    image_x = image_x[:num_instances]
    text_x = text_x[:num_instances]
    structured_data_x = structured_data_x[:num_instances]
    classification_y = utils.generate_one_hot_labels(
        num_instances=num_instances, num_classes=3)
    regression_y = utils.generate_data(num_instances=num_instances,
                                       shape=(1, ))

    # Build model and train.
    image_input = ak.ImageInput()
    output = ak.Normalization()(image_input)
    output = ak.ImageAugmentation()(output)
    outputs1 = ak.ResNetBlock(version='next')(output)
    outputs2 = ak.XceptionBlock()(output)
    image_output = ak.Merge()((outputs1, outputs2))

    structured_data_input = ak.StructuredDataInput()
    structured_data_output = ak.CategoricalToNumerical()(structured_data_input)
    structured_data_output = ak.DenseBlock()(structured_data_output)

    text_input = ak.TextInput()
    outputs1 = ak.TextToIntSequence()(text_input)
    outputs1 = ak.Embedding()(outputs1)
    outputs1 = ak.ConvBlock(separable=True)(outputs1)
    outputs1 = ak.SpatialReduction()(outputs1)
    outputs2 = ak.TextToNgramVector()(text_input)
    outputs2 = ak.DenseBlock()(outputs2)
    text_output = ak.Merge()((outputs1, outputs2))

    merged_outputs = ak.Merge()(
        (structured_data_output, image_output, text_output))

    regression_outputs = ak.RegressionHead()(merged_outputs)
    classification_outputs = ak.ClassificationHead()(merged_outputs)
    automodel = ak.AutoModel(
        inputs=[image_input, text_input, structured_data_input],
        directory=tmp_path,
        outputs=[regression_outputs, classification_outputs],
        max_trials=2,
        tuner=ak.Hyperband,
        seed=utils.SEED)

    automodel.fit((image_x, text_x, structured_data_x),
                  (regression_y, classification_y),
                  validation_split=0.2,
                  epochs=1)
Esempio n. 8
0
def test_structured_data_block():
    block = wrapper.StructuredDataBlock()
    block.num_heads = 1
    block.column_names = ['0', '1']
    block.column_types = {
        '0': adapters.CATEGORICAL,
        '1': adapters.CATEGORICAL,
    }
    hp = kerastuner.HyperParameters()

    output = block.build(hp, ak.StructuredDataInput(shape=(2, )).build())

    assert isinstance(output, tf.Tensor)
Esempio n. 9
0
    def train():
        TRAIN_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/train.csv"
        TEST_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/eval.csv"

        train_file_path = tf.keras.utils.get_file("train.csv", TRAIN_DATA_URL)
        test_file_path = tf.keras.utils.get_file("eval.csv", TEST_DATA_URL)

        # x_train as pandas.DataFrame, y_train as pandas.Series
        x_train = pd.read_csv(train_file_path)
        print(type(x_train))  # pandas.DataFrame
        y_train = x_train.pop('survived')
        print(type(y_train))  # pandas.Series

        # You can also use pandas.DataFrame for y_train.
        y_train = pd.DataFrame(y_train)
        print(type(y_train))  # pandas.DataFrame

        # You can also use numpy.ndarray for x_train and y_train.
        x_train = x_train.to_numpy().astype(np.unicode)
        y_train = y_train.to_numpy()
        print(type(x_train))  # numpy.ndarray
        print(type(y_train))  # numpy.ndarray

        # Preparing testing data.
        x_test = pd.read_csv(test_file_path)
        y_test = x_test.pop('survived')

        # Initialize the structured data classifier.
        input_node = ak.StructuredDataInput()
        output_node = ak.StructuredDataBlock(
            categorical_encoding=True)(input_node)
        output_node = ak.ClassificationHead()(output_node)
        clf = ak.AutoModel(inputs=input_node,
                           outputs=output_node,
                           overwrite=True,
                           max_trials=3)

        # Feed the structured data classifier with training data.
        clf.fit(
            x_train,
            y_train,
            # Split the training data and use the last 15% as validation data.
            validation_split=0.15,
            epochs=10)

        # Predict with the best model.
        predicted_y = clf.predict(test_file_path)

        # Evaluate the best model with testing data.
        print(clf.evaluate(test_file_path, 'survived'))
Esempio n. 10
0
def test_multi_model():

    context = an.AutoMLPipeline(
        an.MultiModel(
            inputs=[ak.ImageInput(), ak.StructuredDataInput()],
            outputs=[
                ak.RegressionHead(metrics=["mae"]),
                ak.ClassificationHead(loss="categorical_crossentropy",
                                      metrics=["accuracy"]),
            ],
            overwrite=True,
            max_trials=2,
        ))
    context.run_automl()
    assert context.return_automl["model"] != None
def test_text_and_structured_data(tmp_path):
    # Prepare the data.
    num_instances = 80
    (x_text, y_train), (x_test, y_test) = utils.imdb_raw()
    x_structured_data = pd.read_csv(utils.TRAIN_CSV_PATH)

    x_text = x_text[:num_instances]
    x_structured_data = x_structured_data[:num_instances]
    y_classification = utils.generate_one_hot_labels(
        num_instances=num_instances, num_classes=3)
    y_regression = utils.generate_data(num_instances=num_instances,
                                       shape=(1, ))

    # Build model and train.
    structured_data_input = ak.StructuredDataInput()
    structured_data_output = ak.CategoricalToNumerical()(structured_data_input)
    structured_data_output = ak.DenseBlock()(structured_data_output)

    text_input = ak.TextInput()
    outputs1 = ak.TextToIntSequence()(text_input)
    outputs1 = ak.Embedding()(outputs1)
    outputs1 = ak.ConvBlock(separable=True)(outputs1)
    outputs1 = ak.SpatialReduction()(outputs1)
    outputs2 = ak.TextToNgramVector()(text_input)
    outputs2 = ak.DenseBlock()(outputs2)
    text_output = ak.Merge()((outputs1, outputs2))

    merged_outputs = ak.Merge()((structured_data_output, text_output))

    regression_outputs = ak.RegressionHead()(merged_outputs)
    classification_outputs = ak.ClassificationHead()(merged_outputs)
    automodel = ak.AutoModel(
        inputs=[text_input, structured_data_input],
        directory=tmp_path,
        outputs=[regression_outputs, classification_outputs],
        max_trials=2,
        tuner=ak.Hyperband,
        seed=utils.SEED,
    )

    automodel.fit(
        (x_text, x_structured_data),
        (y_regression, y_classification),
        validation_split=0.2,
        epochs=1,
    )
Esempio n. 12
0
def test_feature_engineering_fix_keyerror():
    data = structured_data(100)
    dataset = tf.data.Dataset.from_tensor_slices(data)
    feature = preprocessor.FeatureEngineering()
    feature.input_node = ak.StructuredDataInput(
        column_names=COLUMN_NAMES_FROM_NUMPY,
        column_types=COLUMN_TYPES_FROM_NUMPY)
    feature.set_hp(kerastuner.HyperParameters())
    for x in dataset:
        feature.update(x)
    feature.finalize()
    feature.set_config(feature.get_config())
    for a in dataset:
        feature.transform(a)

    def map_func(x):
        return tf.py_function(feature.transform, inp=[x], Tout=(tf.float64, ))

    new_dataset = dataset.map(map_func)
    assert isinstance(new_dataset, tf.data.Dataset)
Esempio n. 13
0
def test_structured_data_input(tmp_dir):
    num_data = 500
    data = common.structured_data(num_data)
    x_train = data
    y = np.random.randint(0, 3, num_data)
    y_train = y

    input_node = ak.StructuredDataInput(
        column_names=common.COLUMN_NAMES_FROM_NUMPY,
        column_types=common.COLUMN_TYPES_FROM_NUMPY)
    output_node = input_node
    output_node = ak.StructuredDataBlock()(output_node)
    output_node = ak.ClassificationHead(loss='categorical_crossentropy',
                                        metrics=['accuracy'])(output_node)

    auto_model = ak.GraphAutoModel(input_node,
                                   output_node,
                                   directory=tmp_dir,
                                   max_trials=1)
    auto_model.fit(x_train,
                   y_train,
                   epochs=1,
                   validation_data=(x_train, y_train))
    auto_model.predict(x_train)
Esempio n. 14
0
# Generate regression targets.
regression_target = np.random.rand(num_instances, 1).astype(np.float32)
# Generate classification labels of five classes.
classification_target = np.random.randint(5, size=num_instances)
"""
## Build and Train the Model
Then we initialize the multi-modal and multi-task model with 
[AutoModel](/auto_model/#automodel-class).
Since this is just a demo, we use small amount of `max_trials` and `epochs`.
"""

import autokeras as ak
# Initialize the multi with multiple inputs and outputs.
model = ak.AutoModel(inputs=[ak.ImageInput(),
                             ak.StructuredDataInput()],
                     outputs=[
                         ak.RegressionHead(metrics=['mae']),
                         ak.ClassificationHead(loss='categorical_crossentropy',
                                               metrics=['accuracy'])
                     ],
                     overwrite=True,
                     max_trials=2)
# Fit the model with prepared data.
model.fit([image_data, structured_data],
          [regression_target, classification_target],
          epochs=3)
"""
## Validation Data
By default, AutoKeras use the last 20% of training data as validation data.
As shown in the example below, you can use `validation_split` to specify the percentage.
Esempio n. 15
0
x_test = x_test[:data_slice]
y_test = y_test[:data_slice]
x_image = x_train.reshape(x_train.shape + (1,))
x_test = x_test.reshape(x_test.shape + (1,))

x_structured = np.random.rand(x_train.shape[0], 100)
y_regression = np.random.rand(x_train.shape[0], 1)
y_classification = y_classification.reshape(-1, 1)

# Build model and train.
inputs = ak.ImageInput(shape=(28, 28, 1))
outputs1 = ak.ResNetBlock(version='next')(inputs)
outputs2 = ak.XceptionBlock()(inputs)
image_outputs = ak.Merge()((outputs1, outputs2))

structured_inputs = ak.StructuredDataInput()
structured_outputs = ak.DenseBlock()(structured_inputs)
merged_outputs = ak.Merge()((structured_outputs, image_outputs))

classification_outputs = ak.ClassificationHead()(merged_outputs)
regression_outputs = ak.RegressionHead()(merged_outputs)
automodel = ak.GraphAutoModel(inputs=[inputs, structured_inputs],
                              outputs=[regression_outputs,
                                       classification_outputs])

automodel.fit((x_image, x_structured),
              (y_regression, y_classification),
              # trials=100,
              validation_split=0.2,
              epochs=200,
              callbacks=[tf.keras.callbacks.EarlyStopping()])
Esempio n. 16
0
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=42)

print(X_train.shape)
print(y_train.shape)
print(y_test.shape)
print(y_val.shape)

print(X_train[1])
print(y_train[1])
# In[27]:

id_input = ak.StructuredDataInput()
id_den = ak.CategoricalToNumerical()(id_input)
id_den = ak.Embedding()(id_den)

x_input = ak.Input()
layer = ak.DenseBlock()(x_input)

mer = ak.Merge()([id_den, layer])
output_node = ak.RegressionHead(metrics=['mae'])(mer)

# In[28]:

# auto_model = ak.AutoModel( inputs= x_input,
#                            #project_name="categorical_model",
#                            outputs = output_node,
#                            objective="loss",
    validation_data=(x_val, y_val),
    epochs=10,
)
"""
## Customized Search Space
For advanced users, you may customize your search space by using
[AutoModel](/auto_model/#automodel-class) instead of
[StructuredDataRegressor](/structured_data_regressor). You can configure the
[StructuredDataBlock](/block/#structureddatablock-class) for some high-level
configurations, e.g., `categorical_encoding` for whether to use the
[CategoricalToNumerical](/block/#categoricaltonumerical-class). You can also do
not specify these arguments, which would leave the different choices to be
tuned automatically. See the following example for detail.
"""

input_node = ak.StructuredDataInput()
output_node = ak.StructuredDataBlock(categorical_encoding=True)(input_node)
output_node = ak.RegressionHead()(output_node)
reg = ak.AutoModel(inputs=input_node,
                   outputs=output_node,
                   overwrite=True,
                   max_trials=3)
reg.fit(x_train, y_train, epochs=10)
"""
The usage of [AutoModel](/auto_model/#automodel-class) is similar to the
[functional API](https://www.tensorflow.org/guide/keras/functional) of Keras.
Basically, you are building a graph, whose edges are blocks and the nodes are
intermediate outputs of blocks.  To add an edge from `input_node` to
`output_node` with `output_node = ak.[some_block]([block_args])(input_node)`.

You can even also use more fine grained blocks to customize the search space
Esempio n. 18
0
# Generate regression targets.
regression_target = np.random.rand(num_instances, 1).astype(np.float32)
# Generate classification labels of five classes.
classification_target = np.random.randint(5, size=num_instances)

"""
## Build and Train the Model
Then we initialize the multi-modal and multi-task model with 
[AutoModel](/auto_model/#automodel-class).
"""

import autokeras as ak
# Initialize the multi with multiple inputs and outputs.
model = ak.AutoModel(
    inputs=[ak.ImageInput(), ak.StructuredDataInput()],
    outputs=[
        ak.RegressionHead(metrics=['mae']),
        ak.ClassificationHead(loss='categorical_crossentropy', metrics=['accuracy'])
    ],
    max_trials=10)
# Fit the model with prepared data.
model.fit(
    [image_data, structured_data],
    [regression_target, classification_target],
    epochs=10)

"""
## Validation Data
By default, AutoKeras use the last 20% of training data as validation data.
As shown in the example below, you can use `validation_split` to specify the percentage.
Esempio n. 19
0
(x_train, y_classification), (x_test, y_test) = mnist.load_data()
data_slice = 20
x_train = x_train[:data_slice]
print(x_train.dtype)
y_classification = y_classification[:data_slice]
x_test = x_test[:data_slice]
y_test = y_test[:data_slice]
x_train = x_train.astype(np.float64)
x_test = x_test.astype(np.float64)
print(x_train.dtype)
# x_image = np.reshape(x_train, (200, 28, 28, 1))
# x_test = np.reshape(x_test, (200, 28, 28, 1))
x_image = x_train.reshape(x_train.shape + (1,))
x_test = x_test.reshape(x_test.shape + (1,))
'''x_structured = np.random.rand(x_train.shape[0], 100)
y_regression = np.random.rand(x_train.shape[0], 1)'''
x_structured = np.random.rand(x_train.shape[0], 100)
y_regression = np.random.rand(x_train.shape[0], 1)
y_classification = y_classification.reshape(-1, 1)
# y_classification = np.reshape(y_classification, (-1, 1))
# Build model and train.
automodel = ak.AutoModel(
    inputs=[ak.ImageInput(),
            ak.StructuredDataInput()],
    outputs=[ak.RegressionHead(metrics=['mae']),
             ak.ClassificationHead(loss='categorical_crossentropy',
                                   metrics=['accuracy'])])
automodel.fit([x_image, x_structured],
              [y_regression, y_classification],
              validation_split=0.2)