Python TextToIntSequence Examples, autokeras.TextToIntSequence Python Examples

Example #1

0

Show file

def test_functional_api(tmp_dir):
    # Prepare the data.
    num_instances = 20
    (image_x, train_y), (test_x, test_y) = mnist.load_data()
    (text_x, train_y), (test_x, test_y) = common.imdb_raw()
    (structured_data_x, train_y), (test_x, test_y) = common.dataframe_numpy()

    image_x = image_x[:num_instances]
    text_x = text_x[:num_instances]
    structured_data_x = structured_data_x[:num_instances]
    classification_y = common.generate_one_hot_labels(
        num_instances=num_instances, num_classes=3)
    regression_y = common.generate_data(num_instances=num_instances,
                                        shape=(1, ))

    # Build model and train.
    image_input = ak.ImageInput()
    output = ak.Normalization()(image_input)
    output = ak.ImageAugmentation()(output)
    outputs1 = ak.ResNetBlock(version='next')(image_input)
    outputs2 = ak.XceptionBlock()(image_input)
    image_output = ak.Merge()((outputs1, outputs2))

    structured_data_input = ak.StructuredDataInput(
        column_names=common.COLUMN_NAMES_FROM_CSV,
        column_types=common.COLUMN_TYPES_FROM_CSV)
    structured_data_output = ak.FeatureEngineering()(structured_data_input)
    structured_data_output = ak.DenseBlock()(structured_data_output)

    text_input = ak.TextInput()
    outputs1 = ak.TextToIntSequence()(text_input)
    outputs1 = ak.EmbeddingBlock()(outputs1)
    outputs1 = ak.ConvBlock(separable=True)(outputs1)
    outputs1 = ak.SpatialReduction()(outputs1)
    outputs2 = ak.TextToNgramVector()(text_input)
    outputs2 = ak.DenseBlock()(outputs2)
    text_output = ak.Merge()((outputs1, outputs2))

    merged_outputs = ak.Merge()(
        (structured_data_output, image_output, text_output))

    regression_outputs = ak.RegressionHead()(merged_outputs)
    classification_outputs = ak.ClassificationHead()(merged_outputs)
    automodel = ak.GraphAutoModel(
        inputs=[image_input, text_input, structured_data_input],
        directory=tmp_dir,
        outputs=[regression_outputs, classification_outputs],
        max_trials=2,
        seed=common.SEED)

    automodel.fit((image_x, text_x, structured_data_x),
                  (regression_y, classification_y),
                  validation_split=0.2,
                  epochs=2)

Example #2

0

Show file

def test_functional_api(tmp_path):
    # Prepare the data.
    num_instances = 80
    (image_x, train_y), (test_x, test_y) = mnist.load_data()
    (text_x, train_y), (test_x, test_y) = utils.imdb_raw()
    (structured_data_x, train_y), (test_x, test_y) = utils.dataframe_numpy()

    image_x = image_x[:num_instances]
    text_x = text_x[:num_instances]
    structured_data_x = structured_data_x[:num_instances]
    classification_y = utils.generate_one_hot_labels(
        num_instances=num_instances, num_classes=3)
    regression_y = utils.generate_data(num_instances=num_instances,
                                       shape=(1, ))

    # Build model and train.
    image_input = ak.ImageInput()
    output = ak.Normalization()(image_input)
    output = ak.ImageAugmentation()(output)
    outputs1 = ak.ResNetBlock(version='next')(output)
    outputs2 = ak.XceptionBlock()(output)
    image_output = ak.Merge()((outputs1, outputs2))

    structured_data_input = ak.StructuredDataInput()
    structured_data_output = ak.CategoricalToNumerical()(structured_data_input)
    structured_data_output = ak.DenseBlock()(structured_data_output)

    text_input = ak.TextInput()
    outputs1 = ak.TextToIntSequence()(text_input)
    outputs1 = ak.Embedding()(outputs1)
    outputs1 = ak.ConvBlock(separable=True)(outputs1)
    outputs1 = ak.SpatialReduction()(outputs1)
    outputs2 = ak.TextToNgramVector()(text_input)
    outputs2 = ak.DenseBlock()(outputs2)
    text_output = ak.Merge()((outputs1, outputs2))

    merged_outputs = ak.Merge()(
        (structured_data_output, image_output, text_output))

    regression_outputs = ak.RegressionHead()(merged_outputs)
    classification_outputs = ak.ClassificationHead()(merged_outputs)
    automodel = ak.AutoModel(
        inputs=[image_input, text_input, structured_data_input],
        directory=tmp_path,
        outputs=[regression_outputs, classification_outputs],
        max_trials=2,
        tuner=ak.Hyperband,
        seed=utils.SEED)

    automodel.fit((image_x, text_x, structured_data_x),
                  (regression_y, classification_y),
                  validation_split=0.2,
                  epochs=1)

Example #3

0

Show file

File: functional_api_test.py Project: arnoweb/autokeras-sandbox

def test_text_and_structured_data(tmp_path):
    # Prepare the data.
    num_instances = 80
    (x_text, y_train), (x_test, y_test) = utils.imdb_raw()
    x_structured_data = pd.read_csv(utils.TRAIN_CSV_PATH)

    x_text = x_text[:num_instances]
    x_structured_data = x_structured_data[:num_instances]
    y_classification = utils.generate_one_hot_labels(
        num_instances=num_instances, num_classes=3)
    y_regression = utils.generate_data(num_instances=num_instances,
                                       shape=(1, ))

    # Build model and train.
    structured_data_input = ak.StructuredDataInput()
    structured_data_output = ak.CategoricalToNumerical()(structured_data_input)
    structured_data_output = ak.DenseBlock()(structured_data_output)

    text_input = ak.TextInput()
    outputs1 = ak.TextToIntSequence()(text_input)
    outputs1 = ak.Embedding()(outputs1)
    outputs1 = ak.ConvBlock(separable=True)(outputs1)
    outputs1 = ak.SpatialReduction()(outputs1)
    outputs2 = ak.TextToNgramVector()(text_input)
    outputs2 = ak.DenseBlock()(outputs2)
    text_output = ak.Merge()((outputs1, outputs2))

    merged_outputs = ak.Merge()((structured_data_output, text_output))

    regression_outputs = ak.RegressionHead()(merged_outputs)
    classification_outputs = ak.ClassificationHead()(merged_outputs)
    automodel = ak.AutoModel(
        inputs=[text_input, structured_data_input],
        directory=tmp_path,
        outputs=[regression_outputs, classification_outputs],
        max_trials=2,
        tuner=ak.Hyperband,
        seed=utils.SEED,
    )

    automodel.fit(
        (x_text, x_structured_data),
        (y_regression, y_classification),
        validation_split=0.2,
        epochs=1,
    )

Example #4

0

Show file

clf.fit(x_train, y_train, epochs=2)
"""
The usage of [AutoModel](/auto_model/#automodel-class) is similar to the
[functional API](https://www.tensorflow.org/guide/keras/functional) of Keras.
Basically, you are building a graph, whose edges are blocks and the nodes are intermediate outputs of blocks.
To add an edge from `input_node` to `output_node` with
`output_node = ak.[some_block]([block_args])(input_node)`.

You can even also use more fine grained blocks to customize the search space even
further. See the following example.
"""

import autokeras as ak

input_node = ak.TextInput()
output_node = ak.TextToIntSequence()(input_node)
output_node = ak.Embedding()(output_node)
# Use separable Conv layers in Keras.
output_node = ak.ConvBlock(separable=True)(output_node)
output_node = ak.ClassificationHead()(output_node)
clf = ak.AutoModel(inputs=input_node, outputs=output_node, max_trials=1)
clf.fit(x_train, y_train, epochs=2)
"""
## Data Format
The AutoKeras TextClassifier is quite flexible for the data format.

For the text, the input data should be one-dimensional 
For the classification labels, AutoKeras accepts both plain labels, i.e. strings or
integers, and one-hot encoded encoded labels, i.e. vectors of 0s and 1s.

We also support using [tf.data.Dataset](

Example #5

0

Show file

# Find the best model for the given training data
reg.fit(text_inputs, media_success_outputs)

# Predict with the chosen model:
predict_y = reg.predict(text_inputs)

"""
If your text source has a larger vocabulary (number of distinct words), you may
need to create a custom pipeline in AutoKeras to increase the `max_tokens`
parameter.
"""

text_input = (df.Title + " " + df.Headline).to_numpy(dtype="str")

# text input and tokenization
input_node = ak.TextInput()
output_node = ak.TextToIntSequence(max_tokens=20000)(input_node)

# regression output
output_node = ak.RegressionHead()(output_node)

# initialize AutoKeras and find the best model
reg = ak.AutoModel(inputs=input_node, outputs=output_node, max_trials=15)
reg.fit(text_input, media_success_outputs)

"""
Measure the accuracy of the regressor on an independent test set:
"""

print(reg.evaluate(text_input, media_success_outputs))