def test_io_api(tmp_path): num_instances = 20 image_x = utils.generate_data(num_instances=num_instances, shape=(28, 28)) text_x = utils.generate_text_data(num_instances=num_instances) image_x = image_x[:num_instances] structured_data_x = (pd.read_csv(utils.TRAIN_CSV_PATH).to_numpy().astype( np.unicode)[:num_instances]) classification_y = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. automodel = ak.AutoModel( inputs=[ak.ImageInput(), ak.TextInput(), ak.StructuredDataInput()], outputs=[ ak.RegressionHead(metrics=["mae"]), ak.ClassificationHead(loss="categorical_crossentropy", metrics=["accuracy"]), ], directory=tmp_path, max_trials=2, tuner=ak.RandomSearch, seed=utils.SEED, ) automodel.fit( [image_x, text_x, structured_data_x], [regression_y, classification_y], epochs=1, validation_split=0.2, batch_size=4, )
def test_io_api(tmp_path): num_instances = 100 (image_x, train_y), (test_x, test_y) = mnist.load_data() (text_x, train_y), (test_x, test_y) = utils.imdb_raw(num_instances=num_instances) image_x = image_x[:num_instances] text_x = text_x[:num_instances] structured_data_x = utils.generate_structured_data( num_instances=num_instances) classification_y = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. automodel = ak.AutoModel( inputs=[ak.ImageInput(), ak.TextInput(), ak.StructuredDataInput()], outputs=[ ak.RegressionHead(metrics=['mae']), ak.ClassificationHead(loss='categorical_crossentropy', metrics=['accuracy']) ], directory=tmp_path, max_trials=2, seed=utils.SEED) automodel.fit([image_x, text_x, structured_data_x], [regression_y, classification_y], epochs=1, validation_split=0.2)
def test_text_block(): block = wrapper.TextBlock() hp = kerastuner.HyperParameters() block.build(hp, ak.TextInput(shape=(1, )).build()) assert utils.name_in_hps('vectorizer', hp)
def test_text_block(): block = hyperblock_module.TextBlock() block.set_state(block.get_state()) hp = kerastuner.HyperParameters() block.build(hp, ak.TextInput()) assert common.name_in_hps('vectorizer', hp)
def test_text_classifier_tuner0(): tf.keras.backend.clear_session() input_node = ak.TextInput(shape=(1, )) output_node = ak.TextBlock()(input_node) output_node = ak.ClassificationHead(loss='categorical_crossentropy', output_shape=(10, ))(output_node) graph = graph_module.Graph(input_node, output_node) check_initial_hp(task_specific.TEXT_CLASSIFIER[0], graph)
def test_functional_api(tmp_dir): # Prepare the data. num_instances = 20 (image_x, train_y), (test_x, test_y) = mnist.load_data() (text_x, train_y), (test_x, test_y) = common.imdb_raw() (structured_data_x, train_y), (test_x, test_y) = common.dataframe_numpy() image_x = image_x[:num_instances] text_x = text_x[:num_instances] structured_data_x = structured_data_x[:num_instances] classification_y = common.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = common.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. image_input = ak.ImageInput() output = ak.Normalization()(image_input) output = ak.ImageAugmentation()(output) outputs1 = ak.ResNetBlock(version='next')(image_input) outputs2 = ak.XceptionBlock()(image_input) image_output = ak.Merge()((outputs1, outputs2)) structured_data_input = ak.StructuredDataInput( column_names=common.COLUMN_NAMES_FROM_CSV, column_types=common.COLUMN_TYPES_FROM_CSV) structured_data_output = ak.FeatureEngineering()(structured_data_input) structured_data_output = ak.DenseBlock()(structured_data_output) text_input = ak.TextInput() outputs1 = ak.TextToIntSequence()(text_input) outputs1 = ak.EmbeddingBlock()(outputs1) outputs1 = ak.ConvBlock(separable=True)(outputs1) outputs1 = ak.SpatialReduction()(outputs1) outputs2 = ak.TextToNgramVector()(text_input) outputs2 = ak.DenseBlock()(outputs2) text_output = ak.Merge()((outputs1, outputs2)) merged_outputs = ak.Merge()( (structured_data_output, image_output, text_output)) regression_outputs = ak.RegressionHead()(merged_outputs) classification_outputs = ak.ClassificationHead()(merged_outputs) automodel = ak.GraphAutoModel( inputs=[image_input, text_input, structured_data_input], directory=tmp_dir, outputs=[regression_outputs, classification_outputs], max_trials=2, seed=common.SEED) automodel.fit((image_x, text_x, structured_data_x), (regression_y, classification_y), validation_split=0.2, epochs=2)
def test_functional_api(tmp_path): # Prepare the data. num_instances = 80 (image_x, train_y), (test_x, test_y) = mnist.load_data() (text_x, train_y), (test_x, test_y) = utils.imdb_raw() (structured_data_x, train_y), (test_x, test_y) = utils.dataframe_numpy() image_x = image_x[:num_instances] text_x = text_x[:num_instances] structured_data_x = structured_data_x[:num_instances] classification_y = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. image_input = ak.ImageInput() output = ak.Normalization()(image_input) output = ak.ImageAugmentation()(output) outputs1 = ak.ResNetBlock(version='next')(output) outputs2 = ak.XceptionBlock()(output) image_output = ak.Merge()((outputs1, outputs2)) structured_data_input = ak.StructuredDataInput() structured_data_output = ak.CategoricalToNumerical()(structured_data_input) structured_data_output = ak.DenseBlock()(structured_data_output) text_input = ak.TextInput() outputs1 = ak.TextToIntSequence()(text_input) outputs1 = ak.Embedding()(outputs1) outputs1 = ak.ConvBlock(separable=True)(outputs1) outputs1 = ak.SpatialReduction()(outputs1) outputs2 = ak.TextToNgramVector()(text_input) outputs2 = ak.DenseBlock()(outputs2) text_output = ak.Merge()((outputs1, outputs2)) merged_outputs = ak.Merge()( (structured_data_output, image_output, text_output)) regression_outputs = ak.RegressionHead()(merged_outputs) classification_outputs = ak.ClassificationHead()(merged_outputs) automodel = ak.AutoModel( inputs=[image_input, text_input, structured_data_input], directory=tmp_path, outputs=[regression_outputs, classification_outputs], max_trials=2, tuner=ak.Hyperband, seed=utils.SEED) automodel.fit((image_x, text_x, structured_data_x), (regression_y, classification_y), validation_split=0.2, epochs=1)
def train(): # Load the integer sequence the IMDB dataset with Keras. index_offset = 3 # word index offset (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=1000, index_from=index_offset) y_train = y_train.reshape(-1, 1) y_test = y_test.reshape(-1, 1) # Prepare the dictionary of index to word. word_to_id = imdb.get_word_index() word_to_id = {k: (v + index_offset) for k, v in word_to_id.items()} word_to_id["<PAD>"] = 0 word_to_id["<START>"] = 1 word_to_id["<UNK>"] = 2 id_to_word = {value: key for key, value in word_to_id.items()} # Convert the word indices to words. x_train = list( map(lambda sentence: ' '.join(id_to_word[i] for i in sentence), x_train)) x_test = list( map(lambda sentence: ' '.join(id_to_word[i] for i in sentence), x_test)) x_train = np.array(x_train, dtype=np.str) x_test = np.array(x_test, dtype=np.str) # Initialize the text regressor. input_node = ak.TextInput() output_node = ak.TextBlock(vectorizer='ngram')(input_node) output_node = ak.RegressionHead()(output_node) reg = ak.AutoModel(inputs=input_node, outputs=output_node, overwrite=True, max_trials=1) reg.fit(x_train, y_train, epochs=2) # Feed the text regressor with training data. reg.fit( x_train, y_train, # Split the training data and use the last 15% as validation data. validation_split=0.15) # Predict with the best model. predicted_y = reg.predict(x_test) # Evaluate the best model with testing data. print(reg.evaluate(x_test, y_test))
def test_text_and_structured_data(tmp_path): # Prepare the data. num_instances = 80 (x_text, y_train), (x_test, y_test) = utils.imdb_raw() x_structured_data = pd.read_csv(utils.TRAIN_CSV_PATH) x_text = x_text[:num_instances] x_structured_data = x_structured_data[:num_instances] y_classification = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) y_regression = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. structured_data_input = ak.StructuredDataInput() structured_data_output = ak.CategoricalToNumerical()(structured_data_input) structured_data_output = ak.DenseBlock()(structured_data_output) text_input = ak.TextInput() outputs1 = ak.TextToIntSequence()(text_input) outputs1 = ak.Embedding()(outputs1) outputs1 = ak.ConvBlock(separable=True)(outputs1) outputs1 = ak.SpatialReduction()(outputs1) outputs2 = ak.TextToNgramVector()(text_input) outputs2 = ak.DenseBlock()(outputs2) text_output = ak.Merge()((outputs1, outputs2)) merged_outputs = ak.Merge()((structured_data_output, text_output)) regression_outputs = ak.RegressionHead()(merged_outputs) classification_outputs = ak.ClassificationHead()(merged_outputs) automodel = ak.AutoModel( inputs=[text_input, structured_data_input], directory=tmp_path, outputs=[regression_outputs, classification_outputs], max_trials=2, tuner=ak.Hyperband, seed=utils.SEED, ) automodel.fit( (x_text, x_structured_data), (y_regression, y_classification), validation_split=0.2, epochs=1, )
For advanced users, you may customize your search space by using [AutoModel](/auto_model/#automodel-class) instead of [TextClassifier](/text_classifier). You can configure the [TextBlock](/block/#textblock-class) for some high-level configurations, e.g., `vectorizer` for the type of text vectorization method to use. You can use 'sequence', which uses [TextToInteSequence](/preprocessor/#texttointsequence-class) to convert the words to integers and use [Embedding](/block/#embedding-class) for embedding the integer sequences, or you can use 'ngram', which uses [TextToNgramVector](/preprocessor/#texttongramvector-class) to vectorize the sentences. You can also do not specify these arguments, which would leave the different choices to be tuned automatically. See the following example for detail. """ import autokeras as ak input_node = ak.TextInput() output_node = ak.TextBlock(vectorizer='ngram')(input_node) output_node = ak.ClassificationHead()(output_node) clf = ak.AutoModel(inputs=input_node, outputs=output_node, max_trials=1) clf.fit(x_train, y_train, epochs=2) """ The usage of [AutoModel](/auto_model/#automodel-class) is similar to the [functional API](https://www.tensorflow.org/guide/keras/functional) of Keras. Basically, you are building a graph, whose edges are blocks and the nodes are intermediate outputs of blocks. To add an edge from `input_node` to `output_node` with `output_node = ak.[some_block]([block_args])(input_node)`. You can even also use more fine grained blocks to customize the search space even further. See the following example. """