def test_io_api(tmp_path): num_instances = 100 (image_x, train_y), (test_x, test_y) = mnist.load_data() (text_x, train_y), (test_x, test_y) = utils.imdb_raw(num_instances=num_instances) image_x = image_x[:num_instances] text_x = text_x[:num_instances] structured_data_x = utils.generate_structured_data( num_instances=num_instances) classification_y = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. automodel = ak.AutoModel( inputs=[ak.ImageInput(), ak.TextInput(), ak.StructuredDataInput()], outputs=[ ak.RegressionHead(metrics=['mae']), ak.ClassificationHead(loss='categorical_crossentropy', metrics=['accuracy']) ], directory=tmp_path, max_trials=2, seed=utils.SEED) automodel.fit([image_x, text_x, structured_data_x], [regression_y, classification_y], epochs=1, validation_split=0.2)
def applyAutoKeras(X_train, y_train, X_test, y_test, SavePath, max_trials=100, epochs=300, useSavedModels = True): if not useSavedModels or not os.path.isdir(SavePath+"/keras_auto_model/best_model/"): input_node = ak.StructuredDataInput() output_node = ak.DenseBlock()(input_node) #output_node = ak.ConvBlock()(output_node) output_node = ak.RegressionHead()(output_node) AKRegressor = ak.AutoModel( inputs=input_node, outputs=output_node, max_trials=max_trials, overwrite=True, tuner="bayesian", project_name=SavePath+"/keras_auto_model" ) print(" X_train shape: {0}\n y_train shape: {1}\n X_test shape: {2}\n y_test shape: {3}".format(X_train.shape, y_train.shape, X_test.shape, y_test.shape)) AKRegressor.fit(x=X_train, y=y_train[:,0],epochs=epochs,verbose=1, batch_size=int(X_train.shape[0]/10), shuffle=False, use_multiprocessing=True) AKRegressor.export_model() else: AKRegressor = tf.keras.models.load_model(SavePath+"/keras_auto_model/best_model/") y_hat = AKRegressor.predict(X_test) print("AUTOKERAS - Score: ") print("MAE: %.4f" % mean_absolute_error(y_test[:,0], y_hat)) return y_hat
def test_io_api(tmp_path): num_instances = 20 image_x = utils.generate_data(num_instances=num_instances, shape=(28, 28)) text_x = utils.generate_text_data(num_instances=num_instances) image_x = image_x[:num_instances] structured_data_x = (pd.read_csv(utils.TRAIN_CSV_PATH).to_numpy().astype( np.unicode)[:num_instances]) classification_y = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. automodel = ak.AutoModel( inputs=[ak.ImageInput(), ak.TextInput(), ak.StructuredDataInput()], outputs=[ ak.RegressionHead(metrics=["mae"]), ak.ClassificationHead(loss="categorical_crossentropy", metrics=["accuracy"]), ], directory=tmp_path, max_trials=2, tuner=ak.RandomSearch, seed=utils.SEED, ) automodel.fit( [image_x, text_x, structured_data_x], [regression_y, classification_y], epochs=1, validation_split=0.2, batch_size=4, )
def test_feature_engineering(): dataset = common.generate_structured_data(dtype='dataset') feature = preprocessor_module.FeatureEngineering() feature.input_node = ak.StructuredDataInput( column_names=common.COLUMN_NAMES_FROM_NUMPY, column_types=common.COLUMN_TYPES_FROM_NUMPY) new_dataset = run_preprocessor(feature, dataset, common.generate_data(dtype='dataset'), tf.float32) assert isinstance(new_dataset, tf.data.Dataset)
def train(): house_dataset = fetch_california_housing() df = pd.DataFrame(np.concatenate( (house_dataset.data, house_dataset.target.reshape(-1, 1)), axis=1), columns=house_dataset.feature_names + ['Price']) train_size = int(df.shape[0] * 0.9) df[:train_size].to_csv('train.csv', index=False) df[train_size:].to_csv('eval.csv', index=False) train_file_path = 'train.csv' test_file_path = 'eval.csv' # x_train as pandas.DataFrame, y_train as pandas.Series x_train = pd.read_csv(train_file_path) print(type(x_train)) # pandas.DataFrame y_train = x_train.pop('Price') print(type(y_train)) # pandas.Series # You can also use pandas.DataFrame for y_train. y_train = pd.DataFrame(y_train) print(type(y_train)) # pandas.DataFrame # You can also use numpy.ndarray for x_train and y_train. x_train = x_train.to_numpy().astype(np.unicode) y_train = y_train.to_numpy() print(type(x_train)) # numpy.ndarray print(type(y_train)) # numpy.ndarray # Preparing testing data. x_test = pd.read_csv(test_file_path) y_test = x_test.pop('Price') # Initialize the structured data regressor. input_node = ak.StructuredDataInput() output_node = ak.StructuredDataBlock( categorical_encoding=True)(input_node) output_node = ak.RegressionHead()(output_node) reg = ak.AutoModel(inputs=input_node, outputs=output_node, overwrite=True, max_trials=3) # Feed the structured data regressor with training data. reg.fit( x_train, y_train, # Split the training data and use the last 15% as validation data. validation_split=0.15, epochs=10) # Predict with the best model. predicted_y = reg.predict(test_file_path) # Evaluate the best model with testing data. print(reg.evaluate(test_file_path, 'Price'))
def test_functional_api(tmp_dir): # Prepare the data. num_instances = 20 (image_x, train_y), (test_x, test_y) = mnist.load_data() (text_x, train_y), (test_x, test_y) = common.imdb_raw() (structured_data_x, train_y), (test_x, test_y) = common.dataframe_numpy() image_x = image_x[:num_instances] text_x = text_x[:num_instances] structured_data_x = structured_data_x[:num_instances] classification_y = common.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = common.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. image_input = ak.ImageInput() output = ak.Normalization()(image_input) output = ak.ImageAugmentation()(output) outputs1 = ak.ResNetBlock(version='next')(image_input) outputs2 = ak.XceptionBlock()(image_input) image_output = ak.Merge()((outputs1, outputs2)) structured_data_input = ak.StructuredDataInput( column_names=common.COLUMN_NAMES_FROM_CSV, column_types=common.COLUMN_TYPES_FROM_CSV) structured_data_output = ak.FeatureEngineering()(structured_data_input) structured_data_output = ak.DenseBlock()(structured_data_output) text_input = ak.TextInput() outputs1 = ak.TextToIntSequence()(text_input) outputs1 = ak.EmbeddingBlock()(outputs1) outputs1 = ak.ConvBlock(separable=True)(outputs1) outputs1 = ak.SpatialReduction()(outputs1) outputs2 = ak.TextToNgramVector()(text_input) outputs2 = ak.DenseBlock()(outputs2) text_output = ak.Merge()((outputs1, outputs2)) merged_outputs = ak.Merge()( (structured_data_output, image_output, text_output)) regression_outputs = ak.RegressionHead()(merged_outputs) classification_outputs = ak.ClassificationHead()(merged_outputs) automodel = ak.GraphAutoModel( inputs=[image_input, text_input, structured_data_input], directory=tmp_dir, outputs=[regression_outputs, classification_outputs], max_trials=2, seed=common.SEED) automodel.fit((image_x, text_x, structured_data_x), (regression_y, classification_y), validation_split=0.2, epochs=2)
def test_functional_api(tmp_path): # Prepare the data. num_instances = 80 (image_x, train_y), (test_x, test_y) = mnist.load_data() (text_x, train_y), (test_x, test_y) = utils.imdb_raw() (structured_data_x, train_y), (test_x, test_y) = utils.dataframe_numpy() image_x = image_x[:num_instances] text_x = text_x[:num_instances] structured_data_x = structured_data_x[:num_instances] classification_y = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. image_input = ak.ImageInput() output = ak.Normalization()(image_input) output = ak.ImageAugmentation()(output) outputs1 = ak.ResNetBlock(version='next')(output) outputs2 = ak.XceptionBlock()(output) image_output = ak.Merge()((outputs1, outputs2)) structured_data_input = ak.StructuredDataInput() structured_data_output = ak.CategoricalToNumerical()(structured_data_input) structured_data_output = ak.DenseBlock()(structured_data_output) text_input = ak.TextInput() outputs1 = ak.TextToIntSequence()(text_input) outputs1 = ak.Embedding()(outputs1) outputs1 = ak.ConvBlock(separable=True)(outputs1) outputs1 = ak.SpatialReduction()(outputs1) outputs2 = ak.TextToNgramVector()(text_input) outputs2 = ak.DenseBlock()(outputs2) text_output = ak.Merge()((outputs1, outputs2)) merged_outputs = ak.Merge()( (structured_data_output, image_output, text_output)) regression_outputs = ak.RegressionHead()(merged_outputs) classification_outputs = ak.ClassificationHead()(merged_outputs) automodel = ak.AutoModel( inputs=[image_input, text_input, structured_data_input], directory=tmp_path, outputs=[regression_outputs, classification_outputs], max_trials=2, tuner=ak.Hyperband, seed=utils.SEED) automodel.fit((image_x, text_x, structured_data_x), (regression_y, classification_y), validation_split=0.2, epochs=1)
def test_structured_data_block(): block = wrapper.StructuredDataBlock() block.num_heads = 1 block.column_names = ['0', '1'] block.column_types = { '0': adapters.CATEGORICAL, '1': adapters.CATEGORICAL, } hp = kerastuner.HyperParameters() output = block.build(hp, ak.StructuredDataInput(shape=(2, )).build()) assert isinstance(output, tf.Tensor)
def train(): TRAIN_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/train.csv" TEST_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/eval.csv" train_file_path = tf.keras.utils.get_file("train.csv", TRAIN_DATA_URL) test_file_path = tf.keras.utils.get_file("eval.csv", TEST_DATA_URL) # x_train as pandas.DataFrame, y_train as pandas.Series x_train = pd.read_csv(train_file_path) print(type(x_train)) # pandas.DataFrame y_train = x_train.pop('survived') print(type(y_train)) # pandas.Series # You can also use pandas.DataFrame for y_train. y_train = pd.DataFrame(y_train) print(type(y_train)) # pandas.DataFrame # You can also use numpy.ndarray for x_train and y_train. x_train = x_train.to_numpy().astype(np.unicode) y_train = y_train.to_numpy() print(type(x_train)) # numpy.ndarray print(type(y_train)) # numpy.ndarray # Preparing testing data. x_test = pd.read_csv(test_file_path) y_test = x_test.pop('survived') # Initialize the structured data classifier. input_node = ak.StructuredDataInput() output_node = ak.StructuredDataBlock( categorical_encoding=True)(input_node) output_node = ak.ClassificationHead()(output_node) clf = ak.AutoModel(inputs=input_node, outputs=output_node, overwrite=True, max_trials=3) # Feed the structured data classifier with training data. clf.fit( x_train, y_train, # Split the training data and use the last 15% as validation data. validation_split=0.15, epochs=10) # Predict with the best model. predicted_y = clf.predict(test_file_path) # Evaluate the best model with testing data. print(clf.evaluate(test_file_path, 'survived'))
def test_multi_model(): context = an.AutoMLPipeline( an.MultiModel( inputs=[ak.ImageInput(), ak.StructuredDataInput()], outputs=[ ak.RegressionHead(metrics=["mae"]), ak.ClassificationHead(loss="categorical_crossentropy", metrics=["accuracy"]), ], overwrite=True, max_trials=2, )) context.run_automl() assert context.return_automl["model"] != None
def test_text_and_structured_data(tmp_path): # Prepare the data. num_instances = 80 (x_text, y_train), (x_test, y_test) = utils.imdb_raw() x_structured_data = pd.read_csv(utils.TRAIN_CSV_PATH) x_text = x_text[:num_instances] x_structured_data = x_structured_data[:num_instances] y_classification = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) y_regression = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. structured_data_input = ak.StructuredDataInput() structured_data_output = ak.CategoricalToNumerical()(structured_data_input) structured_data_output = ak.DenseBlock()(structured_data_output) text_input = ak.TextInput() outputs1 = ak.TextToIntSequence()(text_input) outputs1 = ak.Embedding()(outputs1) outputs1 = ak.ConvBlock(separable=True)(outputs1) outputs1 = ak.SpatialReduction()(outputs1) outputs2 = ak.TextToNgramVector()(text_input) outputs2 = ak.DenseBlock()(outputs2) text_output = ak.Merge()((outputs1, outputs2)) merged_outputs = ak.Merge()((structured_data_output, text_output)) regression_outputs = ak.RegressionHead()(merged_outputs) classification_outputs = ak.ClassificationHead()(merged_outputs) automodel = ak.AutoModel( inputs=[text_input, structured_data_input], directory=tmp_path, outputs=[regression_outputs, classification_outputs], max_trials=2, tuner=ak.Hyperband, seed=utils.SEED, ) automodel.fit( (x_text, x_structured_data), (y_regression, y_classification), validation_split=0.2, epochs=1, )
def test_feature_engineering_fix_keyerror(): data = structured_data(100) dataset = tf.data.Dataset.from_tensor_slices(data) feature = preprocessor.FeatureEngineering() feature.input_node = ak.StructuredDataInput( column_names=COLUMN_NAMES_FROM_NUMPY, column_types=COLUMN_TYPES_FROM_NUMPY) feature.set_hp(kerastuner.HyperParameters()) for x in dataset: feature.update(x) feature.finalize() feature.set_config(feature.get_config()) for a in dataset: feature.transform(a) def map_func(x): return tf.py_function(feature.transform, inp=[x], Tout=(tf.float64, )) new_dataset = dataset.map(map_func) assert isinstance(new_dataset, tf.data.Dataset)
def test_structured_data_input(tmp_dir): num_data = 500 data = common.structured_data(num_data) x_train = data y = np.random.randint(0, 3, num_data) y_train = y input_node = ak.StructuredDataInput( column_names=common.COLUMN_NAMES_FROM_NUMPY, column_types=common.COLUMN_TYPES_FROM_NUMPY) output_node = input_node output_node = ak.StructuredDataBlock()(output_node) output_node = ak.ClassificationHead(loss='categorical_crossentropy', metrics=['accuracy'])(output_node) auto_model = ak.GraphAutoModel(input_node, output_node, directory=tmp_dir, max_trials=1) auto_model.fit(x_train, y_train, epochs=1, validation_data=(x_train, y_train)) auto_model.predict(x_train)
# Generate regression targets. regression_target = np.random.rand(num_instances, 1).astype(np.float32) # Generate classification labels of five classes. classification_target = np.random.randint(5, size=num_instances) """ ## Build and Train the Model Then we initialize the multi-modal and multi-task model with [AutoModel](/auto_model/#automodel-class). Since this is just a demo, we use small amount of `max_trials` and `epochs`. """ import autokeras as ak # Initialize the multi with multiple inputs and outputs. model = ak.AutoModel(inputs=[ak.ImageInput(), ak.StructuredDataInput()], outputs=[ ak.RegressionHead(metrics=['mae']), ak.ClassificationHead(loss='categorical_crossentropy', metrics=['accuracy']) ], overwrite=True, max_trials=2) # Fit the model with prepared data. model.fit([image_data, structured_data], [regression_target, classification_target], epochs=3) """ ## Validation Data By default, AutoKeras use the last 20% of training data as validation data. As shown in the example below, you can use `validation_split` to specify the percentage.
x_test = x_test[:data_slice] y_test = y_test[:data_slice] x_image = x_train.reshape(x_train.shape + (1,)) x_test = x_test.reshape(x_test.shape + (1,)) x_structured = np.random.rand(x_train.shape[0], 100) y_regression = np.random.rand(x_train.shape[0], 1) y_classification = y_classification.reshape(-1, 1) # Build model and train. inputs = ak.ImageInput(shape=(28, 28, 1)) outputs1 = ak.ResNetBlock(version='next')(inputs) outputs2 = ak.XceptionBlock()(inputs) image_outputs = ak.Merge()((outputs1, outputs2)) structured_inputs = ak.StructuredDataInput() structured_outputs = ak.DenseBlock()(structured_inputs) merged_outputs = ak.Merge()((structured_outputs, image_outputs)) classification_outputs = ak.ClassificationHead()(merged_outputs) regression_outputs = ak.RegressionHead()(merged_outputs) automodel = ak.GraphAutoModel(inputs=[inputs, structured_inputs], outputs=[regression_outputs, classification_outputs]) automodel.fit((x_image, x_structured), (y_regression, y_classification), # trials=100, validation_split=0.2, epochs=200, callbacks=[tf.keras.callbacks.EarlyStopping()])
from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42) print(X_train.shape) print(y_train.shape) print(y_test.shape) print(y_val.shape) print(X_train[1]) print(y_train[1]) # In[27]: id_input = ak.StructuredDataInput() id_den = ak.CategoricalToNumerical()(id_input) id_den = ak.Embedding()(id_den) x_input = ak.Input() layer = ak.DenseBlock()(x_input) mer = ak.Merge()([id_den, layer]) output_node = ak.RegressionHead(metrics=['mae'])(mer) # In[28]: # auto_model = ak.AutoModel( inputs= x_input, # #project_name="categorical_model", # outputs = output_node, # objective="loss",
validation_data=(x_val, y_val), epochs=10, ) """ ## Customized Search Space For advanced users, you may customize your search space by using [AutoModel](/auto_model/#automodel-class) instead of [StructuredDataRegressor](/structured_data_regressor). You can configure the [StructuredDataBlock](/block/#structureddatablock-class) for some high-level configurations, e.g., `categorical_encoding` for whether to use the [CategoricalToNumerical](/block/#categoricaltonumerical-class). You can also do not specify these arguments, which would leave the different choices to be tuned automatically. See the following example for detail. """ input_node = ak.StructuredDataInput() output_node = ak.StructuredDataBlock(categorical_encoding=True)(input_node) output_node = ak.RegressionHead()(output_node) reg = ak.AutoModel(inputs=input_node, outputs=output_node, overwrite=True, max_trials=3) reg.fit(x_train, y_train, epochs=10) """ The usage of [AutoModel](/auto_model/#automodel-class) is similar to the [functional API](https://www.tensorflow.org/guide/keras/functional) of Keras. Basically, you are building a graph, whose edges are blocks and the nodes are intermediate outputs of blocks. To add an edge from `input_node` to `output_node` with `output_node = ak.[some_block]([block_args])(input_node)`. You can even also use more fine grained blocks to customize the search space
# Generate regression targets. regression_target = np.random.rand(num_instances, 1).astype(np.float32) # Generate classification labels of five classes. classification_target = np.random.randint(5, size=num_instances) """ ## Build and Train the Model Then we initialize the multi-modal and multi-task model with [AutoModel](/auto_model/#automodel-class). """ import autokeras as ak # Initialize the multi with multiple inputs and outputs. model = ak.AutoModel( inputs=[ak.ImageInput(), ak.StructuredDataInput()], outputs=[ ak.RegressionHead(metrics=['mae']), ak.ClassificationHead(loss='categorical_crossentropy', metrics=['accuracy']) ], max_trials=10) # Fit the model with prepared data. model.fit( [image_data, structured_data], [regression_target, classification_target], epochs=10) """ ## Validation Data By default, AutoKeras use the last 20% of training data as validation data. As shown in the example below, you can use `validation_split` to specify the percentage.
(x_train, y_classification), (x_test, y_test) = mnist.load_data() data_slice = 20 x_train = x_train[:data_slice] print(x_train.dtype) y_classification = y_classification[:data_slice] x_test = x_test[:data_slice] y_test = y_test[:data_slice] x_train = x_train.astype(np.float64) x_test = x_test.astype(np.float64) print(x_train.dtype) # x_image = np.reshape(x_train, (200, 28, 28, 1)) # x_test = np.reshape(x_test, (200, 28, 28, 1)) x_image = x_train.reshape(x_train.shape + (1,)) x_test = x_test.reshape(x_test.shape + (1,)) '''x_structured = np.random.rand(x_train.shape[0], 100) y_regression = np.random.rand(x_train.shape[0], 1)''' x_structured = np.random.rand(x_train.shape[0], 100) y_regression = np.random.rand(x_train.shape[0], 1) y_classification = y_classification.reshape(-1, 1) # y_classification = np.reshape(y_classification, (-1, 1)) # Build model and train. automodel = ak.AutoModel( inputs=[ak.ImageInput(), ak.StructuredDataInput()], outputs=[ak.RegressionHead(metrics=['mae']), ak.ClassificationHead(loss='categorical_crossentropy', metrics=['accuracy'])]) automodel.fit([x_image, x_structured], [y_regression, y_classification], validation_split=0.2)