def test_invalid_tuner_name_error(tmp_path): with pytest.raises(ValueError) as info: ak.AutoModel( ak.ImageInput(), ak.RegressionHead(), directory=tmp_path, tuner="unknown" ) assert "Expected the tuner argument to be one of" in str(info.value)
def main(): deaths, recoveries, cases, deaths_validation, recoveries_validation, cases_validation = get_dfs() X, y = get_ds(deaths, recoveries, cases) X_train, X_test, y_train, y_test = train_test_split(X, y) data = convert_dfs(deaths_validation, recoveries_validation, cases_validation) column_names = [] model_input = ak.Input() model_output = ak.blocks.basic.DenseBlock()(model_input) model_output = ak.blocks.basic.DenseBlock()(model_output) model_output = ak.blocks.basic.DenseBlock()(model_output) model_output = ak.RegressionHead()(model_output) trainer = ak.AutoModel(inputs=model_input, outputs=model_output) trainer.fit(X_train, y_train) model = trainer.export_model() y_fit = model.predict(X_test) tf.keras.models.save_model(model, 'model.tf') with open('data.json', 'wt') as f: json.dump(data, f) print(model.evaluate(X_test, y_test)) plt.scatter(y_test, y_fit) plt.savefig('results.png')
def test_auto_model_max_trial_field_as_specified(tmp_path): auto_model = ak.AutoModel(ak.ImageInput(), ak.RegressionHead(), directory=tmp_path, max_trials=10) assert auto_model.max_trials == 10
def test_io_api(tmp_path): num_instances = 100 (image_x, train_y), (test_x, test_y) = mnist.load_data() (text_x, train_y), (test_x, test_y) = utils.imdb_raw(num_instances=num_instances) image_x = image_x[:num_instances] text_x = text_x[:num_instances] structured_data_x = utils.generate_structured_data( num_instances=num_instances) classification_y = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. automodel = ak.AutoModel( inputs=[ak.ImageInput(), ak.TextInput(), ak.StructuredDataInput()], outputs=[ ak.RegressionHead(metrics=['mae']), ak.ClassificationHead(loss='categorical_crossentropy', metrics=['accuracy']) ], directory=tmp_path, max_trials=2, seed=utils.SEED) automodel.fit([image_x, text_x, structured_data_x], [regression_y, classification_y], epochs=1, validation_split=0.2)
def applyAutoKeras(X_train, y_train, X_test, y_test, SavePath, max_trials=100, epochs=300, useSavedModels = True): if not useSavedModels or not os.path.isdir(SavePath+"/keras_auto_model/best_model/"): input_node = ak.StructuredDataInput() output_node = ak.DenseBlock()(input_node) #output_node = ak.ConvBlock()(output_node) output_node = ak.RegressionHead()(output_node) AKRegressor = ak.AutoModel( inputs=input_node, outputs=output_node, max_trials=max_trials, overwrite=True, tuner="bayesian", project_name=SavePath+"/keras_auto_model" ) print(" X_train shape: {0}\n y_train shape: {1}\n X_test shape: {2}\n y_test shape: {3}".format(X_train.shape, y_train.shape, X_test.shape, y_test.shape)) AKRegressor.fit(x=X_train, y=y_train[:,0],epochs=epochs,verbose=1, batch_size=int(X_train.shape[0]/10), shuffle=False, use_multiprocessing=True) AKRegressor.export_model() else: AKRegressor = tf.keras.models.load_model(SavePath+"/keras_auto_model/best_model/") y_hat = AKRegressor.predict(X_test) print("AUTOKERAS - Score: ") print("MAE: %.4f" % mean_absolute_error(y_test[:,0], y_hat)) return y_hat
def train(): (x_train, y_train), (x_test, y_test) = mnist.load_data() # Initialize the image regressor. input_node = ak.ImageInput() output_node = ak.ImageBlock( # Only search ResNet architectures. block_type="resnet", # Normalize the dataset. normalize=False, # Do not do data augmentation. augment=False, )(input_node) output_node = ak.RegressionHead()(output_node) reg = ak.AutoModel(inputs=input_node, outputs=output_node, overwrite=True, max_trials=1) # Feed the image regressor with training data. reg.fit( x_train, y_train, # Split the training data and use the last 15% as validation data. validation_split=0.15, epochs=2, ) # Predict with the best model. predicted_y = reg.predict(x_test) print(predicted_y) # Evaluate the best model with testing data. print(reg.evaluate(x_test, y_test))
def test_image_blocks(tmp_path): num_instances = 10 (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train[:num_instances] y_regression = utils.generate_data(num_instances=num_instances, shape=(1, )) input_node = ak.ImageInput() output = ak.Normalization()(input_node) output = ak.ImageAugmentation()(output) outputs1 = ak.ResNetBlock(version="v2")(output) outputs2 = ak.XceptionBlock()(output) output_node = ak.Merge()((outputs1, outputs2)) output_node = ak.ClassificationHead()(output_node) automodel = ak.AutoModel( inputs=input_node, outputs=output_node, directory=tmp_path, max_trials=1, seed=utils.SEED, ) automodel.fit(x_train, y_regression, validation_data=(x_train, y_regression), epochs=1)
def build_model(self): base_model_path = path_join(self.model_path, 'base_model.py') exec(open(base_model_path).read()) model = ak.AutoModel(inputs=input_node, outputs=output_node, max_trials=1) return model
def test_io_api(tmp_path): num_instances = 20 image_x = utils.generate_data(num_instances=num_instances, shape=(28, 28)) text_x = utils.generate_text_data(num_instances=num_instances) image_x = image_x[:num_instances] structured_data_x = (pd.read_csv(utils.TRAIN_CSV_PATH).to_numpy().astype( np.unicode)[:num_instances]) classification_y = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. automodel = ak.AutoModel( inputs=[ak.ImageInput(), ak.TextInput(), ak.StructuredDataInput()], outputs=[ ak.RegressionHead(metrics=["mae"]), ak.ClassificationHead(loss="categorical_crossentropy", metrics=["accuracy"]), ], directory=tmp_path, max_trials=2, tuner=ak.RandomSearch, seed=utils.SEED, ) automodel.fit( [image_x, text_x, structured_data_x], [regression_y, classification_y], epochs=1, validation_split=0.2, batch_size=4, )
def test_evaluate(tuner_fn, tmp_dir): pg = mock.Mock() pg.preprocess.return_value = (mock.Mock(), mock.Mock()) tuner_class = tuner_fn.return_value tuner = tuner_class.return_value tuner.get_best_model.return_value = (pg, mock.Mock()) x_train = np.random.rand(100, 32) y_train = np.random.rand(100, 1) input_node = ak.Input() output_node = input_node output_node = ak.DenseBlock()(output_node) output_node = ak.RegressionHead()(output_node) auto_model = ak.AutoModel(input_node, output_node, directory=tmp_dir, max_trials=1) auto_model.fit(x_train, y_train, epochs=1, validation_data=(x_train, y_train)) auto_model.evaluate(x_train, y_train) assert tuner_fn.called assert tuner_class.called assert tuner.get_best_model.called
def get_multi_io_auto_model(tmp_path): return ak.AutoModel( [ak.ImageInput(), ak.ImageInput()], [ak.RegressionHead(), ak.RegressionHead()], directory=tmp_path, max_trials=2, overwrite=False)
def test_image_blocks(tmp_path): num_instances = 10 x_train = test_utils.generate_data(num_instances=num_instances, shape=(28, 28)) y_train = np.random.randint(0, 10, num_instances) input_node = ak.ImageInput() output = ak.Normalization()(input_node) output = ak.ImageAugmentation()(output) outputs1 = ak.ResNetBlock(version="v2")(output) outputs2 = ak.XceptionBlock()(output) output_node = ak.Merge()((outputs1, outputs2)) output_node = ak.ClassificationHead()(output_node) automodel = ak.AutoModel( inputs=input_node, outputs=output_node, directory=tmp_path, max_trials=1, seed=test_utils.SEED, ) automodel.fit(x_train, y_train, validation_data=(x_train, y_train), epochs=1)
def io_api(): (x_train, y_train), (x_test, y_test) = mnist.load_data() clf = ak.AutoModel(ak.ImageInput(), ak.ClassificationHead(), seed=5, max_trials=3) clf.fit(x_train, y_train, validation_split=0.2) return clf.evaluate(x_test, y_test)
def test_predict_tuple_x_and_tuple_y_predict_doesnt_crash(tuner_fn, tmp_path): auto_model = ak.AutoModel(ak.ImageInput(), ak.RegressionHead(), directory=tmp_path) dataset = tf.data.Dataset.from_tensor_slices( ((np.random.rand(100, 32, 32, 3), ), (np.random.rand(100, 1), ))) auto_model.fit(dataset) auto_model.predict(dataset)
def test_auto_model_project_name_field_as_specified(tmp_path): auto_model = ak.AutoModel( ak.ImageInput(), ak.RegressionHead(), directory=tmp_path, project_name="auto_model", ) assert auto_model.project_name == "auto_model"
def test_auto_model_basic(_, tmp_dir): x_train = np.random.rand(100, 32, 32, 3) y_train = np.random.rand(100, 1) auto_model = ak.AutoModel(ak.ImageInput(), ak.RegressionHead(), directory=tmp_dir, max_trials=2) auto_model.fit(x_train, y_train, epochs=2, validation_split=0.2)
def train_ak(): image_count = len(list(config.database_path.glob('**/*.jpg'))) print("# of images found:", image_count) list_ds = tf.data.Dataset.list_files(str(config.database_path / '*/*.jpg'), shuffle=False) list_ds = list_ds.shuffle(image_count, reshuffle_each_iteration=False) # Set `num_parallel_calls` so multiple images are loaded/processed in parallel. AUTOTUNE = tf.data.experimental.AUTOTUNE train_ds = list_ds.map(utils.process_path, num_parallel_calls=AUTOTUNE) features = np.array([list(x[0].numpy()) for x in list(train_ds)]) labels = np.array([x[1].numpy() for x in list(train_ds)]) input_node = ak.ImageInput() output_node = ak.Normalization()(input_node) output_node = ak.ImageAugmentation(horizontal_flip=False, vertical_flip=False, rotation_factor=False, zoom_factor=False)(output_node) output_node = ak.ClassificationHead()(output_node) clf = ak.AutoModel(inputs=input_node, outputs=output_node, overwrite=True, max_trials=config.max_trials, directory=config.outpath_mpii) # Feed the tensorflow Dataset to the classifier. split = config.split x_val = features[split:] y_val = labels[split:] x_train = features[:split] y_train = labels[:split] clf.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=config.epochs) # Predict with the best model. #predicted_y = clf.predict(x_val) #print(predicted_y) # Evaluate the best model with testing data. print(clf.evaluate(x_val, y_val)) # Export as a Keras Model. model = clf.export_model() print( type(model)) # <class 'tensorflow.python.keras.engine.training.Model'> model.save(config.output_path + "model_ak_imgClsf.h5") return 0
def dataset_error(x, y, validation_data, message, tmp_path): auto_model = ak.AutoModel( [ak.ImageInput(), ak.ImageInput()], [ak.RegressionHead(), ak.RegressionHead()], directory=tmp_path, max_trials=2, overwrite=False) with pytest.raises(ValueError) as info: auto_model.fit(x, y, epochs=2, validation_data=validation_data) assert message in str(info.value)
def test_auto_model_predict(tuner_fn, tmp_path): x_train = np.random.rand(100, 32, 32, 3) y_train = np.random.rand(100, 1) auto_model = ak.AutoModel(ak.ImageInput(), ak.RegressionHead(), directory=tmp_path, max_trials=2) auto_model.fit(x_train, y_train, epochs=2, validation_split=0.2) auto_model.predict(x_train) assert tuner_fn.called
def create_image_regressor(self): input_node = ak.ImageInput() output_node = ak.ConvBlock()(input_node) output_node = ak.DenseBlock()(output_node) output_node = ak.RegressionHead()(output_node) reg = ak.AutoModel(inputs=input_node, outputs=output_node, max_trials=10) return reg
def train(): house_dataset = fetch_california_housing() df = pd.DataFrame(np.concatenate( (house_dataset.data, house_dataset.target.reshape(-1, 1)), axis=1), columns=house_dataset.feature_names + ['Price']) train_size = int(df.shape[0] * 0.9) df[:train_size].to_csv('train.csv', index=False) df[train_size:].to_csv('eval.csv', index=False) train_file_path = 'train.csv' test_file_path = 'eval.csv' # x_train as pandas.DataFrame, y_train as pandas.Series x_train = pd.read_csv(train_file_path) print(type(x_train)) # pandas.DataFrame y_train = x_train.pop('Price') print(type(y_train)) # pandas.Series # You can also use pandas.DataFrame for y_train. y_train = pd.DataFrame(y_train) print(type(y_train)) # pandas.DataFrame # You can also use numpy.ndarray for x_train and y_train. x_train = x_train.to_numpy().astype(np.unicode) y_train = y_train.to_numpy() print(type(x_train)) # numpy.ndarray print(type(y_train)) # numpy.ndarray # Preparing testing data. x_test = pd.read_csv(test_file_path) y_test = x_test.pop('Price') # Initialize the structured data regressor. input_node = ak.StructuredDataInput() output_node = ak.StructuredDataBlock( categorical_encoding=True)(input_node) output_node = ak.RegressionHead()(output_node) reg = ak.AutoModel(inputs=input_node, outputs=output_node, overwrite=True, max_trials=3) # Feed the structured data regressor with training data. reg.fit( x_train, y_train, # Split the training data and use the last 15% as validation data. validation_split=0.15, epochs=10) # Predict with the best model. predicted_y = reg.predict(test_file_path) # Evaluate the best model with testing data. print(reg.evaluate(test_file_path, 'Price'))
def functional_api(): (x_train, y_train), (x_test, y_test) = mnist.load_data() input_node = ak.ImageInput() output_node = input_node output_node = ak.Normalization()(output_node) output_node = ak.ConvBlock()(output_node) output_node = ak.SpatialReduction()(output_node) output_node = ak.DenseBlock()(output_node) output_node = ak.ClassificationHead()(output_node) clf = ak.AutoModel(input_node, output_node, seed=5, max_trials=3) clf.fit(x_train, y_train, validation_split=0.2) return clf.evaluate(x_test, y_test)
def test_functional_api(tmp_path): # Prepare the data. num_instances = 80 (image_x, train_y), (test_x, test_y) = mnist.load_data() (text_x, train_y), (test_x, test_y) = utils.imdb_raw() (structured_data_x, train_y), (test_x, test_y) = utils.dataframe_numpy() image_x = image_x[:num_instances] text_x = text_x[:num_instances] structured_data_x = structured_data_x[:num_instances] classification_y = utils.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = utils.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. image_input = ak.ImageInput() output = ak.Normalization()(image_input) output = ak.ImageAugmentation()(output) outputs1 = ak.ResNetBlock(version='next')(output) outputs2 = ak.XceptionBlock()(output) image_output = ak.Merge()((outputs1, outputs2)) structured_data_input = ak.StructuredDataInput() structured_data_output = ak.CategoricalToNumerical()(structured_data_input) structured_data_output = ak.DenseBlock()(structured_data_output) text_input = ak.TextInput() outputs1 = ak.TextToIntSequence()(text_input) outputs1 = ak.Embedding()(outputs1) outputs1 = ak.ConvBlock(separable=True)(outputs1) outputs1 = ak.SpatialReduction()(outputs1) outputs2 = ak.TextToNgramVector()(text_input) outputs2 = ak.DenseBlock()(outputs2) text_output = ak.Merge()((outputs1, outputs2)) merged_outputs = ak.Merge()( (structured_data_output, image_output, text_output)) regression_outputs = ak.RegressionHead()(merged_outputs) classification_outputs = ak.ClassificationHead()(merged_outputs) automodel = ak.AutoModel( inputs=[image_input, text_input, structured_data_input], directory=tmp_path, outputs=[regression_outputs, classification_outputs], max_trials=2, tuner=ak.Hyperband, seed=utils.SEED) automodel.fit((image_x, text_x, structured_data_x), (regression_y, classification_y), validation_split=0.2, epochs=1)
def test_auto_model_basic(tmp_dir): x_train = np.random.rand(100, 32, 32, 3) y_train = np.random.rand(100) auto_model = ak.AutoModel(ak.ImageInput(), ak.RegressionHead(), directory=tmp_dir, max_trials=2) auto_model.fit(x_train, y_train, epochs=2, validation_split=0.2) result = auto_model.predict(x_train) assert result.shape == (100, 1)
def test_no_validation_data_nor_split_error(tmp_path): auto_model = ak.AutoModel( ak.ImageInput(), ak.RegressionHead(), directory=tmp_path ) with pytest.raises(ValueError) as info: auto_model.fit( x=np.random.rand(100, 32, 32, 3), y=np.random.rand(100, 1), validation_split=0, ) assert "Either validation_data or a non-zero" in str(info.value)
def test_single_nested_dataset_doesnt_crash(tuner_fn, tmp_path): auto_model = ak.AutoModel( ak.ImageInput(), ak.RegressionHead(), directory=tmp_path, max_trials=2, overwrite=False, ) x1 = utils.generate_data() y1 = utils.generate_data(shape=(1,)) dataset = tf.data.Dataset.from_tensor_slices(((x1,), y1)) auto_model.fit(dataset, epochs=2)
def test_final_fit_concat(tuner_fn, tmp_dir): tuner_class = tuner_fn.return_value x_train = np.random.rand(100, 32, 32, 3) y_train = np.random.rand(100, 1) auto_model = ak.AutoModel(ak.ImageInput(), ak.RegressionHead(), directory=tmp_dir, max_trials=2) auto_model.fit(x_train, y_train, epochs=2, validation_split=0.2) assert auto_model._split_dataset assert tuner_class.call_args_list[0][1]['fit_on_val_data']
def test_single_nested_dataset(tuner_fn, tmp_path): auto_model = ak.AutoModel(ak.ImageInput(), ak.RegressionHead(), directory=tmp_path, max_trials=2, overwrite=False) x1 = utils.generate_data() y1 = utils.generate_data(shape=(1, )) dataset = tf.data.Dataset.from_tensor_slices(((x1, ), y1)) auto_model.fit(dataset, epochs=2) for adapter in auto_model._input_adapters + auto_model._output_adapters: assert adapter.shape is not None
def CreateSupergraph(output_dir, hp_tuner): input_node = ak.Input() conv2d_1 = ak.ConvBlock(num_blocks=1, num_layers=3, max_pooling=True, dropout=0)(input_node) dense_1 = ak.DenseBlock(dropout=0)(conv2d_1) output_node = ak.ClassificationHead(num_classes=4, metrics=['accuracy'])(dense_1) automodel = ak.AutoModel(inputs=input_node, outputs=output_node, max_trials=3, directory=output_dir, project_name="autoML", tuner=hp_tuner, seed=123) return automodel
def train(): TRAIN_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/train.csv" TEST_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/eval.csv" train_file_path = tf.keras.utils.get_file("train.csv", TRAIN_DATA_URL) test_file_path = tf.keras.utils.get_file("eval.csv", TEST_DATA_URL) # x_train as pandas.DataFrame, y_train as pandas.Series x_train = pd.read_csv(train_file_path) print(type(x_train)) # pandas.DataFrame y_train = x_train.pop('survived') print(type(y_train)) # pandas.Series # You can also use pandas.DataFrame for y_train. y_train = pd.DataFrame(y_train) print(type(y_train)) # pandas.DataFrame # You can also use numpy.ndarray for x_train and y_train. x_train = x_train.to_numpy().astype(np.unicode) y_train = y_train.to_numpy() print(type(x_train)) # numpy.ndarray print(type(y_train)) # numpy.ndarray # Preparing testing data. x_test = pd.read_csv(test_file_path) y_test = x_test.pop('survived') # Initialize the structured data classifier. input_node = ak.StructuredDataInput() output_node = ak.StructuredDataBlock( categorical_encoding=True)(input_node) output_node = ak.ClassificationHead()(output_node) clf = ak.AutoModel(inputs=input_node, outputs=output_node, overwrite=True, max_trials=3) # Feed the structured data classifier with training data. clf.fit( x_train, y_train, # Split the training data and use the last 15% as validation data. validation_split=0.15, epochs=10) # Predict with the best model. predicted_y = clf.predict(test_file_path) # Evaluate the best model with testing data. print(clf.evaluate(test_file_path, 'survived'))