def train(config): with open(config.pkl_file,"rb") as fp: data=pickle.load(fp) fp.close() X=data["array"] y=data["cluster"] print("Encoder cluster -> label ") encoder=LabelEncoder() encoder.fit(y) label=encoder.transform(y) num_classes=len(np.unique(label)) weight=compute_class_weight("balanced",np.unique(label),label) X_train, X_test, y_train, y_test = train_test_split(X,label,test_size=config.test_ratio,shuffle=True) print("X train : ({},{})".format(X_train.shape[0],X_train.shape[1])) print("X test : ({},{})".format(X_test.shape[0],X_test.shape[1])) print("Build Model") model=AutoNetClassification(log_level='info', cuda=False, dataset_name="VKH_10X", shuffle=True, num_iterations=config.num_iter, budget_type='epochs', min_budget=100, max_budget=10000, result_logger_dir="./logger", cross_validator="k_fold", cross_validator_args={"n_splits": 5}) print("Training") model.fit(X_train,y_train,validation_split=0.2) return
def train_autopytorch(X_train,X_test,y_train,y_test,mtype,common_name_model,problemtype,classes,default_featurenames,transform_model,settings,model_session): # name model model_name=common_name_model+'.pickle' files=list() if mtype=='c': from autoPyTorch import AutoNetClassification autonet = AutoNetClassification(log_level='debug', max_runtime=900, min_budget=50, max_budget=150) autonet.fit(X_train, y_train, validation_split=0.30) print(autonet.predict(X_test).flatten()) if mtype=='r': from autoPyTorch import AutoNetRegression autonet = AutoNetRegression(log_level='debug', max_runtime=900, min_budget=50, max_budget=150) autonet.fit(X_train, y_train) print(autonet.predict(X_test).flatten()) print('saving model -->') torch.save(autonet, model_name) # get model directory files.append(model_name) files.append('configs.json') files.append('results.json') model_dir=os.getcwd() return model_name, model_dir, files
def create_model(max_batch): search_space_updates = HyperparameterSearchSpaceUpdates() #TODO: this still runs out of memory and wastes resources search_space_updates.append(node_name="CreateImageDataLoader", hyperparameter="batch_size", log=False, \ value_range=[2, max_batch]) shutil.rmtree(save_output_to) autonet = AutoNetClassification( preset, \ # hyperparameter_search_space_updates=search_space_updates, \ min_workers=2, \ # dataloader_worker=4, \ # global_results_dir="results", \ # keep_only_incumbent_checkpoints=False, \ log_level="info", \ budget_type="time", \ # save_checkpoints=True, \ result_logger_dir=save_output_to, \ min_budget=200, \ max_budget=600, \ num_iterations=1, \ # images_shape=[channels, input_size[0], input_size[1]], \ optimizer = ["adam", "adamw", "sgd", "rmsprop"], \ algorithm="hyperband", \ optimize_metric="balanced_accuracy", \ additional_metrics=["pac_metric"], \ lr_scheduler=["cosine_annealing", "cyclic", "step", "adapt", "plateau", "alternating_cosine", "exponential"], \ networks=['mlpnet', 'shapedmlpnet', 'resnet', 'shapedresnet'], #, 'densenet_flexible', 'resnet', 'resnet152', 'darts'], \ use_tensorboard_logger=True, \ cuda=True \ ) return autonet
def fit(self, data_manager): if (data_manager.problem_type == ProblemType.FeatureRegression): autonet = AutoNetRegression() elif (data_manager.problem_type == ProblemType.FeatureMultilabel): autonet = AutoNetMultilabel() elif (data_manager.problem_type == ProblemType.FeatureClassification): autonet = AutoNetClassification() else: raise ValueError('Problem type ' + str(data_manager.problem_type) + ' is not defined') autonet.pipeline[ autonet_nodes.LogFunctionsSelector.get_name()].add_log_function( 'test_result', test_result(autonet, data_manager.X_test, data_manager.Y_test)) metrics = autonet.pipeline[autonet_nodes.MetricSelector.get_name()] metrics.add_metric('pac_metric', autonet_metrics.pac_metric) metrics.add_metric('balanced_accuracy', autonet_metrics.balanced_accuracy) metrics.add_metric('mean_distance', autonet_metrics.mean_distance) metrics.add_metric('multilabel_accuracy', autonet_metrics.multilabel_accuracy) metrics.add_metric('auc_metric', autonet_metrics.auc_metric) metrics.add_metric('accuracy', autonet_metrics.accuracy) return {'autonet': autonet}
additional_metrices = [] """ TEST CASE 4: Openml, missing values """ if TEST_CASE == 4: dm.read_data("openml:188", is_classification=True) metric = "accuracy" additional_metrices = [] """ TEST CASE 5: MNIST """ if TEST_CASE == 5: dm.read_data(os.path.join(dataset_dir, "classification/phpnBqZGZ.csv"), is_classification=True) metric = "accuracy" additional_metrices = [] # Generate autonet autonet = AutoNetClassification() if TEST_CASE != 3 else AutoNetMultilabel() # add metrics and test_result to pipeline autonet.pipeline[autonet_nodes.LogFunctionsSelector.get_name()].add_log_function('test_result', test_result(autonet, dm.X_test, dm.Y_test)) metrics = autonet.pipeline[autonet_nodes.MetricSelector.get_name()] metrics.add_metric('pac_metric', autonet_metrics.pac_metric) metrics.add_metric('auc_metric', autonet_metrics.auc_metric) metrics.add_metric('accuracy', autonet_metrics.accuracy) # Fit autonet using train data res = autonet.fit(min_budget=300, max_budget=900, max_runtime=1800, budget_type='time', normalization_strategies=['maxabs'], train_metric=metric, additional_metrics=additional_metrices, cv_splits=3,
X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) #Init custom search space # search_space_updates = HyperparameterSearchSpaceUpdates() # search_space_updates.append(node_name="CreateDataLoader", # hyperparameter="batch_size", # value_range=[32], # log=False) #Init autonet # autoPyTorch = AutoNetClassification(hyperparameter_search_space_updates=search_space_updates, # config autoPyTorch = AutoNetClassification( # "full_cs", # config networks=["resnet"], # torch_num_threads=2, log_level='info', budget_type='epochs', min_budget=5, max_budget=20, num_iterations=100, cuda=True, use_pynisher=False) #fit autoPyTorch.fit(X_train=X_train, Y_train=y_train, X_valid=X_test, Y_valid=y_test, optimize_metric="auc_metric", loss_modules=["cross_entropy", "cross_entropy_weighted"]) #predict y_pred = autoPyTorch.predict(X_test) #check
dm = DataManager(verbose=1) dataset_dir = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'datasets')) dm.read_data(os.path.join(dataset_dir, "classification/dataset_28_optdigits.csv"), is_classification=True) # 5620 samples, 10 classes, 65 features ---> 98% validation accuracy mem_logger = MemoryLogger() mem_logger.start() try: autonet = AutoNetClassification(early_stopping_patience=15, budget_type='epochs', min_budget=1, max_budget=9, num_iterations=1, log_level='error') res = autonet.fit(X_train=dm.X, Y_train=dm.Y, X_valid=dm.X_train, Y_valid=dm.Y_train, categorical_features=dm.categorical_features) print(res) finally: mem_logger.stop()
__author__ = "Max Dippel, Michael Burkart and Matthias Urban" __version__ = "0.0.1" __license__ = "BSD" import os, sys sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", ".."))) from autoPyTorch import AutoNetClassification from autoPyTorch.data_management.data_manager import DataManager # Note: You can write your own datamanager! Call fit with respective train, valid data (numpy matrices) dm = DataManager() dm.generate_classification(num_classes=3, num_features=21, num_samples=1500) # Note: every parameter has a default value, you do not have to specify anything. The given parameter allow a fast test. autonet = AutoNetClassification(budget_type='epochs', min_budget=1, max_budget=9, num_iterations=1, log_level='info') res = autonet.fit(X_train=dm.X, Y_train=dm.Y, X_valid=dm.X_train, Y_valid=dm.Y_train) print(res) print("Score:", autonet.score(X_test=dm.X_train, Y_test=dm.Y_train))
__author__ = "Max Dippel, Michael Burkart and Matthias Urban" __version__ = "0.0.1" __license__ = "BSD" import os, sys sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", ".."))) from autoPyTorch import AutoNetClassification from autoPyTorch.data_management.data_manager import DataManager # Note: You can write your own datamanager! Call fit with respective train, valid data (numpy matrices) dm = DataManager() dm.generate_classification(num_classes=3, num_features=21, num_samples=1500) # Note: every parameter has a default value, you do not have to specify anything. The given parameter allow a fast test. autonet = AutoNetClassification("tiny_cs", budget_type='epochs', min_budget=1, max_budget=9, num_iterations=1, log_level='debug', use_pynisher=False) res = autonet.fit(X_train=dm.X, Y_train=dm.Y, cross_validator="k_fold", cross_validator_args={"n_splits": 3}) print(res) print("Score:", autonet.score(X_test=dm.X_train, Y_test=dm.Y_train))
is_classification=True) metric = "pac_metric" additional_metrices = [] """ TEST CASE 4: Openml, missing values """ if TEST_CASE == 4: dm.read_data("openml:188", is_classification=True) metric = "accuracy" additional_metrices = [] """ TEST CASE 5: MNIST """ if TEST_CASE == 5: dm.read_data("openml:40996", is_classification=True) metric = "accuracy" additional_metrices = [] # Generate autonet autonet = AutoNetClassification() if TEST_CASE != 3 else AutoNetMultilabel() # add metrics and test_result to pipeline autonet.pipeline[ autonet_nodes.LogFunctionsSelector.get_name()].add_log_function( 'test_result', test_result(autonet, dm.X_test, dm.Y_test)) # Fit autonet using train data res = autonet.fit(min_budget=300, max_budget=900, max_runtime=1800, budget_type='time', normalization_strategies=['maxabs'], train_metric=metric, additional_metrics=additional_metrices, cross_validator='stratified_k_fold',
from autoPyTorch import AutoNetClassification # Other imports for later usage import openml import json autonet = AutoNetClassification(config_preset="tiny_cs", result_logger_dir="logs/") # Get the current configuration as dict current_configuration = autonet.get_current_autonet_config() hyperparameter_search_space = autonet.get_hyperparameter_search_space() task = openml.tasks.get_task(task_id=31) X, y = task.get_X_and_y() ind_train, ind_test = task.get_train_test_split_indices() X_train, Y_train = X[ind_train], y[ind_train] X_test, Y_test = X[ind_test], y[ind_test] autonet = AutoNetClassification(config_preset="tiny_cs", result_logger_dir="logs/") # Fit (note that the settings are for demonstration, you might need larger budgets) results_fit = autonet.fit(X_train=X_train, Y_train=Y_train, validation_split=0.3, max_runtime=300, min_budget=60, max_budget=100, refit=True) # Save fit results as json with open("logs/results_fit.json", "w") as file: json.dump(results_fit, file) # See how the random configuration performs (often it just predicts 0) score = autonet.score(X_test=X_test, Y_test=Y_test) pred = autonet.predict(X=X_test)
# Test logging autonet_config["additional_logs"] = [ test_predictions_for_ensemble.__name__, test_result_ens.__name__ ] # Initialize (ensemble) if args.ensemble_setting == "ensemble": print("Using ensembles!") ensemble_config = get_ensemble_config() autonet_config = {**autonet_config, **ensemble_config} autonet = AutoNetEnsemble(AutoNetClassification, config_preset="full_cs", **autonet_config) elif args.ensemble_setting == "normal": autonet = AutoNetClassification(config_preset="full_cs", **autonet_config) # Test logging cont. autonet.pipeline[LogFunctionsSelector.get_name()].add_log_function( name=test_predictions_for_ensemble.__name__, log_function=test_predictions_for_ensemble(autonet, X_test, y_test), loss_transform=False) autonet.pipeline[LogFunctionsSelector.get_name()].add_log_function( name=test_result_ens.__name__, log_function=test_result_ens(autonet, X_test, y_test)) autonet.pipeline[BaselineTrainer.get_name()].add_test_data(X_test) print(autonet.get_current_autonet_config()) fit_results = autonet.fit(X_train, y_train,
# Tập dữ liệu hoa Iris của sklearn # https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html#sklearn.datasets.load_iris import sklearn.model_selection import sklearn.datasets X, y = sklearn.datasets.load_iris(return_X_y=True) x_train, x_test, y_train, y_test = \ sklearn.model_selection.train_test_split(X, y, random_state=1) # Sử dụng máy học tự động from autoPyTorch import AutoNetClassification # khởi tạo autopytorch autoPyTorch = AutoNetClassification("tiny_cs", log_level='info', max_runtime=900, min_budget=30, max_budget=90, cuda=True, use_pynisher=False) # Huấn luyện với tập dữ liệu autoPyTorch.fit(x_train, y_train, validation_split=0.3) # Kiểm tra độ chính xác với dữ liệu kiểm thử y_pred = autoPyTorch.predict(x_test) print("Accuracy score", sklearn.metrics.accuracy_score(y_test, y_pred))
import openml from pprint import pprint from autoPyTorch import AutoNetClassification from sklearn.metrics import accuracy_score # get OpenML task by its ID task = openml.tasks.get_task(task_id=32) X, y = task.get_X_and_y() ind_train, ind_test = task.get_train_test_split_indices() # run Auto-PyTorch autoPyTorch = AutoNetClassification( "tiny_cs", # config preset log_level='info', max_runtime=300, min_budget=30, max_budget=90) autoPyTorch.fit(X[ind_train], y[ind_train], validation_split=0.3) # predict y_pred = autoPyTorch.predict(X[ind_test]) print("Accuracy score", accuracy_score(y[ind_test], y_pred)) # print network configuration pprint(autoPyTorch.fit_result["optimized_hyperparameter_config"])
is_classification=True) # 4601 samples, 2 classes, 58 features if TEST_CASE == 8: dm.read_data(os.path.join(dataset_dir, "classification/dataset_32_pendigits.csv"), is_classification=True) if TEST_CASE == 9: dm.read_data(os.path.join(dataset_dir, "classification/php4fATLZ.csv"), is_classification=True) if TEST_CASE == 10: dm.read_data(os.path.join(dataset_dir, "classification/phpnBqZGZ.csv"), is_classification=True) autonet = AutoNetClassification(budget_type='epochs', min_budget=1, max_budget=9, num_iterations=1, log_level='info') res = autonet.fit( X_train=dm.X_train, Y_train=dm.Y_train, early_stopping_patience=3, # validation_split=0.3, categorical_features=dm.categorical_features) print(res)