Beispiel #1
0
def train(config):
    with open(config.pkl_file,"rb") as fp:
        data=pickle.load(fp)
    fp.close()

    X=data["array"]
    y=data["cluster"]

    print("Encoder cluster -> label ")
    encoder=LabelEncoder()
    encoder.fit(y)
    label=encoder.transform(y)
    num_classes=len(np.unique(label))
    weight=compute_class_weight("balanced",np.unique(label),label)
    
    X_train, X_test, y_train, y_test = train_test_split(X,label,test_size=config.test_ratio,shuffle=True)
    print("X train : ({},{})".format(X_train.shape[0],X_train.shape[1]))
    print("X test : ({},{})".format(X_test.shape[0],X_test.shape[1]))
    
    print("Build Model")
    model=AutoNetClassification(log_level='info',
            cuda=False,
            dataset_name="VKH_10X",
            shuffle=True,
            num_iterations=config.num_iter,
            budget_type='epochs',
            min_budget=100,
            max_budget=10000,
            result_logger_dir="./logger",
            cross_validator="k_fold", 
            cross_validator_args={"n_splits": 5})
    print("Training")
    model.fit(X_train,y_train,validation_split=0.2)
    return
Beispiel #2
0
def train_autopytorch(X_train,X_test,y_train,y_test,mtype,common_name_model,problemtype,classes,default_featurenames,transform_model,settings,model_session):

	# name model
	model_name=common_name_model+'.pickle'
	files=list()

	if mtype=='c': 
		from autoPyTorch import AutoNetClassification
		autonet = AutoNetClassification(log_level='debug', max_runtime=900, min_budget=50, max_budget=150)
		autonet.fit(X_train, y_train, validation_split=0.30)
		print(autonet.predict(X_test).flatten())

	if mtype=='r': 
		from autoPyTorch import AutoNetRegression
		autonet = AutoNetRegression(log_level='debug', max_runtime=900, min_budget=50, max_budget=150)
		autonet.fit(X_train, y_train)
		print(autonet.predict(X_test).flatten())

	print('saving model -->')
	torch.save(autonet, model_name)

	# get model directory
	files.append(model_name)
	files.append('configs.json')
	files.append('results.json')
	model_dir=os.getcwd()

	return model_name, model_dir, files
Beispiel #3
0
# Generate autonet
autonet = AutoNetClassification() if TEST_CASE != 3 else AutoNetMultilabel()

# add metrics and test_result to pipeline
autonet.pipeline[autonet_nodes.LogFunctionsSelector.get_name()].add_log_function('test_result', test_result(autonet, dm.X_test, dm.Y_test))
metrics = autonet.pipeline[autonet_nodes.MetricSelector.get_name()]
metrics.add_metric('pac_metric', autonet_metrics.pac_metric)
metrics.add_metric('auc_metric', autonet_metrics.auc_metric)
metrics.add_metric('accuracy', autonet_metrics.accuracy)

# Fit autonet using train data
res = autonet.fit(min_budget=300,
                  max_budget=900, max_runtime=1800, budget_type='time',
                  normalization_strategies=['maxabs'],
                  train_metric=metric,
                  additional_metrics=additional_metrices,
                  cv_splits=3,
                  preprocessors=["truncated_svd"],
                  log_level="debug",
                  X_train=dm.X_train,
                  Y_train=dm.Y_train,
                  X_valid=dm.X_valid,
                  Y_valid=dm.Y_valid,
                  categorical_features=dm.categorical_features,
                  additional_logs=["test_result"],
                  full_eval_each_epoch=True)

# Calculate quality metrics using validation data.
autonet.score(dm.X_test, dm.Y_test)
print(res)
Beispiel #4
0
 #                             value_range=[32],
 #                             log=False)
 #Init autonet
 # autoPyTorch = AutoNetClassification(hyperparameter_search_space_updates=search_space_updates,  # config
 autoPyTorch = AutoNetClassification(
     # "full_cs",  # config
     networks=["resnet"],
     # torch_num_threads=2,
     log_level='info',
     budget_type='epochs',
     min_budget=5,
     max_budget=20,
     num_iterations=100,
     cuda=True,
     use_pynisher=False)
 #fit
 autoPyTorch.fit(X_train=X_train,
                 Y_train=y_train,
                 X_valid=X_test,
                 Y_valid=y_test,
                 optimize_metric="auc_metric",
                 loss_modules=["cross_entropy", "cross_entropy_weighted"])
 #predict
 y_pred = autoPyTorch.predict(X_test)
 #check
 print("Accuracy score", sklearn.metrics.accuracy_score(y_test, y_pred))
 print("Confusion matrix",
       sklearn.metrics.confusion_matrix(y_test, y_pred, labels=le.classes_))
 print(autoPyTorch)
 pytorch_model = autoPyTorch.get_pytorch_model()
 print(pytorch_model)
dm = DataManager(verbose=1)
dataset_dir = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'datasets'))


dm.read_data(os.path.join(dataset_dir, "classification/dataset_28_optdigits.csv"), is_classification=True)
# 5620 samples, 10 classes, 65 features      --->    98% validation accuracy







mem_logger = MemoryLogger()
mem_logger.start()

try:
    autonet = AutoNetClassification(early_stopping_patience=15, budget_type='epochs', min_budget=1, max_budget=9, num_iterations=1, log_level='error')

    res = autonet.fit(X_train=dm.X,
                        Y_train=dm.Y,
                        X_valid=dm.X_train,
                        Y_valid=dm.Y_train,
                        categorical_features=dm.categorical_features)
    print(res)
    
finally:
    mem_logger.stop()

Beispiel #6
0
__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
__version__ = "0.0.1"
__license__ = "BSD"

import os, sys

sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", "..")))
from autoPyTorch import AutoNetClassification
from autoPyTorch.data_management.data_manager import DataManager

# Note: You can write your own datamanager! Call fit with respective train, valid data (numpy matrices)
dm = DataManager()
dm.generate_classification(num_classes=3, num_features=21, num_samples=1500)

# Note: every parameter has a default value, you do not have to specify anything. The given parameter allow a fast test.
autonet = AutoNetClassification(budget_type='epochs',
                                min_budget=1,
                                max_budget=9,
                                num_iterations=1,
                                log_level='info')

res = autonet.fit(X_train=dm.X,
                  Y_train=dm.Y,
                  X_valid=dm.X_train,
                  Y_valid=dm.Y_train)

print(res)
print("Score:", autonet.score(X_test=dm.X_train, Y_test=dm.Y_train))
Beispiel #7
0
__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
__version__ = "0.0.1"
__license__ = "BSD"

import os, sys
sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", "..")))
from autoPyTorch import AutoNetClassification
from autoPyTorch.data_management.data_manager import DataManager

# Note: You can write your own datamanager! Call fit with respective train, valid data (numpy matrices) 
dm = DataManager()
dm.generate_classification(num_classes=3, num_features=21, num_samples=1500)

# Note: every parameter has a default value, you do not have to specify anything. The given parameter allow a fast test.
autonet = AutoNetClassification("tiny_cs", budget_type='epochs', min_budget=1, max_budget=9, num_iterations=1, log_level='debug', use_pynisher=False)

res = autonet.fit(X_train=dm.X, Y_train=dm.Y, cross_validator="k_fold", cross_validator_args={"n_splits": 3})

print(res)
print("Score:", autonet.score(X_test=dm.X_train, Y_test=dm.Y_train))
Beispiel #8
0
# Generate autonet
autonet = AutoNetClassification() if TEST_CASE != 3 else AutoNetMultilabel()

# add metrics and test_result to pipeline
autonet.pipeline[
    autonet_nodes.LogFunctionsSelector.get_name()].add_log_function(
        'test_result', test_result(autonet, dm.X_test, dm.Y_test))

# Fit autonet using train data
res = autonet.fit(min_budget=300,
                  max_budget=900,
                  max_runtime=1800,
                  budget_type='time',
                  normalization_strategies=['maxabs'],
                  train_metric=metric,
                  additional_metrics=additional_metrices,
                  cross_validator='stratified_k_fold',
                  cross_validator_args={'n_splits': 3},
                  preprocessors=["truncated_svd"],
                  log_level="debug",
                  X_train=dm.X_train,
                  Y_train=dm.Y_train,
                  X_valid=None,
                  Y_valid=None,
                  categorical_features=dm.categorical_features,
                  additional_logs=["test_result"])

# Calculate quality metrics using validation data.
autonet.score(dm.X_test, dm.Y_test)
print(res)
# Get the current configuration as dict
current_configuration = autonet.get_current_autonet_config()
hyperparameter_search_space = autonet.get_hyperparameter_search_space()

task = openml.tasks.get_task(task_id=31)
X, y = task.get_X_and_y()
ind_train, ind_test = task.get_train_test_split_indices()
X_train, Y_train = X[ind_train], y[ind_train]
X_test, Y_test = X[ind_test], y[ind_test]
autonet = AutoNetClassification(config_preset="tiny_cs",
                                result_logger_dir="logs/")
# Fit (note that the settings are for demonstration, you might need larger budgets)
results_fit = autonet.fit(X_train=X_train,
                          Y_train=Y_train,
                          validation_split=0.3,
                          max_runtime=300,
                          min_budget=60,
                          max_budget=100,
                          refit=True)
# Save fit results as json
with open("logs/results_fit.json", "w") as file:
    json.dump(results_fit, file)
# See how the random configuration performs (often it just predicts 0)
score = autonet.score(X_test=X_test, Y_test=Y_test)
pred = autonet.predict(X=X_test)
print("Model prediction:", pred[0:10])
print("Accuracy score", score)
pytorch_model = autonet.get_pytorch_model()
print(pytorch_model)
# Load fit results as json
with open("logs/results_fit.json") as file:
# Tập dữ liệu hoa Iris của sklearn
# https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html#sklearn.datasets.load_iris
import sklearn.model_selection
import sklearn.datasets

X, y = sklearn.datasets.load_iris(return_X_y=True)
x_train, x_test, y_train, y_test = \
        sklearn.model_selection.train_test_split(X, y, random_state=1)
# Sử dụng máy học tự động
from autoPyTorch import AutoNetClassification
# khởi tạo autopytorch
autoPyTorch = AutoNetClassification("tiny_cs",
                                    log_level='info',
                                    max_runtime=900,
                                    min_budget=30,
                                    max_budget=90,
                                    cuda=True,
                                    use_pynisher=False)
# Huấn luyện với tập dữ liệu
autoPyTorch.fit(x_train, y_train, validation_split=0.3)

# Kiểm tra độ chính xác với dữ liệu kiểm thử
y_pred = autoPyTorch.predict(x_test)
print("Accuracy score", sklearn.metrics.accuracy_score(y_test, y_pred))
Beispiel #11
0
import openml
from pprint import pprint
from autoPyTorch import AutoNetClassification
from sklearn.metrics import accuracy_score

# get OpenML task by its ID
task = openml.tasks.get_task(task_id=32)
X, y = task.get_X_and_y()
ind_train, ind_test = task.get_train_test_split_indices()

# run Auto-PyTorch
autoPyTorch = AutoNetClassification(
    "tiny_cs",  # config preset
    log_level='info',
    max_runtime=300,
    min_budget=30,
    max_budget=90)

autoPyTorch.fit(X[ind_train], y[ind_train], validation_split=0.3)

# predict
y_pred = autoPyTorch.predict(X[ind_test])

print("Accuracy score", accuracy_score(y[ind_test], y_pred))

# print network configuration
pprint(autoPyTorch.fit_result["optimized_hyperparameter_config"])
                 is_classification=True)
    # 4601 samples, 2 classes, 58 features

if TEST_CASE == 8:
    dm.read_data(os.path.join(dataset_dir,
                              "classification/dataset_32_pendigits.csv"),
                 is_classification=True)

if TEST_CASE == 9:
    dm.read_data(os.path.join(dataset_dir, "classification/php4fATLZ.csv"),
                 is_classification=True)

if TEST_CASE == 10:
    dm.read_data(os.path.join(dataset_dir, "classification/phpnBqZGZ.csv"),
                 is_classification=True)

autonet = AutoNetClassification(budget_type='epochs',
                                min_budget=1,
                                max_budget=9,
                                num_iterations=1,
                                log_level='info')

res = autonet.fit(
    X_train=dm.X_train,
    Y_train=dm.Y_train,
    early_stopping_patience=3,
    # validation_split=0.3,
    categorical_features=dm.categorical_features)

print(res)