def test_add_classifier(self): self.assertEqual(len(classification_components._addons.components), 0) classification_components.add_classifier(DummyClassifier) self.assertEqual(len(classification_components._addons.components), 1) cs = SimpleClassificationPipeline().get_hyperparameter_search_space() self.assertIn('DummyClassifier', str(cs)) del classification_components._addons.components['DummyClassifier']
def test_add_classifier(self): self.assertEqual(len(classification_components._addons.components), 0) classification_components.add_classifier(DummyClassifier) self.assertEqual(len(classification_components._addons.components), 1) cs = SimpleClassificationPipeline().get_hyperparameter_search_space() self.assertIn('DummyClassifier', str(cs)) del classification_components._addons.components['DummyClassifier']
# -*- encoding: utf-8 -*- import numpy as np from autosklearn.pipeline.components.classification import add_classifier import time import autosklearn.automl as autosk from autosklearn.constants import BINARY_CLASSIFICATION from component import DeepFeedNet dataset_dir = # Load our training data X_train = np.load(dataset_dir + 'train.npy') y_train = np.load(dataset_dir + 'train_labels.npy') add_classifier(DeepFeedNet.DeepFeedNet) ensemble_size = 1 # Create model modl = autosk.AutoML(time_left_for_this_task=600, per_run_time_limit=90, delete_tmp_folder_after_terminate=False, tmp_dir='tmp/autosk_tmp', output_dir='tmp/autosk_out', log_dir='tmp/autosk_log', include_estimators=['DeepFeedNet'], include_preprocessors=['NoPreprocessing'], ensemble_size=0, ensemble_nbest=0, initial_configurations_via_metalearning=0, seed=50, ml_memory_limit=2048, metadata_directory=None,
import datetime import math import multiprocessing import multiprocessing.pool from multiprocessing import Queue import pandas as pd from six.moves import queue import time from sklearn.utils import check_X_y from autosklearn.classification import AutoSklearnClassifier from autosklearn.pipeline.components import classification import gc gc.enable() classification.add_classifier(LogisticRegressionSK) classification.add_classifier(LogisticRegressionSMAC) ObjectiveFuncType = Callable[[trial_module.Trial], float] def _name_estimators(estimators): """Generate names for estimators.""" names = [type(estimator).__name__.lower() for estimator in estimators] namecount = defaultdict(int) for est, name in zip(estimators, names): namecount[name] += 1 for k, v in list(six.iteritems(namecount)): if v == 1:
def evaluate_ml_algorithm(dataset, algo, run_id, obj_metric, time_limit=600, seed=1, task_type=None): if algo == 'lightgbm': _algo = ['LightGBM'] add_classifier(LightGBM) elif algo == 'logistic_regression': _algo = ['Logistic_Regression'] add_classifier(Logistic_Regression) else: _algo = [algo] print('EVALUATE-%s-%s-%s: run_id=%d' % (dataset, algo, obj_metric, run_id)) train_data, test_data = load_train_test_data(dataset, task_type=task_type) if task_type in CLS_TASKS: task_type = BINARY_CLS if len(set( train_data.data[1])) == 2 else MULTICLASS_CLS print(set(train_data.data[1])) raw_data, test_raw_data = load_train_test_data(dataset, task_type=MULTICLASS_CLS) X, y = raw_data.data X_test, y_test = test_raw_data.data feat_type = [ 'Categorical' if _type == CATEGORICAL else 'Numerical' for _type in raw_data.feature_types ] from autosklearn.metrics import balanced_accuracy as balanced_acc automl = AutoSklearnClassifier( time_left_for_this_task=int(time_limit), per_run_time_limit=180, n_jobs=1, include_estimators=_algo, initial_configurations_via_metalearning=0, ensemble_memory_limit=16384, ml_memory_limit=16384, # tmp_folder='/var/folders/0t/mjph32q55hd10x3qr_kdd2vw0000gn/T/autosklearn_tmp', ensemble_size=1, seed=int(seed), resampling_strategy='holdout', resampling_strategy_arguments={'train_size': 0.67}) automl.fit(X.copy(), y.copy(), feat_type=feat_type, metric=balanced_acc) model_desc = automl.show_models() str_stats = automl.sprint_statistics() valid_results = automl.cv_results_['mean_test_score'] print('Eval num: %d' % (len(valid_results))) validation_score = np.max(valid_results) # Test performance. automl.refit(X.copy(), y.copy()) predictions = automl.predict(X_test) test_score = balanced_accuracy_score(y_test, predictions) # Print statistics about the auto-sklearn run such as number of # iterations, number of models failed with a time out. print(str_stats) print(model_desc) print('Validation Accuracy:', validation_score) print("Test Accuracy :", test_score) save_path = save_dir + '%s-%s-%s-%d-%d.pkl' % (dataset, algo, obj_metric, run_id, time_limit) with open(save_path, 'wb') as f: pickle.dump([dataset, algo, validation_score, test_score, task_type], f)