def get_online_obj(algo_name, dataset, data_dir, n_jobs=1): model_class = model_class_dict[algo_name] # from test.basic.utils import load_data sys.path.append('../mindware') from mindware.datasets.utils import load_data x, y, _ = load_data(dataset, data_dir=data_dir, datanode_returned=False, preprocess=True, task_type=0) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=1) def objective_function(config): config_dict = config.get_dictionary() model = model_class(**config_dict, n_jobs=n_jobs, random_state=47) model.fit(x_train, y_train) y_pred = model.predict(x_test) perf = 1 - balanced_accuracy_score(y_test, y_pred) return perf return objective_function
def check_datasets(datasets): for _dataset in datasets: try: _ = load_data(_dataset, data_dir=data_dir, datanode_returned=False, preprocess=True, task_type=0) except Exception as e: print(traceback.format_exc()) raise ValueError('Dataset - %s load error!' % _dataset)
def get_meta_learning_configs(X, y, task_type, dataset_name='default', metric='accuracy', num_cfgs=5): if X is None or y is None: X, y, _ = load_data(dataset_name) backend = create(temporary_directory=None, output_directory=None, delete_tmp_folder_after_terminate=False, delete_output_folder_after_terminate=False, shared_mode=True) dm = XYDataManager(X, y, None, None, task_type, None, dataset_name) configuration_space = pipeline.get_configuration_space( dm.info, include_estimators=None, exclude_estimators=None, include_preprocessors=None, exclude_preprocessors=None) watcher = StopWatch() name = os.path.basename(dm.name) watcher.start_task(name) def reset_data_manager(max_mem=None): pass automlsmbo = AutoMLSMBO( config_space=configuration_space, dataset_name=dataset_name, backend=backend, total_walltime_limit=1e5, func_eval_time_limit=1e5, memory_limit=1e5, metric=metric, watcher=watcher, metadata_directory='components/meta_learning/meta_resource', num_metalearning_cfgs=num_cfgs) automlsmbo.reset_data_manager = reset_data_manager automlsmbo.task = task_type automlsmbo.datamanager = dm configs = automlsmbo.get_metalearning_suggestions() return configs
import os import sys import argparse sys.path.append(os.getcwd()) from mindware.datasets.utils import load_data parser = argparse.ArgumentParser() dataset_set = 'diabetes,spectf,credit,ionosphere,lymphography,pc4,' \ 'messidor_features,winequality_red,winequality_white,splice,spambase,amazon_employee' parser.add_argument('--datasets', type=str, default=dataset_set) args = parser.parse_args() for dataset in args.datasets.split(','): raw_data = load_data(dataset, datanode_returned=True) print(raw_data)
import os import sys sys.path.append(os.getcwd()) from autosklearn.smbo import AutoMLSMBO from autosklearn.constants import * from autosklearn.data.xy_data_manager import XYDataManager from autosklearn.util.backend import create from autosklearn.util import pipeline, StopWatch from mindware.datasets.utils import load_data dataset_name = 'diabetes' X, y, _ = load_data(dataset_name) def get_meta_learning_configs(X, y, task_type, dataset_name, metric='accuracy', num_cfgs=5): backend = create(temporary_directory=None, output_directory=None, delete_tmp_folder_after_terminate=False, delete_output_folder_after_terminate=False, shared_mode=True) dm = XYDataManager(X, y, None, None, task_type, None, dataset_name) configuration_space = pipeline.get_configuration_space(dm.info, include_estimators=None, exclude_estimators=None, include_preprocessors=None, exclude_preprocessors=None) watcher = StopWatch()
y_valid_pred = model.predict(x_valid) y_test_pred = model.predict(x_test) valid_perf = 1 - balanced_accuracy_score(y_valid, y_valid_pred) test_perf = 1 - balanced_accuracy_score(y_test, y_test_pred) return valid_perf, test_perf check_datasets(datasets) cs = model_class.get_cs() for dataset in datasets: # load dataset x, y, _ = load_data(dataset, data_dir=data_dir, datanode_returned=False, preprocess=True, task_type=0) # 6:2:2 x_used, x_test, y_used, y_test = train_test_split(x, y, test_size=0.2, stratify=y, random_state=1) x_train, x_valid, y_train, y_valid = train_test_split(x_used, y_used, test_size=0.25, stratify=y_used, random_state=1) # make dirs