def tpe_objective_function(config):
     metric = get_metric('bal_acc')
     _, estimator = get_estimator(config)
     X_train, y_train = train_data.data
     X_test, y_test = test_data.data
     estimator.fit(X_train, y_train)
     return -metric(estimator, X_test, y_test)
    def __init__(self, estimator, metric, task_type,
                 evaluation_strategy, **evaluation_params):
        self.estimator = estimator
        if task_type not in TASK_TYPES:
            raise ValueError('Unsupported task type: %s' % task_type)
        self.metric = get_metric(metric)
        self.metric_name = metric
        self.evaluation_strategy = evaluation_strategy
        self.evaluation_params = evaluation_params

        if self.evaluation_strategy == 'holdout':
            if 'train_size' not in self.evaluation_params:
                self.evaluation_params['train_size']
Exemple #3
0
    def __init__(self,
                 task_type=CLASSIFICATION,
                 optimizer_type='eval_base',
                 metric='acc',
                 trans_set=None,
                 time_budget=None,
                 maximum_evaluation_num=None,
                 time_limit_per_trans=600,
                 mem_limit_per_trans=1024,
                 fe_enabled=True,
                 evaluator=None,
                 debug=False,
                 seed=1,
                 tmp_directory='logs',
                 logging_config=None,
                 model_id=None,
                 task_id='Default'):
        self.fe_enabled = fe_enabled
        self.trans_set = trans_set
        self.maximum_evaluation_num = maximum_evaluation_num
        self.time_budget = time_budget
        self.time_limit_per_trans = time_limit_per_trans
        self.mem_limit_per_trans = mem_limit_per_trans
        self.optimizer_type = optimizer_type
        self.evaluator = evaluator
        self.optimizer = None

        self.metric = get_metric(metric)
        self.task_type = task_type
        self.task_id = task_id
        self.model_id = model_id
        self._seed = seed
        self.tmp_directory = tmp_directory
        self.logging_config = logging_config
        self._logger = self._get_logger(task_id)

        # Set up backend.
        if not os.path.exists(self.tmp_directory):
            os.makedirs(self.tmp_directory)

        # For data preprocessing.
        self.uninformative_columns, self.uninformative_idx = list(), list()
        self.variance_selector = None
        self.onehot_encoder = None
        self.label_encoder = None
Exemple #4
0
    cs.add_conditions(aug_space.get_conditions())

    for estimator_id in algorithm_candidates:
        sub_cs = get_model_config_space(estimator_id,
                                        include_estimator=False,
                                        include_aug=False)
        parent_hyperparameter = {
            'parent': estimator_choice,
            'value': estimator_id
        }
        cs.add_configuration_space(estimator_id,
                                   sub_cs,
                                   parent_hyperparameter=parent_hyperparameter)

    return cs


cs = get_pipeline_config_space(['resnet34', 'mobilenet'])
dataset = 'cifar10'
data_dir = 'data/img_datasets/%s/' % dataset
image_data = ImageDataset(data_path=data_dir, train_val_split=True)

hpo_evaluator = DLEvaluator(cs.get_default_configuration(),
                            IMG_CLS,
                            scorer=get_metric('acc'),
                            dataset=image_data,
                            device='cuda',
                            image_size=32,
                            seed=1)
hpo_evaluator(cs.get_default_configuration())
    def __init__(self,
                 node_list,
                 node_index,
                 task_type,
                 timestamp,
                 fe_config_space: ConfigurationSpace,
                 cash_config_space: ConfigurationSpace,
                 data: DataNode,
                 fixed_config=None,
                 trial_num=0,
                 time_limit=None,
                 metric='acc',
                 optimizer='smac',
                 ensemble_method='ensemble_selection',
                 ensemble_size=50,
                 per_run_time_limit=300,
                 output_dir="logs",
                 dataset_name='default_dataset',
                 eval_type='holdout',
                 resampling_params=None,
                 n_jobs=1,
                 seed=1):
        # Tree setting
        self.node_list = node_list
        self.node_index = node_index

        # Set up backend.
        self.dataset_name = dataset_name
        self.trial_num = trial_num
        self.time_limit = time_limit
        self.per_run_time_limit = per_run_time_limit
        self.start_time = time.time()
        self.logger = get_logger('Soln-ml: %s' % dataset_name)

        # Basic settings.
        self.eval_type = eval_type
        self.resampling_params = resampling_params
        self.task_type = task_type
        self.timestamp = timestamp
        self.fe_config_space = fe_config_space
        self.cash_config_space = cash_config_space
        self.fixed_config = fixed_config
        self.original_data = data.copy_()
        self.metric = get_metric(metric)
        self.optimizer = optimizer
        self.ensemble_method = ensemble_method
        self.ensemble_size = ensemble_size
        self.n_jobs = n_jobs
        self.seed = seed
        self.output_dir = output_dir

        self.early_stop_flag = False
        self.timeout_flag = False
        self.incumbent_perf = -float("INF")
        self.incumbent = None
        self.eval_dict = dict()

        if self.task_type in CLS_TASKS:
            self.if_imbal = is_imbalanced_dataset(self.original_data)
        else:
            self.if_imbal = False

        self.es = None
Exemple #6
0
    def __init__(self, time_limit=300,
                 dataset_name='default_name',
                 amount_of_resource=None,
                 task_type=None,
                 metric='bal_acc',
                 include_algorithms=None,
                 include_preprocessors=None,
                 optimizer='smac',
                 ensemble_method='ensemble_selection',
                 enable_meta_algorithm_selection=True,
                 enable_fe=True,
                 per_run_time_limit=150,
                 ensemble_size=50,
                 evaluation='holdout',
                 resampling_params=None,
                 output_dir="logs",
                 logging_config=None,
                 random_state=1,
                 n_jobs=1):
        self.metric_id = metric
        self.metric = get_metric(self.metric_id)

        self.dataset_name = dataset_name
        self.time_limit = time_limit
        self.seed = random_state
        self.per_run_time_limit = per_run_time_limit
        self.output_dir = output_dir
        self.logging_config = logging_config
        self.logger = self._get_logger(self.dataset_name)

        self.evaluation_type = evaluation
        self.resampling_params = resampling_params
        self.include_preprocessors = include_preprocessors

        self.amount_of_resource = int(1e8) if amount_of_resource is None else amount_of_resource
        self.optimizer = optimizer
        self.ensemble_method = ensemble_method
        self.ensemble_size = ensemble_size
        self.enable_meta_algorithm_selection = enable_meta_algorithm_selection
        self.enable_fe = enable_fe
        self.task_type = task_type
        self.n_jobs = n_jobs
        self.solver = None

        self.global_start_time = time.time()
        self.eval_time = None
        self.total_time = None

        # Disable meta learning
        if self.include_preprocessors is not None:
            self.enable_meta_algorithm_selection = False

        if include_algorithms is not None:
            self.include_algorithms = include_algorithms
        else:
            if task_type in CLS_TASKS:
                if task_type in [IMG_CLS, TEXT_CLS]:
                    raise ValueError('Please use AutoDL module, instead of AutoML.')
                else:
                    self.include_algorithms = list(classification_algorithms)
            elif task_type in RGS_TASKS:
                self.include_algorithms = list(regression_algorithms)
            else:
                raise ValueError("Unknown task type %s" % task_type)
        if ensemble_method is not None and ensemble_method not in ensemble_list:
            raise ValueError("%s is not supported for ensemble!" % ensemble_method)