Ejemplo n.º 1
0
def train(resource_num, params, data_node):
    print(resource_num, params)
    start_time = time.time()
    resource_num = resource_num * 1.0 / 27
    # Prepare data node.
    data_node = data_node['data_node']
    _data_node = tmp_bo._parse(data_node, params)

    X_train, y_train = _data_node.data

    config_dict = params.copy()
    # Prepare training and initial params for classifier.
    init_params, fit_params = {}, {}
    if _data_node.enable_balance == 1:
        init_params, fit_params = get_fit_params(y_train, params['estimator'])
        for key, val in init_params.items():
            config_dict[key] = val

    classifier_id, clf = get_estimator(config_dict)

    try:
        test_size = 0.2
        score = partial_validation(clf, balanced_accuracy_scorer, X_train, y_train, resource_num,
                                   test_size=test_size,
                                   random_state=1,
                                   if_stratify=True,
                                   onehot=None,
                                   fit_params=fit_params)
    except Exception as e:
        print(e)
        score = -np.inf

    print(resource_num, params, -score, time.time() - start_time)
    # Turn it intos a minimization problem.
    return {'loss': 1 - score, 'early_stop': False, 'lc_info': []}
Ejemplo n.º 2
0
    def __call__(self, config, **kwargs):
        start_time = time.time()
        if self.name is None:
            raise ValueError('This evaluator has no name/type!')
        assert self.name in ['hpo', 'fe']

        # Prepare configuration.
        np.random.seed(self.seed)
        config = config if config is not None else self.hpo_config

        downsample_ratio = kwargs.get('data_subsample_ratio', 1.0)
        # Prepare data node.
        if 'data_node' in kwargs:
            data_node = kwargs['data_node']
        else:
            data_node = self.data_node

        X_train, y_train = data_node.data

        config_dict = config.get_dictionary().copy()
        # Prepare training and initial params for classifier.
        init_params, fit_params = {}, {}
        if data_node.enable_balance == 1:
            init_params, fit_params = self.get_fit_params(
                y_train, config['estimator'])
            for key, val in init_params.items():
                config_dict[key] = val

        if data_node.data_balance == 1:
            fit_params['data_balance'] = True

        classifier_id, clf = get_estimator(config_dict)

        if self.onehot_encoder is None:
            self.onehot_encoder = OneHotEncoder(categories='auto')
            y = np.reshape(y_train, (len(y_train), 1))
            self.onehot_encoder.fit(y)

        try:
            if self.resampling_strategy == 'cv':
                if self.resampling_params is None or 'folds' not in self.resampling_params:
                    folds = 5
                else:
                    folds = self.resampling_params['folds']
                score = cross_validation(
                    clf,
                    self.scorer,
                    X_train,
                    y_train,
                    n_fold=folds,
                    random_state=self.seed,
                    if_stratify=True,
                    onehot=self.onehot_encoder if isinstance(
                        self.scorer, _ThresholdScorer) else None,
                    fit_params=fit_params)
            elif self.resampling_strategy == 'holdout':
                if self.resampling_params is None or 'test_size' not in self.resampling_params:
                    test_size = 0.33
                else:
                    test_size = self.resampling_params['test_size']
                score = holdout_validation(
                    clf,
                    self.scorer,
                    X_train,
                    y_train,
                    test_size=test_size,
                    random_state=self.seed,
                    if_stratify=True,
                    onehot=self.onehot_encoder if isinstance(
                        self.scorer, _ThresholdScorer) else None,
                    fit_params=fit_params)
            elif self.resampling_strategy == 'partial':
                if self.resampling_params is None or 'test_size' not in self.resampling_params:
                    test_size = 0.33
                else:
                    test_size = self.resampling_params['test_size']
                score = partial_validation(
                    clf,
                    self.scorer,
                    X_train,
                    y_train,
                    downsample_ratio,
                    test_size=test_size,
                    random_state=self.seed,
                    if_stratify=True,
                    onehot=self.onehot_encoder if isinstance(
                        self.scorer, _ThresholdScorer) else None,
                    fit_params=fit_params)
            else:
                raise ValueError('Invalid resampling strategy: %s!' %
                                 self.resampling_strategy)
        except Exception as e:
            if self.name == 'fe':
                raise e
            self.logger.info('%s-evaluator: %s' % (self.name, str(e)))
            score = np.inf

        self.logger.debug(
            '%d-Evaluation<%s> | Score: %.4f | Time cost: %.2f seconds | Shape: %s'
            % (self.eval_id, classifier_id, self.scorer._sign * score,
               time.time() - start_time, X_train.shape))
        self.eval_id += 1

        # Turn it into a minimization problem.
        return -score
Ejemplo n.º 3
0
    def __call__(self, config, **kwargs):
        start_time = time.time()
        return_dict = dict()
        self.seed = 1
        downsample_ratio = kwargs.get('resource_ratio', 1.0)
        # Prepare data node.
        data_node = self.tmp_bo._parse(self.data_node, config)

        X_train, y_train = data_node.data

        config_dict = config.get_dictionary().copy()
        # Prepare training and initial params for classifier.
        init_params, fit_params = {}, {}
        if data_node.enable_balance == 1:
            init_params, fit_params = self.get_fit_params(
                y_train, config['estimator'])
            for key, val in init_params.items():
                config_dict[key] = val

        if data_node.data_balance == 1:
            fit_params['data_balance'] = True

        classifier_id, clf = get_estimator(config_dict)

        if self.onehot_encoder is None:
            self.onehot_encoder = OneHotEncoder(categories='auto')
            y = np.reshape(y_train, (len(y_train), 1))
            self.onehot_encoder.fit(y)

        try:
            if 'cv' in self.resampling_strategy:
                if self.resampling_params is None or 'folds' not in self.resampling_params:
                    folds = 5
                else:
                    folds = self.resampling_params['folds']
                score = cross_validation(
                    clf,
                    self.scorer,
                    X_train,
                    y_train,
                    n_fold=folds,
                    random_state=self.seed,
                    if_stratify=True,
                    onehot=self.onehot_encoder if isinstance(
                        self.scorer, _ThresholdScorer) else None,
                    fit_params=fit_params)
            elif 'holdout' in self.resampling_strategy:
                if self.resampling_params is None or 'test_size' not in self.resampling_params:
                    test_size = 0.33
                else:
                    test_size = self.resampling_params['test_size']
                score = holdout_validation(
                    clf,
                    self.scorer,
                    X_train,
                    y_train,
                    test_size=test_size,
                    random_state=self.seed,
                    if_stratify=True,
                    onehot=self.onehot_encoder if isinstance(
                        self.scorer, _ThresholdScorer) else None,
                    fit_params=fit_params)
            elif 'partial' in self.resampling_strategy:
                if self.resampling_params is None or 'test_size' not in self.resampling_params:
                    test_size = 0.33
                else:
                    test_size = self.resampling_params['test_size']
                score = partial_validation(
                    clf,
                    self.scorer,
                    X_train,
                    y_train,
                    downsample_ratio,
                    test_size=test_size,
                    random_state=self.seed,
                    if_stratify=True,
                    onehot=self.onehot_encoder if isinstance(
                        self.scorer, _ThresholdScorer) else None,
                    fit_params=fit_params)
            else:
                raise ValueError('Invalid resampling strategy: %s!' %
                                 self.resampling_strategy)
        except Exception as e:
            self.logger.info('evaluator: %s' % (str(e)))
            score = -np.inf
        # print(config)
        # self.logger.info('%d-Evaluation<%s> | Score: %.4f | Time cost: %.2f seconds | Shape: %s' %
        #                  (self.eval_id, classifier_id,
        #                   self.scorer._sign * score,
        #                   time.time() - start_time, X_train.shape))
        self.eval_id += 1

        # Turn it into a minimization problem.
        return_dict['score'] = -score
        return -score
Ejemplo n.º 4
0
    def __call__(self, config, **kwargs):
        start_time = time.time()
        return_dict = dict()

        if self.name is None:
            raise ValueError('This evaluator has no name/type!')
        assert self.name in ['hpo', 'fe']

        # Prepare configuration.
        np.random.seed(self.seed)
        config = config if config is not None else self.hpo_config

        downsample_ratio = kwargs.get('resource_ratio', 1.0)

        # Prepare data node.
        if 'data_node' in kwargs:
            data_node = kwargs['data_node']
        else:
            data_node = self.data_node

        X_train, y_train = data_node.data

        config_dict = config.get_dictionary().copy()
        regressor_id, reg = get_estimator(config_dict)
        try:
            if self.resampling_strategy == 'cv':
                if self.resampling_params is None or 'folds' not in self.resampling_params:
                    folds = 5
                else:
                    folds = self.resampling_params['folds']
                score = cross_validation(reg,
                                         self.scorer,
                                         X_train,
                                         y_train,
                                         n_fold=folds,
                                         random_state=self.seed,
                                         if_stratify=False)
            elif self.resampling_strategy == 'holdout':
                if self.resampling_params is None or 'test_size' not in self.resampling_params:
                    test_size = 0.33
                else:
                    test_size = self.resampling_params['test_size']
                score = holdout_validation(reg,
                                           self.scorer,
                                           X_train,
                                           y_train,
                                           test_size=test_size,
                                           random_state=self.seed,
                                           if_stratify=False)
            elif self.resampling_strategy == 'partial':
                if self.resampling_params is None or 'test_size' not in self.resampling_params:
                    test_size = 0.33
                else:
                    test_size = self.resampling_params['test_size']
                score = partial_validation(reg,
                                           self.scorer,
                                           X_train,
                                           y_train,
                                           downsample_ratio,
                                           test_size=test_size,
                                           random_state=self.seed,
                                           if_stratify=False)
            else:
                raise ValueError('Invalid resampling strategy: %s!' %
                                 self.resampling_strategy)
        except Exception as e:
            if self.name == 'fe':
                raise e
            self.logger.info('%s-evaluator: %s' % (self.name, str(e)))
            return -np.inf

        self.logger.debug(
            '%d-Evaluation<%s> | Score: %.4f | Time cost: %.2f seconds | Shape: %s'
            % (self.eval_id, regressor_id, self.scorer._sign * score,
               time.time() - start_time, X_train.shape))
        self.eval_id += 1

        # Turn it into a minimization problem.
        return_dict['score'] = -score
        return -score