Beispiel #1
0
def _calculate_metafeatures(data_feat_type, data_info_task, basename,
                            metalearning_cnt, x_train, y_train, watcher,
                            logger):
    # == Calculate metafeatures
    task_name = 'CalculateMetafeatures'
    watcher.start_task(task_name)
    categorical = [
        True if feat_type.lower() in ['categorical'] else False
        for feat_type in data_feat_type
    ]

    if metalearning_cnt <= 0:
        result = None
    elif data_info_task in \
            [MULTICLASS_CLASSIFICATION, BINARY_CLASSIFICATION, MULTILABEL_CLASSIFICATION]:
        logger.info('Start calculating metafeatures for %s' % basename)
        result = calc_meta_features(x_train,
                                    y_train,
                                    categorical=categorical,
                                    dataset_name=basename)
    else:
        result = None
        logger.info('Metafeatures not calculated')
    watcher.stop_task(task_name)
    logger.info(
        'Calculating Metafeatures (categorical attributes) took %5.2f' %
        watcher.wall_elapsed(task_name))
    return result
    def test_metalearning(self):
        dataset_name = 'digits'

        initial_challengers = {
            ACC_METRIC: "--initial-challengers \" "
                        "-balancing:strategy 'weighting' "
                        "-classifier:__choice__ 'proj_logit'",
            AUC_METRIC: "--initial-challengers \" "
                        "-balancing:strategy 'none' "
                        "-classifier:__choice__ 'random_forest'",
            BAC_METRIC: "--initial-challengers \" "
                        "-balancing:strategy 'weighting' "
                        "-classifier:__choice__ 'proj_logit'",
            F1_METRIC: "--initial-challengers \" "
                       "-balancing:strategy 'weighting' "
                       "-classifier:__choice__ 'proj_logit'",
            PAC_METRIC: "--initial-challengers \" "
                        "-balancing:strategy 'none' "
                        "-classifier:__choice__ 'random_forest'"
        }

        for metric in initial_challengers:
            configuration_space = get_configuration_space(
                {
                    'metric': metric,
                    'task': MULTICLASS_CLASSIFICATION,
                    'is_sparse': False
                },
                include_preprocessors=['no_preprocessing'])

            X_train, Y_train, X_test, Y_test = get_dataset(dataset_name)
            categorical = [False] * X_train.shape[1]

            meta_features_label = calc_meta_features(X_train, Y_train,
                                                     categorical, dataset_name)
            meta_features_encoded_label = calc_meta_features_encoded(X_train,
                                                                     Y_train,
                                                                     categorical,
                                                                     dataset_name)
            initial_configuration_strings_for_smac = \
                create_metalearning_string_for_smac_call(
                    meta_features_label,
                    meta_features_encoded_label,
                    configuration_space, dataset_name, metric,
                    MULTICLASS_CLASSIFICATION, False, 1, None)

            print(metric)
            print(initial_configuration_strings_for_smac[0])
            self.assertTrue(initial_configuration_strings_for_smac[
                                0].startswith(initial_challengers[metric]))
    def test_metalearning(self):
        dataset_name = 'digits'

        initial_challengers = {
            ACC_METRIC: "--initial-challengers \" "
                        "-balancing:strategy 'weighting' "
                        "-classifier:__choice__ 'proj_logit'",
            AUC_METRIC: "--initial-challengers \" "
                        "-balancing:strategy 'none' "
                        "-classifier:__choice__ 'random_forest'",
            BAC_METRIC: "--initial-challengers \" "
                        "-balancing:strategy 'weighting' "
                        "-classifier:__choice__ 'proj_logit'",
            F1_METRIC: "--initial-challengers \" "
                       "-balancing:strategy 'weighting' "
                       "-classifier:__choice__ 'proj_logit'",
            PAC_METRIC: "--initial-challengers \" "
                        "-balancing:strategy 'none' "
                        "-classifier:__choice__ 'random_forest'"
        }

        for metric in initial_challengers:
            configuration_space = get_configuration_space(
                {
                    'metric': metric,
                    'task': MULTICLASS_CLASSIFICATION,
                    'is_sparse': False
                },
                include_preprocessors=['no_preprocessing'])

            X_train, Y_train, X_test, Y_test = get_dataset(dataset_name)
            categorical = [False] * X_train.shape[1]

            meta_features_label = calc_meta_features(X_train, Y_train,
                                                     categorical, dataset_name)
            meta_features_encoded_label = calc_meta_features_encoded(X_train,
                                                                     Y_train,
                                                                     categorical,
                                                                     dataset_name)
            initial_configuration_strings_for_smac = \
                create_metalearning_string_for_smac_call(
                    meta_features_label,
                    meta_features_encoded_label,
                    configuration_space, dataset_name, metric,
                    MULTICLASS_CLASSIFICATION, False, 1, None)

            print(metric)
            print(initial_configuration_strings_for_smac[0])
            self.assertTrue(initial_configuration_strings_for_smac[
                                0].startswith(initial_challengers[metric]))
Beispiel #4
0
def _calculate_metafeatures(
    data_feat_type, data_info_task, basename, metalearning_cnt, x_train, y_train, watcher, logger
):
    # == Calculate metafeatures
    task_name = "CalculateMetafeatures"
    watcher.start_task(task_name)
    categorical = [True if feat_type.lower() in ["categorical"] else False for feat_type in data_feat_type]

    if metalearning_cnt <= 0:
        result = None
    elif data_info_task in [MULTICLASS_CLASSIFICATION, BINARY_CLASSIFICATION, MULTILABEL_CLASSIFICATION]:
        logger.info("Start calculating metafeatures for %s" % basename)
        result = calc_meta_features(x_train, y_train, categorical=categorical, dataset_name=basename)
    else:
        result = None
        logger.info("Metafeatures not calculated")
    watcher.stop_task(task_name)
    logger.info("Calculating Metafeatures (categorical attributes) took %5.2f" % watcher.wall_elapsed(task_name))
    return result