Ejemplo n.º 1
0
def calculate_metafeatures(task_id):
    X_train, y_train, X_test, y_test, cat, task_type, dataset_name = load_task(task_id)
    watch = StopWatch()

    if task_type == 'classification':
        if len(np.unique(y_train)) == 2:
            task_type = BINARY_CLASSIFICATION
        else:
            task_type = MULTICLASS_CLASSIFICATION
    else:
        task_type = REGRESSION

    _metafeatures_labels = _calculate_metafeatures(
        x_train=X_train, y_train=y_train, data_feat_type=cat,
        data_info_task=task_type, basename=dataset_name, logger_=logger,
        watcher=watch,
    )

    _metafeatures_encoded_labels = _calculate_metafeatures_encoded(
        x_train=X_train, y_train=y_train, data_feat_type=cat,
        task=task_type, basename=dataset_name, logger_=logger,
        watcher=watch,
    )

    mf = _metafeatures_labels
    mf.metafeature_values.update(
        _metafeatures_encoded_labels.metafeature_values)

    return mf
Ejemplo n.º 2
0
    def test_metalearning(self):
        dataset_name_classification = 'digits'
        initial_challengers_classification = {
            "ACC_METRIC":
            "--initial-challengers \" "
            "-balancing:strategy 'weighting' "
            "-classifier:__choice__ 'proj_logit'",
            "AUC_METRIC":
            "--initial-challengers \" "
            "-balancing:strategy 'weighting' "
            "-classifier:__choice__ 'liblinear_svc'",
            "BAC_METRIC":
            "--initial-challengers \" "
            "-balancing:strategy 'weighting' "
            "-classifier:__choice__ 'proj_logit'",
            "F1_METRIC":
            "--initial-challengers \" "
            "-balancing:strategy 'weighting' "
            "-classifier:__choice__ 'proj_logit'",
            "PAC_METRIC":
            "--initial-challengers \" "
            "-balancing:strategy 'none' "
            "-classifier:__choice__ 'random_forest'"
        }

        dataset_name_regression = 'diabetes'
        initial_challengers_regression = {
            "A_METRIC":
            "--initial-challengers \" "
            "-imputation:strategy 'mean' "
            "-one_hot_encoding:minimum_fraction '0.01' "
            "-one_hot_encoding:use_minimum_fraction 'True' "
            "-preprocessor:__choice__ 'no_preprocessing' "
            "-regressor:__choice__ 'random_forest'",
            "R2_METRIC":
            "--initial-challengers \" "
            "-imputation:strategy 'mean' "
            "-one_hot_encoding:minimum_fraction '0.01' "
            "-one_hot_encoding:use_minimum_fraction 'True' "
            "-preprocessor:__choice__ 'no_preprocessing' "
            "-regressor:__choice__ 'random_forest'",
        }

        for dataset_name, task, initial_challengers in [
            (dataset_name_regression, REGRESSION,
             initial_challengers_regression),
            (dataset_name_classification, MULTICLASS_CLASSIFICATION,
             initial_challengers_classification)
        ]:

            for metric in initial_challengers:
                configuration_space = get_configuration_space(
                    {
                        'metric': metric,
                        'task': task,
                        'is_sparse': False
                    },
                    include_preprocessors=['no_preprocessing'])

                X_train, Y_train, X_test, Y_test = get_dataset(dataset_name)
                categorical = [False] * X_train.shape[1]

                meta_features_label = _calculate_metafeatures(
                    X_train, Y_train, categorical, dataset_name, task)
                meta_features_encoded_label = _calculate_metafeatures_encoded(
                    X_train, Y_train, categorical, dataset_name, task)

                initial_configuration_strings_for_smac = \
                    suggest_via_metalearning(
                        meta_features_label,
                        meta_features_encoded_label,
                        configuration_space, dataset_name, metric,
                        task, False, 1, None)

                print(metric)
                print(initial_configuration_strings_for_smac[0])
                self.assertTrue(
                    initial_configuration_strings_for_smac[0].startswith(
                        initial_challengers[metric]))