Beispiel #1
0
def get_data(data_id, randomstate=42):
    dataset = openml.datasets.get_dataset(dataset_id=data_id)

    X, y, categorical_indicator, attribute_names = dataset.get_data(
        dataset_format="array", target=dataset.default_target_attribute)

    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
        X, y, random_state=randomstate, stratify=y, train_size=0.6)

    calculate_all_metafeatures_with_labels(X_train,
                                           y_train,
                                           categorical=categorical_indicator,
                                           dataset_name='data')
Beispiel #2
0
def _calculate_metafeatures(data_feat_type, data_info_task, basename,
                            x_train, y_train, watcher, logger):
    # == Calculate metafeatures
    task_name = 'CalculateMetafeatures'
    watcher.start_task(task_name)
    categorical = [True if feat_type.lower() in ['categorical'] else False
                   for feat_type in data_feat_type]

    EXCLUDE_META_FEATURES = EXCLUDE_META_FEATURES_CLASSIFICATION \
        if data_info_task in CLASSIFICATION_TASKS else EXCLUDE_META_FEATURES_REGRESSION

    if data_info_task in [MULTICLASS_CLASSIFICATION, BINARY_CLASSIFICATION,
                          MULTILABEL_CLASSIFICATION, REGRESSION]:
        logger.info('Start calculating metafeatures for %s', basename)
        result = calculate_all_metafeatures_with_labels(
            x_train, y_train, categorical=categorical,
            dataset_name=basename,
            dont_calculate=EXCLUDE_META_FEATURES, )
        for key in list(result.metafeature_values.keys()):
            if result.metafeature_values[key].type_ != 'METAFEATURE':
                del result.metafeature_values[key]

    else:
        result = None
        logger.info('Metafeatures not calculated')
    watcher.stop_task(task_name)
    logger.info(
        'Calculating Metafeatures (categorical attributes) took %5.2f',
        watcher.wall_elapsed(task_name))
    return result
Beispiel #3
0
def _calculate_metafeatures__(data_feat_type, data_info_task, basename,
                              x_train, y_train):
    # == Calculate metafeatures
    task_name = 'CalculateMetafeatures'
    categorical = [
        True if feat_type.lower() in ['categorical'] else False
        for feat_type in data_feat_type
    ]

    EXCLUDE_META_FEATURES = EXCLUDE_META_FEATURES_CLASSIFICATION \
        if data_info_task in CLASSIFICATION_TASKS else EXCLUDE_META_FEATURES_REGRESSION

    if data_info_task in [
            MULTICLASS_CLASSIFICATION, BINARY_CLASSIFICATION,
            MULTILABEL_CLASSIFICATION, REGRESSION
    ]:
        result = calculate_all_metafeatures_with_labels(
            x_train,
            y_train,
            categorical=categorical,
            dataset_name=basename,
            dont_calculate=EXCLUDE_META_FEATURES,
        )
        for key in list(result.metafeature_values.keys()):
            if result.metafeature_values[key].type_ != 'METAFEATURE':
                del result.metafeature_values[key]
    else:
        result = None
    return result
Beispiel #4
0
def _calculate_metafeatures(data_feat_type, data_info_task, basename,
                            x_train, y_train, watcher, logger):
    # == Calculate metafeatures
    task_name = 'CalculateMetafeatures'
    watcher.start_task(task_name)
    categorical = [True if feat_type.lower() in ['categorical'] else False
                   for feat_type in data_feat_type]

    EXCLUDE_META_FEATURES = EXCLUDE_META_FEATURES_CLASSIFICATION \
        if data_info_task in CLASSIFICATION_TASKS else EXCLUDE_META_FEATURES_REGRESSION

    if data_info_task in [MULTICLASS_CLASSIFICATION, BINARY_CLASSIFICATION,
                          MULTILABEL_CLASSIFICATION, REGRESSION]:
        logger.info('Start calculating metafeatures for %s', basename)
        result = calculate_all_metafeatures_with_labels(
            x_train, y_train, categorical=categorical,
            dataset_name=basename,
            dont_calculate=EXCLUDE_META_FEATURES, )
        for key in list(result.metafeature_values.keys()):
            if result.metafeature_values[key].type_ != 'METAFEATURE':
                del result.metafeature_values[key]

    else:
        result = None
        logger.info('Metafeatures not calculated')
    watcher.stop_task(task_name)
    logger.info(
        'Calculating Metafeatures (categorical attributes) took %5.2f',
        watcher.wall_elapsed(task_name))
    return result
Beispiel #5
0
def data2features(X_train, y_train, categorical_indicator):
    metafeatures = calculate_all_metafeatures_with_labels(X_train, y_train, categorical=categorical_indicator,
                                                          dataset_name='data')

    metafeature_values = np.zeros((1, len(metafeature_names_new)))
    for m_i in range(len(metafeature_names_new)):
        try:
            metafeature_values[0, m_i] = metafeatures[metafeature_names_new[m_i]].value
        except:
            pass
    return metafeature_values
Beispiel #6
0
def calc_meta_features(X_train, Y_train, categorical, dataset_name):
    """
    Calculate meta features with label
    :param X_train:
    :param Y_train:
    :param categorical:
    :param dataset_name:
    :return:
    """
    return calculate_all_metafeatures_with_labels(
        X_train, Y_train, categorical, dataset_name + SENTINEL,
        dont_calculate=EXCLUDE_META_FUTURES)
Beispiel #7
0
def calc_meta_features(X_train, Y_train, categorical, dataset_name):
    """
    Calculate meta features with label
    :param X_train:
    :param Y_train:
    :param categorical:
    :param dataset_name:
    :return:
    """
    return calculate_all_metafeatures_with_labels(
        X_train,
        Y_train,
        categorical,
        dataset_name + SENTINEL,
        dont_calculate=EXCLUDE_META_FUTURES)
def calculate_metafeatures(task_id):
    print(task_id)
    X_train, y_train, X_test, y_test, cat = load_task(task_id)
    categorical = [True if 'categorical' == c else False for c in cat]

    _metafeatures_labels = metafeatures.calculate_all_metafeatures_with_labels(
        X_train, y_train, [False] * X_train.shape[1], task_id)

    X_train, sparse = perform_one_hot_encoding(scipy.sparse.issparse(X_train),
                                               categorical, [X_train])
    X_train = X_train[0]
    categorical = [False] * X_train.shape[1]

    start_time = time.time()
    obj = pynisher.enforce_limits(mem_in_mb=3072)(
        metafeatures.calculate_all_metafeatures_encoded_labels)
    _metafeatures_encoded_labels = obj(X_train, y_train, categorical, task_id)
    end_time = time.time()

    if obj.exit_status == pynisher.MemorylimitException:
        # During the conversion of the dataset (rescaling, etc...), it can
        # happen that we run out of memory.
        _metafeatures_encoded_labels = \
            metafeature.DatasetMetafeatures(task_id, dict())

        metafeature_calculation_time = (end_time - start_time) / \
                                       len(metafeatures.npy_metafeatures)

        for metafeature_name in metafeatures.npy_metafeatures:
            type_ = "HELPERFUNCTION" if metafeature_name not in \
                                        metafeatures.metafeatures.functions \
                else "METAFEATURE"
            _metafeatures_encoded_labels.metafeature_values[metafeature_name] = \
                metafeature.MetaFeatureValue(metafeature_name, type_, 0, 0,
                                             np.NaN, metafeature_calculation_time,
                                             "Memory error during dataset scaling.")

    mf = _metafeatures_labels
    mf.metafeature_values.update(
        _metafeatures_encoded_labels.metafeature_values)

    return mf
Beispiel #9
0
def calculate_metafeatures(profile, basename, x_train, y_train):

    is_class = profile.has_categorical_target()
    pf = profile.get_raw_profile()
    categorical = pf.loc[~pf['drop'] & ~pf['target'] & ~(pf['col_type'] == 'datetime'), 'is_cat'].values

    if is_class:
        EXCLUDE_META_FEATURES = EXCLUDE_META_FEATURES_CLASSIFICATION
    else:
        EXCLUDE_META_FEATURES = EXCLUDE_META_FEATURES_REGRESSION

    logger.info('Start calculating metafeatures')
    result = calculate_all_metafeatures_with_labels(
        x_train, y_train, categorical=categorical,
        dataset_name=basename,
        dont_calculate=EXCLUDE_META_FEATURES, )
    for key in list(result.metafeature_values.keys()):
        if result.metafeature_values[key].type_ != 'METAFEATURE':
            del result.metafeature_values[key]
    return result
Beispiel #10
0
    def calculate_metafeatures(self, data_manager, dataset_name):
        """
        A function to calculate the dataset's meta features
        internally called Auto-SKLearn's caclulate_all_metafeatures_with_labels()
        and stores the returned DatasetMetaFeatures Object
        """

        categorical = [
            True if feat_type.lower() in ['categorical'] else False
            for feat_type in data_manager.feat_type
        ]

        EXCLUDE_META_FEATURES = EXCLUDE_META_FEATURES_CLASSIFICATION \
            if data_manager.info['task'] in ask_const.CLASSIFICATION_TASKS else EXCLUDE_META_FEATURES_REGRESSION

        if data_manager.info['task'] in [
                ask_const.MULTICLASS_CLASSIFICATION,
                ask_const.BINARY_CLASSIFICATION,
                ask_const.MULTILABEL_CLASSIFICATION, ask_const.REGRESSION
        ]:

            result = calculate_all_metafeatures_with_labels(
                data_manager.data['X_train'],
                data_manager.data['Y_train'],
                categorical=categorical,
                dataset_name=dataset_name,
                dont_calculate=EXCLUDE_META_FEATURES,
            )

            for key in list(result.metafeature_values.keys()):
                if result.metafeature_values[key].type_ != 'METAFEATURE':
                    del result.metafeature_values[key]

        else:
            result = None

        return result
Beispiel #11
0
res = suggest_via_metalearning(meta_base,'198_a_metric',metric,task,False,1)  


print(res)
print(type(res))
print(len(res))

from autosklearn.metalearning.metafeatures.metafeatures import \
    calculate_all_metafeatures_with_labels, \
    calculate_all_metafeatures_encoded_labels, subsets
    
X_train, Y_train, X_test, Y_test = get_dataset(dataset_name)
print(Y_train)
categorical = [False] * X_train.shape[1]

meta_features_label = calculate_all_metafeatures_with_labels(
                    X_train, Y_train, categorical, dataset_name)
print(meta_features_label)

meta_features_encoded_label = calculate_all_metafeatures_encoded_labels(
                    X_train, Y_train, categorical, dataset_name)
print(meta_features_encoded_label)
#configuration_space = get_configuration_space(
#                {
#                 'metric': metric,
#                 'task': task,
#                 'is_sparse': False
#                },

#include_preprocessors=['no_preprocessing'])
#X_train, Y_train, X_test, Y_test = get_dataset(dataset_name)
#categorical = [False] * X_train.shape[1]