def get_feature_vector(dataset, dataset_id, data_dir='./', task_type=None):
    feature_dict = calculate_metafeatures(dataset,
                                          dataset_id,
                                          data_dir,
                                          task_type=task_type)
    sorted_keys = sorted(feature_dict.keys())
    return [feature_dict[key] for key in sorted_keys]
def prepare_meta_dataset(meta_dir,
                         metric,
                         total_resource,
                         rep,
                         buildin_datasets,
                         buildin_algorithms,
                         task_type=None):
    X, Y = list(), list()
    sorted_keys = None
    include_datasets = list()
    for _dataset in buildin_datasets:
        print('Processing', _dataset)
        # Calculate metafeature for datasets.
        try:
            feature_dict = calculate_metafeatures(_dataset,
                                                  task_type=task_type)
        except Exception as e:
            print(e)
            continue
        if sorted_keys is None:
            sorted_keys = sorted(feature_dict.keys())
        meta_instance = [feature_dict[key] for key in sorted_keys]
        X.append(meta_instance)

        # Load partial relationship between algorithms.
        scores = fetch_algorithm_runs(meta_dir, _dataset, metric,
                                      total_resource, rep, buildin_algorithms)
        Y.append(scores)
        include_datasets.append(_dataset)

    return X, Y, include_datasets
Exemple #3
0
 def fetch_algorithm_set(self, dataset, datanode=None):
     input_vector = get_feature_vector(dataset, task_type=self.task_type)
     if input_vector is None:
         input_dict = calculate_metafeatures(dataset=datanode,
                                             task_type=self.task_type)
         sorted_keys = sorted(input_dict.keys())
         input_vector = [input_dict[key] for key in sorted_keys]
     preds = self.predict(input_vector)
     idxs = np.argsort(-preds)
     return [self.algorithms[idx] for idx in idxs]
Exemple #4
0
def get_feature_vector(dataset, dataset_id, data_dir='./', task_type=None):
    meta_dir = os.path.dirname(__file__) + '/../meta_resource/'
    dataset_meta_feat_filename = meta_dir + 'meta_feature_dataset_%s.pkl' % dataset
    if os.path.exists(dataset_meta_feat_filename):
        with open(dataset_meta_feat_filename, 'rb') as f:
            feature_vec = pickle.load(f)
        return feature_vec
    else:
        feature_dict = calculate_metafeatures(dataset,
                                              dataset_id,
                                              data_dir,
                                              task_type=task_type)
        sorted_keys = sorted(feature_dict.keys())
        return [feature_dict[key] for key in sorted_keys]
Exemple #5
0
def prepare_meta_dataset(meta_dir,
                         metric,
                         total_resource,
                         rep,
                         buildin_datasets,
                         buildin_algorithms,
                         task_type=None):
    X, Y = list(), list()
    sorted_keys = None
    include_datasets = list()
    for _dataset in buildin_datasets:
        print('Processing', _dataset)
        dataset_meta_feat_filename = meta_dir + 'meta_feature_dataset_%s.pkl' % _dataset
        if os.path.exists(dataset_meta_feat_filename):
            with open(dataset_meta_feat_filename, 'rb') as f:
                meta_instance = pickle.load(f)
        else:
            # Calculate metafeature for datasets.
            try:
                feature_dict = calculate_metafeatures(_dataset,
                                                      task_type=task_type)
            except Exception as e:
                print(e)
                continue
            if sorted_keys is None:
                sorted_keys = sorted(feature_dict.keys())
            meta_instance = [feature_dict[key] for key in sorted_keys]
            with open(dataset_meta_feat_filename, 'wb') as f:
                pickle.dump(meta_instance, f)
        X.append(meta_instance)

        # Load partial relationship between algorithms.
        scores = fetch_algorithm_runs(meta_dir, _dataset, metric,
                                      total_resource, rep, buildin_algorithms)
        Y.append(scores)
        include_datasets.append(_dataset)

    return X, Y, include_datasets
Exemple #6
0
    'analcatdata_supreme', 'splice', 'abalone', 'spambase',
    'winequality_white', 'waveform-5000(1)', 'waveform-5000(2)',
    'page-blocks(1)', 'page-blocks(2)', 'cpu_act', 'optdigits', 'satimage',
    'wind', 'musk', 'delta_ailerons', 'bank32nh', 'mushroom', 'puma8NH'
]

task_ids = list()
X = list()
result = dict()
sorted_keys = None

for _dataset in datasets:
    print('Creating embedding for dataset - %s.' % _dataset)
    # Calculate metafeature for datasets.
    try:
        feature_dict = calculate_metafeatures(_dataset,
                                              task_type=MULTICLASS_CLS)
    except Exception as e:
        print(e)
        continue
    sorted_keys = sorted(feature_dict.keys())
    meta_instance = [feature_dict[key] for key in sorted_keys]

    X.append(meta_instance)
    task_ids.append(_dataset)

result['task_ids'] = task_ids
result['dataset_embedding'] = np.array(X)
result['meta_feature_names'] = sorted_keys

with open('dataset_metafeatures.pkl', 'wb') as f:
    pk.dump(result, f)