def get_feature_vector(dataset, dataset_id, data_dir='./', task_type=None): feature_dict = calculate_metafeatures(dataset, dataset_id, data_dir, task_type=task_type) sorted_keys = sorted(feature_dict.keys()) return [feature_dict[key] for key in sorted_keys]
def prepare_meta_dataset(meta_dir, metric, total_resource, rep, buildin_datasets, buildin_algorithms, task_type=None): X, Y = list(), list() sorted_keys = None include_datasets = list() for _dataset in buildin_datasets: print('Processing', _dataset) # Calculate metafeature for datasets. try: feature_dict = calculate_metafeatures(_dataset, task_type=task_type) except Exception as e: print(e) continue if sorted_keys is None: sorted_keys = sorted(feature_dict.keys()) meta_instance = [feature_dict[key] for key in sorted_keys] X.append(meta_instance) # Load partial relationship between algorithms. scores = fetch_algorithm_runs(meta_dir, _dataset, metric, total_resource, rep, buildin_algorithms) Y.append(scores) include_datasets.append(_dataset) return X, Y, include_datasets
def fetch_algorithm_set(self, dataset, datanode=None): input_vector = get_feature_vector(dataset, task_type=self.task_type) if input_vector is None: input_dict = calculate_metafeatures(dataset=datanode, task_type=self.task_type) sorted_keys = sorted(input_dict.keys()) input_vector = [input_dict[key] for key in sorted_keys] preds = self.predict(input_vector) idxs = np.argsort(-preds) return [self.algorithms[idx] for idx in idxs]
def get_feature_vector(dataset, dataset_id, data_dir='./', task_type=None): meta_dir = os.path.dirname(__file__) + '/../meta_resource/' dataset_meta_feat_filename = meta_dir + 'meta_feature_dataset_%s.pkl' % dataset if os.path.exists(dataset_meta_feat_filename): with open(dataset_meta_feat_filename, 'rb') as f: feature_vec = pickle.load(f) return feature_vec else: feature_dict = calculate_metafeatures(dataset, dataset_id, data_dir, task_type=task_type) sorted_keys = sorted(feature_dict.keys()) return [feature_dict[key] for key in sorted_keys]
def prepare_meta_dataset(meta_dir, metric, total_resource, rep, buildin_datasets, buildin_algorithms, task_type=None): X, Y = list(), list() sorted_keys = None include_datasets = list() for _dataset in buildin_datasets: print('Processing', _dataset) dataset_meta_feat_filename = meta_dir + 'meta_feature_dataset_%s.pkl' % _dataset if os.path.exists(dataset_meta_feat_filename): with open(dataset_meta_feat_filename, 'rb') as f: meta_instance = pickle.load(f) else: # Calculate metafeature for datasets. try: feature_dict = calculate_metafeatures(_dataset, task_type=task_type) except Exception as e: print(e) continue if sorted_keys is None: sorted_keys = sorted(feature_dict.keys()) meta_instance = [feature_dict[key] for key in sorted_keys] with open(dataset_meta_feat_filename, 'wb') as f: pickle.dump(meta_instance, f) X.append(meta_instance) # Load partial relationship between algorithms. scores = fetch_algorithm_runs(meta_dir, _dataset, metric, total_resource, rep, buildin_algorithms) Y.append(scores) include_datasets.append(_dataset) return X, Y, include_datasets
'analcatdata_supreme', 'splice', 'abalone', 'spambase', 'winequality_white', 'waveform-5000(1)', 'waveform-5000(2)', 'page-blocks(1)', 'page-blocks(2)', 'cpu_act', 'optdigits', 'satimage', 'wind', 'musk', 'delta_ailerons', 'bank32nh', 'mushroom', 'puma8NH' ] task_ids = list() X = list() result = dict() sorted_keys = None for _dataset in datasets: print('Creating embedding for dataset - %s.' % _dataset) # Calculate metafeature for datasets. try: feature_dict = calculate_metafeatures(_dataset, task_type=MULTICLASS_CLS) except Exception as e: print(e) continue sorted_keys = sorted(feature_dict.keys()) meta_instance = [feature_dict[key] for key in sorted_keys] X.append(meta_instance) task_ids.append(_dataset) result['task_ids'] = task_ids result['dataset_embedding'] = np.array(X) result['meta_feature_names'] = sorted_keys with open('dataset_metafeatures.pkl', 'wb') as f: pk.dump(result, f)