def test_exclude_metafeature_groups(self): SUBSET_LENGTH = 3 test_failures = {} test_name = inspect.stack()[0][3] for dataset_filename, dataset in self.datasets.items(): groups = random.sample( [group.value for group in consts.MetafeatureGroup], SUBSET_LENGTH) computed_mfs = Metafeatures().compute( X=dataset["X"], Y=dataset["Y"], column_types=dataset["column_types"], seed=CORRECTNESS_SEED, exclude_groups=groups, ) known_metafeatures = dataset["known_metafeatures"] required_checks = [ (self._check_correctness, [computed_mfs, known_metafeatures, dataset_filename]) ] test_failures.update(self._perform_checks(required_checks)) metafeature_ids = set( mf_id for group in groups for mf_id in Metafeatures.list_metafeatures(group)) if any(mf_id in computed_mfs.keys() for mf_id in metafeature_ids): self.fail('Metafeatures computed an excluded metafeature') self._report_test_failures(test_failures, test_name)
def test_request_metafeatures(self): SUBSET_LENGTH = 20 test_failures = {} test_name = inspect.stack()[0][3] for dataset_filename, dataset in self.datasets.items(): metafeature_ids = random.sample(Metafeatures.IDS, SUBSET_LENGTH) computed_mfs = Metafeatures().compute( X=dataset["X"], Y=dataset["Y"], seed=CORRECTNESS_SEED, metafeature_ids=metafeature_ids, column_types=dataset["column_types"]) known_metafeatures = dataset["known_metafeatures"] required_checks = [ (self._check_correctness, [computed_mfs, known_metafeatures, dataset_filename]) ] test_failures.update(self._perform_checks(required_checks)) self.assertEqual(set(metafeature_ids), set(computed_mfs.keys()), "Compute did not return requested metafeatures") self._report_test_failures(test_failures, test_name)
def test_exclude_metafeatures(self): SUBSET_LENGTH = 20 test_failures = {} test_name = inspect.stack()[0][3] for dataset_filename, dataset in self.datasets.items(): metafeature_ids = random.sample(Metafeatures.IDS, SUBSET_LENGTH) computed_mfs = Metafeatures().compute( X=dataset["X"], Y=dataset["Y"], seed=CORRECTNESS_SEED, exclude=metafeature_ids, column_types=dataset["column_types"]) known_metafeatures = dataset["known_metafeatures"] required_checks = [ (self._check_correctness, [computed_mfs, known_metafeatures, dataset_filename]) ] test_failures.update(self._perform_checks(required_checks)) if any(mf_id in computed_mfs.keys() for mf_id in metafeature_ids): self.assertTrue( False, "Metafeatures computed an excluded metafeature") self._report_test_failures(test_failures, test_name)
def compute_dataset_metafeatures(): metadata = json.load(open(METADATA_PATH, "r")) for dataset_metadata in metadata: dataset_filename = dataset_metadata["filename"] choice = None while not choice in ["y", "v", "n"]: choice = input(dataset_filename + " [(y)es, (v)erbose, (n)o]: ") if choice == "n": continue X, Y, column_types = read_dataset(dataset_metadata) start_time = time.time() computed_mfs = Metafeatures().compute(X=X, Y=Y, column_types=column_types, seed=CORRECTNESS_SEED) run_time = time.time() - start_time if choice == "v": known_mf_path = get_dataset_metafeatures_path(dataset_filename) with open(known_mf_path, 'r') as fp: known_mfs = json.load(fp) new_mfs = {} deleted_mfs = {} updated_mfs = {} same_mfs = {} all_mf_names = set( list(computed_mfs.keys()) + list(known_mfs.keys())) for mf in all_mf_names: if mf not in known_mfs.keys(): new_mfs[mf] = computed_mfs[mf] elif mf not in computed_mfs.keys(): deleted_mfs[mf] = known_mfs[mf] elif is_close(computed_mfs[mf]['value'], known_mfs[mf]['value']): same_mfs[mf] = computed_mfs[mf] else: updated_mfs[mf] = { 'known': known_mfs[mf], 'computed': computed_mfs[mf] } print('UNCHANGED METAFEATURES') print(json.dumps(same_mfs, sort_keys=True, indent=4)) print('DELETED METAFEATURES') print(json.dumps(deleted_mfs, sort_keys=True, indent=4)) print('NEW METAFEATURES') print(json.dumps(new_mfs, sort_keys=True, indent=4)) print('UPDATED METAFEATURES') print(json.dumps(updated_mfs, sort_keys=True, indent=4)) print("Runtime: " + str(run_time)) choice = None while not choice in ["y", "n"]: choice = input( f"Update {dataset_filename} metafeatures? [(y)es, (n)o]: ") if choice == "y": mf_file_path = get_dataset_metafeatures_path(dataset_filename) with open(mf_file_path, 'w') as fp: json.dump(computed_mfs, fp, sort_keys=True, indent=4)