def test_nasbench_201_init(): data_manager = NASBench_201Data(dataset='cifar100') assert len(data_manager.files) == 3 assert all([file.startswith('NAS-Bench') for file in data_manager.files]) with pytest.raises(AssertionError): NASBench_201Data(dataset='Non_existing_dataset') assert data_manager._save_dir == hpobench.config_file.data_dir / "nasbench_201" assert data_manager._save_dir.exists()
def test_nasbench_201_get_metrics(): metrics = NASBench_201Data.get_metrics() assert metrics == [ 'train_acc1es', 'train_losses', 'train_times', 'valid_acc1es', 'valid_times', 'valid_losses', 'test_acc1es', 'test_times', 'test_losses' ]
def test_nasbench_201_load(): shutil.rmtree(hpobench.config_file.data_dir / "nasbench_201", ignore_errors=True) data_manager = NASBench_201Data(dataset='cifar100') data = data_manager.load() assert len(data) == 3 assert (hpobench.config_file.data_dir / "nasbench_201").exists() assert len( list((hpobench.config_file.data_dir / "nasbench_201").glob('*.json'))) == 3 assert not (hpobench.config_file.data_dir / "nasbench_201_data_v1.3.zip").exists() data_manager.data = None data_manager = NASBench_201Data(dataset='cifar100') data = data_manager.load() assert len(data) == 3
def test_nasbench_201_load(): shutil.rmtree(hpobench.config_file.data_dir / "nasbench_201", ignore_errors=True) data_manager = NASBench_201Data(dataset='cifar100') data = data_manager.load() assert len(data) == len(list(NASBench_201Data.get_seeds_metrics())) assert len(data) == 3 * len(NASBench_201Data.get_metrics()) assert (hpobench.config_file.data_dir / "nasbench_201").exists() assert len( list((hpobench.config_file.data_dir / "nasbench_201").glob('*.pkl'))) == 108 assert not (hpobench.config_file.data_dir / "nasbench_201_data_v1.2.zip").exists() data_manager.data = None data_manager = NASBench_201Data(dataset='cifar100') data = data_manager.load() assert len(data) == 3 * len(NASBench_201Data.get_metrics())
def __init__(self, dataset: str, rng: Union[np.random.RandomState, int, None] = None, **kwargs): """ Benchmark interface to the NASBench201 Benchmarks. The NASBench201 contains results for architectures on 4 different data sets. We have split the "api" file from NASBench201 in separate files per data set. The original "api" file contains all data sets, but loading this single file took too much RAM. We recommend to not call this base class directly but using the correct subclass below. The parameter ``dataset`` indicates which data set was used for training. For each data set the metrics 'train_acc1es', 'train_losses', 'train_times', 'eval_acc1es', 'eval_times', 'eval_losses' are available. However, the data sets report them on different data splits (train, train + valid, test, valid or test+valid). Note: - The parameter epoch is 0 indexed! - In the original data, the training splits are always marked with the key 'train' but they use different identifiers to refer to the available evaluation splits. We report them also in the table below. The table in the following shows the mapping from data set and metric to used split. |-------------------|---------------|-----------------------------------| | Data set | train_* | eval_* (key in orig. data) | |-------------------|---------------|-----------------------------------| | 'cifar10-valid' | train | valid (x-valid) | | 'cifar10' | train + valid | test (ori-test) | | 'cifar100' | train | valid + test (ori-test) | | 'ImageNet16-120' | train | valid + test (ori-test) | |-------------------|---------------|-----------------------------------| Some further remarks: - cifar10-valid is trained on the train split and tested on the validation split. - cifar10 is trained on the train *and* validation split and tested on the test split. - The train metrics are dictionaries with epochs (e.g. 0, 1, 2) as key and the metric as value. The evaluation metrics, however, have as key the identifiers, e.g. ori-test@0, with 0 indicating the epoch. Also, each data set (except for cifar10) reports values for all 200 epochs for a metric on the specified split (see first table) and a single value on the 200th epoch for the other splits. Table 3 shows the available identifiers for each data set. |-------------------|------------------------------| | Data set | eval*: values for epochs | |-------------------|------------------------------| | 'cifar10-valid' | x-valid: 0-199 | | | ori-test: 199 | | 'cifar10' | ori-test: 0-199 | | 'cifar100' | ori-test: 0-199 | | | x-valid: 199 | | | x-test: 199 | | 'ImageNet16-120' | ori-test: 0-199 | | | x-valid: 199 | | | x-test: 199 | |-------------------|------------------------------| Parameters ---------- dataset : str One of cifar10-valid, cifar10, cifar100, ImageNet16-120. rng : np.random.RandomState, int, None Random seed for the benchmark's random state. """ super(NasBench201BaseBenchmark, self).__init__(rng=rng) data_manager = NASBench_201Data(dataset=dataset) self.data = data_manager.load() self.config_to_structure = NasBench201BaseBenchmark.config_to_structure_func( max_nodes=MAX_NODES)
def __init__(self, dataset: str, rng: Union[np.random.RandomState, int, None] = None, **kwargs): """ Benchmark interface to the NASBench201 Benchmarks. The NASBench201 contains results for architectures on 4 different data sets. We have split the "api" file from NASBench201 in separate files per data set. The original "api" file contains all data sets, but loading this single file took too much RAM. We recommend to not call this base class directly but using the correct subclass below. The parameter ``dataset`` indicates which data set was used for training. For each data set the metrics 'train_acc1es', 'train_losses', 'train_times', 'eval_acc1es', 'eval_times', 'eval_losses' are available. However, the data sets report them on different data splits (train, train + valid, test, valid or test+valid). We summarize all information about the data sets in the following tables. Datastet Metric Avail.Epochs Explanation returned by HPOBENCH ---------------------------------------------------------------------------------------- cifar10-valid train [0-199] training set cifar10-valid x-valid [0-199] validation set objective function cifar10-valid x-test cifar10-valid ori-test 199 test set objective function test cifar100 train [0-199] training set cifar100 x-valid 199 validation set cifar100 x-test 199 test set objective function test cifar100 ori-test [0-199] validation + test set objective function ImageNet16-120 train [0-199] training set ImageNet16-120 x-valid 199 validation set ImageNet16-120 x-test 199 test set objective function test ImageNet16-120 ori-test [0-199] validation + test set objective function We have also extracted the incumbents per split. We report the incumbent accuracy and loss performance i) by taking the maximum value across all seeds and configurations ii) averaged across the three available seeds i) The best possible incumbents (NO AVG!) ii) The "average" incumbent Datastet Metric (Index of Arch, Accuracy) (Index, Loss) (Index of Arch, Accuracy) (Index, Loss) ---------------------------------------------------------------------------------------------------------------------------------------------------------- cifar10-valid train (258, 100.0) (2778, 0.001179278278425336) (10154, 100) (2778, 0.0013082386429297428) cifar10-valid x-valid (6111, 91.71999999023437) (14443, 0.3837750501537323) (6111, 91.60666665039064) (3888, 0.3894046771335602) cifar10-valid x-test cifar10-valid ori-test (14174, 91.65) (3385, 0.3850496160507202) (1459, 91.52333333333333) (3385, 0.3995230517864227) cifar100 train (9930, 99.948) (9930, 0.012630240231156348) (9930, 99.93733333333334) (9930, 0.012843489621082942) cifar100 x-valid (13714, 73.71999998779297) (13934, 1.1490126512527465) (9930, 73.4933333577474) (7361, 1.1600867895126343) cifar100 x-test (1459, 74.28000004882813) (15383, 1.1427113876342774) (9930, 73.51333332112631) (7337, 1.1747569534301758) cifar100 ori-test (9930, 73.88) (13706, 1.1610547459602356) (9930, 73.50333333333333) (7361, 1.1696554500579834) ImageNet16-120 train (9930, 73.2524719841793) (9930, 0.9490517352046979) (9930, 73.22918040138735) (9930, 0.9524298415108582) ImageNet16-120 x-valid (13778, 47.39999985758463) (10721, 2.0826991437276203) (10676, 46.73333327229818) (10721, 2.0915397168795264) ImageNet16-120 x-test (857, 48.03333317057292) (12887, 2.0940088628133138) (857, 47.31111100599501) (11882, 2.106453532218933) ImageNet16-120 ori-test (857, 47.083333353678384) (11882, 2.0950548852284747) (857, 46.8444444647895) (11882, 2.1028235816955565) Note: - The parameter epoch is 0 indexed! - In the original data, the training splits are always marked with the key 'train' but they use different identifiers to refer to the available evaluation splits. We report them also in the table below. - We exclude the data set cifar10 from this benchmark. Some further remarks: - cifar10-valid is trained on the train split and tested on the validation split. - The train metrics are dictionaries with epochs (e.g. 0, 1, 2) as key and the metric as value. The evaluation metrics, however, have as key the identifiers, e.g. ori-test@0, with 0 indicating the epoch. Also, each data set reports values for all 200 epochs for a metric on the specified split and a single value on the 200th epoch for the other splits. Parameters ---------- dataset : str One of cifar10-valid, cifar10, cifar100, ImageNet16-120. rng : np.random.RandomState, int, None Random seed for the benchmark's random state. """ # noqa: E501 super(NasBench201BaseBenchmark, self).__init__(rng=rng) data_manager = NASBench_201Data(dataset=dataset) self.dataset = dataset self.data = data_manager.load() self.config_to_structure = NasBench201BaseBenchmark.config_to_structure_func( max_nodes=MAX_NODES)
def test_nasbench_201_load_thread_safe(): shutil.rmtree(hpobench.config_file.data_dir / "nasbench_201", ignore_errors=True) function = lambda: NASBench_201Data(dataset='cifar100').load() with Pool(3) as pool: pool.map(function, [])
def test_nasbench_201_get_files(): files = NASBench_201Data.get_files_per_dataset(dataset='cifar10') assert len(files) == 27 assert all([file.startswith('nb201_cifar10') for file in files])