예제 #1
0
def test_nasbench_201_init():

    data_manager = NASBench_201Data(dataset='cifar100')
    assert len(data_manager.files) == 3
    assert all([file.startswith('NAS-Bench') for file in data_manager.files])

    with pytest.raises(AssertionError):
        NASBench_201Data(dataset='Non_existing_dataset')

    assert data_manager._save_dir == hpobench.config_file.data_dir / "nasbench_201"
    assert data_manager._save_dir.exists()
예제 #2
0
def test_nasbench_201_get_metrics():

    metrics = NASBench_201Data.get_metrics()
    assert metrics == [
        'train_acc1es', 'train_losses', 'train_times', 'valid_acc1es',
        'valid_times', 'valid_losses', 'test_acc1es', 'test_times',
        'test_losses'
    ]
예제 #3
0
def test_nasbench_201_load():

    shutil.rmtree(hpobench.config_file.data_dir / "nasbench_201",
                  ignore_errors=True)

    data_manager = NASBench_201Data(dataset='cifar100')
    data = data_manager.load()

    assert len(data) == 3
    assert (hpobench.config_file.data_dir / "nasbench_201").exists()
    assert len(
        list((hpobench.config_file.data_dir /
              "nasbench_201").glob('*.json'))) == 3
    assert not (hpobench.config_file.data_dir /
                "nasbench_201_data_v1.3.zip").exists()

    data_manager.data = None

    data_manager = NASBench_201Data(dataset='cifar100')
    data = data_manager.load()
    assert len(data) == 3
예제 #4
0
def test_nasbench_201_load():

    shutil.rmtree(hpobench.config_file.data_dir / "nasbench_201",
                  ignore_errors=True)

    data_manager = NASBench_201Data(dataset='cifar100')
    data = data_manager.load()

    assert len(data) == len(list(NASBench_201Data.get_seeds_metrics()))
    assert len(data) == 3 * len(NASBench_201Data.get_metrics())
    assert (hpobench.config_file.data_dir / "nasbench_201").exists()
    assert len(
        list((hpobench.config_file.data_dir /
              "nasbench_201").glob('*.pkl'))) == 108
    assert not (hpobench.config_file.data_dir /
                "nasbench_201_data_v1.2.zip").exists()

    data_manager.data = None

    data_manager = NASBench_201Data(dataset='cifar100')
    data = data_manager.load()
    assert len(data) == 3 * len(NASBench_201Data.get_metrics())
예제 #5
0
    def __init__(self,
                 dataset: str,
                 rng: Union[np.random.RandomState, int, None] = None,
                 **kwargs):
        """
        Benchmark interface to the NASBench201 Benchmarks. The NASBench201 contains
        results for architectures on 4 different data sets.

        We have split the "api" file from NASBench201 in separate files per data set.
        The original "api" file contains all data sets, but loading this single file took too much RAM.

        We recommend to not call this base class directly but using the correct subclass below.

        The parameter ``dataset`` indicates which data set was used for training.

        For each data set the metrics
        'train_acc1es', 'train_losses', 'train_times', 'eval_acc1es', 'eval_times', 'eval_losses' are available.
        However, the data sets report them on different data splits (train, train + valid, test, valid or test+valid).

        Note:
        - The parameter epoch is 0 indexed!
        - In the original data, the training splits are always marked with the key 'train' but they use different
          identifiers to refer to the available evaluation splits. We report them also in the table below.

        The table in the following shows the mapping from data set and metric to used split.

        |-------------------|---------------|-----------------------------------|
        | Data set          | train_*       | eval_*        (key in orig. data) |
        |-------------------|---------------|-----------------------------------|
        | 'cifar10-valid'   | train         | valid         (x-valid)           |
        | 'cifar10'         | train + valid | test          (ori-test)          |
        | 'cifar100'        | train         | valid + test  (ori-test)          |
        | 'ImageNet16-120'  | train         | valid + test  (ori-test)          |
        |-------------------|---------------|-----------------------------------|


        Some further remarks:
        - cifar10-valid is trained on the train split and tested on the validation split.
        - cifar10 is trained on the train *and* validation split and tested on the test split.
        - The train metrics are dictionaries with epochs (e.g. 0, 1, 2) as key and the metric as value.
          The evaluation metrics, however, have as key the identifiers, e.g. ori-test@0, with 0 indicating the epoch.
          Also, each data set (except for cifar10) reports values for all 200 epochs for a metric on the specified
          split (see first table) and a single value on the 200th epoch for the other splits.
          Table 3 shows the available identifiers for each data set.

        |-------------------|------------------------------|
        | Data set          | eval*:   values for epochs   |
        |-------------------|------------------------------|
        | 'cifar10-valid'   | x-valid:	0-199	           |
        |		     		| ori-test:	199		           |
        | 'cifar10'         | ori-test:	0-199	           |
        | 'cifar100'        | ori-test:	0-199	           |
        |					| x-valid:	199		           |
        |   				| x-test:   199                |
        | 'ImageNet16-120'  | ori-test:	0-199	           |
        |					| x-valid:	199		           |
        |   				| x-test:  	199                |
        |-------------------|------------------------------|

        Parameters
        ----------
        dataset : str
            One of cifar10-valid, cifar10, cifar100, ImageNet16-120.
        rng : np.random.RandomState, int, None
            Random seed for the benchmark's random state.
        """

        super(NasBench201BaseBenchmark, self).__init__(rng=rng)

        data_manager = NASBench_201Data(dataset=dataset)

        self.data = data_manager.load()

        self.config_to_structure = NasBench201BaseBenchmark.config_to_structure_func(
            max_nodes=MAX_NODES)
예제 #6
0
    def __init__(self,
                 dataset: str,
                 rng: Union[np.random.RandomState, int, None] = None,
                 **kwargs):
        """
        Benchmark interface to the NASBench201 Benchmarks. The NASBench201 contains
        results for architectures on 4 different data sets.

        We have split the "api" file from NASBench201 in separate files per data set.
        The original "api" file contains all data sets, but loading this single file took too much RAM.

        We recommend to not call this base class directly but using the correct subclass below.

        The parameter ``dataset`` indicates which data set was used for training.

        For each data set the metrics
        'train_acc1es', 'train_losses', 'train_times', 'eval_acc1es', 'eval_times', 'eval_losses' are available.
        However, the data sets report them on different data splits (train, train + valid, test, valid or test+valid).

        We summarize all information about the data sets in the following tables.

        Datastet        Metric      Avail.Epochs    Explanation             returned by HPOBENCH
        ----------------------------------------------------------------------------------------
        cifar10-valid   train       [0-199]         training set
        cifar10-valid   x-valid     [0-199]         validation set          objective function
        cifar10-valid   x-test
        cifar10-valid   ori-test    199             test set                objective function test

        cifar100        train       [0-199]         training set
        cifar100        x-valid     199             validation set
        cifar100        x-test      199             test set                objective function test
        cifar100        ori-test    [0-199]         validation + test set   objective function

        ImageNet16-120  train       [0-199]         training set
        ImageNet16-120  x-valid     199             validation set
        ImageNet16-120  x-test      199             test set                objective function test
        ImageNet16-120  ori-test    [0-199]         validation + test set   objective function


        We have also extracted the incumbents per split. We report the incumbent accuracy and loss performance
        i) by taking the maximum value across all seeds and configurations
        ii) averaged across the three available seeds

                                    i) The best possible incumbents (NO AVG!)                       ii) The "average" incumbent
        Datastet        Metric      (Index of Arch, Accuracy)       (Index, Loss)                   (Index of Arch, Accuracy)       (Index, Loss)
        ----------------------------------------------------------------------------------------------------------------------------------------------------------
        cifar10-valid   train       (258, 100.0)                    (2778, 0.001179278278425336)    (10154, 100)                    (2778, 0.0013082386429297428)
        cifar10-valid   x-valid     (6111, 91.71999999023437)       (14443, 0.3837750501537323)     (6111, 91.60666665039064)       (3888, 0.3894046771335602)
        cifar10-valid   x-test
        cifar10-valid   ori-test    (14174, 91.65)                  (3385, 0.3850496160507202)      (1459, 91.52333333333333)       (3385, 0.3995230517864227)

        cifar100        train       (9930, 99.948)                  (9930, 0.012630240231156348)    (9930, 99.93733333333334)       (9930, 0.012843489621082942)
        cifar100        x-valid     (13714, 73.71999998779297)      (13934, 1.1490126512527465)     (9930, 73.4933333577474)        (7361, 1.1600867895126343)
        cifar100        x-test      (1459, 74.28000004882813)       (15383, 1.1427113876342774)     (9930, 73.51333332112631)       (7337, 1.1747569534301758)
        cifar100        ori-test    (9930, 73.88)                   (13706, 1.1610547459602356)     (9930, 73.50333333333333)       (7361, 1.1696554500579834)

        ImageNet16-120  train       (9930, 73.2524719841793)        (9930, 0.9490517352046979)      (9930, 73.22918040138735)       (9930, 0.9524298415108582)
        ImageNet16-120  x-valid     (13778, 47.39999985758463)      (10721, 2.0826991437276203)     (10676, 46.73333327229818)      (10721, 2.0915397168795264)
        ImageNet16-120  x-test      (857, 48.03333317057292)        (12887, 2.0940088628133138)     (857, 47.31111100599501)        (11882, 2.106453532218933)
        ImageNet16-120  ori-test    (857, 47.083333353678384)       (11882, 2.0950548852284747)     (857, 46.8444444647895)         (11882, 2.1028235816955565)


        Note:
        - The parameter epoch is 0 indexed!
        - In the original data, the training splits are always marked with the key 'train' but they use different
          identifiers to refer to the available evaluation splits. We report them also in the table below.
        - We exclude the data set cifar10 from this benchmark.

         Some further remarks:
        - cifar10-valid is trained on the train split and tested on the validation split.
        - The train metrics are dictionaries with epochs (e.g. 0, 1, 2) as key and the metric as value.
          The evaluation metrics, however, have as key the identifiers, e.g. ori-test@0, with 0 indicating the epoch.
          Also, each data set reports values for all 200 epochs for a metric on the specified split
          and a single value on the 200th epoch for the other splits.

        Parameters
        ----------
        dataset : str
            One of cifar10-valid, cifar10, cifar100, ImageNet16-120.
        rng : np.random.RandomState, int, None
            Random seed for the benchmark's random state.
        """  # noqa: E501

        super(NasBench201BaseBenchmark, self).__init__(rng=rng)

        data_manager = NASBench_201Data(dataset=dataset)

        self.dataset = dataset
        self.data = data_manager.load()
        self.config_to_structure = NasBench201BaseBenchmark.config_to_structure_func(
            max_nodes=MAX_NODES)
예제 #7
0
def test_nasbench_201_load_thread_safe():
    shutil.rmtree(hpobench.config_file.data_dir / "nasbench_201",
                  ignore_errors=True)
    function = lambda: NASBench_201Data(dataset='cifar100').load()
    with Pool(3) as pool:
        pool.map(function, [])
예제 #8
0
def test_nasbench_201_get_files():

    files = NASBench_201Data.get_files_per_dataset(dataset='cifar10')
    assert len(files) == 27
    assert all([file.startswith('nb201_cifar10') for file in files])