Ejemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('input_dir', help='Path to extracted NDS data dir.')
    args = parser.parse_args()

    sweep_list = [
        'Amoeba.json', 'Amoeba_in.json', 'DARTS.json', 'DARTS_fix-w-d.json',
        'DARTS_in.json', 'DARTS_lr-wd.json', 'DARTS_lr-wd_in.json',
        'ENAS.json', 'ENAS_fix-w-d.json', 'ENAS_in.json', 'NASNet.json',
        'NASNet_in.json', 'PNAS.json', 'PNAS_fix-w-d.json', 'PNAS_in.json',
        'ResNeXt-A.json', 'ResNeXt-A_in.json', 'ResNeXt-B.json',
        'ResNeXt-B_in.json', 'ResNet-B.json', 'ResNet.json',
        'ResNet_lr-wd.json', 'ResNet_lr-wd_in.json', 'ResNet_reruns.json',
        'ResNet_rng1.json', 'ResNet_rng2.json', 'ResNet_rng3.json',
        'Vanilla.json', 'Vanilla_lr-wd.json', 'Vanilla_lr-wd_in.json',
        'Vanilla_reruns.json', 'Vanilla_rng1.json', 'Vanilla_rng2.json',
        'Vanilla_rng3.json'
    ]

    db = load_benchmark('nds')

    with db:
        db.create_tables([NdsTrialConfig, NdsTrialStats, NdsIntermediateStats])
        for json_idx, json_file in enumerate(sweep_list, start=1):
            if 'fix-w-d' in json_file:
                generator = 'fix_w_d'
            elif 'lr-wd' in json_file:
                generator = 'tune_lr_wd'
            else:
                generator = 'random'
            if '_in' in json_file:
                dataset = 'imagenet'
            else:
                dataset = 'cifar10'
            proposer = json_file.split(".")[0].split("_")[0].lower()
            with open(os.path.join(args.input_dir, json_file), 'r') as f:
                data = json.load(f)
            if 'top' in data and 'mid' in data:
                for t in tqdm.tqdm(data['top'],
                                   desc='[{}/{}] Processing {} (top)'.format(
                                       json_idx, len(sweep_list), json_file)):
                    inject_item(db, t, proposer, dataset, generator)
                for t in tqdm.tqdm(data['mid'],
                                   desc='[{}/{}] Processing {} (mid)'.format(
                                       json_idx, len(sweep_list), json_file)):
                    inject_item(db, t, proposer, dataset, generator)
            else:
                for job in tqdm.tqdm(data,
                                     desc='[{}/{}] Processing {}'.format(
                                         json_idx, len(sweep_list),
                                         json_file)):
                    inject_item(db, job, proposer, dataset, generator)
Ejemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'input_file',
        help='Path to the file to be converted, e.g., nasbench_full.tfrecord')
    args = parser.parse_args()
    nasbench = api.NASBench(args.input_file)

    db = load_benchmark('nasbench101')
    with db:
        db.create_tables(
            [Nb101TrialConfig, Nb101TrialStats, Nb101IntermediateStats])
        for hashval in tqdm(nasbench.hash_iterator(),
                            desc='Dumping data into database'):
            metadata, metrics = nasbench.get_metrics_from_hash(hashval)
            num_vertices, architecture = nasbench_format_to_architecture_repr(
                metadata['module_adjacency'], metadata['module_operations'])
            assert hashval == hash_module(architecture, num_vertices)
            for epochs in [4, 12, 36, 108]:
                trial_config = Nb101TrialConfig.create(
                    arch=architecture,
                    num_vertices=num_vertices,
                    hash=hashval,
                    num_epochs=epochs)

                for seed in range(3):
                    cur = metrics[epochs][seed]
                    trial = Nb101TrialStats.create(
                        config=trial_config,
                        train_acc=cur['final_train_accuracy'] * 100,
                        valid_acc=cur['final_validation_accuracy'] * 100,
                        test_acc=cur['final_test_accuracy'] * 100,
                        parameters=metadata['trainable_parameters'] / 1e6,
                        training_time=cur['final_training_time'] * 60)
                    for t in ['halfway', 'final']:
                        Nb101IntermediateStats.create(
                            trial=trial,
                            current_epoch=epochs //
                            2 if t == 'halfway' else epochs,
                            training_time=cur[t + '_training_time'],
                            train_acc=cur[t + '_train_accuracy'] * 100,
                            valid_acc=cur[t + '_validation_accuracy'] * 100,
                            test_acc=cur[t + '_test_accuracy'] * 100)
Ejemplo n.º 3
0
def query_nds_trial_stats(model_family, proposer, generator, model_spec, cell_spec, dataset,
                          num_epochs=None, reduction=None, include_intermediates=False):
    """
    Query trial stats of NDS given conditions.

    Parameters
    ----------
    model_family : str or None
        If str, can be one of the model families available in :class:`nni.nas.benchmark.nds.NdsTrialConfig`.
        Otherwise a wildcard.
    proposer : str or None
        If str, can be one of the proposers available in :class:`nni.nas.benchmark.nds.NdsTrialConfig`. Otherwise a wildcard.
    generator : str or None
        If str, can be one of the generators available in :class:`nni.nas.benchmark.nds.NdsTrialConfig`. Otherwise a wildcard.
    model_spec : dict or None
        If specified, can be one of the model spec available in :class:`nni.nas.benchmark.nds.NdsTrialConfig`.
        Otherwise a wildcard.
    cell_spec : dict or None
        If specified, can be one of the cell spec available in :class:`nni.nas.benchmark.nds.NdsTrialConfig`.
        Otherwise a wildcard.
    dataset : str or None
        If str, can be one of the datasets available in :class:`nni.nas.benchmark.nds.NdsTrialConfig`. Otherwise a wildcard.
    num_epochs : float or None
        If int, matching results will be returned. Otherwise a wildcard.
    reduction : str or None
        If 'none' or None, all trial stats will be returned directly.
        If 'mean', fields in trial stats will be averaged given the same trial config.
    include_intermediates : boolean
        If true, intermediate results will be returned.

    Returns
    -------
    generator of dict
        A generator of :class:`nni.nas.benchmark.nds.NdsTrialStats` objects,
        where each of them has been converted into a dict.
    """

    if proxy.obj is None:
        proxy.initialize(load_benchmark('nds'))

    fields = []
    if reduction == 'none':
        reduction = None
    if reduction == 'mean':
        for field_name in NdsTrialStats._meta.sorted_field_names:
            if field_name not in ['id', 'config', 'seed']:
                fields.append(fn.AVG(getattr(NdsTrialStats, field_name)).alias(field_name))
    elif reduction is None:
        fields.append(NdsTrialStats)
    else:
        raise ValueError('Unsupported reduction: \'%s\'' % reduction)
    query = NdsTrialStats.select(*fields, NdsTrialConfig).join(NdsTrialConfig)
    conditions = []
    for field_name in ['model_family', 'proposer', 'generator', 'model_spec', 'cell_spec',
                       'dataset', 'num_epochs']:
        if locals()[field_name] is not None:
            conditions.append(getattr(NdsTrialConfig, field_name) == locals()[field_name])
    if conditions:
        query = query.where(functools.reduce(lambda a, b: a & b, conditions))
    if reduction is not None:
        query = query.group_by(NdsTrialStats.config)
    for trial in query:
        if include_intermediates:
            data = model_to_dict(trial)
            # exclude 'trial' from intermediates as it is already available in data
            data['intermediates'] = [
                {k: v for k, v in model_to_dict(t).items() if k != 'trial'} for t in trial.intermediates
            ]
            yield data
        else:
            yield model_to_dict(trial)
Ejemplo n.º 4
0
def query_nb101_trial_stats(arch,
                            num_epochs,
                            isomorphism=True,
                            reduction=None,
                            include_intermediates=False):
    """
    Query trial stats of NAS-Bench-101 given conditions.

    Parameters
    ----------
    arch : dict or None
        If a dict, it is in the format that is described in
        :class:`nni.nas.benchmark.nasbench101.Nb101TrialConfig`. Only trial stats
        matched will be returned. If none, all architectures in the database will be matched.
    num_epochs : int or None
        If int, matching results will be returned. Otherwise a wildcard.
    isomorphism : boolean
        Whether to match essentially-same architecture, i.e., architecture with the
        same graph-invariant hash value.
    reduction : str or None
        If 'none' or None, all trial stats will be returned directly.
        If 'mean', fields in trial stats will be averaged given the same trial config.
    include_intermediates : boolean
        If true, intermediate results will be returned.

    Returns
    -------
    generator of dict
        A generator of :class:`nni.nas.benchmark.nasbench101.Nb101TrialStats` objects,
        where each of them has been converted into a dict.
    """

    if proxy.obj is None:
        proxy.initialize(load_benchmark('nasbench101'))

    fields = []
    if reduction == 'none':
        reduction = None
    if reduction == 'mean':
        for field_name in Nb101TrialStats._meta.sorted_field_names:
            if field_name not in ['id', 'config']:
                fields.append(
                    fn.AVG(getattr(Nb101TrialStats,
                                   field_name)).alias(field_name))
    elif reduction is None:
        fields.append(Nb101TrialStats)
    else:
        raise ValueError('Unsupported reduction: \'%s\'' % reduction)
    query = Nb101TrialStats.select(*fields,
                                   Nb101TrialConfig).join(Nb101TrialConfig)
    conditions = []
    if arch is not None:
        if isomorphism:
            num_vertices = infer_num_vertices(arch)
            conditions.append(
                Nb101TrialConfig.hash == hash_module(arch, num_vertices))
        else:
            conditions.append(Nb101TrialConfig.arch == arch)
    if num_epochs is not None:
        conditions.append(Nb101TrialConfig.num_epochs == num_epochs)
    if conditions:
        query = query.where(functools.reduce(lambda a, b: a & b, conditions))
    if reduction is not None:
        query = query.group_by(Nb101TrialStats.config)
    for trial in query:
        if include_intermediates:
            data = model_to_dict(trial)
            # exclude 'trial' from intermediates as it is already available in data
            data['intermediates'] = [{
                k: v
                for k, v in model_to_dict(t).items() if k != 'trial'
            } for t in trial.intermediates]
            yield data
        else:
            yield model_to_dict(trial)
Ejemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'input_file',
        help=
        'Path to the file to be converted, e.g., NAS-Bench-201-v1_1-096897.pth.'
    )
    args = parser.parse_args()
    dataset_split = {
        'cifar10-valid': ['train', 'x-valid', 'ori-test', 'ori-test'],
        'cifar10': ['train', 'ori-test', 'ori-test', 'ori-test'],
        'cifar100': ['train', 'x-valid', 'x-test', 'ori-test'],
        'imagenet16-120': ['train', 'x-valid', 'x-test', 'ori-test'],
    }

    db = load_benchmark('nasbench201')

    with db:
        db.create_tables(
            [Nb201TrialConfig, Nb201TrialStats, Nb201IntermediateStats])
        print('Loading NAS-Bench-201 pickle...')
        nb201_data = torch.load(args.input_file)
        print('Dumping architectures...')
        for arch_str in nb201_data['meta_archs']:
            arch_json = parse_arch_str(arch_str)
            for epochs in [12, 200]:
                for dataset in Nb201TrialConfig.dataset.choices:
                    Nb201TrialConfig.create(arch=arch_json,
                                            num_epochs=epochs,
                                            dataset=dataset,
                                            num_channels=16,
                                            num_cells=5)
        for arch_info in tqdm.tqdm(nb201_data['arch2infos'].values(),
                                   desc='Processing architecture statistics'):
            for epochs_verb, d in arch_info.items():
                if epochs_verb == 'less':
                    epochs = 12
                else:
                    epochs = 200
                arch_json = parse_arch_str(d['arch_str'])
                for (dataset, seed), r in d['all_results'].items():
                    sp = dataset_split[dataset.lower()]
                    data_parsed = {
                        'train_acc':
                        r['train_acc1es'][epochs - 1],
                        'valid_acc':
                        r['eval_acc1es']['{}@{}'.format(sp[1], epochs - 1)],
                        'test_acc':
                        r['eval_acc1es']['{}@{}'.format(sp[2], epochs - 1)],
                        'ori_test_acc':
                        r['eval_acc1es']['{}@{}'.format(sp[3], epochs - 1)],
                        'train_loss':
                        r['train_losses'][epochs - 1],
                        'valid_loss':
                        r['eval_losses']['{}@{}'.format(sp[1], epochs - 1)],
                        'test_loss':
                        r['eval_losses']['{}@{}'.format(sp[2], epochs - 1)],
                        'ori_test_loss':
                        r['eval_losses']['{}@{}'.format(sp[3], epochs - 1)],
                        'parameters':
                        r['params'],
                        'flops':
                        r['flop'],
                        'latency':
                        r['latency'][0],
                        'training_time':
                        r['train_times'][epochs - 1] * epochs,
                        'valid_evaluation_time':
                        r['eval_times']['{}@{}'.format(sp[1], epochs - 1)],
                        'test_evaluation_time':
                        r['eval_times']['{}@{}'.format(sp[2], epochs - 1)],
                        'ori_test_evaluation_time':
                        r['eval_times']['{}@{}'.format(sp[3], epochs - 1)],
                    }
                    config = Nb201TrialConfig.get(
                        (Nb201TrialConfig.num_epochs == epochs)
                        & (Nb201TrialConfig.arch == arch_json)
                        & (Nb201TrialConfig.dataset == dataset.lower()))
                    trial_stats = Nb201TrialStats.create(config=config,
                                                         seed=seed,
                                                         **data_parsed)
                    intermediate_stats = []
                    for epoch in range(epochs):
                        data_parsed = {
                            'train_acc':
                            r['train_acc1es'].get(epoch),
                            'valid_acc':
                            r['eval_acc1es'].get('{}@{}'.format(sp[1], epoch)),
                            'test_acc':
                            r['eval_acc1es'].get('{}@{}'.format(sp[2], epoch)),
                            'ori_test_acc':
                            r['eval_acc1es'].get('{}@{}'.format(sp[3], epoch)),
                            'train_loss':
                            r['train_losses'].get(epoch),
                            'valid_loss':
                            r['eval_losses'].get('{}@{}'.format(sp[1], epoch)),
                            'test_loss':
                            r['eval_losses'].get('{}@{}'.format(sp[2], epoch)),
                            'ori_test_loss':
                            r['eval_losses'].get('{}@{}'.format(sp[3], epoch)),
                        }
                        if all([v is None for v in data_parsed.values()]):
                            continue
                        data_parsed.update(current_epoch=epoch + 1,
                                           trial=trial_stats)
                        intermediate_stats.append(data_parsed)
                    Nb201IntermediateStats.insert_many(
                        intermediate_stats).execute(db)