Esempio n. 1
0
def account_one_arch(arch_index, arch_str, checkpoints, datasets, dataloader_dict):
  information = ArchResults(arch_index, arch_str)

  for checkpoint_path in checkpoints:
    checkpoint = torch.load(checkpoint_path, map_location='cpu')
    used_seed  = checkpoint_path.name.split('-')[-1].split('.')[0]
    for dataset in datasets:
      assert dataset in checkpoint, 'Can not find {:} in arch-{:} from {:}'.format(dataset, arch_index, checkpoint_path)
      results     = checkpoint[dataset]
      assert results['finish-train'], 'This {:} arch seed={:} does not finish train on {:} ::: {:}'.format(arch_index, used_seed, dataset, checkpoint_path)
      arch_config = {'channel': results['channel'], 'num_cells': results['num_cells'], 'arch_str': arch_str, 'class_num': results['config']['class_num']}
      xresult     = ResultsCount(dataset, results['net_state_dict'], results['train_acc1es'], results['train_losses'], \
                                  results['param'], results['flop'], arch_config, used_seed, results['total_epoch'], None)
      if dataset == 'cifar10-valid':
        xresult.update_eval('x-valid' , results['valid_acc1es'], results['valid_losses'])
      elif dataset == 'cifar10':
        xresult.update_eval('ori-test', results['valid_acc1es'], results['valid_losses'])
      elif dataset == 'cifar100' or dataset == 'ImageNet16-120':
        xresult.update_eval('ori-test', results['valid_acc1es'], results['valid_losses'])
        net_config = dict2config({'name': 'infer.tiny', 'C': arch_config['channel'], 'N': arch_config['num_cells'],
                                  'genotype': CellStructure.str2structure(arch_config['arch_str']), 'num_classes':arch_config['class_num']}, None)
        network = get_cell_based_tiny_net(net_config)
        network.load_state_dict(xresult.get_net_param())
        network = network.cuda()
        loss, top1, top5, latencies = pure_evaluate(dataloader_dict['{:}@{:}'.format(dataset, 'valid')], network)
        xresult.update_eval('x-valid', {results['total_epoch']-1: top1}, {results['total_epoch']-1: loss})
        loss, top1, top5, latencies = pure_evaluate(dataloader_dict['{:}@{:}'.format(dataset,  'test')], network)
        xresult.update_eval('x-test' , {results['total_epoch']-1: top1}, {results['total_epoch']-1: loss})
        xresult.update_latency(latencies)
      else:
        raise ValueError('invalid dataset name : {:}'.format(dataset))
      information.update(dataset, int(used_seed), xresult)
  return information
Esempio n. 2
0
def evaluate_one_shot(model, xloader, api, cal_mode, seed=111):
    print(
        "This is an old version of codes to use NAS-Bench-API, and should be modified to align with the new version. Please contact me for more details if you use this function."
    )
    weights = deepcopy(model.state_dict())
    model.train(cal_mode)
    with torch.no_grad():
        logits = nn.functional.log_softmax(model.arch_parameters, dim=-1)
        archs = CellStructure.gen_all(model.op_names, model.max_nodes, False)
        probs, accuracies, gt_accs_10_valid, gt_accs_10_test = [], [], [], []
        loader_iter = iter(xloader)
        random.seed(seed)
        random.shuffle(archs)
        for idx, arch in enumerate(archs):
            arch_index = api.query_index_by_arch(arch)
            metrics = api.get_more_info(arch_index, "cifar10-valid", None,
                                        False, False)
            gt_accs_10_valid.append(metrics["valid-accuracy"])
            metrics = api.get_more_info(arch_index, "cifar10", None, False,
                                        False)
            gt_accs_10_test.append(metrics["test-accuracy"])
            select_logits = []
            for i, node_info in enumerate(arch.nodes):
                for op, xin in node_info:
                    node_str = "{:}<-{:}".format(i + 1, xin)
                    op_index = model.op_names.index(op)
                    select_logits.append(logits[model.edge2index[node_str],
                                                op_index])
            cur_prob = sum(select_logits).item()
            probs.append(cur_prob)
        cor_prob_valid = np.corrcoef(probs, gt_accs_10_valid)[0, 1]
        cor_prob_test = np.corrcoef(probs, gt_accs_10_test)[0, 1]
        print(
            "{:} correlation for probabilities : {:.6f} on CIFAR-10 validation and {:.6f} on CIFAR-10 test"
            .format(time_string(), cor_prob_valid, cor_prob_test))

        for idx, arch in enumerate(archs):
            model.set_cal_mode("dynamic", arch)
            try:
                inputs, targets = next(loader_iter)
            except:
                loader_iter = iter(xloader)
                inputs, targets = next(loader_iter)
            _, logits = model(inputs.cuda())
            _, preds = torch.max(logits, dim=-1)
            correct = (preds == targets.cuda()).float()
            accuracies.append(correct.mean().item())
            if idx != 0 and (idx % 500 == 0 or idx + 1 == len(archs)):
                cor_accs_valid = np.corrcoef(accuracies,
                                             gt_accs_10_valid[:idx + 1])[0, 1]
                cor_accs_test = np.corrcoef(accuracies,
                                            gt_accs_10_test[:idx + 1])[0, 1]
                print(
                    "{:} {:05d}/{:05d} mode={:5s}, correlation : accs={:.5f} for CIFAR-10 valid, {:.5f} for CIFAR-10 test."
                    .format(time_string(), idx, len(archs),
                            "Train" if cal_mode else "Eval", cor_accs_valid,
                            cor_accs_test))
    model.load_state_dict(weights)
    return archs, probs, accuracies
Esempio n. 3
0
def create_result_count(used_seed, dataset, arch_config, results,
                        dataloader_dict):
    xresult     = ResultsCount(dataset, results['net_state_dict'], results['train_acc1es'], results['train_losses'], \
                                 results['param'], results['flop'], arch_config, used_seed, results['total_epoch'], None)

    net_config = dict2config(
        {
            'name': 'infer.tiny',
            'C': arch_config['channel'],
            'N': arch_config['num_cells'],
            'genotype': CellStructure.str2structure(arch_config['arch_str']),
            'num_classes': arch_config['class_num']
        }, None)
    network = get_cell_based_tiny_net(net_config)
    network.load_state_dict(xresult.get_net_param())
    if 'train_times' in results:  # new version
        xresult.update_train_info(results['train_acc1es'],
                                  results['train_acc5es'],
                                  results['train_losses'],
                                  results['train_times'])
        xresult.update_eval(results['valid_acc1es'], results['valid_losses'],
                            results['valid_times'])
    else:
        if dataset == 'cifar10-valid':
            xresult.update_OLD_eval('x-valid', results['valid_acc1es'],
                                    results['valid_losses'])
            loss, top1, top5, latencies = pure_evaluate(
                dataloader_dict['{:}@{:}'.format('cifar10', 'test')],
                network.cuda())
            xresult.update_OLD_eval('ori-test',
                                    {results['total_epoch'] - 1: top1},
                                    {results['total_epoch'] - 1: loss})
            xresult.update_latency(latencies)
        elif dataset == 'cifar10':
            xresult.update_OLD_eval('ori-test', results['valid_acc1es'],
                                    results['valid_losses'])
            loss, top1, top5, latencies = pure_evaluate(
                dataloader_dict['{:}@{:}'.format(dataset, 'test')],
                network.cuda())
            xresult.update_latency(latencies)
        elif dataset == 'cifar100' or dataset == 'ImageNet16-120':
            xresult.update_OLD_eval('ori-test', results['valid_acc1es'],
                                    results['valid_losses'])
            loss, top1, top5, latencies = pure_evaluate(
                dataloader_dict['{:}@{:}'.format(dataset, 'valid')],
                network.cuda())
            xresult.update_OLD_eval('x-valid',
                                    {results['total_epoch'] - 1: top1},
                                    {results['total_epoch'] - 1: loss})
            loss, top1, top5, latencies = pure_evaluate(
                dataloader_dict['{:}@{:}'.format(dataset, 'test')],
                network.cuda())
            xresult.update_OLD_eval('x-test',
                                    {results['total_epoch'] - 1: top1},
                                    {results['total_epoch'] - 1: loss})
            xresult.update_latency(latencies)
        else:
            raise ValueError('invalid dataset name : {:}'.format(dataset))
    return xresult
Esempio n. 4
0
def generate_meta_info(save_dir, max_node, divide=40):
  aa_nas_bench_ss = get_search_spaces('cell', 'nas-bench-201')
  archs = CellStructure.gen_all(aa_nas_bench_ss, max_node, False)
  print ('There are {:} archs vs {:}.'.format(len(archs), len(aa_nas_bench_ss) ** ((max_node-1)*max_node/2)))

  random.seed( 88 ) # please do not change this line for reproducibility
  random.shuffle( archs )
  # to test fixed-random shuffle 
  #print ('arch [0] : {:}\n---->>>>   {:}'.format( archs[0], archs[0].tostr() ))
  #print ('arch [9] : {:}\n---->>>>   {:}'.format( archs[9], archs[9].tostr() ))
  assert archs[0  ].tostr() == '|avg_pool_3x3~0|+|nor_conv_1x1~0|skip_connect~1|+|nor_conv_1x1~0|skip_connect~1|skip_connect~2|', 'please check the 0-th architecture : {:}'.format(archs[0])
  assert archs[9  ].tostr() == '|avg_pool_3x3~0|+|none~0|none~1|+|skip_connect~0|none~1|nor_conv_3x3~2|', 'please check the 9-th architecture : {:}'.format(archs[9])
  assert archs[123].tostr() == '|avg_pool_3x3~0|+|avg_pool_3x3~0|nor_conv_1x1~1|+|none~0|avg_pool_3x3~1|nor_conv_3x3~2|', 'please check the 123-th architecture : {:}'.format(archs[123])
  total_arch = len(archs)
  
  num = 50000
  indexes_5W = list(range(num))
  random.seed( 1021 )
  random.shuffle( indexes_5W )
  train_split = sorted( list(set(indexes_5W[:num//2])) )
  valid_split = sorted( list(set(indexes_5W[num//2:])) )
  assert len(train_split) + len(valid_split) == num
  assert train_split[0] == 0 and train_split[10] == 26 and train_split[111] == 203 and valid_split[0] == 1 and valid_split[10] == 18 and valid_split[111] == 242, '{:} {:} {:} - {:} {:} {:}'.format(train_split[0], train_split[10], train_split[111], valid_split[0], valid_split[10], valid_split[111])
  splits = {num: {'train': train_split, 'valid': valid_split} }

  info = {'archs' : [x.tostr() for x in archs],
          'total' : total_arch,
          'max_node' : max_node,
          'splits': splits}

  save_dir = Path(save_dir)
  save_dir.mkdir(parents=True, exist_ok=True)
  save_name = save_dir / 'meta-node-{:}.pth'.format(max_node)
  assert not save_name.exists(), '{:} already exist'.format(save_name)
  torch.save(info, save_name)
  print ('save the meta file into {:}'.format(save_name))

  script_name_full = save_dir / 'BENCH-201-N{:}.opt-full.script'.format(max_node)
  script_name_less = save_dir / 'BENCH-201-N{:}.opt-less.script'.format(max_node)
  full_file = open(str(script_name_full), 'w')
  less_file = open(str(script_name_less), 'w')
  gaps = total_arch // divide
  for start in range(0, total_arch, gaps):
    xend = min(start+gaps, total_arch)
    full_file.write('bash ./scripts-search/NAS-Bench-201/train-models.sh 0 {:5d} {:5d} -1 \'777 888 999\'\n'.format(start, xend-1))
    less_file.write('bash ./scripts-search/NAS-Bench-201/train-models.sh 1 {:5d} {:5d} -1 \'777 888 999\'\n'.format(start, xend-1))
  print ('save the training script into {:} and {:}'.format(script_name_full, script_name_less))
  full_file.close()
  less_file.close()

  script_name = save_dir / 'meta-node-{:}.cal-script.txt'.format(max_node)
  macro = 'OMP_NUM_THREADS=6 CUDA_VISIBLE_DEVICES=0'
  with open(str(script_name), 'w') as cfile:
    for start in range(0, total_arch, gaps):
      xend = min(start+gaps, total_arch)
      cfile.write('{:} python exps/NAS-Bench-201/statistics.py --mode cal --target_dir {:06d}-{:06d}-C16-N5\n'.format(macro, start, xend-1))
  print ('save the post-processing script into {:}'.format(script_name))
Esempio n. 5
0
def evaluate_one_shot(model, xloader, api, cal_mode, seed=111):
    weights = deepcopy(model.state_dict())
    model.train(cal_mode)
    with torch.no_grad():
        logits = nn.functional.log_softmax(model.arch_parameters, dim=-1)
        archs = CellStructure.gen_all(model.op_names, model.max_nodes, False)
        probs, accuracies, gt_accs_10_valid, gt_accs_10_test = [], [], [], []
        loader_iter = iter(xloader)
        random.seed(seed)
        random.shuffle(archs)
        for idx, arch in enumerate(archs):
            arch_index = api.query_index_by_arch(arch)
            metrics = api.get_more_info(arch_index, 'cifar10-valid', None,
                                        False, False)
            gt_accs_10_valid.append(metrics['valid-accuracy'])
            metrics = api.get_more_info(arch_index, 'cifar10', None, False,
                                        False)
            gt_accs_10_test.append(metrics['test-accuracy'])
            select_logits = []
            for i, node_info in enumerate(arch.nodes):
                for op, xin in node_info:
                    node_str = '{:}<-{:}'.format(i + 1, xin)
                    op_index = model.op_names.index(op)
                    select_logits.append(logits[model.edge2index[node_str],
                                                op_index])
            cur_prob = sum(select_logits).item()
            probs.append(cur_prob)
        cor_prob_valid = np.corrcoef(probs, gt_accs_10_valid)[0, 1]
        cor_prob_test = np.corrcoef(probs, gt_accs_10_test)[0, 1]
        print(
            '{:} correlation for probabilities : {:.6f} on CIFAR-10 validation and {:.6f} on CIFAR-10 test'
            .format(time_string(), cor_prob_valid, cor_prob_test))

        for idx, arch in enumerate(archs):
            model.set_cal_mode('dynamic', arch)
            try:
                inputs, targets = next(loader_iter)
            except:
                loader_iter = iter(xloader)
                inputs, targets = next(loader_iter)
            _, logits = model(inputs.cuda())
            _, preds = torch.max(logits, dim=-1)
            correct = (preds == targets.cuda()).float()
            accuracies.append(correct.mean().item())
            if idx != 0 and (idx % 500 == 0 or idx + 1 == len(archs)):
                cor_accs_valid = np.corrcoef(accuracies,
                                             gt_accs_10_valid[:idx + 1])[0, 1]
                cor_accs_test = np.corrcoef(accuracies,
                                            gt_accs_10_test[:idx + 1])[0, 1]
                print(
                    '{:} {:05d}/{:05d} mode={:5s}, correlation : accs={:.5f} for CIFAR-10 valid, {:.5f} for CIFAR-10 test.'
                    .format(time_string(), idx, len(archs),
                            'Train' if cal_mode else 'Eval', cor_accs_valid,
                            cor_accs_test))
    model.load_state_dict(weights)
    return archs, probs, accuracies
Esempio n. 6
0
 def generate_arch(self, actions):
     genotypes = []
     for i in range(1, self.max_nodes):
         xlist = []
         for j in range(i):
             node_str = '{:}<-{:}'.format(i, j)
             op_name = self.search_space[actions[self.edge2index[node_str]]]
             xlist.append((op_name, j))
         genotypes.append(tuple(xlist))
     return CellStructure(genotypes)
Esempio n. 7
0
 def config2structure(config):
     genotypes = []
     for i in range(1, max_nodes):
         xlist = []
         for j in range(i):
             node_str = "{:}<-{:}".format(i, j)
             op_name = config[node_str]
             xlist.append((op_name, j))
         genotypes.append(tuple(xlist))
     return CellStructure(genotypes)
Esempio n. 8
0
 def random_architecture():
   genotypes = []
   for i in range(1, max_nodes):
     xlist = []
     for j in range(i):
       node_str = '{:}<-{:}'.format(i, j)
       op_name  = random.choice( op_names )
       xlist.append((op_name, j))
     genotypes.append( tuple(xlist) )
   return CellStructure( genotypes )
Esempio n. 9
0
def get_an_arch():
    genotypes = []
    for i in range(1, 4):
        xlist = []
        for j in range(i):
            node_str = '{:}<-{:}'.format(i, j)
            op_name = 'nor_conv_3x3'
            xlist.append((op_name, j))
        genotypes.append(tuple(xlist))
    return CellStructure(genotypes)
Esempio n. 10
0
def main(save_dir: Path, workers: int, datasets: List[Text], xpaths: List[Text],
         splits: List[int], seeds: List[int], nets: List[str], opt_config: Dict[Text, Any],
         to_evaluate_indexes: tuple, cover_mode: bool, arch_config: Dict[Text, Any]):

  log_dir = save_dir / 'logs'
  log_dir.mkdir(parents=True, exist_ok=True)
  logger = Logger(str(log_dir), os.getpid(), False)

  logger.log('xargs : seeds      = {:}'.format(seeds))
  logger.log('xargs : cover_mode = {:}'.format(cover_mode))
  logger.log('-' * 100)
  logger.log(
    'Start evaluating range =: {:06d} - {:06d}'.format(min(to_evaluate_indexes), max(to_evaluate_indexes))
   +'({:} in total) / {:06d} with cover-mode={:}'.format(len(to_evaluate_indexes), len(nets), cover_mode))
  for i, (dataset, xpath, split) in enumerate(zip(datasets, xpaths, splits)):
    logger.log(
      '--->>> Evaluate {:}/{:} : dataset={:9s}, path={:}, split={:}'.format(i, len(datasets), dataset, xpath, split))
  logger.log('--->>> optimization config : {:}'.format(opt_config))

  start_time, epoch_time = time.time(), AverageMeter()
  for i, index in enumerate(to_evaluate_indexes):
    arch = nets[index]
    logger.log('\n{:} evaluate {:06d}/{:06d} ({:06d}/{:06d})-th arch [seeds={:}] {:}'.format(time_string(), i,
                       len(to_evaluate_indexes), index, len(nets), seeds, '-' * 15))
    logger.log('{:} {:} {:}'.format('-' * 15, arch, '-' * 15))

    # test this arch on different datasets with different seeds
    has_continue = False
    for seed in seeds:
      to_save_name = save_dir / 'arch-{:06d}-seed-{:04d}.pth'.format(index, seed)
      if to_save_name.exists():
        if cover_mode:
          logger.log('Find existing file : {:}, remove it before evaluation'.format(to_save_name))
          os.remove(str(to_save_name))
        else:
          logger.log('Find existing file : {:}, skip this evaluation'.format(to_save_name))
          has_continue = True
          continue
      results = evaluate_all_datasets(CellStructure.str2structure(arch),
                                      datasets, xpaths, splits, opt_config, seed,
                                      arch_config, workers, logger)
      torch.save(results, to_save_name)
      logger.log('\n{:} evaluate {:06d}/{:06d} ({:06d}/{:06d})-th arch [seeds={:}] ===>>> {:}'.format(time_string(), i,
                  len(to_evaluate_indexes), index, len(nets), seeds, to_save_name))
    # measure elapsed time
    if not has_continue: epoch_time.update(time.time() - start_time)
    start_time = time.time()
    need_time = 'Time Left: {:}'.format(convert_secs2time(epoch_time.avg * (len(to_evaluate_indexes)-i-1), True) )
    logger.log('This arch costs : {:}'.format(convert_secs2time(epoch_time.val, True) ))
    logger.log('{:}'.format('*' * 100))
    logger.log('{:}   {:74s}   {:}'.format('*' * 10, '{:06d}/{:06d} ({:06d}/{:06d})-th done, left {:}'.format(i, len(
      to_evaluate_indexes), index, len(nets), need_time), '*' * 10))
    logger.log('{:}'.format('*' * 100))

  logger.close()
Esempio n. 11
0
def traverse_net(max_node):
  aa_nas_bench_ss = get_search_spaces('cell', 'nats-bench')
  archs = CellStructure.gen_all(aa_nas_bench_ss, max_node, False)
  print ('There are {:} archs vs {:}.'.format(len(archs), len(aa_nas_bench_ss) ** ((max_node-1)*max_node/2)))

  random.seed( 88 ) # please do not change this line for reproducibility
  random.shuffle( archs )
  assert archs[0  ].tostr() == '|avg_pool_3x3~0|+|nor_conv_1x1~0|skip_connect~1|+|nor_conv_1x1~0|skip_connect~1|skip_connect~2|', 'please check the 0-th architecture : {:}'.format(archs[0])
  assert archs[9  ].tostr() == '|avg_pool_3x3~0|+|none~0|none~1|+|skip_connect~0|none~1|nor_conv_3x3~2|', 'please check the 9-th architecture : {:}'.format(archs[9])
  assert archs[123].tostr() == '|avg_pool_3x3~0|+|avg_pool_3x3~0|nor_conv_1x1~1|+|none~0|avg_pool_3x3~1|nor_conv_3x3~2|', 'please check the 123-th architecture : {:}'.format(archs[123])
  return [x.tostr() for x in archs]
Esempio n. 12
0
 def genotype(self):
     genotypes = []
     for i in range(1, self.max_nodes):
         xlist = []
         for j in range(i):
             node_str = '{:}<-{:}'.format(i, j)
             with torch.no_grad():
                 weights = self.arch_parameters[self.edge2index[node_str]]
                 op_name = self.search_space[weights.argmax().item()]
             xlist.append((op_name, j))
         genotypes.append(tuple(xlist))
     return CellStructure(genotypes)
Esempio n. 13
0
def train_single_model(save_dir, workers, datasets, xpaths, splits, use_less, seeds, model_str, arch_config):
  assert torch.cuda.is_available(), 'CUDA is not available.'
  torch.backends.cudnn.enabled   = True
  torch.backends.cudnn.deterministic = True
  #torch.backends.cudnn.benchmark = True
  torch.set_num_threads( workers )
  
  save_dir = Path(save_dir) / 'specifics' / '{:}-{:}-{:}-{:}'.format('LESS' if use_less else 'FULL', model_str, arch_config['channel'], arch_config['num_cells'])
  logger   = Logger(str(save_dir), 0, False)
  if model_str in CellArchitectures:
    arch   = CellArchitectures[model_str]
    logger.log('The model string is found in pre-defined architecture dict : {:}'.format(model_str))
  else:
    try:
      arch = CellStructure.str2structure(model_str)
    except:
      raise ValueError('Invalid model string : {:}. It can not be found or parsed.'.format(model_str))
  assert arch.check_valid_op(get_search_spaces('cell', 'full')), '{:} has the invalid op.'.format(arch)
  logger.log('Start train-evaluate {:}'.format(arch.tostr()))
  logger.log('arch_config : {:}'.format(arch_config))

  start_time, seed_time = time.time(), AverageMeter()
  for _is, seed in enumerate(seeds):
    logger.log('\nThe {:02d}/{:02d}-th seed is {:} ----------------------<.>----------------------'.format(_is, len(seeds), seed))
    to_save_name = save_dir / 'seed-{:04d}.pth'.format(seed)
    if to_save_name.exists():
      logger.log('Find the existing file {:}, directly load!'.format(to_save_name))
      checkpoint = torch.load(to_save_name)
    else:
      logger.log('Does not find the existing file {:}, train and evaluate!'.format(to_save_name))
      checkpoint = evaluate_all_datasets(arch, datasets, xpaths, splits, use_less, seed, arch_config, workers, logger)
      torch.save(checkpoint, to_save_name)
    # log information
    logger.log('{:}'.format(checkpoint['info']))
    all_dataset_keys = checkpoint['all_dataset_keys']
    for dataset_key in all_dataset_keys:
      logger.log('\n{:} dataset : {:} {:}'.format('-'*15, dataset_key, '-'*15))
      dataset_info = checkpoint[dataset_key]
      #logger.log('Network ==>\n{:}'.format( dataset_info['net_string'] ))
      logger.log('Flops = {:} MB, Params = {:} MB'.format(dataset_info['flop'], dataset_info['param']))
      logger.log('config : {:}'.format(dataset_info['config']))
      logger.log('Training State (finish) = {:}'.format(dataset_info['finish-train']))
      last_epoch = dataset_info['total_epoch'] - 1
      train_acc1es, train_acc5es = dataset_info['train_acc1es'], dataset_info['train_acc5es']
      valid_acc1es, valid_acc5es = dataset_info['valid_acc1es'], dataset_info['valid_acc5es']
      logger.log('Last Info : Train = Acc@1 {:.2f}% Acc@5 {:.2f}% Error@1 {:.2f}%, Test = Acc@1 {:.2f}% Acc@5 {:.2f}% Error@1 {:.2f}%'.format(train_acc1es[last_epoch], train_acc5es[last_epoch], 100-train_acc1es[last_epoch], valid_acc1es[last_epoch], valid_acc5es[last_epoch], 100-valid_acc1es[last_epoch]))
    # measure elapsed time
    seed_time.update(time.time() - start_time)
    start_time = time.time()
    need_time = 'Time Left: {:}'.format( convert_secs2time(seed_time.avg * (len(seeds)-_is-1), True) )
    logger.log('\n<<<***>>> The {:02d}/{:02d}-th seed is {:} <finish> other procedures need {:}'.format(_is, len(seeds), seed, need_time))
  logger.close()
Esempio n. 14
0
def generate_meta_info(save_dir, max_node, divide=40):
    aa_nas_bench_ss = get_search_spaces('cell', 'nas-bench-201')
    archs = CellStructure.gen_all(aa_nas_bench_ss, max_node, False)
    print('There are {:} archs vs {:}.'.format(
        len(archs),
        len(aa_nas_bench_ss)**((max_node - 1) * max_node / 2)))

    random.seed(88)  # please do not change this line for reproducibility
    random.shuffle(archs)
    # to test fixed-random shuffle
    #print ('arch [0] : {:}\n---->>>>   {:}'.format( archs[0], archs[0].tostr() ))
    #print ('arch [9] : {:}\n---->>>>   {:}'.format( archs[9], archs[9].tostr() ))
    assert archs[0].tostr(
    ) == '|avg_pool_3x3~0|+|nor_conv_1x1~0|skip_connect~1|+|nor_conv_1x1~0|skip_connect~1|skip_connect~2|', 'please check the 0-th architecture : {:}'.format(
        archs[0])
    assert archs[9].tostr(
    ) == '|avg_pool_3x3~0|+|none~0|none~1|+|skip_connect~0|none~1|nor_conv_3x3~2|', 'please check the 9-th architecture : {:}'.format(
        archs[9])
    assert archs[123].tostr(
    ) == '|avg_pool_3x3~0|+|avg_pool_3x3~0|nor_conv_1x1~1|+|none~0|avg_pool_3x3~1|nor_conv_3x3~2|', 'please check the 123-th architecture : {:}'.format(
        archs[123])
    total_arch = len(archs)

    num = 50000
    indexes_5W = list(range(num))
    random.seed(1021)
    random.shuffle(indexes_5W)
    train_split = sorted(list(set(indexes_5W[:num // 2])))
    valid_split = sorted(list(set(indexes_5W[num // 2:])))
    assert len(train_split) + len(valid_split) == num
    assert train_split[0] == 0 and train_split[10] == 26 and train_split[
        111] == 203 and valid_split[0] == 1 and valid_split[
            10] == 18 and valid_split[
                111] == 242, '{:} {:} {:} - {:} {:} {:}'.format(
                    train_split[0], train_split[10], train_split[111],
                    valid_split[0], valid_split[10], valid_split[111])
    splits = {num: {'train': train_split, 'valid': valid_split}}

    info = {
        'archs': [x.tostr() for x in archs],
        'total': total_arch,
        'max_node': max_node,
        'splits': splits
    }

    save_dir = Path(save_dir)
    save_dir.mkdir(parents=True, exist_ok=True)
    save_name = save_dir / 'meta-node-{:}.pth'.format(max_node)
    assert not save_name.exists(), '{:} already exist'.format(save_name)
    torch.save(info, save_name)
    print('save the meta file into {:}'.format(save_name))
Esempio n. 15
0
def check_unique_arch(meta_file):
    api = API(str(meta_file))
    arch_strs = deepcopy(api.meta_archs)
    xarchs = [CellStructure.str2structure(x) for x in arch_strs]

    def get_unique_matrix(archs, consider_zero):
        UniquStrs = [arch.to_unique_str(consider_zero) for arch in archs]
        print("{:} create unique-string ({:}/{:}) done".format(
            time_string(), len(set(UniquStrs)), len(UniquStrs)))
        Unique2Index = dict()
        for index, xstr in enumerate(UniquStrs):
            if xstr not in Unique2Index:
                Unique2Index[xstr] = list()
            Unique2Index[xstr].append(index)
        sm_matrix = torch.eye(len(archs)).bool()
        for _, xlist in Unique2Index.items():
            for i in xlist:
                for j in xlist:
                    sm_matrix[i, j] = True
        unique_ids, unique_num = [-1 for _ in archs], 0
        for i in range(len(unique_ids)):
            if unique_ids[i] > -1:
                continue
            neighbours = sm_matrix[i].nonzero().view(-1).tolist()
            for nghb in neighbours:
                assert unique_ids[nghb] == -1, "impossible"
                unique_ids[nghb] = unique_num
            unique_num += 1
        return sm_matrix, unique_ids, unique_num

    print("There are {:} valid-archs".format(
        sum(arch.check_valid() for arch in xarchs)))
    sm_matrix, uniqueIDs, unique_num = get_unique_matrix(xarchs, None)
    print(
        "{:} There are {:} unique architectures (considering nothing).".format(
            time_string(), unique_num))
    sm_matrix, uniqueIDs, unique_num = get_unique_matrix(xarchs, False)
    print("{:} There are {:} unique architectures (not considering zero).".
          format(time_string(), unique_num))
    sm_matrix, uniqueIDs, unique_num = get_unique_matrix(xarchs, True)
    print("{:} There are {:} unique architectures (considering zero).".format(
        time_string(), unique_num))
Esempio n. 16
0
def test_issue_81_82(api):
    results = api.query_by_index(0, 'cifar10-valid', hp='12')
    results = api.query_by_index(0, 'cifar10-valid', hp='200')
    print(list(results.keys()))
    print(results[888].get_eval('valid'))
    print(results[888].get_eval('x-valid'))
    result_dict = api.get_more_info(index=0,
                                    dataset='cifar10-valid',
                                    iepoch=11,
                                    hp='200',
                                    is_random=False)
    info = api.query_by_arch(
        '|nor_conv_3x3~0|+|skip_connect~0|nor_conv_3x3~1|+|skip_connect~0|none~1|nor_conv_3x3~2|',
        '200')
    print(info)
    structure = CellStructure.str2structure(
        '|nor_conv_3x3~0|+|skip_connect~0|nor_conv_3x3~1|+|skip_connect~0|none~1|nor_conv_3x3~2|'
    )
    info = api.query_by_arch(structure, '200')
    print(info)
Esempio n. 17
0
def get_all_archs(operations):
    combs = []
    for i in range(1, 4):
        for j in range(i):
            if len(combs) == 0:
                for func in operations[(i, j)]:
                    combs.append([(func, j)])
            else:
                new_combs = []
                for string in combs:
                    for func in operations[(i, j)]:
                        xstring = string + [(func, j)]
                        new_combs.append(xstring)
                combs = new_combs
    operations = combs

    operations_ = []
    for ops in operations:
        temp = [[ops[0]], [ops[1], ops[2]], [ops[3], ops[4], ops[5]]]
        operations_.append(CellStructure(temp))
    return operations_
Esempio n. 18
0
def main(save_dir, workers, datasets, xpaths, splits, use_less, srange, arch_index, seeds, cover_mode, meta_info, arch_config):
  assert torch.cuda.is_available(), 'CUDA is not available.'
  torch.backends.cudnn.enabled   = True
  #torch.backends.cudnn.benchmark = True
  torch.backends.cudnn.deterministic = True
  torch.set_num_threads( workers )

  assert len(srange) == 2 and 0 <= srange[0] <= srange[1], 'invalid srange : {:}'.format(srange)
  
  if use_less:
    sub_dir = Path(save_dir) / '{:06d}-{:06d}-C{:}-N{:}-LESS'.format(srange[0], srange[1], arch_config['channel'], arch_config['num_cells'])
  else:
    sub_dir = Path(save_dir) / '{:06d}-{:06d}-C{:}-N{:}'.format(srange[0], srange[1], arch_config['channel'], arch_config['num_cells'])
  logger  = Logger(str(sub_dir), 0, False)

  all_archs = meta_info['archs']
  assert srange[1] < meta_info['total'], 'invalid range : {:}-{:} vs. {:}'.format(srange[0], srange[1], meta_info['total'])
  assert arch_index == -1 or srange[0] <= arch_index <= srange[1], 'invalid range : {:} vs. {:} vs. {:}'.format(srange[0], arch_index, srange[1])
  if arch_index == -1:
    to_evaluate_indexes = list(range(srange[0], srange[1]+1))
  else:
    to_evaluate_indexes = [arch_index]
  logger.log('xargs : seeds      = {:}'.format(seeds))
  logger.log('xargs : arch_index = {:}'.format(arch_index))
  logger.log('xargs : cover_mode = {:}'.format(cover_mode))
  logger.log('-'*100)

  logger.log('Start evaluating range =: {:06d} vs. {:06d} vs. {:06d} / {:06d} with cover-mode={:}'.format(srange[0], arch_index, srange[1], meta_info['total'], cover_mode))
  for i, (dataset, xpath, split) in enumerate(zip(datasets, xpaths, splits)):
    logger.log('--->>> Evaluate {:}/{:} : dataset={:9s}, path={:}, split={:}'.format(i, len(datasets), dataset, xpath, split))
  logger.log('--->>> architecture config : {:}'.format(arch_config))
  

  start_time, epoch_time = time.time(), AverageMeter()
  for i, index in enumerate(to_evaluate_indexes):
    arch = all_archs[index]
    logger.log('\n{:} evaluate {:06d}/{:06d} ({:06d}/{:06d})-th architecture [seeds={:}] {:}'.format('-'*15, i, len(to_evaluate_indexes), index, meta_info['total'], seeds, '-'*15))
    #logger.log('{:} {:} {:}'.format('-'*15, arch.tostr(), '-'*15))
    logger.log('{:} {:} {:}'.format('-'*15, arch, '-'*15))
  
    # test this arch on different datasets with different seeds
    has_continue = False
    for seed in seeds:
      to_save_name = sub_dir / 'arch-{:06d}-seed-{:04d}.pth'.format(index, seed)
      if to_save_name.exists():
        if cover_mode:
          logger.log('Find existing file : {:}, remove it before evaluation'.format(to_save_name))
          os.remove(str(to_save_name))
        else         :
          logger.log('Find existing file : {:}, skip this evaluation'.format(to_save_name))
          has_continue = True
          continue
      results = evaluate_all_datasets(CellStructure.str2structure(arch), \
                                        datasets, xpaths, splits, use_less, seed, \
                                        arch_config, workers, logger)
      torch.save(results, to_save_name)
      logger.log('{:} --evaluate-- {:06d}/{:06d} ({:06d}/{:06d})-th seed={:} done, save into {:}'.format('-'*15, i, len(to_evaluate_indexes), index, meta_info['total'], seed, to_save_name))
    # measure elapsed time
    if not has_continue: epoch_time.update(time.time() - start_time)
    start_time = time.time()
    need_time = 'Time Left: {:}'.format( convert_secs2time(epoch_time.avg * (len(to_evaluate_indexes)-i-1), True) )
    logger.log('This arch costs : {:}'.format( convert_secs2time(epoch_time.val, True) ))
    logger.log('{:}'.format('*'*100))
    logger.log('{:}   {:74s}   {:}'.format('*'*10, '{:06d}/{:06d} ({:06d}/{:06d})-th done, left {:}'.format(i, len(to_evaluate_indexes), index, meta_info['total'], need_time), '*'*10))
    logger.log('{:}'.format('*'*100))

  logger.close()
Esempio n. 19
0
def main(xargs):
    cifar10 = tf.keras.datasets.cifar10

    (x_train, y_train), (x_test, y_test) = cifar10.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0
    x_train, x_test = x_train.astype('float32'), x_test.astype('float32')

    # Add a channels dimension
    all_indexes = list(range(x_train.shape[0]))
    random.shuffle(all_indexes)
    s_train_idxs, s_valid_idxs = all_indexes[::2], all_indexes[1::2]
    search_train_x, search_train_y = x_train[s_train_idxs], y_train[
        s_train_idxs]
    search_valid_x, search_valid_y = x_train[s_valid_idxs], y_train[
        s_valid_idxs]
    #x_train, x_test = x_train[..., tf.newaxis], x_test[..., tf.newaxis]

    # Use tf.data
    #train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(64)
    search_ds = tf.data.Dataset.from_tensor_slices(
        (search_train_x, search_train_y, search_valid_x, search_valid_y))
    search_ds = search_ds.map(pre_process).shuffle(1000).batch(64)

    test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

    # Create an instance of the model
    config = dict2config(
        {
            'name': 'GDAS',
            'C': xargs.channel,
            'N': xargs.num_cells,
            'max_nodes': xargs.max_nodes,
            'num_classes': 10,
            'space': 'nas-bench-102',
            'affine': True
        }, None)
    model = get_cell_based_tiny_net(config)
    #import pdb; pdb.set_trace()
    #model.build(((64, 32, 32, 3), (1,)))
    #for x in model.trainable_variables:
    #  print('{:30s} : {:}'.format(x.name, x.shape))
    # Choose optimizer
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
    w_optimizer = SGDW(learning_rate=xargs.w_lr,
                       weight_decay=xargs.w_weight_decay,
                       momentum=xargs.w_momentum,
                       nesterov=True)
    a_optimizer = AdamW(learning_rate=xargs.arch_learning_rate,
                        weight_decay=xargs.arch_weight_decay,
                        beta_1=0.5,
                        beta_2=0.999,
                        epsilon=1e-07)
    #w_optimizer = tf.keras.optimizers.SGD(learning_rate=0.025, momentum=0.9, nesterov=True)
    #a_optimizer = tf.keras.optimizers.AdamW(learning_rate=xargs.arch_learning_rate, beta_1=0.5, beta_2=0.999, epsilon=1e-07)
    ####
    # metrics
    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='train_accuracy')
    valid_loss = tf.keras.metrics.Mean(name='valid_loss')
    valid_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='valid_accuracy')
    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
        name='test_accuracy')

    @tf.function
    def search_step(train_images, train_labels, valid_images, valid_labels,
                    tf_tau):
        # optimize weights
        with tf.GradientTape() as tape:
            predictions = model(train_images, tf_tau, True)
            w_loss = loss_object(train_labels, predictions)
        net_w_param = model.get_weights()
        gradients = tape.gradient(w_loss, net_w_param)
        w_optimizer.apply_gradients(zip(gradients, net_w_param))
        train_loss(w_loss)
        train_accuracy(train_labels, predictions)
        # optimize alphas
        with tf.GradientTape() as tape:
            predictions = model(valid_images, tf_tau, True)
            a_loss = loss_object(valid_labels, predictions)
        net_a_param = model.get_alphas()
        gradients = tape.gradient(a_loss, net_a_param)
        a_optimizer.apply_gradients(zip(gradients, net_a_param))
        valid_loss(a_loss)
        valid_accuracy(valid_labels, predictions)

    # TEST
    @tf.function
    def test_step(images, labels):
        predictions = model(images)
        t_loss = loss_object(labels, predictions)

        test_loss(t_loss)
        test_accuracy(labels, predictions)

    print(
        '{:} start searching with {:} epochs ({:} batches per epoch).'.format(
            time_string(), xargs.epochs,
            tf.data.experimental.cardinality(search_ds).numpy()))

    for epoch in range(xargs.epochs):
        # Reset the metrics at the start of the next epoch
        train_loss.reset_states()
        train_accuracy.reset_states()
        test_loss.reset_states()
        test_accuracy.reset_states()
        cur_tau = xargs.tau_max - (xargs.tau_max -
                                   xargs.tau_min) * epoch / (xargs.epochs - 1)
        tf_tau = tf.cast(cur_tau, dtype=tf.float32, name='tau')

        for trn_imgs, trn_labels, val_imgs, val_labels in search_ds:
            search_step(trn_imgs, trn_labels, val_imgs, val_labels, tf_tau)
        genotype = model.genotype()
        genotype = CellStructure(genotype)

        #for test_images, test_labels in test_ds:
        #  test_step(test_images, test_labels)

        template = '{:} Epoch {:03d}/{:03d}, Train-Loss: {:.3f}, Train-Accuracy: {:.2f}%, Valid-Loss: {:.3f}, Valid-Accuracy: {:.2f}% | tau={:.3f}'
        print(
            template.format(time_string(), epoch + 1, xargs.epochs,
                            train_loss.result(),
                            train_accuracy.result() * 100, valid_loss.result(),
                            valid_accuracy.result() * 100, cur_tau))
        print('{:} genotype : {:}\n{:}\n'.format(time_string(), genotype,
                                                 model.get_np_alphas()))
Esempio n. 20
0
def create_result_count(
    used_seed: int,
    dataset: Text,
    arch_config: Dict[Text, Any],
    results: Dict[Text, Any],
    dataloader_dict: Dict[Text, Any],
) -> ResultsCount:
    xresult = ResultsCount(
        dataset,
        results["net_state_dict"],
        results["train_acc1es"],
        results["train_losses"],
        results["param"],
        results["flop"],
        arch_config,
        used_seed,
        results["total_epoch"],
        None,
    )
    net_config = dict2config(
        {
            "name": "infer.tiny",
            "C": arch_config["channel"],
            "N": arch_config["num_cells"],
            "genotype": CellStructure.str2structure(arch_config["arch_str"]),
            "num_classes": arch_config["class_num"],
        },
        None,
    )
    if "train_times" in results:  # new version
        xresult.update_train_info(
            results["train_acc1es"],
            results["train_acc5es"],
            results["train_losses"],
            results["train_times"],
        )
        xresult.update_eval(results["valid_acc1es"], results["valid_losses"],
                            results["valid_times"])
    else:
        network = get_cell_based_tiny_net(net_config)
        network.load_state_dict(xresult.get_net_param())
        if dataset == "cifar10-valid":
            xresult.update_OLD_eval("x-valid", results["valid_acc1es"],
                                    results["valid_losses"])
            loss, top1, top5, latencies = pure_evaluate(
                dataloader_dict["{:}@{:}".format("cifar10", "test")],
                network.cuda())
            xresult.update_OLD_eval(
                "ori-test",
                {results["total_epoch"] - 1: top1},
                {results["total_epoch"] - 1: loss},
            )
            xresult.update_latency(latencies)
        elif dataset == "cifar10":
            xresult.update_OLD_eval("ori-test", results["valid_acc1es"],
                                    results["valid_losses"])
            loss, top1, top5, latencies = pure_evaluate(
                dataloader_dict["{:}@{:}".format(dataset, "test")],
                network.cuda())
            xresult.update_latency(latencies)
        elif dataset == "cifar100" or dataset == "ImageNet16-120":
            xresult.update_OLD_eval("ori-test", results["valid_acc1es"],
                                    results["valid_losses"])
            loss, top1, top5, latencies = pure_evaluate(
                dataloader_dict["{:}@{:}".format(dataset, "valid")],
                network.cuda())
            xresult.update_OLD_eval(
                "x-valid",
                {results["total_epoch"] - 1: top1},
                {results["total_epoch"] - 1: loss},
            )
            loss, top1, top5, latencies = pure_evaluate(
                dataloader_dict["{:}@{:}".format(dataset, "test")],
                network.cuda())
            xresult.update_OLD_eval(
                "x-test",
                {results["total_epoch"] - 1: top1},
                {results["total_epoch"] - 1: loss},
            )
            xresult.update_latency(latencies)
        else:
            raise ValueError("invalid dataset name : {:}".format(dataset))
    return xresult
Esempio n. 21
0
def train_single_model(
    save_dir, workers, datasets, xpaths, splits, use_less, seeds, model_str, arch_config
):
    assert torch.cuda.is_available(), "CUDA is not available."
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = True
    torch.set_num_threads(workers)

    save_dir = (
        Path(save_dir)
        / "specifics"
        / "{:}-{:}-{:}-{:}".format(
            "LESS" if use_less else "FULL",
            model_str,
            arch_config["channel"],
            arch_config["num_cells"],
        )
    )
    logger = Logger(str(save_dir), 0, False)
    if model_str in CellArchitectures:
        arch = CellArchitectures[model_str]
        logger.log(
            "The model string is found in pre-defined architecture dict : {:}".format(
                model_str
            )
        )
    else:
        try:
            arch = CellStructure.str2structure(model_str)
        except:
            raise ValueError(
                "Invalid model string : {:}. It can not be found or parsed.".format(
                    model_str
                )
            )
    assert arch.check_valid_op(
        get_search_spaces("cell", "full")
    ), "{:} has the invalid op.".format(arch)
    logger.log("Start train-evaluate {:}".format(arch.tostr()))
    logger.log("arch_config : {:}".format(arch_config))

    start_time, seed_time = time.time(), AverageMeter()
    for _is, seed in enumerate(seeds):
        logger.log(
            "\nThe {:02d}/{:02d}-th seed is {:} ----------------------<.>----------------------".format(
                _is, len(seeds), seed
            )
        )
        to_save_name = save_dir / "seed-{:04d}.pth".format(seed)
        if to_save_name.exists():
            logger.log(
                "Find the existing file {:}, directly load!".format(to_save_name)
            )
            checkpoint = torch.load(to_save_name)
        else:
            logger.log(
                "Does not find the existing file {:}, train and evaluate!".format(
                    to_save_name
                )
            )
            checkpoint = evaluate_all_datasets(
                arch,
                datasets,
                xpaths,
                splits,
                use_less,
                seed,
                arch_config,
                workers,
                logger,
            )
            torch.save(checkpoint, to_save_name)
        # log information
        logger.log("{:}".format(checkpoint["info"]))
        all_dataset_keys = checkpoint["all_dataset_keys"]
        for dataset_key in all_dataset_keys:
            logger.log(
                "\n{:} dataset : {:} {:}".format("-" * 15, dataset_key, "-" * 15)
            )
            dataset_info = checkpoint[dataset_key]
            # logger.log('Network ==>\n{:}'.format( dataset_info['net_string'] ))
            logger.log(
                "Flops = {:} MB, Params = {:} MB".format(
                    dataset_info["flop"], dataset_info["param"]
                )
            )
            logger.log("config : {:}".format(dataset_info["config"]))
            logger.log(
                "Training State (finish) = {:}".format(dataset_info["finish-train"])
            )
            last_epoch = dataset_info["total_epoch"] - 1
            train_acc1es, train_acc5es = (
                dataset_info["train_acc1es"],
                dataset_info["train_acc5es"],
            )
            valid_acc1es, valid_acc5es = (
                dataset_info["valid_acc1es"],
                dataset_info["valid_acc5es"],
            )
            logger.log(
                "Last Info : Train = Acc@1 {:.2f}% Acc@5 {:.2f}% Error@1 {:.2f}%, Test = Acc@1 {:.2f}% Acc@5 {:.2f}% Error@1 {:.2f}%".format(
                    train_acc1es[last_epoch],
                    train_acc5es[last_epoch],
                    100 - train_acc1es[last_epoch],
                    valid_acc1es[last_epoch],
                    valid_acc5es[last_epoch],
                    100 - valid_acc1es[last_epoch],
                )
            )
        # measure elapsed time
        seed_time.update(time.time() - start_time)
        start_time = time.time()
        need_time = "Time Left: {:}".format(
            convert_secs2time(seed_time.avg * (len(seeds) - _is - 1), True)
        )
        logger.log(
            "\n<<<***>>> The {:02d}/{:02d}-th seed is {:} <finish> other procedures need {:}".format(
                _is, len(seeds), seed, need_time
            )
        )
    logger.close()