def eval(self, cand):
     print('cand={}'.format(cand))
     self.model = SuperNetwork().cuda()
     assert(os.path.exists(config.net_cache))
     load(self.model, config.net_cache)
     res = self._test_candidate(cand)
     return res
Ejemplo n.º 2
0
    def search(self, operations):
        self.operations = operations
        self.model = SuperNetwork()
        self.layer = len(self.model.features)
        now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
        print('{} layer = {} population_num = {} select_num = {} mutation_num = {} crossover_num = {} random_num = {} max_epochs = {}'\
            .format(now, self.layer, self.population_num, self.select_num, self.mutation_num, self.crossover_num, self.population_num - \
                self.mutation_num - self.crossover_num, self.max_epochs))
        self.get_random(self.population_num)

        while self.epoch < self.max_epochs and len(self.candidates) > 0:
            print('epoch = {}'.format(self.epoch))
            self.sync_candidates()
            print('sync finish')

            self.update_top_k(self.candidates,
                              k=self.select_num,
                              key=lambda x: self.vis_dict[x]['acc'],
                              reverse=True)
            self.update_top_k(self.candidates,
                              k=50,
                              key=lambda x: self.vis_dict[x]['acc'],
                              reverse=True)

            now = time.strftime('%Y-%m-%d %H:%M:%S',
                                time.localtime(time.time()))
            print('{} epoch = {} : top {} result'.format(
                now, self.epoch, len(self.keep_top_k[50])))
            for i, cand in enumerate(self.keep_top_k[50]):
                if 'flops' in self.vis_dict[cand]:
                    flops = self.vis_dict[cand]['flops']
                print('No.{} cand={} Top-1 acc = {} flops = {:.2f}M'.format(
                    i + 1, cand, self.vis_dict[cand]['acc'], flops / 1e6))

            crossover = self.get_crossover(self.select_num, self.crossover_num)
            mutation = self.get_mutation(self.select_num,
                                         self.population_num - len(crossover),
                                         self.m_prob)
            self.candidates = mutation + crossover
            self.get_random(self.population_num)
            self.epoch += 1
        topks = self.get_topk()
        self.reset()
        print('finish!')
        return topks
Ejemplo n.º 3
0
def main():
    if not torch.cuda.is_available():
        print('no gpu device available')
        sys.exit(1)

    num_gpus = torch.cuda.device_count()
    np.random.seed(args.seed)
    args.gpu = args.local_rank % num_gpus
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    cudnn.deterministic = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    group_name = 'search'
    print('gpu device = %d' % args.gpu)
    print("args = %s", args)

    torch.distributed.init_process_group(backend='nccl',
                                         init_method='env://',
                                         group_name=group_name)
    args.world_size = torch.distributed.get_world_size()
    args.batch_size = args.batch_size // args.world_size

    criterion_smooth = CrossEntropyLabelSmooth(args.classes, args.label_smooth)
    criterion_smooth = criterion_smooth.cuda()
    model = SuperNetwork()
    args.layers = len(model.features)
    model = model.cuda(args.gpu)
    model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True)

    all_parameters = model.parameters()
    weight_parameters = []
    for pname, p in model.named_parameters():
        if p.ndimension(
        ) == 4 or 'classifier.0.weight' in pname or 'classifier.0.bias' in pname:
            weight_parameters.append(p)
    weight_parameters_id = list(map(id, weight_parameters))
    other_parameters = list(
        filter(lambda p: id(p) not in weight_parameters_id, all_parameters))
    optimizer = torch.optim.SGD(
        [{
            'params': other_parameters
        }, {
            'params': weight_parameters,
            'weight_decay': args.weight_decay
        }],
        args.learning_rate,
        momentum=args.momentum,
    )

    args.total_iters = args.epochs * per_epoch_iters
    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer, lambda step: (1.0 - step / args.total_iters), last_epoch=-1)

    # Prepare data
    train_loader = get_train_dataloader(args.train_dir, args.batch_size,
                                        args.local_rank, args.total_iters)
    train_dataprovider = DataIterator(train_loader)
    val_loader = get_val_dataloader(args.test_dir)
    val_dataprovider = DataIterator(val_loader)

    # load the shrunk search space found by ABS
    operations = pickle.load(open(args.operations_path, 'rb'))
    print('operations={}'.format(operations))
    train(train_dataprovider, val_dataprovider, optimizer, scheduler, model,
          criterion_smooth, args, val_iters, args.seed, operations)

    if args.local_rank == 0:
        save(model.module, config.net_cache)
        evolution_trainer = EvolutionTrainer()
        topk = evolution_trainer.search(operations)
        now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
        save_checkpoint({
            'topk': topk,
            'state_dict': model.state_dict(),
        }, config.checkpoint_cache)
        topk_str = get_topk_str(topk)
        print('{} |=> topk = {}, topk_str={}'.format(now, topk, topk_str))
Ejemplo n.º 4
0
def main():
    if not torch.cuda.is_available():
        print('no gpu device available')
        sys.exit(1)

    num_gpus = torch.cuda.device_count()
    np.random.seed(args.seed)
    args.gpu = args.local_rank % num_gpus
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    cudnn.deterministic = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    group_name = 'search_space_shrinking'
    print('gpu device = %d' % args.gpu)
    print("args = %s", args)

    torch.distributed.init_process_group(backend='nccl',
                                         init_method='env://',
                                         group_name=group_name)
    args.world_size = torch.distributed.get_world_size()
    args.batch_size = args.batch_size // args.world_size

    criterion_smooth = CrossEntropyLabelSmooth(args.classes, args.label_smooth)
    criterion_smooth = criterion_smooth.cuda()

    # Prepare model
    base_model = SuperNetwork().cuda(args.gpu)
    model = SuperNetwork().cuda(args.gpu)
    model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True)

    # Max shrinking iterations
    iters = int(
        (config.layers * (config.op_num + 1) - len(config.stage_last_id) - 1) /
        config.per_stage_drop_num)
    args.total_iters = args.epochs * per_epoch_iters
    optimizer, scheduler = get_optimizer_schedule(model, args)

    # Prepare data
    train_loader = get_train_dataloader(args.train_dir, args.batch_size,
                                        args.local_rank, args.total_iters)
    train_dataprovider = DataIterator(train_loader)

    operations = [list(range(config.op_num)) for i in range(config.layers)]
    for i in range(len(operations)):
        if i not in config.stage_last_id and not i == 0:
            operations[i].append(-1)
    print('operations={}'.format(operations))

    seed = args.seed
    start_iter, ops_dim, modify_initial_model = 0, 0, False
    checkpoint_tar = config.checkpoint_cache

    if os.path.exists(checkpoint_tar):
        checkpoint = torch.load(
            checkpoint_tar,
            map_location={'cuda:0': 'cuda:{}'.format(args.local_rank)})
        start_iter = checkpoint['iter'] + 1
        seed = checkpoint['seed']
        operations = checkpoint['operations']
        modify_initial_model = checkpoint['modify_initial_model']
        model.load_state_dict(checkpoint['state_dict'])

        now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
        print('{} load checkpoint..., iter = {}, operations={}'.format(
            now, start_iter, operations))

        # reset the scheduler
        cur_iters = (config.first_stage_epochs +
                     (start_iter - 1) * config.other_stage_epochs
                     ) * per_epoch_iters if start_iter > 0 else 0
        for _ in range(cur_iters):
            if scheduler.get_lr()[0] > args.min_lr:
                scheduler.step()
        print('resume from iters={}'.format(cur_iters))

    # Save the base weights for computing angle
    if start_iter == 0 and args.local_rank == 0:
        torch.save(model.module.state_dict(), config.initial_net_cache)
        print('save base weights ...')

    for i in range(start_iter, iters):
        print('search space size: {}'.format(
            get_search_space_size(operations)))  # 6 ^ 21
        # ABS finishes when the size of search space is less than the threshold
        if get_search_space_size(
                operations
        ) <= config.shrinking_finish_threshold:  # 将模型搜索大小限制在一个范围
            # save the shrunk search space
            pickle.dump(operations, open(args.operations_path, 'wb'))
            break

        per_stage_iters = config.other_stage_epochs * \
            per_epoch_iters if i > 0 else config.first_stage_epochs * per_epoch_iters

        seed = train(train_dataprovider, optimizer, scheduler, model,
                     criterion_smooth, operations, i, per_stage_iters, seed,
                     args)

        if args.local_rank == 0:
            # Start shrinking the search space
            load(
                base_model,
                config.initial_net_cache)  # initial_net_cache='base_weight.pt'
            operations = ABS(base_model, model.module, operations, i)
            now = time.strftime('%Y-%m-%d %H:%M:%S',
                                time.localtime(time.time()))

            # Modify the base weights for only one time
            if not modify_initial_model and (
                    i + 1
            ) * config.per_stage_drop_num > config.modify_initial_model_threshold:
                torch.save(model.module.state_dict(), config.initial_net_cache)
                modify_initial_model = True
                print('Modify base weights ...')

            save_checkpoint(
                {
                    'modify_initial_model': modify_initial_model,
                    'operations': operations,
                    'iter': i,
                    'state_dict': model.state_dict(),
                    'seed': seed
                }, config.checkpoint_cache)

            operations = merge_ops(operations)
            ops_dim = len(operations)

        # Synchronize variable cross multiple processes
        ops_dim = broadcast(obj=ops_dim, src=0)
        if not args.local_rank == 0:
            operations = np.zeros(ops_dim, dtype=np.int)
        operations = broadcast(obj=operations, src=0)
        operations = split_ops(operations)