def eval(self, cand): print('cand={}'.format(cand)) self.model = SuperNetwork().cuda() assert(os.path.exists(config.net_cache)) load(self.model, config.net_cache) res = self._test_candidate(cand) return res
def search(self, operations): self.operations = operations self.model = SuperNetwork() self.layer = len(self.model.features) now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) print('{} layer = {} population_num = {} select_num = {} mutation_num = {} crossover_num = {} random_num = {} max_epochs = {}'\ .format(now, self.layer, self.population_num, self.select_num, self.mutation_num, self.crossover_num, self.population_num - \ self.mutation_num - self.crossover_num, self.max_epochs)) self.get_random(self.population_num) while self.epoch < self.max_epochs and len(self.candidates) > 0: print('epoch = {}'.format(self.epoch)) self.sync_candidates() print('sync finish') self.update_top_k(self.candidates, k=self.select_num, key=lambda x: self.vis_dict[x]['acc'], reverse=True) self.update_top_k(self.candidates, k=50, key=lambda x: self.vis_dict[x]['acc'], reverse=True) now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) print('{} epoch = {} : top {} result'.format( now, self.epoch, len(self.keep_top_k[50]))) for i, cand in enumerate(self.keep_top_k[50]): if 'flops' in self.vis_dict[cand]: flops = self.vis_dict[cand]['flops'] print('No.{} cand={} Top-1 acc = {} flops = {:.2f}M'.format( i + 1, cand, self.vis_dict[cand]['acc'], flops / 1e6)) crossover = self.get_crossover(self.select_num, self.crossover_num) mutation = self.get_mutation(self.select_num, self.population_num - len(crossover), self.m_prob) self.candidates = mutation + crossover self.get_random(self.population_num) self.epoch += 1 topks = self.get_topk() self.reset() print('finish!') return topks
def main(): if not torch.cuda.is_available(): print('no gpu device available') sys.exit(1) num_gpus = torch.cuda.device_count() np.random.seed(args.seed) args.gpu = args.local_rank % num_gpus torch.cuda.set_device(args.gpu) cudnn.benchmark = True cudnn.deterministic = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) group_name = 'search' print('gpu device = %d' % args.gpu) print("args = %s", args) torch.distributed.init_process_group(backend='nccl', init_method='env://', group_name=group_name) args.world_size = torch.distributed.get_world_size() args.batch_size = args.batch_size // args.world_size criterion_smooth = CrossEntropyLabelSmooth(args.classes, args.label_smooth) criterion_smooth = criterion_smooth.cuda() model = SuperNetwork() args.layers = len(model.features) model = model.cuda(args.gpu) model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True) all_parameters = model.parameters() weight_parameters = [] for pname, p in model.named_parameters(): if p.ndimension( ) == 4 or 'classifier.0.weight' in pname or 'classifier.0.bias' in pname: weight_parameters.append(p) weight_parameters_id = list(map(id, weight_parameters)) other_parameters = list( filter(lambda p: id(p) not in weight_parameters_id, all_parameters)) optimizer = torch.optim.SGD( [{ 'params': other_parameters }, { 'params': weight_parameters, 'weight_decay': args.weight_decay }], args.learning_rate, momentum=args.momentum, ) args.total_iters = args.epochs * per_epoch_iters scheduler = torch.optim.lr_scheduler.LambdaLR( optimizer, lambda step: (1.0 - step / args.total_iters), last_epoch=-1) # Prepare data train_loader = get_train_dataloader(args.train_dir, args.batch_size, args.local_rank, args.total_iters) train_dataprovider = DataIterator(train_loader) val_loader = get_val_dataloader(args.test_dir) val_dataprovider = DataIterator(val_loader) # load the shrunk search space found by ABS operations = pickle.load(open(args.operations_path, 'rb')) print('operations={}'.format(operations)) train(train_dataprovider, val_dataprovider, optimizer, scheduler, model, criterion_smooth, args, val_iters, args.seed, operations) if args.local_rank == 0: save(model.module, config.net_cache) evolution_trainer = EvolutionTrainer() topk = evolution_trainer.search(operations) now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) save_checkpoint({ 'topk': topk, 'state_dict': model.state_dict(), }, config.checkpoint_cache) topk_str = get_topk_str(topk) print('{} |=> topk = {}, topk_str={}'.format(now, topk, topk_str))
def main(): if not torch.cuda.is_available(): print('no gpu device available') sys.exit(1) num_gpus = torch.cuda.device_count() np.random.seed(args.seed) args.gpu = args.local_rank % num_gpus torch.cuda.set_device(args.gpu) cudnn.benchmark = True cudnn.deterministic = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) group_name = 'search_space_shrinking' print('gpu device = %d' % args.gpu) print("args = %s", args) torch.distributed.init_process_group(backend='nccl', init_method='env://', group_name=group_name) args.world_size = torch.distributed.get_world_size() args.batch_size = args.batch_size // args.world_size criterion_smooth = CrossEntropyLabelSmooth(args.classes, args.label_smooth) criterion_smooth = criterion_smooth.cuda() # Prepare model base_model = SuperNetwork().cuda(args.gpu) model = SuperNetwork().cuda(args.gpu) model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True) # Max shrinking iterations iters = int( (config.layers * (config.op_num + 1) - len(config.stage_last_id) - 1) / config.per_stage_drop_num) args.total_iters = args.epochs * per_epoch_iters optimizer, scheduler = get_optimizer_schedule(model, args) # Prepare data train_loader = get_train_dataloader(args.train_dir, args.batch_size, args.local_rank, args.total_iters) train_dataprovider = DataIterator(train_loader) operations = [list(range(config.op_num)) for i in range(config.layers)] for i in range(len(operations)): if i not in config.stage_last_id and not i == 0: operations[i].append(-1) print('operations={}'.format(operations)) seed = args.seed start_iter, ops_dim, modify_initial_model = 0, 0, False checkpoint_tar = config.checkpoint_cache if os.path.exists(checkpoint_tar): checkpoint = torch.load( checkpoint_tar, map_location={'cuda:0': 'cuda:{}'.format(args.local_rank)}) start_iter = checkpoint['iter'] + 1 seed = checkpoint['seed'] operations = checkpoint['operations'] modify_initial_model = checkpoint['modify_initial_model'] model.load_state_dict(checkpoint['state_dict']) now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) print('{} load checkpoint..., iter = {}, operations={}'.format( now, start_iter, operations)) # reset the scheduler cur_iters = (config.first_stage_epochs + (start_iter - 1) * config.other_stage_epochs ) * per_epoch_iters if start_iter > 0 else 0 for _ in range(cur_iters): if scheduler.get_lr()[0] > args.min_lr: scheduler.step() print('resume from iters={}'.format(cur_iters)) # Save the base weights for computing angle if start_iter == 0 and args.local_rank == 0: torch.save(model.module.state_dict(), config.initial_net_cache) print('save base weights ...') for i in range(start_iter, iters): print('search space size: {}'.format( get_search_space_size(operations))) # 6 ^ 21 # ABS finishes when the size of search space is less than the threshold if get_search_space_size( operations ) <= config.shrinking_finish_threshold: # 将模型搜索大小限制在一个范围 # save the shrunk search space pickle.dump(operations, open(args.operations_path, 'wb')) break per_stage_iters = config.other_stage_epochs * \ per_epoch_iters if i > 0 else config.first_stage_epochs * per_epoch_iters seed = train(train_dataprovider, optimizer, scheduler, model, criterion_smooth, operations, i, per_stage_iters, seed, args) if args.local_rank == 0: # Start shrinking the search space load( base_model, config.initial_net_cache) # initial_net_cache='base_weight.pt' operations = ABS(base_model, model.module, operations, i) now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) # Modify the base weights for only one time if not modify_initial_model and ( i + 1 ) * config.per_stage_drop_num > config.modify_initial_model_threshold: torch.save(model.module.state_dict(), config.initial_net_cache) modify_initial_model = True print('Modify base weights ...') save_checkpoint( { 'modify_initial_model': modify_initial_model, 'operations': operations, 'iter': i, 'state_dict': model.state_dict(), 'seed': seed }, config.checkpoint_cache) operations = merge_ops(operations) ops_dim = len(operations) # Synchronize variable cross multiple processes ops_dim = broadcast(obj=ops_dim, src=0) if not args.local_rank == 0: operations = np.zeros(ops_dim, dtype=np.int) operations = broadcast(obj=operations, src=0) operations = split_ops(operations)