def main(): start = time.time() if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) torch.cuda.set_device(config.local_rank % len(config.gpus)) torch.distributed.init_process_group(backend='nccl', init_method='env://') config.world_size = torch.distributed.get_world_size() config.total_batch = config.world_size * config.batch_size np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.benchmark = True CLASSES = 1000 channels = [32, 16, 24, 40, 80, 96, 192, 320, 1280] steps = [1, 1, 2, 3, 4, 3, 3, 1, 1] strides = [2, 1, 2, 2, 1, 2, 1, 1, 1] criterion = nn.CrossEntropyLoss() criterion_latency = LatencyLoss(channels[2:9], steps[2:8], strides[2:8]) criterion = criterion.cuda(config.gpus) criterion_latency = criterion_latency.cuda(config.gpus) model = Network(channels, steps, strides, CLASSES, criterion) model = model.to(device) #model = DDP(model, delay_allreduce=True) # For solve the custome loss can`t use model.parameters() in apex warpped model via https://github.com/NVIDIA/apex/issues/457 and model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[config.local_rank], output_device=config.local_rank) logger.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), config.w_lr, momentum=config.w_momentum, weight_decay=config.w_weight_decay) train_data = get_imagenet_iter_torch( type='train', # image_dir="/googol/atlas/public/cv/ILSVRC/Data/" # use soft link `mkdir ./data/imagenet && ln -s /googol/atlas/public/cv/ILSVRC/Data/CLS-LOC/* ./data/imagenet/` image_dir=config.data_path + config.dataset.lower(), batch_size=config.batch_size, num_threads=config.workers, world_size=config.world_size, local_rank=config.local_rank, crop=224, device_id=config.local_rank, num_gpus=config.gpus, portion=config.train_portion) valid_data = get_imagenet_iter_torch( type='train', # image_dir="/googol/atlas/public/cv/ILSVRC/Data/" # use soft link `mkdir ./data/imagenet && ln -s /googol/atlas/public/cv/ILSVRC/Data/CLS-LOC/* ./data/imagenet/` image_dir=config.data_path + "/" + config.dataset.lower(), batch_size=config.batch_size, num_threads=config.workers, world_size=config.world_size, local_rank=config.local_rank, crop=224, device_id=config.local_rank, num_gpus=config.gpus, portion=config.val_portion) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(config.epochs), eta_min=config.w_lr_min) if len(config.gpus) > 1: architect = Architect(model.module, config) else: architect = Architect(module, config) best_top1 = 0. for epoch in range(config.epochs): scheduler.step() lr = scheduler.get_lr()[0] logger.info('epoch %d lr %e', epoch, lr) #print(F.softmax(model.alphas_normal, dim=-1)) #print(F.softmax(model.alphas_reduce, dim=-1)) # training train_top1, train_loss = train(train_data, valid_data, model, architect, criterion, criterion_latency, optimizer, lr, epoch, writer) logger.info('Train top1 %f', train_top1) # validation top1 = 0 if config.epochs - epoch <= 1: top1, loss = infer(valid_data, model, epoch, criterion, writer) logger.info('valid top1 %f', top1) if len(config.gpus) > 1: genotype = model.module.genotype() else: genotype = model.genotype() logger.info("genotype = {}".format(genotype)) # genotype as a image plot_path = os.path.join(config.plot_path, "EP{:02d}".format(epoch + 1)) caption = "Epoch {}".format(epoch + 1) plot(genotype.normal, plot_path + "-normal") plot(genotype.reduce, plot_path + "-reduce") # save if best_top1 < top1: best_top1 = top1 best_genotype = genotype is_best = True else: is_best = False utils.save_checkpoint(model, config.path, is_best) print("") utils.time(time.time() - start) logger.info("Final best Prec@1 = {:.4%}".format(best_top1)) logger.info("Best Genotype = {}".format(best_genotype))
def main(): args.exp_path /= f'{args.gpu}_{time.strftime("%Y%m%d-%H%M%S")}' utils.create_exp_dir(Path(args.exp_path), scripts_to_save=glob.glob('*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(args.exp_path / 'log.txt') fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) if args.seed is None: raise Exception('designate seed.') np.random.seed(args.seed) cudnn.benchmark = True cudnn.enabled = True torch.manual_seed(args.seed) # ================================================ # total, used = os.popen( # 'nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader' # ).read().split('\n')[args.gpu].split(',') # total = int(total) # used = int(used) # print('Total GPU mem:', total, 'used:', used) # try: # block_mem = 0.85 * (total - used) # print(block_mem) # x = torch.empty((256, 1024, int(block_mem))).cuda() # del x # except RuntimeError as err: # print(err) # block_mem = 0.8 * (total - used) # print(block_mem) # x = torch.empty((256, 1024, int(block_mem))).cuda() # del x # # # print('reuse mem now ...') # ================================================ logging.info(f'GPU device = {args.gpu}') logging.info(f'args = {args}') criterion = nn.CrossEntropyLoss().to(device) setting = args.location model = Network(args.init_ch, 10, args.layers, criterion, setting) checkpoint = None previous_epochs = 0 if args.checkpoint_path: checkpoint = torch.load(args.checkpoint_path) utils.load(model, checkpoint['state_dict'], False) previous_epochs = checkpoint['epoch'] args.epochs -= previous_epochs if args.epochs <= 0: raise Exception('args.epochs is too small.') if use_DataParallel: print('use Data Parallel') model = nn.parallel.DataParallel(model) model = model.cuda() module = model.module torch.cuda.manual_seed_all(args.seed) else: model = model.to(device) module = model param_size = utils.count_parameters_in_MB(model) logging.info(f'param size = {param_size}MB') arch_and_attn_params = list( map( id, module.arch_and_attn_parameters() if use_DataParallel else model.arch_and_attn_parameters())) weight_params = filter( lambda p: id(p) not in arch_and_attn_params, module.parameters() if use_DataParallel else model.parameters()) optimizer = optim.SGD(weight_params, args.lr, momentum=args.momentum, weight_decay=args.wd) if checkpoint: optimizer.load_state_dict(checkpoint['optimizer']) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) # 50000 indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # 25000 train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=8) # from 2 valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]), pin_memory=True, num_workers=8) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs, eta_min=args.lr_min) if checkpoint: scheduler.load_state_dict(checkpoint['scheduler']) arch = Arch(model, criterion, args) if checkpoint: arch.optimizer.load_state_dict(checkpoint['arch_optimizer']) for epoch in tqdm(range(args.epochs), desc='Total Progress'): scheduler.step() lr = scheduler.get_lr()[0] logging.info(f'\nEpoch: {epoch} lr: {lr}') gen = module.genotype() logging.info(f'Genotype: {gen}') print(F.softmax(module.alphas_normal, dim=-1)) print(F.softmax(module.alphas_reduce, dim=-1)) if module.betas_normal is not None: print(F.softmax(module.betas_normal, dim=-1)) print(F.softmax(module.betas_reduce, dim=-1)) if module.gammas_normal is not None: print(F.softmax(module.gammas_normal, dim=-1)) print(F.softmax(module.gammas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, arch, criterion, optimizer, lr, epoch + 1) logging.info(f'train acc: {train_acc}') # validation valid_acc, valid_obj = infer(valid_queue, model, criterion, epoch + 1) logging.info(f'valid acc: {valid_acc}') utils.save(model, args.exp_path / 'search.pt') utils.save_checkpoint( { 'epoch': epoch + 1 + previous_epochs, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'arch_optimizer': arch.optimizer.state_dict(), 'scheduler': scheduler.state_dict() }, False, args.exp_path) gen = module.genotype() gen_path = args.exp_path / 'genotype.json' utils.save_genotype(gen, gen_path) logging.info(f'Result genotype: {gen}')
class neural_architecture_search(): def __init__(self, args): self.args = args if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) if self.args.distributed: # Init distributed environment self.rank, self.world_size, self.device = init_dist( port=self.args.port) self.seed = self.rank * self.args.seed else: torch.cuda.set_device(self.args.gpu) self.device = torch.device("cuda") self.rank = 0 self.seed = self.args.seed self.world_size = 1 if self.args.fix_seedcudnn: random.seed(self.seed) torch.backends.cudnn.deterministic = True np.random.seed(self.seed) cudnn.benchmark = False torch.manual_seed(self.seed) cudnn.enabled = True torch.cuda.manual_seed(self.seed) torch.cuda.manual_seed_all(self.seed) else: np.random.seed(self.seed) cudnn.benchmark = True torch.manual_seed(self.seed) cudnn.enabled = True torch.cuda.manual_seed(self.seed) torch.cuda.manual_seed_all(self.seed) self.path = os.path.join(generate_date, self.args.save) if self.rank == 0: utils.create_exp_dir(generate_date, self.path, scripts_to_save=glob.glob('*.py')) logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(self.path, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) logging.info("self.args = %s", self.args) self.logger = tensorboardX.SummaryWriter( './runs/' + generate_date + '/nas_{}'.format(self.args.remark)) else: self.logger = None # set default resource_lambda for different methods if self.args.resource_efficient: if self.args.method == 'policy_gradient': if self.args.log_penalty: default_resource_lambda = 1e-4 else: default_resource_lambda = 1e-5 if self.args.method == 'reparametrization': if self.args.log_penalty: default_resource_lambda = 1e-2 else: default_resource_lambda = 1e-5 if self.args.method == 'discrete': if self.args.log_penalty: default_resource_lambda = 1e-2 else: default_resource_lambda = 1e-4 if self.args.resource_lambda == default_lambda: self.args.resource_lambda = default_resource_lambda #initialize loss function self.criterion = nn.CrossEntropyLoss().to(self.device) #initialize model self.init_model() #calculate model param size if self.rank == 0: logging.info("param size = %fMB", utils.count_parameters_in_MB(self.model)) self.model._logger = self.logger self.model._logging = logging #initialize optimizer self.init_optimizer() #iniatilize dataset loader self.init_loaddata() self.update_theta = True self.update_alpha = True def init_model(self): self.model = Network(self.args.init_channels, CIFAR_CLASSES, self.args.layers, self.criterion, self.args, self.rank, self.world_size) self.model.to(self.device) if self.args.distributed: broadcast_params(self.model) for v in self.model.parameters(): if v.requires_grad: if v.grad is None: v.grad = torch.zeros_like(v) self.model.normal_log_alpha.grad = torch.zeros_like( self.model.normal_log_alpha) self.model.reduce_log_alpha.grad = torch.zeros_like( self.model.reduce_log_alpha) def init_optimizer(self): if args.distributed: self.optimizer = torch.optim.SGD( [ param for name, param in self.model.named_parameters() if name != 'normal_log_alpha' and name != 'reduce_log_alpha' ], self.args.learning_rate, momentum=self.args.momentum, weight_decay=self.args.weight_decay) self.arch_optimizer = torch.optim.Adam( [ param for name, param in self.model.named_parameters() if name == 'normal_log_alpha' or name == 'reduce_log_alpha' ], lr=self.args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=self.args.arch_weight_decay) else: self.optimizer = torch.optim.SGD(self.model.parameters(), self.args.learning_rate, momentum=self.args.momentum, weight_decay=args.weight_decay) self.arch_optimizer = torch.optim.SGD( self.model.arch_parameters(), lr=self.args.arch_learning_rate) def init_loaddata(self): train_transform, valid_transform = utils._data_transforms_cifar10( self.args) train_data = dset.CIFAR10(root=self.args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=self.args.data, train=False, download=True, transform=valid_transform) if self.args.seed: def worker_init_fn(): seed = self.seed np.random.seed(seed) random.seed(seed) torch.manual_seed(seed) return else: worker_init_fn = None if self.args.distributed: train_sampler = DistributedSampler(train_data) valid_sampler = DistributedSampler(valid_data) self.train_queue = torch.utils.data.DataLoader( train_data, batch_size=self.args.batch_size // self.world_size, shuffle=False, num_workers=0, pin_memory=False, sampler=train_sampler) self.valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=self.args.batch_size // self.world_size, shuffle=False, num_workers=0, pin_memory=False, sampler=valid_sampler) else: self.train_queue = torch.utils.data.DataLoader( train_data, batch_size=self.args.batch_size, shuffle=True, pin_memory=False, num_workers=2) self.valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=self.args.batch_size, shuffle=False, pin_memory=False, num_workers=2) def main(self): # lr scheduler: cosine annealing # temp scheduler: linear annealing (self-defined in utils) self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( self.optimizer, float(self.args.epochs), eta_min=self.args.learning_rate_min) self.temp_scheduler = utils.Temp_Scheduler(self.args.epochs, self.model._temp, self.args.temp, temp_min=self.args.temp_min) for epoch in range(self.args.epochs): if self.args.random_sample_pretrain: if epoch < self.args.random_sample_pretrain_epoch: self.args.random_sample = True else: self.args.random_sample = False self.scheduler.step() if self.args.temp_annealing: self.model._temp = self.temp_scheduler.step() self.lr = self.scheduler.get_lr()[0] if self.rank == 0: logging.info('epoch %d lr %e temp %e', epoch, self.lr, self.model._temp) self.logger.add_scalar('epoch_temp', self.model._temp, epoch) logging.info(self.model.normal_log_alpha) logging.info(self.model.reduce_log_alpha) logging.info( self.model._get_weights(self.model.normal_log_alpha[0])) logging.info( self.model._get_weights(self.model.reduce_log_alpha[0])) genotype_edge_all = self.model.genotype_edge_all() if self.rank == 0: logging.info('genotype_edge_all = %s', genotype_edge_all) # create genotypes.txt file txt_name = self.args.remark + '_genotype_edge_all_epoch' + str( epoch) utils.txt('genotype', self.args.save, txt_name, str(genotype_edge_all), generate_date) self.model.train() train_acc, loss, error_loss, loss_alpha = self.train( epoch, logging) if self.rank == 0: logging.info('train_acc %f', train_acc) self.logger.add_scalar("epoch_train_acc", train_acc, epoch) self.logger.add_scalar("epoch_train_error_loss", error_loss, epoch) if self.args.dsnas: self.logger.add_scalar("epoch_train_alpha_loss", loss_alpha, epoch) # validation self.model.eval() valid_acc, valid_obj = self.infer(epoch) if self.args.gen_max_child: self.args.gen_max_child_flag = True valid_acc_max_child, valid_obj_max_child = self.infer(epoch) self.args.gen_max_child_flag = False if self.rank == 0: logging.info('valid_acc %f', valid_acc) self.logger.add_scalar("epoch_valid_acc", valid_acc, epoch) if self.args.gen_max_child: logging.info('valid_acc_argmax_alpha %f', valid_acc_max_child) self.logger.add_scalar("epoch_valid_acc_argmax_alpha", valid_acc_max_child, epoch) utils.save(self.model, os.path.join(self.path, 'weights.pt')) if self.rank == 0: logging.info(self.model.normal_log_alpha) logging.info(self.model.reduce_log_alpha) genotype_edge_all = self.model.genotype_edge_all() logging.info('genotype_edge_all = %s', genotype_edge_all) def train(self, epoch, logging): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() grad = utils.AvgrageMeter() normal_resource_gradient = 0 reduce_resource_gradient = 0 normal_loss_gradient = 0 reduce_loss_gradient = 0 normal_total_gradient = 0 reduce_total_gradient = 0 loss_alpha = None count = 0 for step, (input, target) in enumerate(self.train_queue): if self.args.alternate_update: if step % 2 == 0: self.update_theta = True self.update_alpha = False else: self.update_theta = False self.update_alpha = True n = input.size(0) input = input.to(self.device) target = target.to(self.device, non_blocking=True) if self.args.snas: logits, logits_aux, penalty, op_normal, op_reduce = self.model( input) error_loss = self.criterion(logits, target) if self.args.auxiliary: loss_aux = self.criterion(logits_aux, target) error_loss += self.args.auxiliary_weight * loss_aux if self.args.dsnas: logits, error_loss, loss_alpha, penalty = self.model( input, target, self.criterion) num_normal = self.model.num_normal num_reduce = self.model.num_reduce normal_arch_entropy = self.model._arch_entropy( self.model.normal_log_alpha) reduce_arch_entropy = self.model._arch_entropy( self.model.reduce_log_alpha) if self.args.resource_efficient: if self.args.method == 'policy_gradient': resource_penalty = (penalty[2]) / 6 + self.args.ratio * ( penalty[7]) / 2 log_resource_penalty = ( penalty[35]) / 6 + self.args.ratio * (penalty[36]) / 2 elif self.args.method == 'reparametrization': resource_penalty = (penalty[26]) / 6 + self.args.ratio * ( penalty[25]) / 2 log_resource_penalty = ( penalty[37]) / 6 + self.args.ratio * (penalty[38]) / 2 elif self.args.method == 'discrete': resource_penalty = (penalty[28]) / 6 + self.args.ratio * ( penalty[27]) / 2 log_resource_penalty = ( penalty[39]) / 6 + self.args.ratio * (penalty[40]) / 2 elif self.args.method == 'none': # TODo resource_penalty = torch.zeros(1).cuda() log_resource_penalty = torch.zeros(1).cuda() else: logging.info( "wrongly input of method, please re-enter --method from 'policy_gradient', 'discrete', " "'reparametrization', 'none'") sys.exit(1) else: resource_penalty = torch.zeros(1).cuda() log_resource_penalty = torch.zeros(1).cuda() if self.args.log_penalty: resource_loss = self.model._resource_lambda * log_resource_penalty else: resource_loss = self.model._resource_lambda * resource_penalty if self.args.loss: if self.args.snas: loss = resource_loss.clone() + error_loss.clone() elif self.args.dsnas: loss = resource_loss.clone() else: loss = resource_loss.clone() + -child_coef * ( torch.log(normal_one_hot_prob) + torch.log(reduce_one_hot_prob)).sum() else: if self.args.snas or self.args.dsnas: loss = error_loss.clone() if self.args.distributed: loss.div_(self.world_size) error_loss.div_(self.world_size) resource_loss.div_(self.world_size) if self.args.dsnas: loss_alpha.div_(self.world_size) # logging gradient count += 1 if self.args.resource_efficient: self.optimizer.zero_grad() self.arch_optimizer.zero_grad() resource_loss.backward(retain_graph=True) if not self.args.random_sample: normal_resource_gradient += self.model.normal_log_alpha.grad reduce_resource_gradient += self.model.reduce_log_alpha.grad if self.args.snas: self.optimizer.zero_grad() self.arch_optimizer.zero_grad() error_loss.backward(retain_graph=True) if not self.args.random_sample: normal_loss_gradient += self.model.normal_log_alpha.grad reduce_loss_gradient += self.model.reduce_log_alpha.grad self.optimizer.zero_grad() self.arch_optimizer.zero_grad() if self.args.snas or not self.args.random_sample and not self.args.dsnas: loss.backward() if not self.args.random_sample: normal_total_gradient += self.model.normal_log_alpha.grad reduce_total_gradient += self.model.reduce_log_alpha.grad if self.args.distributed: reduce_tensorgradients(self.model.parameters(), sync=True) nn.utils.clip_grad_norm_([ param for name, param in self.model.named_parameters() if name != 'normal_log_alpha' and name != 'reduce_log_alpha' ], self.args.grad_clip) arch_grad_norm = nn.utils.clip_grad_norm_([ param for name, param in self.model.named_parameters() if name == 'normal_log_alpha' or name == 'reduce_log_alpha' ], 10.) else: nn.utils.clip_grad_norm_(self.model.parameters(), self.args.grad_clip) arch_grad_norm = nn.utils.clip_grad_norm_( self.model.arch_parameters(), 10.) grad.update(arch_grad_norm) if not self.args.fix_weight and self.update_theta: self.optimizer.step() self.optimizer.zero_grad() if not self.args.random_sample and self.update_alpha: self.arch_optimizer.step() self.arch_optimizer.zero_grad() if self.rank == 0: self.logger.add_scalar( "iter_train_loss", error_loss, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "normal_arch_entropy", normal_arch_entropy, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reduce_arch_entropy", reduce_arch_entropy, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "total_arch_entropy", normal_arch_entropy + reduce_arch_entropy, step + len(self.train_queue.dataset) * epoch) if self.args.dsnas: #reward_normal_edge self.logger.add_scalar( "reward_normal_edge_0", self.model.normal_edge_reward[0], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_1", self.model.normal_edge_reward[1], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_2", self.model.normal_edge_reward[2], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_3", self.model.normal_edge_reward[3], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_4", self.model.normal_edge_reward[4], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_5", self.model.normal_edge_reward[5], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_6", self.model.normal_edge_reward[6], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_7", self.model.normal_edge_reward[7], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_8", self.model.normal_edge_reward[8], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_9", self.model.normal_edge_reward[9], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_10", self.model.normal_edge_reward[10], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_11", self.model.normal_edge_reward[11], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_12", self.model.normal_edge_reward[12], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_13", self.model.normal_edge_reward[13], step + len(self.train_queue.dataset) * epoch) #reward_reduce_edge self.logger.add_scalar( "reward_reduce_edge_0", self.model.reduce_edge_reward[0], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_1", self.model.reduce_edge_reward[1], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_2", self.model.reduce_edge_reward[2], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_3", self.model.reduce_edge_reward[3], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_4", self.model.reduce_edge_reward[4], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_5", self.model.reduce_edge_reward[5], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_6", self.model.reduce_edge_reward[6], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_7", self.model.reduce_edge_reward[7], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_8", self.model.reduce_edge_reward[8], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_9", self.model.reduce_edge_reward[9], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_10", self.model.reduce_edge_reward[10], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_11", self.model.reduce_edge_reward[11], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_12", self.model.reduce_edge_reward[12], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_13", self.model.reduce_edge_reward[13], step + len(self.train_queue.dataset) * epoch) #policy size self.logger.add_scalar( "iter_normal_size_policy", penalty[2] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_size_policy", penalty[7] / num_reduce, step + len(self.train_queue.dataset) * epoch) # baseline: discrete_probability self.logger.add_scalar( "iter_normal_size_baseline", penalty[3] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_flops_baseline", penalty[5] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_mac_baseline", penalty[6] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_size_baseline", penalty[8] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_flops_baseline", penalty[9] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_mac_baseline", penalty[10] / num_reduce, step + len(self.train_queue.dataset) * epoch) # R - median(R) self.logger.add_scalar( "iter_normal_size-avg", penalty[60] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_flops-avg", penalty[61] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_mac-avg", penalty[62] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_size-avg", penalty[63] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_flops-avg", penalty[64] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_mac-avg", penalty[65] / num_reduce, step + len(self.train_queue.dataset) * epoch) # lnR - ln(median) self.logger.add_scalar( "iter_normal_ln_size-ln_avg", penalty[66] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_ln_flops-ln_avg", penalty[67] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_ln_mac-ln_avg", penalty[68] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_ln_size-ln_avg", penalty[69] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_ln_flops-ln_avg", penalty[70] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_ln_mac-ln_avg", penalty[71] / num_reduce, step + len(self.train_queue.dataset) * epoch) ''' self.logger.add_scalar("iter_normal_size_normalized", penalty[17] / 6, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar("iter_normal_flops_normalized", penalty[18] / 6, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar("iter_normal_mac_normalized", penalty[19] / 6, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar("iter_reduce_size_normalized", penalty[20] / 2, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar("iter_reduce_flops_normalized", penalty[21] / 2, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar("iter_reduce_mac_normalized", penalty[22] / 2, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar("iter_normal_penalty_normalized", penalty[23] / 6, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar("iter_reduce_penalty_normalized", penalty[24] / 2, step + len(self.train_queue.dataset) * epoch) ''' # Monte_Carlo(R_i) self.logger.add_scalar( "iter_normal_size_mc", penalty[29] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_flops_mc", penalty[30] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_mac_mc", penalty[31] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_size_mc", penalty[32] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_flops_mc", penalty[33] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_mac_mc", penalty[34] / num_reduce, step + len(self.train_queue.dataset) * epoch) # log(|R_i|) self.logger.add_scalar( "iter_normal_log_size", penalty[41] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_log_flops", penalty[42] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_log_mac", penalty[43] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_log_size", penalty[44] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_log_flops", penalty[45] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_log_mac", penalty[46] / num_reduce, step + len(self.train_queue.dataset) * epoch) # log(P)R_i self.logger.add_scalar( "iter_normal_logP_size", penalty[47] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_logP_flops", penalty[48] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_logP_mac", penalty[49] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_logP_size", penalty[50] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_logP_flops", penalty[51] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_logP_mac", penalty[52] / num_reduce, step + len(self.train_queue.dataset) * epoch) # log(P)log(R_i) self.logger.add_scalar( "iter_normal_logP_log_size", penalty[53] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_logP_log_flops", penalty[54] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_logP_log_mac", penalty[55] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_logP_log_size", penalty[56] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_logP_log_flops", penalty[57] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_logP_log_mac", penalty[58] / num_reduce, step + len(self.train_queue.dataset) * epoch) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) if self.args.distributed: loss = loss.detach() dist.all_reduce(error_loss) dist.all_reduce(prec1) dist.all_reduce(prec5) prec1.div_(self.world_size) prec5.div_(self.world_size) #dist_util.all_reduce([loss, prec1, prec5], 'mean') objs.update(error_loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % self.args.report_freq == 0 and self.rank == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) self.logger.add_scalar( "iter_train_top1_acc", top1.avg, step + len(self.train_queue.dataset) * epoch) if self.rank == 0: logging.info('-------resource gradient--------') logging.info(normal_resource_gradient / count) logging.info(reduce_resource_gradient / count) logging.info('-------loss gradient--------') logging.info(normal_loss_gradient / count) logging.info(reduce_loss_gradient / count) logging.info('-------total gradient--------') logging.info(normal_total_gradient / count) logging.info(reduce_total_gradient / count) return top1.avg, loss, error_loss, loss_alpha def infer(self, epoch): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() self.model.eval() with torch.no_grad(): for step, (input, target) in enumerate(self.valid_queue): input = input.to(self.device) target = target.to(self.device) if self.args.snas: logits, logits_aux, resource_loss, op_normal, op_reduce = self.model( input) loss = self.criterion(logits, target) elif self.args.dsnas: logits, error_loss, loss_alpha, resource_loss = self.model( input, target, self.criterion) loss = error_loss prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) if self.args.distributed: loss.div_(self.world_size) loss = loss.detach() dist.all_reduce(loss) dist.all_reduce(prec1) dist.all_reduce(prec5) prec1.div_(self.world_size) prec5.div_(self.world_size) objs.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) if step % self.args.report_freq == 0 and self.rank == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) self.logger.add_scalar( "iter_valid_loss", loss, step + len(self.valid_queue.dataset) * epoch) self.logger.add_scalar( "iter_valid_top1_acc", top1.avg, step + len(self.valid_queue.dataset) * epoch) return top1.avg, objs.avg
def main(): # if not torch.cuda.is_available(): # logging.info('no gpu device available') # sys.exit(1) np.random.seed(args.seed) # torch.cuda.set_device(args.gpu) # cudnn.benchmark = True # torch.manual_seed(args.seed) # cudnn.enabled = True # torch.cuda.manual_seed(args.seed) # logging.info('gpu device = %d' % args.gpu) # logging.info("args = %s", args) torch.manual_seed(args.seed) logging.info('use cpu') logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() # criterion = criterion.cuda() criterion.to(device) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, learnable_bn=args.learnable_bn) # model = model.cuda() model.to(device) a = list(model.parameters()) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) # optimizer = torch.optim.SGD( # model.parameters(), # args.learning_rate, # momentum=args.momentum, # weight_decay=args.weight_decay) ################################################################################ # AdaS: optimizer and scheduler optimizer = SGDVec(params=model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = AdaS( parameters=list(model.parameters()), init_lr=args.learning_rate, # min_lr=kwargs['min_lr'], # zeta=kwargs['zeta'], p=args.scheduler_p, beta=args.scheduler_beta) ################################################################################ # train_transform, valid_transform = utils._data_transforms_cifar100(args) # train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) """From https://github.com/chenxin061/pdarts/""" if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100( args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( # optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) """Hessian""" analyser = Analyzer(model, args) """adaptive stopping""" stop_checker = StopChecker() METRICS = Metrics(list(model.parameters()), p=1) PERFORMANCE_STATISTICS = {} ARCH_STATISTICS = {} GENOTYPE_STATISTICS = {} metrics_path = './metrics_stat_test_adas.xlsx' weights_path = './weights_stat_test_adas.xlsx' genotypes_path = './genotypes_stat_test_adas.xlsx' for epoch in range(args.epochs): # scheduler.step() # lr = scheduler.get_lr()[0] # logging.info genotype = model.genotype() logging.info('genotype = %s', genotype) if epoch % 5 == 0 or epoch == args.epochs - 1: GENOTYPE_STATISTICS[f'epoch_{epoch}'] = [genotype] genotypes_df = pd.DataFrame(data=GENOTYPE_STATISTICS) genotypes_df.to_excel(genotypes_path) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(epoch, train_queue, valid_queue, model, architect, criterion, optimizer, METRICS, scheduler, analyser) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) # metrics io_metrics = METRICS.evaluate(epoch) PERFORMANCE_STATISTICS[ f'in_S_epoch_{epoch}'] = io_metrics.input_channel_S PERFORMANCE_STATISTICS[ f'out_S_epoch_{epoch}'] = io_metrics.output_channel_S PERFORMANCE_STATISTICS[f'fc_S_epoch_{epoch}'] = io_metrics.fc_S PERFORMANCE_STATISTICS[ f'in_rank_epoch_{epoch}'] = io_metrics.input_channel_rank PERFORMANCE_STATISTICS[ f'out_rank_epoch_{epoch}'] = io_metrics.output_channel_rank PERFORMANCE_STATISTICS[f'fc_rank_epoch_{epoch}'] = io_metrics.fc_rank PERFORMANCE_STATISTICS[ f'in_condition_epoch_{epoch}'] = io_metrics.input_channel_condition PERFORMANCE_STATISTICS[ f'out_condition_epoch_{epoch}'] = io_metrics.output_channel_condition ################################################################################ # AdaS: update learning rates lr_metrics = scheduler.step(epoch, METRICS) PERFORMANCE_STATISTICS[ f'rank_velocity_epoch_{epoch}'] = lr_metrics.rank_velocity PERFORMANCE_STATISTICS[ f'learning_rate_epoch_{epoch}'] = lr_metrics.r_conv ################################################################################ # write metrics data to xls file metrics_df = pd.DataFrame(data=PERFORMANCE_STATISTICS) metrics_df.to_excel(metrics_path) # weights weights_normal = F.softmax(model.alphas_normal, dim=-1).detach().cpu().numpy() weights_reduce = F.softmax(model.alphas_reduce, dim=-1).detach().cpu().numpy() # normal ARCH_STATISTICS[f'normal_none_epoch{epoch}'] = weights_normal[:, 0] ARCH_STATISTICS[f'normal_max_epoch{epoch}'] = weights_normal[:, 1] ARCH_STATISTICS[f'normal_avg_epoch{epoch}'] = weights_normal[:, 2] ARCH_STATISTICS[f'normal_skip_epoch{epoch}'] = weights_normal[:, 3] ARCH_STATISTICS[f'normal_sep_3_epoch{epoch}'] = weights_normal[:, 4] ARCH_STATISTICS[f'normal_sep_5_epoch{epoch}'] = weights_normal[:, 5] ARCH_STATISTICS[f'normal_dil_3_epoch{epoch}'] = weights_normal[:, 6] ARCH_STATISTICS[f'normal_dil_5_epoch{epoch}'] = weights_normal[:, 7] # reduce ARCH_STATISTICS[f'reduce_none_epoch{epoch}'] = weights_reduce[:, 0] ARCH_STATISTICS[f'reduce_max_epoch{epoch}'] = weights_reduce[:, 1] ARCH_STATISTICS[f'reduce_avg_epoch{epoch}'] = weights_reduce[:, 2] ARCH_STATISTICS[f'reduce_skip_epoch{epoch}'] = weights_reduce[:, 3] ARCH_STATISTICS[f'reduce_sep_3_epoch{epoch}'] = weights_reduce[:, 4] ARCH_STATISTICS[f'reduce_sep_5_epoch{epoch}'] = weights_reduce[:, 5] ARCH_STATISTICS[f'reduce_dil_3_epoch{epoch}'] = weights_reduce[:, 6] ARCH_STATISTICS[f'reduce_dil_5_epoch{epoch}'] = weights_reduce[:, 7] # write weights data to xls file weights_df = pd.DataFrame(data=ARCH_STATISTICS) weights_df.to_excel(weights_path) # adaptive stopping criterion if args.adaptive_stop and epoch >= 10: # apply local stopping criterion stop_checker.local_stop(METRICS, epoch) # freeze some edges based on their knowledge gains iteration_p = 0 for p in model.parameters(): if ~METRICS.layers_index_todo[iteration_p]: p.requires_grad = False p.grad = None iteration_p += 1 utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): # if not torch.cuda.is_available(): # logging.info('no gpu device available') # sys.exit(1) np.random.seed(args.seed) # torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.to(DEVICE) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.to(DEVICE) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py')) print(args) seed = random.randint(1, 100000000) print(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True cudnn.enabled = True n_channels = 3 n_bins = 2.**args.n_bits # Define model and loss criteria model = SearchNetwork(n_channels, args.n_flow, args.n_block, n_bins, affine=args.affine, conv_lu=not args.no_lu) model = nn.DataParallel(model, [args.gpu]) model.load_state_dict( torch.load("architecture.pt", map_location="cuda:{}".format(args.gpu))) model = model.module genotype = model.sample_architecture() with open(args.save + '/genotype.pkl', 'wb') as fp: pickle.dump(genotype, fp) model_single = EnsembleNetwork(n_channels, args.n_flow, args.n_block, n_bins, genotype, affine=args.affine, conv_lu=not args.no_lu) model = model_single model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), args.learning_rate) dataset = iter(sample_cifar10(args.batch, args.img_size)) # Sample generated images z_sample = [] z_shapes = calc_z_shapes(n_channels, args.img_size, args.n_flow, args.n_block) for z in z_shapes: z_new = torch.randn(args.n_sample, *z) * args.temp z_sample.append(z_new.to(device)) with tqdm(range(args.iter)) as pbar: for i in pbar: # Training procedure model.train() # Get a random minibatch from the search queue with replacement input, _ = next(dataset) input = Variable(input, requires_grad=False).cuda(non_blocking=True) log_p, logdet, _ = model(input + torch.rand_like(input) / n_bins) logdet = logdet.mean() loss, _, _ = likelihood_loss(log_p, logdet, args.img_size, n_bins) # Optimize model optimizer.zero_grad() loss.backward() optimizer.step() pbar.set_description("Loss: {}".format(loss.item())) # Save generated samples if i % 100 == 0: with torch.no_grad(): tvutils.save_image( model_single.reverse(z_sample).cpu().data, "{}/samples/{}.png".format(args.save, str(i + 1).zfill(6)), normalize=False, nrow=10, ) # Save checkpoint if i % 1000 == 0: utils.save(model, os.path.join(args.save, 'latest_weights.pt'))
def main(): #LOAD CONFIGS################################################################ args = get_args() import os os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_no log_format = '[%(asctime)s] %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%d %I:%M:%S') t = time.time() local_time = time.localtime(t) if not os.path.exists('./log'): os.mkdir('./log') fh = logging.FileHandler(os.path.join('log/train-{}{:02}{}'.format(local_time.tm_year % 2000, local_time.tm_mon, t))) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) use_gpu = False if torch.cuda.is_available(): use_gpu = True cudnn.benchmark = True torch.manual_seed(args.rand_seed) cudnn.enabled=True torch.cuda.manual_seed(args.rand_seed) # cudnn.enabled=True # torch.cuda.manual_seed(str(args.rand_seed)) random.seed(args.rand_seed) #LOAD DATA################################################################### def convert_param(original_lists): # assert isinstance(original_lists, list), 'The type is not right : {:}'.format(original_lists) ctype, value = original_lists[0], original_lists[1] # assert ctype in support_types, 'Ctype={:}, support={:}'.format(ctype, support_types) is_list = isinstance(value, list) if not is_list: value = [value] outs = [] for x in value: if ctype == 'int': x = int(x) elif ctype == 'str': x = str(x) elif ctype == 'bool': x = bool(int(x)) elif ctype == 'float': x = float(x) elif ctype == 'none': if x.lower() != 'none': raise ValueError('For the none type, the value must be none instead of {:}'.format(x)) x = None else: raise TypeError('Does not know this type : {:}'.format(ctype)) outs.append(x) if not is_list: outs = outs[0] return outs from collections import namedtuple with open('../data/cifar-split.txt', 'r') as f: data = json.load(f) content = { k: convert_param(v) for k,v in data.items()} Arguments = namedtuple('Configure', ' '.join(content.keys())) content = Arguments(**content) cifar_split = content train_split, valid_split = cifar_split.train, cifar_split.valid print(len(train_split),len(valid_split)) if args.dataset == "cifar10": mean = [x / 255 for x in [125.3, 123.0, 113.9]] std = [x / 255 for x in [63.0, 62.1, 66.7]] lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std)] transform_train = transforms.Compose(lists) transform_test = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)]) train_dataset = datasets.CIFAR10(root='../data', train=True, download=True, transform=transform_train) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=False,sampler=torch.utils.data.sampler.SubsetRandomSampler(train_split), num_workers=4, pin_memory=use_gpu) train_dataprovider = DataIterator(train_loader) val_loader = torch.utils.data.DataLoader( datasets.CIFAR10(root='../data', train=True, download=True, transform=transform_test), batch_size=250, shuffle=False, sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_split), num_workers=4, pin_memory=use_gpu ) val_dataprovider = DataIterator(val_loader) CLASS = 10 elif args.dataset == "cifar100": mean = [x / 255 for x in [129.3, 124.1, 112.4]] std = [x / 255 for x in [68.2, 65.4, 70.4]] lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std)] transform_train = transforms.Compose(lists) transform_test = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)]) train_dataset = datasets.CIFAR100(root='../data', train=True, download=True, transform=transform_train) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=False,sampler=torch.utils.data.sampler.SubsetRandomSampler(train_split), num_workers=4, pin_memory=use_gpu) train_dataprovider = DataIterator(train_loader) val_loader = torch.utils.data.DataLoader( datasets.CIFAR100(root='../data', train=True, download=True, transform=transform_test), batch_size=250, shuffle=False, sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_split), num_workers=4, pin_memory=use_gpu ) val_dataprovider = DataIterator(val_loader) CLASS = 100 elif args.dataset == "svhn": mean = [0.4377, 0.4438, 0.4728] std = [0.1980, 0.2010, 0.1970] lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std)] transform_train = transforms.Compose(lists) transform_test = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)]) train_dataset = datasets.SVHN(root='../data', split='train', download=True, transform=transform_train) num_train = len(train_dataset) indices = list(range(num_train)) split = int(np.floor(0.5 * num_train)) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=False,sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), num_workers=4, pin_memory=use_gpu) train_dataprovider = DataIterator(train_loader) val_loader = torch.utils.data.DataLoader( datasets.SVHN(root='../data', split='train', download=True, transform=transform_test), batch_size=250, shuffle=False, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), num_workers=4, pin_memory=use_gpu ) val_dataprovider = DataIterator(val_loader) CLASS = 10 print('load data successfully') model = Network(args.init_channels, CLASS, args.stacks, eval(args.search_space)).cuda() optimizer = torch.optim.SGD(get_parameters(model), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) criterion_smooth = CrossEntropyLabelSmooth(CLASS, 0.1) if use_gpu: loss_function = criterion_smooth.cuda() device = torch.device("cuda") else: loss_function = criterion_smooth device = torch.device("cpu") scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,args.total_iters) model = model.to(device) all_iters = 0 if args.auto_continue: lastest_model, iters = get_lastest_model() if lastest_model is not None: all_iters = iters checkpoint = torch.load(lastest_model, map_location=None if use_gpu else 'cpu') model.load_state_dict(checkpoint['state_dict'], strict=True) print('load from checkpoint') for i in range(iters): scheduler.step() args.optimizer = optimizer args.loss_function = loss_function args.scheduler = scheduler args.train_dataprovider = train_dataprovider args.val_dataprovider = val_dataprovider args.evo_controller = evolutionary(args.max_population,args.select_number, args.mutation_len,args.mutation_number,args.p_edgewise,args.p_opwise) path = './record_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}'.format(args.stacks,args.init_channels,args.total_iters,args.warmup_iters,args.max_population,args.select_number,args.mutation_len,args.mutation_number,args.val_interval,args.val_times,args.p_edgewise,args.p_opwise,args.evo_momentum,args.rand_seed,args.search_space,args.dataset) logging.info(path) # args.evo_controller.trained_group = args.evo_controller.group while all_iters < args.total_iters: if all_iters > 1 and all_iters%args.val_interval == 0: results = [] for structure_father in args.evo_controller.group: results.append([structure_father.structure,structure_father.loss,structure_father.count]) if not os.path.exists(path): os.mkdir(path) with open(path + '/%06d-ep.txt'%all_iters,'w') as tt: json.dump(results,tt) if all_iters >= args.warmup_iters:#warmup args.evo_controller.select() else: print("warmup") all_iters = train(model, device, args, val_interval=args.val_interval, bn_process=False, all_iters=all_iters) validate(model, device, args, all_iters=all_iters) ###end # validate(model, device, args, all_iters=all_iters) results = [] for structure_father in args.evo_controller.group: results.append([structure_father.structure,structure_father.loss,structure_father.count]) with open(path + '/%06d-ep.txt'%all_iters,'w') as tt: json.dump(results,tt)
def main(): start = time.time() if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.benchmark = True CLASSES = 1000 channels = SEARCH_SPACE['channel_size'] strides = SEARCH_SPACE['strides'] # Model model = Network(channels, strides, CLASSES) model = model.to(device) model = nn.DataParallel(model, device_ids=config.gpus) logger.info("param size = %fMB", utils.count_parameters_in_MB(model)) config.world_size = 0 if config.target_hardware is None: config.ref_value = None else: config.ref_value = ref_values[config.target_hardware][ '%.2f' % config.width_mult] # Loss criterion = LatencyLoss(config, channels, strides).cuda(config.gpus) normal_critersion = nn.CrossEntropyLoss() alpha_weight = model.module.arch_parameters() # weight = [param for param in model.parameters() if not utils.check_tensor_in_list(param, alpha_weight)] weight = model.module.weight_parameters() # Optimizer w_optimizer = torch.optim.SGD(weight, config.w_lr, momentum=config.w_momentum, weight_decay=config.w_weight_decay) alpha_optimizer = torch.optim.Adam(alpha_weight, lr=config.alpha_lr, betas=(config.arch_adam_beta1, config.arch_adam_beta2), eps=config.arch_adam_eps, weight_decay=config.alpha_weight_decay) train_data = get_imagenet_torch( type='train', # image_dir="/googol/atlas/public/cv/ILSVRC/Data/" # use soft link `mkdir ./data/imagenet && ln -s /googol/atlas/public/cv/ILSVRC/Data/CLS-LOC/* ./data/imagenet/` image_dir=config.data_path + "/" + config.dataset.lower(), batch_size=config.batch_size, num_threads=config.workers, world_size=config.world_size, crop=224, device_id=0, num_gpus=len(config.gpus), portion=config.train_portion) valid_data = get_imagenet_torch( type='val', # image_dir="/googol/atlas/public/cv/ILSVRC/Data/" # use soft link `mkdir ./data/imagenet && ln -s /googol/atlas/public/cv/ILSVRC/Data/CLS-LOC/* ./data/imagenet/` image_dir=config.data_path + "/" + config.dataset.lower(), batch_size=config.batch_size, num_threads=config.workers, world_size=config.world_size, crop=224, device_id=0, num_gpus=len(config.gpus), portion=config.val_portion) best_top1 = 0. best_genotype = list() lr = 0 config.start_epoch = -1 config.warmup_epoch = 0 config.warmup = True ### Resume form warmup model or train model ### if config.resume: try: model_path = config.path + '/checkpoint.pth.tar' model, w_optimizer, alpha_optimizer = load_model( model, model_fname=model_path, optimizer=w_optimizer, arch_optimizer=alpha_optimizer) except Exception: warmup_path = config.path + '/warmup.pth.tar' if os.path.exists(warmup_path): print('load warmup weights') model, w_optimizer, alpha_optimizer = load_model( model, model_fname=warmup_path, optimizer=w_optimizer, arch_optimizer=alpha_optimizer) else: print('fail to load models') w_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( w_optimizer, float(config.epochs), eta_min=config.w_lr_min) if config.start_epoch < 0 and config.warm_up: for epoch in range(config.warmup_epoch, config.warmup_epochs): # warmup train_top1, train_loss = warm_up(train_data, valid_data, model, normal_critersion, criterion, w_optimizer, epoch, writer) config.start_epoch = epoch update_schedule = utils.get_update_schedule_grad(len(train_data), config) for epoch in range(config.start_epoch + 1, config.epochs): if epoch > config.warmup_epochs: w_scheduler.step() lr = w_scheduler.get_lr()[0] logger.info('epoch %d lr %e', epoch, lr) # training train_top1, train_loss = train(train_data, valid_data, model, normal_critersion, criterion, w_optimizer, alpha_optimizer, lr, epoch, writer, update_schedule) logger.info('Train top1 %f', train_top1) # validation top1 = train_top1 if epoch % 10 == 0: top1, loss = infer(valid_data, model, epoch, criterion, normal_critersion, writer) logger.info('valid top1 %f', top1) genotype = model.module.genotype() logger.info("genotype = {}".format(genotype)) # save if best_top1 < top1: best_top1 = top1 best_genotype = genotype is_best = True else: is_best = False save_model(model, { 'warmup': False, 'epoch': epoch, 'w_optimizer': w_optimizer.state_dict(), 'alpha_optimizer': alpha_optimizer.state_dict(), 'state_dict': model.state_dict() }, is_best=is_best) utils.time(time.time() - start) logger.info("Final best Prec@1 = {:.4%}".format(best_top1)) logger.info("Best Genotype = {}".format(best_genotype))
class neural_architecture_search(): def __init__(self, args): self.args = args if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) torch.cuda.set_device(self.args.gpu) self.device = torch.device("cuda") self.rank = 0 self.seed = self.args.seed self.world_size = 1 if self.args.fix_cudnn: random.seed(self.seed) torch.backends.cudnn.deterministic = True np.random.seed(self.seed) cudnn.benchmark = False torch.manual_seed(self.seed) cudnn.enabled = True torch.cuda.manual_seed(self.seed) torch.cuda.manual_seed_all(self.seed) else: np.random.seed(self.seed) cudnn.benchmark = True torch.manual_seed(self.seed) cudnn.enabled = True torch.cuda.manual_seed(self.seed) torch.cuda.manual_seed_all(self.seed) self.path = os.path.join(generate_date, self.args.save) if self.rank == 0: utils.create_exp_dir(generate_date, self.path, scripts_to_save=glob.glob('*.py')) logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(self.path, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) logging.info("self.args = %s", self.args) self.logger = tensorboardX.SummaryWriter('./runs/' + generate_date + '/' + self.args.save_log) else: self.logger = None #initialize loss function self.criterion = nn.CrossEntropyLoss().to(self.device) #initialize model self.init_model() if self.args.resume: self.reload_model() #calculate model param size if self.rank == 0: logging.info("param size = %fMB", utils.count_parameters_in_MB(self.model)) self.model._logger = self.logger self.model._logging = logging #initialize optimizer self.init_optimizer() #iniatilize dataset loader self.init_loaddata() self.update_theta = True self.update_alpha = True def init_model(self): self.model = Network(self.args.init_channels, CIFAR_CLASSES, self.args.layers, self.criterion, self.args, self.rank, self.world_size, self.args.steps, self.args.multiplier) self.model.to(self.device) for v in self.model.parameters(): if v.requires_grad: if v.grad is None: v.grad = torch.zeros_like(v) self.model.normal_log_alpha.grad = torch.zeros_like( self.model.normal_log_alpha) self.model.reduce_log_alpha.grad = torch.zeros_like( self.model.reduce_log_alpha) def reload_model(self): self.model.load_state_dict(torch.load(self.args.resume_path + '/weights.pt'), strict=True) def init_optimizer(self): self.optimizer = torch.optim.SGD(self.model.parameters(), self.args.learning_rate, momentum=self.args.momentum, weight_decay=args.weight_decay) self.arch_optimizer = torch.optim.Adam( self.model.arch_parameters(), lr=self.args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=self.args.arch_weight_decay) def init_loaddata(self): train_transform, valid_transform = utils._data_transforms_cifar10( self.args) train_data = dset.CIFAR10(root=self.args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=self.args.data, train=False, download=True, transform=valid_transform) if self.args.seed: def worker_init_fn(): seed = self.seed np.random.seed(seed) random.seed(seed) torch.manual_seed(seed) return else: worker_init_fn = None num_train = len(train_data) indices = list(range(num_train)) self.train_queue = torch.utils.data.DataLoader( train_data, batch_size=self.args.batch_size, shuffle=True, pin_memory=False, num_workers=2) self.valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=self.args.batch_size, shuffle=False, pin_memory=False, num_workers=2) def main(self): # lr scheduler: cosine annealing # temp scheduler: linear annealing (self-defined in utils) self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( self.optimizer, float(self.args.epochs), eta_min=self.args.learning_rate_min) self.temp_scheduler = utils.Temp_Scheduler(self.args.epochs, self.model._temp, self.args.temp, temp_min=self.args.temp_min) for epoch in range(self.args.epochs): if self.args.child_reward_stat: self.update_theta = False self.update_alpha = False if self.args.current_reward: self.model.normal_reward_mean = torch.zeros_like( self.model.normal_reward_mean) self.model.reduce_reward_mean = torch.zeros_like( self.model.reduce_reward_mean) self.model.count = 0 if epoch < self.args.resume_epoch: continue self.scheduler.step() if self.args.temp_annealing: self.model._temp = self.temp_scheduler.step() self.lr = self.scheduler.get_lr()[0] if self.rank == 0: logging.info('epoch %d lr %e temp %e', epoch, self.lr, self.model._temp) self.logger.add_scalar('epoch_temp', self.model._temp, epoch) logging.info(self.model.normal_log_alpha) logging.info(self.model.reduce_log_alpha) logging.info(F.softmax(self.model.normal_log_alpha, dim=-1)) logging.info(F.softmax(self.model.reduce_log_alpha, dim=-1)) genotype_edge_all = self.model.genotype_edge_all() if self.rank == 0: logging.info('genotype_edge_all = %s', genotype_edge_all) # create genotypes.txt file txt_name = remark + '_genotype_edge_all_epoch' + str(epoch) utils.txt('genotype', self.args.save, txt_name, str(genotype_edge_all), generate_date) self.model.train() train_acc, loss, error_loss, loss_alpha = self.train( epoch, logging) if self.rank == 0: logging.info('train_acc %f', train_acc) self.logger.add_scalar("epoch_train_acc", train_acc, epoch) self.logger.add_scalar("epoch_train_error_loss", error_loss, epoch) if self.args.dsnas: self.logger.add_scalar("epoch_train_alpha_loss", loss_alpha, epoch) if self.args.dsnas and not self.args.child_reward_stat: if self.args.current_reward: logging.info('reward mean stat') logging.info(self.model.normal_reward_mean) logging.info(self.model.reduce_reward_mean) logging.info('count') logging.info(self.model.count) else: logging.info('reward mean stat') logging.info(self.model.normal_reward_mean) logging.info(self.model.reduce_reward_mean) if self.model.normal_reward_mean.size(0) > 1: logging.info('reward mean total stat') logging.info(self.model.normal_reward_mean.sum(0)) logging.info(self.model.reduce_reward_mean.sum(0)) if self.args.child_reward_stat: logging.info('reward mean stat') logging.info(self.model.normal_reward_mean.sum(0)) logging.info(self.model.reduce_reward_mean.sum(0)) logging.info('reward var stat') logging.info( self.model.normal_reward_mean_square.sum(0) - self.model.normal_reward_mean.sum(0)**2) logging.info( self.model.reduce_reward_mean_square.sum(0) - self.model.reduce_reward_mean.sum(0)**2) # validation self.model.eval() valid_acc, valid_obj = self.infer(epoch) if self.args.gen_max_child: self.args.gen_max_child_flag = True valid_acc_max_child, valid_obj_max_child = self.infer(epoch) self.args.gen_max_child_flag = False if self.rank == 0: logging.info('valid_acc %f', valid_acc) self.logger.add_scalar("epoch_valid_acc", valid_acc, epoch) if self.args.gen_max_child: logging.info('valid_acc_argmax_alpha %f', valid_acc_max_child) self.logger.add_scalar("epoch_valid_acc_argmax_alpha", valid_acc_max_child, epoch) utils.save(self.model, os.path.join(self.path, 'weights.pt')) if self.rank == 0: logging.info(self.model.normal_log_alpha) logging.info(self.model.reduce_log_alpha) genotype_edge_all = self.model.genotype_edge_all() logging.info('genotype_edge_all = %s', genotype_edge_all) def train(self, epoch, logging): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() grad = utils.AvgrageMeter() normal_loss_gradient = 0 reduce_loss_gradient = 0 normal_total_gradient = 0 reduce_total_gradient = 0 loss_alpha = None train_correct_count = 0 train_correct_cost = 0 train_correct_entropy = 0 train_correct_loss = 0 train_wrong_count = 0 train_wrong_cost = 0 train_wrong_entropy = 0 train_wrong_loss = 0 count = 0 for step, (input, target) in enumerate(self.train_queue): n = input.size(0) input = input.to(self.device) target = target.to(self.device, non_blocking=True) if self.args.snas: logits, logits_aux = self.model(input) error_loss = self.criterion(logits, target) if self.args.auxiliary: loss_aux = self.criterion(logits_aux, target) error_loss += self.args.auxiliary_weight * loss_aux if self.args.dsnas: logits, error_loss, loss_alpha = self.model( input, target, self.criterion, update_theta=self.update_theta, update_alpha=self.update_alpha) for i in range(logits.size(0)): index = logits[i].topk(5, 0, True, True)[1] if index[0].item() == target[i].item(): train_correct_cost += ( -logits[i, target[i].item()] + (F.softmax(logits[i]) * logits[i]).sum()) train_correct_count += 1 discrete_prob = F.softmax(logits[i], dim=-1) train_correct_entropy += -( discrete_prob * torch.log(discrete_prob)).sum(-1) train_correct_loss += -torch.log(discrete_prob)[ target[i].item()] else: train_wrong_cost += ( -logits[i, target[i].item()] + (F.softmax(logits[i]) * logits[i]).sum()) train_wrong_count += 1 discrete_prob = F.softmax(logits[i], dim=-1) train_wrong_entropy += -(discrete_prob * torch.log(discrete_prob)).sum(-1) train_wrong_loss += -torch.log(discrete_prob)[ target[i].item()] num_normal = self.model.num_normal num_reduce = self.model.num_reduce if self.args.snas or self.args.dsnas: loss = error_loss.clone() #self.update_lr() # logging gradient count += 1 if self.args.snas: self.optimizer.zero_grad() self.arch_optimizer.zero_grad() error_loss.backward(retain_graph=True) if not self.args.random_sample: normal_loss_gradient += self.model.normal_log_alpha.grad reduce_loss_gradient += self.model.reduce_log_alpha.grad self.optimizer.zero_grad() self.arch_optimizer.zero_grad() if self.args.snas and (not self.args.random_sample and not self.args.dsnas): loss.backward() if not self.args.random_sample: normal_total_gradient += self.model.normal_log_alpha.grad reduce_total_gradient += self.model.reduce_log_alpha.grad nn.utils.clip_grad_norm_(self.model.parameters(), self.args.grad_clip) arch_grad_norm = nn.utils.clip_grad_norm_( self.model.arch_parameters(), 10.) grad.update(arch_grad_norm) if not self.args.fix_weight and self.update_theta: self.optimizer.step() self.optimizer.zero_grad() if not self.args.random_sample and self.update_alpha: self.arch_optimizer.step() self.arch_optimizer.zero_grad() prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(error_loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % self.args.report_freq == 0 and self.rank == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) self.logger.add_scalar( "iter_train_top1_acc", top1.avg, step + len(self.train_queue.dataset) * epoch) if self.rank == 0: logging.info('-------loss gradient--------') logging.info(normal_loss_gradient / count) logging.info(reduce_loss_gradient / count) logging.info('-------total gradient--------') logging.info(normal_total_gradient / count) logging.info(reduce_total_gradient / count) logging.info('correct loss ') logging.info((train_correct_loss / train_correct_count).item()) logging.info('correct entropy ') logging.info((train_correct_entropy / train_correct_count).item()) logging.info('correct cost ') logging.info((train_correct_cost / train_correct_count).item()) logging.info('correct count ') logging.info(train_correct_count) logging.info('wrong loss ') logging.info((train_wrong_loss / train_wrong_count).item()) logging.info('wrong entropy ') logging.info((train_wrong_entropy / train_wrong_count).item()) logging.info('wrong cost ') logging.info((train_wrong_cost / train_wrong_count).item()) logging.info('wrong count ') logging.info(train_wrong_count) logging.info('total loss ') logging.info(((train_correct_loss + train_wrong_loss) / (train_correct_count + train_wrong_count)).item()) logging.info('total entropy ') logging.info(((train_correct_entropy + train_wrong_entropy) / (train_correct_count + train_wrong_count)).item()) logging.info('total cost ') logging.info(((train_correct_cost + train_wrong_cost) / (train_correct_count + train_wrong_count)).item()) logging.info('total count ') logging.info(train_correct_count + train_wrong_count) return top1.avg, loss, error_loss, loss_alpha def infer(self, epoch): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() self.model.eval() with torch.no_grad(): for step, (input, target) in enumerate(self.valid_queue): input = input.to(self.device) target = target.to(self.device) if self.args.snas: logits, logits_aux = self.model(input) loss = self.criterion(logits, target) elif self.args.dsnas: logits, error_loss, loss_alpha = self.model( input, target, self.criterion) loss = error_loss prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) if step % self.args.report_freq == 0 and self.rank == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) self.logger.add_scalar( "iter_valid_loss", loss, step + len(self.valid_queue.dataset) * epoch) self.logger.add_scalar( "iter_valid_top1_acc", top1.avg, step + len(self.valid_queue.dataset) * epoch) return top1.avg, objs.avg
def main(): args = get_args() # get log args.save = '{}/search-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S")) tools.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(args.save, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logger = logging.getLogger('Train Search') logger.addHandler(fh) # monitor pymonitor = ProgressMonitor(logger) tbmonitor = TensorBoardMonitor(logger, args.save) monitors = [pymonitor, tbmonitor] if not torch.cuda.is_available(): logger.info('no gpu device available') sys.exit(1) # set random seed np.random.seed(args.seed) torch.manual_seed(args.seed) args.use_cuda = args.gpus > 0 and torch.cuda.is_available() args.device = torch.device('cuda:0' if args.use_cuda else 'cpu') if args.use_cuda: torch.cuda.manual_seed(args.seed) cudnn.enabled = True cudnn.benchmark = True setting = {k: v for k, v in args._get_kwargs()} logger.info(setting) with open(os.path.join(args.save, "args.yaml"), "w") as yaml_file: # dump experiment config yaml.dump(args, yaml_file) if args.cifar100: CIFAR_CLASSES = 100 data_folder = 'cifar-100-python' else: CIFAR_CLASSES = 10 data_folder = 'cifar-10-batches-py' # prepare dataset if args.cifar100: train_transform, valid_transform = tools._data_transforms_cifar100( args) else: train_transform, valid_transform = tools._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) vaild_ata = dset.CIFAR100(root=args.tmp_data_dir, train=False, download=False, transform=valid_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) vaild_ata = dset.CIFAR10(root=args.tmp_data_dir, train=False, download=False, transform=valid_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=args.workers) valLoader = torch.utils.data.DataLoader(vaild_ata, batch_size=args.batch_size, pin_memory=True, num_workers=args.workers) # build Network criterion = nn.CrossEntropyLoss() criterion = criterion.to(args.device) switches = [] for i in range(14): switches.append([True for j in range(len(PRIMITIVES))]) switches_normal = copy.deepcopy(switches) switches_reduce = copy.deepcopy(switches) # To be moved to args num_to_keep = [5, 3, 1] num_to_drop = [3, 2, 2] if len(args.add_width) == 3: add_width = args.add_width else: add_width = [0, 0, 0] if len(args.add_layers) == 3: add_layers = args.add_layers else: add_layers = [0, 6, 12] if len(args.dropout_rate) == 3: drop_rate = args.dropout_rate else: drop_rate = [0.1, 0.4, 0.7] eps_no_archs = [10, 10, 10] state_epochs = 0 for sp in range(len(num_to_keep)): model = Network(args.init_channels + int(add_width[sp]), CIFAR_CLASSES, args.layers + int(add_layers[sp]), criterion, steps=args.nodes, multiplier=args.multiplier, stem_multiplier=args.stem_multiplier, switches_normal=switches_normal, switches_reduce=switches_reduce, p=float(drop_rate[sp])) model = model.to(args.device) logger.info("stage:{} param size:{}MB".format( sp, tools.count_parameters_in_MB(model))) optimizer = torch.optim.SGD(model.weight_parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer_a = torch.optim.Adam(model.arch_parameters(), lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) sm_dim = -1 epochs = args.epochs eps_no_arch = eps_no_archs[sp] scale_factor = 0.2 for epoch in range(epochs): lr = scheduler.get_lr()[0] logger.info('Epoch: %d lr: %e', epoch, lr) epoch_start = time.time() # training if epoch < eps_no_arch: model.p = float(drop_rate[sp]) * (epochs - epoch - 1) / epochs model.update_p() train_acc, train_obj = train(state_epochs + epoch, train_queue, valid_queue, model, criterion, optimizer, optimizer_a, args, monitors, logger, train_arch=False) else: model.p = float(drop_rate[sp]) * np.exp( -(epoch - eps_no_arch) * scale_factor) model.update_p() train_acc, train_obj = train(state_epochs + epoch, train_queue, valid_queue, model, criterion, optimizer, optimizer_a, args, monitors, logger, train_arch=True) # validation valid_acc, valid_obj = infer(state_epochs + epoch, valLoader, model, criterion, args, monitors, logger) if epoch >= eps_no_arch: # 将本epoch的解析结果保存 arch_param = model.arch_parameters() normal_prob = F.softmax(arch_param[0], dim=-1).data.cpu().numpy() reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy() logger.info('Genotypev: {}'.format( parse_genotype(switches_normal.copy(), switches_reduce.copy(), normal_prob.copy(), reduce_prob.copy()))) scheduler.step() tools.save(model, os.path.join(args.save, 'state{}_weights.pt'.format(sp))) state_epochs += args.epochs # Save switches info for s-c refinement. if sp == len(num_to_keep) - 1: switches_normal_2 = copy.deepcopy(switches_normal) switches_reduce_2 = copy.deepcopy(switches_reduce) arch_param = model.arch_parameters() normal_prob = F.softmax(arch_param[0], dim=-1).data.cpu().numpy() reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy() logger.info('------Stage %d end!------' % sp) logger.info("normal: \n{}".format(normal_prob)) logger.info("reduce: \n{}".format(reduce_prob)) logger.info('Genotypev: {}'.format( parse_genotype(switches_normal.copy(), switches_reduce.copy(), normal_prob.copy(), reduce_prob.copy()))) # 根据最新的结构权重,旧的搜索空间,需要抛弃的数量,当前状态 来进行空间正则化 switches_normal = update_switches(normal_prob.copy(), switches_normal, num_to_drop[sp], sp, len(num_to_keep)) switches_reduce = update_switches(reduce_prob.copy(), switches_reduce, num_to_drop[sp], sp, len(num_to_keep)) logger.info('------Dropping %d paths------' % num_to_drop[sp]) logger.info('switches_normal = %s', switches_normal) logging_switches(switches_normal, logger) logger.info('switches_reduce = %s', switches_reduce) logging_switches(switches_reduce, logger) if sp == len(num_to_keep) - 1: # arch_param = model.arch_parameters() # normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() # reduce_prob = F.softmax(arch_param[1], dim=sm_dim).data.cpu().numpy() normal_final = [0 for idx in range(14)] reduce_final = [0 for idx in range(14)] # remove all Zero operations for i in range(14): if switches_normal_2[i][0] == True: normal_prob[i][0] = 0 normal_final[i] = max(normal_prob[i]) if switches_reduce_2[i][0] == True: reduce_prob[i][0] = 0 reduce_final[i] = max(reduce_prob[i]) # Generate Architecture, similar to DARTS keep_normal = [0, 1] keep_reduce = [0, 1] n = 3 start = 2 for i in range(3): end = start + n tbsn = normal_final[start:end] tbsr = reduce_final[start:end] edge_n = sorted(range(n), key=lambda x: tbsn[x]) keep_normal.append(edge_n[-1] + start) keep_normal.append(edge_n[-2] + start) edge_r = sorted(range(n), key=lambda x: tbsr[x]) keep_reduce.append(edge_r[-1] + start) keep_reduce.append(edge_r[-2] + start) start = end n = n + 1 # set switches according the ranking of arch parameters for i in range(14): if not i in keep_normal: for j in range(len(PRIMITIVES)): switches_normal[i][j] = False if not i in keep_reduce: for j in range(len(PRIMITIVES)): switches_reduce[i][j] = False # translate switches into genotype genotype = parse_network(switches_normal, switches_reduce) logger.info(genotype) ## restrict skipconnect (normal cell only) logger.info('Restricting skipconnect...') # generating genotypes with different numbers of skip-connect operations for sks in range(0, 9): max_sk = 8 - sks num_sk = check_sk_number(switches_normal) if not num_sk > max_sk: continue while num_sk > max_sk: normal_prob = delete_min_sk_prob(switches_normal, switches_normal_2, normal_prob) switches_normal = keep_1_on(switches_normal_2, normal_prob) switches_normal = keep_2_branches(switches_normal, normal_prob) num_sk = check_sk_number(switches_normal) logger.info('Number of skip-connect: %d', max_sk) genotype = parse_network(switches_normal, switches_reduce) logger.info(genotype)
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.to(device) model = Network(args.init_channels, CLASSES, args.layers, criterion) model = model.to(device) #help(model) #print(model.__repr__()) #print(model.modules()) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = TARGET_DATASET_TRAIN valid_data = TARGET_DATASET_VALID num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2)#changed from 2 to 6 search_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2)#changed from 2 to 6 valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2)#changed from 2 to 6 scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj, arch_grad_norm = train(train_queue, search_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) #print(dir(model)) #'modules', 'named_children', 'named_modules', 'load_state_dict', 'genotype', 'eval', 'cells', 'children', 'arch_parameters', '_layers', #print("cells:", model.cells) #print("modules:", model.modules) #print("named_children:", model.named_children) #print("named_modules:", model.named_modules) #print("load_state_dict:", model.load_state_dict) #print("genotype:", model.genotype) #print("eval:", model.eval) #print("children:", model.children) #print("arch_parameters:", model.arch_parameters) #print("_layers:", model._layers) utils.save(model, os.path.join(args.save, 'weights.pt'))
device = torch.device("cuda:{}".format(args.gpu)) cpu_device = torch.device("cpu") torch.cuda.set_device(args.gpu) cudnn.deterministic = True cudnn.enabled = True cudnn.benchmark = False CIFAR_CLASSES = 10 genotype = eval("genotypes.%s" % "DARTS") criterion = nn.CrossEntropyLoss() criterion.to(device) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, device) model.to(device) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) #split = 32000
def train(): use_gpu = cfg.MODEL.DEVICE == "cuda" # 1、make dataloader train_loader, val_loader, test_loader, num_query, num_class = darts_make_data_loader( cfg) # print(num_query, num_class) # 2、make model model = Network(num_class, cfg) # tensor = torch.randn(2, 3, 256, 128) # res = model(tensor) # print(res[0].size()) [2, 751] # 3、make optimizer optimizer = make_optimizer(cfg, model) arch_optimizer = torch.optim.Adam( model._arch_parameters(), lr=cfg.SOLVER.ARCH_LR, betas=(0.5, 0.999), weight_decay=cfg.SOLVER.ARCH_WEIGHT_DECAY) # 4、make lr scheduler lr_scheduler = make_lr_scheduler(cfg, optimizer) # make lr scheduler arch_lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( arch_optimizer, [80, 160], 0.1) # 5、make loss loss_fn = darts_make_loss(cfg) # model._set_loss(loss_fn, compute_loss_acc) # 6、make architect # architect = Architect(model, cfg) # get parameters device = cfg.MODEL.DEVICE use_gpu = device == "cuda" pretrained = cfg.MODEL.PRETRAINED != "" log_period = cfg.OUTPUT.LOG_PERIOD ckpt_period = cfg.OUTPUT.CKPT_PERIOD eval_period = cfg.OUTPUT.EVAL_PERIOD output_dir = cfg.OUTPUT.DIRS ckpt_save_path = output_dir + cfg.OUTPUT.CKPT_DIRS epochs = cfg.SOLVER.MAX_EPOCHS batch_size = cfg.SOLVER.BATCH_SIZE grad_clip = cfg.SOLVER.GRAD_CLIP batch_num = len(train_loader) log_iters = batch_num // log_period if not os.path.exists(ckpt_save_path): os.makedirs(ckpt_save_path) # create *_result.xlsx # save the result for analyze name = (cfg.OUTPUT.LOG_NAME).split(".")[0] + ".xlsx" result_path = cfg.OUTPUT.DIRS + name wb = xl.Workbook() sheet = wb.worksheets[0] titles = [ 'size/M', 'speed/ms', 'final_planes', 'acc', 'mAP', 'r1', 'r5', 'r10', 'loss', 'acc', 'mAP', 'r1', 'r5', 'r10', 'loss', 'acc', 'mAP', 'r1', 'r5', 'r10', 'loss' ] sheet.append(titles) check_epochs = [40, 80, 120, 160, 200, 240, 280, 320, 360, epochs] values = [] logger = logging.getLogger("CSNet_Search.train") size = count_parameters(model) values.append(format(size, '.2f')) values.append(model.final_planes) logger.info("the param number of the model is {:.2f} M".format(size)) logger.info("Starting Search CDNetwork") best_mAP, best_r1 = 0., 0. is_best = False avg_loss, avg_acc = RunningAverageMeter(), RunningAverageMeter() avg_time, global_avg_time = AverageMeter(), AverageMeter() if use_gpu: model = model.to(device) if pretrained: logger.info("load self pretrained chekpoint to init") model.load_pretrained_model(cfg.MODEL.PRETRAINED) else: logger.info("use kaiming init to init the model") model.kaiming_init_() # exit(1) for epoch in range(epochs): model.set_tau(cfg.MODEL.TAU_MAX - (cfg.MODEL.TAU_MAX - cfg.MODEL.TAU_MIN) * epoch / (epochs - 1)) lr_scheduler.step() lr = lr_scheduler.get_lr()[0] # architect lr.step arch_lr_scheduler.step() # if save epoch_num k, then run k+1 epoch next if pretrained and epoch < model.start_epoch: continue # print(epoch) # exit(1) model.train() avg_loss.reset() avg_acc.reset() avg_time.reset() for i, batch in enumerate(train_loader): t0 = time.time() imgs, labels = batch val_imgs, val_labels = next(iter(val_loader)) if use_gpu: imgs = imgs.to(device) labels = labels.to(device) val_imgs = val_imgs.to(device) val_labels = val_labels.to(device) # 1、 update the weights optimizer.zero_grad() res = model(imgs) # loss = loss_fn(scores, feats, labels) loss, acc = compute_loss_acc(res, labels, loss_fn) loss.backward() if grad_clip != 0: nn.utils.clip_grad_norm_(model.parameters(), grad_clip) optimizer.step() # 2、update the alpha arch_optimizer.zero_grad() res = model(val_imgs) val_loss, val_acc = compute_loss_acc(res, val_labels, loss_fn) val_loss.backward() arch_optimizer.step() # compute the acc # acc = (scores.max(1)[1] == labels).float().mean() t1 = time.time() avg_time.update((t1 - t0) / batch_size) avg_loss.update(loss) avg_acc.update(acc) # log info if (i + 1) % log_iters == 0: logger.info( "epoch {}: {}/{} with loss is {:.5f} and acc is {:.3f}". format(epoch + 1, i + 1, batch_num, avg_loss.avg, avg_acc.avg)) logger.info( "end epochs {}/{} with lr: {:.5f} and avg_time is: {:.3f} ms". format(epoch + 1, epochs, lr, avg_time.avg * 1000)) global_avg_time.update(avg_time.avg) # test the model if (epoch + 1) % eval_period == 0 or (epoch + 1) in check_epochs: model.eval() metrics = R1_mAP(num_query, use_gpu=use_gpu) with torch.no_grad(): for vi, batch in enumerate(test_loader): # break # print(len(batch)) imgs, labels, camids = batch if use_gpu: imgs = imgs.to(device) feats = model(imgs) metrics.update((feats, labels, camids)) #compute cmc and mAP cmc, mAP = metrics.compute() logger.info("validation results at epoch {}".format(epoch + 1)) logger.info("mAP:{:2%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.2%}".format( r, cmc[r - 1])) # determine whether current model is the best if mAP > best_mAP: is_best = True best_mAP = mAP logger.info("Get a new best mAP") if cmc[0] > best_r1: is_best = True best_r1 = cmc[0] logger.info("Get a new best r1") # add the result to sheet if (epoch + 1) in check_epochs: val = [avg_acc.avg, mAP, cmc[0], cmc[4], cmc[9]] change = [format(v * 100, '.2f') for v in val] change.append(format(avg_loss.avg, '.3f')) values.extend(change) # whether to save the model if (epoch + 1) % ckpt_period == 0 or is_best: torch.save(model.state_dict(), ckpt_save_path + "checkpoint_{}.pth".format(epoch + 1)) model._parse_genotype(file=ckpt_save_path + "genotype_{}.json".format(epoch + 1)) logger.info("checkpoint {} was saved".format(epoch + 1)) if is_best: torch.save(model.state_dict(), ckpt_save_path + "best_ckpt.pth") model._parse_genotype(file=ckpt_save_path + "best_genotype.json") logger.info("best_checkpoint was saved") is_best = False # exit(1) values.insert(1, format(global_avg_time.avg * 1000, '.2f')) sheet.append(values) wb.save(result_path) logger.info("Ending Search GDAS_Search")