def _construct_model_from_theta(self, theta): model_clone = Network(self.model._C, self.model._num_classes, self.model._layers, self.model._criterion).cuda() for x, y in zip(model_clone.arch_parameters(), self.model.arch_parameters()): x.data.copy_(y.data) model_dict = self.model.state_dict() params, offset = {}, 0 for k, v in self.model.named_parameters(): v_length = np.prod(v.size()) params[k] = theta[offset:offset + v_length].view(v.size()) offset += v_length assert offset == len(theta) model_dict.update(params) model_clone.load_state_dict(model_dict) return model_clone.cuda()
def main(): # check gpu is available if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) # init np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) # criterion, model, optimizer, for model training criterion = nn.CrossEntropyLoss() # TODO add latency loss criterion = criterion.cuda() model = Network(channels, steps, strides, CLASSES, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # prepare datasets #train_transform, valid_transform = utils._data_transforms_cifar10(args) #train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) train_transform, valid_transform = utils._data_transforms_imagenet(args) train_data = dset.ImageNet(root=args.data, split='train', download=True, transform=train_transform) valid_data = dset.ImageNet(root=args.data, split='val', download=True, transform=valid_transform) num_train = len(train_data) #indices = list(range(num_train)) #split = int(np.floor(args.train_portion * num_train)) # create dataloader train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, #sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, #sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2) # learning rate scheduler with cosineAnnealingtopk scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) # architect architect = Architect(model, args) # training for epoch in range(args.epochs): # lr update scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # get genotype for logging genotype = model.genotype() logging.info('genotype = %s', genotype) for alpha in model.arch_parameters(): print(F.softmax(alpha, dim=-1).data) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def search(self, train_x, train_y, valid_x, valid_y, metadata): np.random.seed(self.seed) cudnn.benchmark = True torch.manual_seed(self.seed) cudnn.enabled = True torch.cuda.manual_seed(self.seed) is_multi_gpu = False helper_function() n_classes = metadata['n_classes'] # check torch available if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) cudnn.benchmark = True cudnn.enabled = True # loading criterion criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() train_pack = list(zip(train_x, train_y)) valid_pack = list(zip(valid_x, valid_y)) data_channel = np.array(train_x).shape[1] train_loader = torch.utils.data.DataLoader(train_pack, int(self.batch_size), pin_memory=True, num_workers=4) valid_loader = torch.utils.data.DataLoader(valid_pack, int(self.batch_size), pin_memory=True, num_workers=4) model = Network(self.init_channels, data_channel, n_classes, self.layers, criterion) model = model.cuda() # since submission server does not deal with multi-gpu if is_multi_gpu: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = nn.DataParallel(model) arch_parameters = model.module.arch_parameters( ) if is_multi_gpu else model.arch_parameters() arch_params = list(map(id, arch_parameters)) parameters = model.module.parameters( ) if is_multi_gpu else model.parameters() weight_params = filter(lambda p: id(p) not in arch_params, parameters) optimizer = torch.optim.SGD(weight_params, self.learning_rate, momentum=self.momentum, weight_decay=self.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(self.epochs), eta_min=self.learning_rate_min) architect = Architect(is_multi_gpu, model, criterion, self.momentum, self.weight_decay, self.arch_learning_rate, self.arch_weight_decay) best_accuracy = 0 best_accuracy_different_cnn_counts = dict() for epoch in range(self.epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # training objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() train_batch = time.time() for step, (input, target) in enumerate(train_loader): # logging.info("epoch %d, step %d START" % (epoch, step)) model.train() n = input.size(0) input = input.cuda() target = target.cuda() # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_loader)) input_search = input_search.cuda() target_search = target_search.cuda() # Update architecture alpha by Adam-SGD # logging.info("step %d. update architecture by Adam. START" % step) # if args.optimization == "DARTS": # architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) # else: architect.step_milenas_2ndorder(input, target, input_search, target_search, lr, optimizer, 1, 1) # logging.info("step %d. update architecture by Adam. FINISH" % step) # Update weights w by SGD, ignore the weights that gained during architecture training # logging.info("step %d. update weight by SGD. START" % step) optimizer.zero_grad() logits = model(input) loss = criterion(logits, target) loss.backward() parameters = model.module.arch_parameters( ) if is_multi_gpu else model.arch_parameters() nn.utils.clip_grad_norm_(parameters, self.grad_clip) optimizer.step() # logging.info("step %d. update weight by SGD. FINISH\n" % step) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) # torch.cuda.empty_cache() if step % self.report_freq == 0: average_batch_t = (time.time() - train_batch) / (step + 1) print("Epoch: {}, Step: {}, Top1: {}, Top5: {}, T: {}". format( epoch, step, top1.avg, top5.avg, show_time(average_batch_t * (len(train_loader) - step)))) model.eval() # validation with torch.no_grad(): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() for step, (input, target) in enumerate(valid_loader): input = input.cuda() target = target.cuda() logits = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % self.report_freq == 0: print("Epoch: {}, Step: {}, Top1: {}, Top5: {}".format( epoch, step, top1.avg, top5.avg)) scheduler.step() # save the structure genotype, normal_cnn_count, reduce_cnn_count = model.module.genotype( ) if is_multi_gpu else model.genotype() print("(n:%d,r:%d)" % (normal_cnn_count, reduce_cnn_count)) # print(F.softmax(model.module.alphas_normal if is_multi_gpu else model.alphas_normal, dim=-1)) # print(F.softmax(model.module.alphas_reduce if is_multi_gpu else model.alphas_reduce, dim=-1)) # logging.info('genotype = %s', genotype) return model
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(0) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() """Noise Darts""" if args.noise_darts: SearchControllerConf['noise_darts']['noise_type'] = args.noise_type SearchControllerConf['noise_darts']['T_max'] = args.max_step else: SearchControllerConf['noise_darts'] = None """Random Darts""" if args.random_search: SearchControllerConf['random_search']['num_identity'] = args.num_identity SearchControllerConf['random_search']['num_arch'] = args.num_arch SearchControllerConf['random_search']['flops_threshold'] = args.flops_threshold else: SearchControllerConf['random_search'] = None """Reweight Darts""" SearchControllerConf['reweight'] = args.reweight model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) if args.random_search: genotype_list = model.random_generate() logging.info('genotype list = %s', genotype_list) logging.info('generate done!') sys.exit(0) model_optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) ## single level arch_optimizer = torch.optim.Adam(model.arch_parameters(), lr=args.arch_learning_rate, betas=(0.9, 0.999), weight_decay=args.arch_weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( model_optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) logging.info(F.softmax(model.alphas_normal, dim=-1)) logging.info(F.softmax(model.alphas_reduce, dim=-1)) model.update_history() # training and search the model train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, model_optimizer, lr, epoch) logging.info('train_acc %f', train_acc) # validation the model valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) utils.save_file(recoder = model.alphas_normal_history, path = os.path.join(args.save, 'normal')) utils.save_file(recoder = model.alphas_reduce_history, path = os.path.join(args.save, 'reduce'))
class neural_architecture_search(): def __init__(self, args): self.args = args if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) if self.args.distributed: # Init distributed environment self.rank, self.world_size, self.device = init_dist( port=self.args.port) self.seed = self.rank * self.args.seed else: torch.cuda.set_device(self.args.gpu) self.device = torch.device("cuda") self.rank = 0 self.seed = self.args.seed self.world_size = 1 if self.args.fix_seedcudnn: random.seed(self.seed) torch.backends.cudnn.deterministic = True np.random.seed(self.seed) cudnn.benchmark = False torch.manual_seed(self.seed) cudnn.enabled = True torch.cuda.manual_seed(self.seed) torch.cuda.manual_seed_all(self.seed) else: np.random.seed(self.seed) cudnn.benchmark = True torch.manual_seed(self.seed) cudnn.enabled = True torch.cuda.manual_seed(self.seed) torch.cuda.manual_seed_all(self.seed) self.path = os.path.join(generate_date, self.args.save) if self.rank == 0: utils.create_exp_dir(generate_date, self.path, scripts_to_save=glob.glob('*.py')) logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(self.path, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) logging.info("self.args = %s", self.args) self.logger = tensorboardX.SummaryWriter( './runs/' + generate_date + '/nas_{}'.format(self.args.remark)) else: self.logger = None # set default resource_lambda for different methods if self.args.resource_efficient: if self.args.method == 'policy_gradient': if self.args.log_penalty: default_resource_lambda = 1e-4 else: default_resource_lambda = 1e-5 if self.args.method == 'reparametrization': if self.args.log_penalty: default_resource_lambda = 1e-2 else: default_resource_lambda = 1e-5 if self.args.method == 'discrete': if self.args.log_penalty: default_resource_lambda = 1e-2 else: default_resource_lambda = 1e-4 if self.args.resource_lambda == default_lambda: self.args.resource_lambda = default_resource_lambda #initialize loss function self.criterion = nn.CrossEntropyLoss().to(self.device) #initialize model self.init_model() #calculate model param size if self.rank == 0: logging.info("param size = %fMB", utils.count_parameters_in_MB(self.model)) self.model._logger = self.logger self.model._logging = logging #initialize optimizer self.init_optimizer() #iniatilize dataset loader self.init_loaddata() self.update_theta = True self.update_alpha = True def init_model(self): self.model = Network(self.args.init_channels, CIFAR_CLASSES, self.args.layers, self.criterion, self.args, self.rank, self.world_size) self.model.to(self.device) if self.args.distributed: broadcast_params(self.model) for v in self.model.parameters(): if v.requires_grad: if v.grad is None: v.grad = torch.zeros_like(v) self.model.normal_log_alpha.grad = torch.zeros_like( self.model.normal_log_alpha) self.model.reduce_log_alpha.grad = torch.zeros_like( self.model.reduce_log_alpha) def init_optimizer(self): if args.distributed: self.optimizer = torch.optim.SGD( [ param for name, param in self.model.named_parameters() if name != 'normal_log_alpha' and name != 'reduce_log_alpha' ], self.args.learning_rate, momentum=self.args.momentum, weight_decay=self.args.weight_decay) self.arch_optimizer = torch.optim.Adam( [ param for name, param in self.model.named_parameters() if name == 'normal_log_alpha' or name == 'reduce_log_alpha' ], lr=self.args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=self.args.arch_weight_decay) else: self.optimizer = torch.optim.SGD(self.model.parameters(), self.args.learning_rate, momentum=self.args.momentum, weight_decay=args.weight_decay) self.arch_optimizer = torch.optim.SGD( self.model.arch_parameters(), lr=self.args.arch_learning_rate) def init_loaddata(self): train_transform, valid_transform = utils._data_transforms_cifar10( self.args) train_data = dset.CIFAR10(root=self.args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=self.args.data, train=False, download=True, transform=valid_transform) if self.args.seed: def worker_init_fn(): seed = self.seed np.random.seed(seed) random.seed(seed) torch.manual_seed(seed) return else: worker_init_fn = None if self.args.distributed: train_sampler = DistributedSampler(train_data) valid_sampler = DistributedSampler(valid_data) self.train_queue = torch.utils.data.DataLoader( train_data, batch_size=self.args.batch_size // self.world_size, shuffle=False, num_workers=0, pin_memory=False, sampler=train_sampler) self.valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=self.args.batch_size // self.world_size, shuffle=False, num_workers=0, pin_memory=False, sampler=valid_sampler) else: self.train_queue = torch.utils.data.DataLoader( train_data, batch_size=self.args.batch_size, shuffle=True, pin_memory=False, num_workers=2) self.valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=self.args.batch_size, shuffle=False, pin_memory=False, num_workers=2) def main(self): # lr scheduler: cosine annealing # temp scheduler: linear annealing (self-defined in utils) self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( self.optimizer, float(self.args.epochs), eta_min=self.args.learning_rate_min) self.temp_scheduler = utils.Temp_Scheduler(self.args.epochs, self.model._temp, self.args.temp, temp_min=self.args.temp_min) for epoch in range(self.args.epochs): if self.args.random_sample_pretrain: if epoch < self.args.random_sample_pretrain_epoch: self.args.random_sample = True else: self.args.random_sample = False self.scheduler.step() if self.args.temp_annealing: self.model._temp = self.temp_scheduler.step() self.lr = self.scheduler.get_lr()[0] if self.rank == 0: logging.info('epoch %d lr %e temp %e', epoch, self.lr, self.model._temp) self.logger.add_scalar('epoch_temp', self.model._temp, epoch) logging.info(self.model.normal_log_alpha) logging.info(self.model.reduce_log_alpha) logging.info( self.model._get_weights(self.model.normal_log_alpha[0])) logging.info( self.model._get_weights(self.model.reduce_log_alpha[0])) genotype_edge_all = self.model.genotype_edge_all() if self.rank == 0: logging.info('genotype_edge_all = %s', genotype_edge_all) # create genotypes.txt file txt_name = self.args.remark + '_genotype_edge_all_epoch' + str( epoch) utils.txt('genotype', self.args.save, txt_name, str(genotype_edge_all), generate_date) self.model.train() train_acc, loss, error_loss, loss_alpha = self.train( epoch, logging) if self.rank == 0: logging.info('train_acc %f', train_acc) self.logger.add_scalar("epoch_train_acc", train_acc, epoch) self.logger.add_scalar("epoch_train_error_loss", error_loss, epoch) if self.args.dsnas: self.logger.add_scalar("epoch_train_alpha_loss", loss_alpha, epoch) # validation self.model.eval() valid_acc, valid_obj = self.infer(epoch) if self.args.gen_max_child: self.args.gen_max_child_flag = True valid_acc_max_child, valid_obj_max_child = self.infer(epoch) self.args.gen_max_child_flag = False if self.rank == 0: logging.info('valid_acc %f', valid_acc) self.logger.add_scalar("epoch_valid_acc", valid_acc, epoch) if self.args.gen_max_child: logging.info('valid_acc_argmax_alpha %f', valid_acc_max_child) self.logger.add_scalar("epoch_valid_acc_argmax_alpha", valid_acc_max_child, epoch) utils.save(self.model, os.path.join(self.path, 'weights.pt')) if self.rank == 0: logging.info(self.model.normal_log_alpha) logging.info(self.model.reduce_log_alpha) genotype_edge_all = self.model.genotype_edge_all() logging.info('genotype_edge_all = %s', genotype_edge_all) def train(self, epoch, logging): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() grad = utils.AvgrageMeter() normal_resource_gradient = 0 reduce_resource_gradient = 0 normal_loss_gradient = 0 reduce_loss_gradient = 0 normal_total_gradient = 0 reduce_total_gradient = 0 loss_alpha = None count = 0 for step, (input, target) in enumerate(self.train_queue): if self.args.alternate_update: if step % 2 == 0: self.update_theta = True self.update_alpha = False else: self.update_theta = False self.update_alpha = True n = input.size(0) input = input.to(self.device) target = target.to(self.device, non_blocking=True) if self.args.snas: logits, logits_aux, penalty, op_normal, op_reduce = self.model( input) error_loss = self.criterion(logits, target) if self.args.auxiliary: loss_aux = self.criterion(logits_aux, target) error_loss += self.args.auxiliary_weight * loss_aux if self.args.dsnas: logits, error_loss, loss_alpha, penalty = self.model( input, target, self.criterion) num_normal = self.model.num_normal num_reduce = self.model.num_reduce normal_arch_entropy = self.model._arch_entropy( self.model.normal_log_alpha) reduce_arch_entropy = self.model._arch_entropy( self.model.reduce_log_alpha) if self.args.resource_efficient: if self.args.method == 'policy_gradient': resource_penalty = (penalty[2]) / 6 + self.args.ratio * ( penalty[7]) / 2 log_resource_penalty = ( penalty[35]) / 6 + self.args.ratio * (penalty[36]) / 2 elif self.args.method == 'reparametrization': resource_penalty = (penalty[26]) / 6 + self.args.ratio * ( penalty[25]) / 2 log_resource_penalty = ( penalty[37]) / 6 + self.args.ratio * (penalty[38]) / 2 elif self.args.method == 'discrete': resource_penalty = (penalty[28]) / 6 + self.args.ratio * ( penalty[27]) / 2 log_resource_penalty = ( penalty[39]) / 6 + self.args.ratio * (penalty[40]) / 2 elif self.args.method == 'none': # TODo resource_penalty = torch.zeros(1).cuda() log_resource_penalty = torch.zeros(1).cuda() else: logging.info( "wrongly input of method, please re-enter --method from 'policy_gradient', 'discrete', " "'reparametrization', 'none'") sys.exit(1) else: resource_penalty = torch.zeros(1).cuda() log_resource_penalty = torch.zeros(1).cuda() if self.args.log_penalty: resource_loss = self.model._resource_lambda * log_resource_penalty else: resource_loss = self.model._resource_lambda * resource_penalty if self.args.loss: if self.args.snas: loss = resource_loss.clone() + error_loss.clone() elif self.args.dsnas: loss = resource_loss.clone() else: loss = resource_loss.clone() + -child_coef * ( torch.log(normal_one_hot_prob) + torch.log(reduce_one_hot_prob)).sum() else: if self.args.snas or self.args.dsnas: loss = error_loss.clone() if self.args.distributed: loss.div_(self.world_size) error_loss.div_(self.world_size) resource_loss.div_(self.world_size) if self.args.dsnas: loss_alpha.div_(self.world_size) # logging gradient count += 1 if self.args.resource_efficient: self.optimizer.zero_grad() self.arch_optimizer.zero_grad() resource_loss.backward(retain_graph=True) if not self.args.random_sample: normal_resource_gradient += self.model.normal_log_alpha.grad reduce_resource_gradient += self.model.reduce_log_alpha.grad if self.args.snas: self.optimizer.zero_grad() self.arch_optimizer.zero_grad() error_loss.backward(retain_graph=True) if not self.args.random_sample: normal_loss_gradient += self.model.normal_log_alpha.grad reduce_loss_gradient += self.model.reduce_log_alpha.grad self.optimizer.zero_grad() self.arch_optimizer.zero_grad() if self.args.snas or not self.args.random_sample and not self.args.dsnas: loss.backward() if not self.args.random_sample: normal_total_gradient += self.model.normal_log_alpha.grad reduce_total_gradient += self.model.reduce_log_alpha.grad if self.args.distributed: reduce_tensorgradients(self.model.parameters(), sync=True) nn.utils.clip_grad_norm_([ param for name, param in self.model.named_parameters() if name != 'normal_log_alpha' and name != 'reduce_log_alpha' ], self.args.grad_clip) arch_grad_norm = nn.utils.clip_grad_norm_([ param for name, param in self.model.named_parameters() if name == 'normal_log_alpha' or name == 'reduce_log_alpha' ], 10.) else: nn.utils.clip_grad_norm_(self.model.parameters(), self.args.grad_clip) arch_grad_norm = nn.utils.clip_grad_norm_( self.model.arch_parameters(), 10.) grad.update(arch_grad_norm) if not self.args.fix_weight and self.update_theta: self.optimizer.step() self.optimizer.zero_grad() if not self.args.random_sample and self.update_alpha: self.arch_optimizer.step() self.arch_optimizer.zero_grad() if self.rank == 0: self.logger.add_scalar( "iter_train_loss", error_loss, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "normal_arch_entropy", normal_arch_entropy, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reduce_arch_entropy", reduce_arch_entropy, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "total_arch_entropy", normal_arch_entropy + reduce_arch_entropy, step + len(self.train_queue.dataset) * epoch) if self.args.dsnas: #reward_normal_edge self.logger.add_scalar( "reward_normal_edge_0", self.model.normal_edge_reward[0], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_1", self.model.normal_edge_reward[1], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_2", self.model.normal_edge_reward[2], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_3", self.model.normal_edge_reward[3], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_4", self.model.normal_edge_reward[4], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_5", self.model.normal_edge_reward[5], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_6", self.model.normal_edge_reward[6], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_7", self.model.normal_edge_reward[7], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_8", self.model.normal_edge_reward[8], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_9", self.model.normal_edge_reward[9], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_10", self.model.normal_edge_reward[10], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_11", self.model.normal_edge_reward[11], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_12", self.model.normal_edge_reward[12], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_normal_edge_13", self.model.normal_edge_reward[13], step + len(self.train_queue.dataset) * epoch) #reward_reduce_edge self.logger.add_scalar( "reward_reduce_edge_0", self.model.reduce_edge_reward[0], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_1", self.model.reduce_edge_reward[1], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_2", self.model.reduce_edge_reward[2], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_3", self.model.reduce_edge_reward[3], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_4", self.model.reduce_edge_reward[4], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_5", self.model.reduce_edge_reward[5], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_6", self.model.reduce_edge_reward[6], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_7", self.model.reduce_edge_reward[7], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_8", self.model.reduce_edge_reward[8], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_9", self.model.reduce_edge_reward[9], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_10", self.model.reduce_edge_reward[10], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_11", self.model.reduce_edge_reward[11], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_12", self.model.reduce_edge_reward[12], step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "reward_reduce_edge_13", self.model.reduce_edge_reward[13], step + len(self.train_queue.dataset) * epoch) #policy size self.logger.add_scalar( "iter_normal_size_policy", penalty[2] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_size_policy", penalty[7] / num_reduce, step + len(self.train_queue.dataset) * epoch) # baseline: discrete_probability self.logger.add_scalar( "iter_normal_size_baseline", penalty[3] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_flops_baseline", penalty[5] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_mac_baseline", penalty[6] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_size_baseline", penalty[8] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_flops_baseline", penalty[9] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_mac_baseline", penalty[10] / num_reduce, step + len(self.train_queue.dataset) * epoch) # R - median(R) self.logger.add_scalar( "iter_normal_size-avg", penalty[60] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_flops-avg", penalty[61] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_mac-avg", penalty[62] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_size-avg", penalty[63] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_flops-avg", penalty[64] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_mac-avg", penalty[65] / num_reduce, step + len(self.train_queue.dataset) * epoch) # lnR - ln(median) self.logger.add_scalar( "iter_normal_ln_size-ln_avg", penalty[66] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_ln_flops-ln_avg", penalty[67] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_ln_mac-ln_avg", penalty[68] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_ln_size-ln_avg", penalty[69] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_ln_flops-ln_avg", penalty[70] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_ln_mac-ln_avg", penalty[71] / num_reduce, step + len(self.train_queue.dataset) * epoch) ''' self.logger.add_scalar("iter_normal_size_normalized", penalty[17] / 6, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar("iter_normal_flops_normalized", penalty[18] / 6, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar("iter_normal_mac_normalized", penalty[19] / 6, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar("iter_reduce_size_normalized", penalty[20] / 2, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar("iter_reduce_flops_normalized", penalty[21] / 2, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar("iter_reduce_mac_normalized", penalty[22] / 2, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar("iter_normal_penalty_normalized", penalty[23] / 6, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar("iter_reduce_penalty_normalized", penalty[24] / 2, step + len(self.train_queue.dataset) * epoch) ''' # Monte_Carlo(R_i) self.logger.add_scalar( "iter_normal_size_mc", penalty[29] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_flops_mc", penalty[30] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_mac_mc", penalty[31] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_size_mc", penalty[32] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_flops_mc", penalty[33] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_mac_mc", penalty[34] / num_reduce, step + len(self.train_queue.dataset) * epoch) # log(|R_i|) self.logger.add_scalar( "iter_normal_log_size", penalty[41] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_log_flops", penalty[42] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_log_mac", penalty[43] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_log_size", penalty[44] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_log_flops", penalty[45] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_log_mac", penalty[46] / num_reduce, step + len(self.train_queue.dataset) * epoch) # log(P)R_i self.logger.add_scalar( "iter_normal_logP_size", penalty[47] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_logP_flops", penalty[48] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_logP_mac", penalty[49] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_logP_size", penalty[50] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_logP_flops", penalty[51] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_logP_mac", penalty[52] / num_reduce, step + len(self.train_queue.dataset) * epoch) # log(P)log(R_i) self.logger.add_scalar( "iter_normal_logP_log_size", penalty[53] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_logP_log_flops", penalty[54] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_normal_logP_log_mac", penalty[55] / num_normal, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_logP_log_size", penalty[56] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_logP_log_flops", penalty[57] / num_reduce, step + len(self.train_queue.dataset) * epoch) self.logger.add_scalar( "iter_reduce_logP_log_mac", penalty[58] / num_reduce, step + len(self.train_queue.dataset) * epoch) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) if self.args.distributed: loss = loss.detach() dist.all_reduce(error_loss) dist.all_reduce(prec1) dist.all_reduce(prec5) prec1.div_(self.world_size) prec5.div_(self.world_size) #dist_util.all_reduce([loss, prec1, prec5], 'mean') objs.update(error_loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % self.args.report_freq == 0 and self.rank == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) self.logger.add_scalar( "iter_train_top1_acc", top1.avg, step + len(self.train_queue.dataset) * epoch) if self.rank == 0: logging.info('-------resource gradient--------') logging.info(normal_resource_gradient / count) logging.info(reduce_resource_gradient / count) logging.info('-------loss gradient--------') logging.info(normal_loss_gradient / count) logging.info(reduce_loss_gradient / count) logging.info('-------total gradient--------') logging.info(normal_total_gradient / count) logging.info(reduce_total_gradient / count) return top1.avg, loss, error_loss, loss_alpha def infer(self, epoch): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() self.model.eval() with torch.no_grad(): for step, (input, target) in enumerate(self.valid_queue): input = input.to(self.device) target = target.to(self.device) if self.args.snas: logits, logits_aux, resource_loss, op_normal, op_reduce = self.model( input) loss = self.criterion(logits, target) elif self.args.dsnas: logits, error_loss, loss_alpha, resource_loss = self.model( input, target, self.criterion) loss = error_loss prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) if self.args.distributed: loss.div_(self.world_size) loss = loss.detach() dist.all_reduce(loss) dist.all_reduce(prec1) dist.all_reduce(prec5) prec1.div_(self.world_size) prec5.div_(self.world_size) objs.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) if step % self.args.report_freq == 0 and self.rank == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) self.logger.add_scalar( "iter_valid_loss", loss, step + len(self.valid_queue.dataset) * epoch) self.logger.add_scalar( "iter_valid_top1_acc", top1.avg, step + len(self.valid_queue.dataset) * epoch) return top1.avg, objs.avg
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) # torch.cuda.set_device(args.gpu) gpus = [int(i) for i in args.gpu.split(',')] if len(gpus) == 1: torch.cuda.set_device(int(args.gpu)) # cudnn.benchmark = True torch.manual_seed(args.seed) # cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %s' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() if len(gpus)>1: print("True") model = nn.parallel.DataParallel(model, device_ids=gpus, output_device=gpus[0]) model = model.module arch_params = list(map(id, model.arch_parameters())) weight_params = filter(lambda p: id(p) not in arch_params, model.parameters()) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( # model.parameters(), weight_params, args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) #optimizer = nn.DataParallel(optimizer, device_ids=gpus) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, criterion, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation with torch.no_grad(): valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): if is_wandb_used: wandb.init(project="automl-gradient-based-nas", name="r" + str(args.run_id) + "-e" + str(args.epochs) + "-lr" + str(args.learning_rate) + "-l(" + str(args.lambda_train_regularizer) + "," + str(args.lambda_valid_regularizer) + ")", config=args, entity="automl") global is_multi_gpu gpus = [int(i) for i in args.gpu.split(',')] logging.info('gpus = %s' % gpus) if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %s' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() # default: args.init_channels = 16, CIFAR_CLASSES = 10, args.layers = 8 model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) if len(gpus) > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = nn.DataParallel(model) is_multi_gpu = True model.cuda() if args.model_path != "saved_models": utils.load(model, args.model_path) arch_parameters = model.module.arch_parameters( ) if is_multi_gpu else model.arch_parameters() arch_params = list(map(id, arch_parameters)) parameters = model.module.parameters( ) if is_multi_gpu else model.parameters() weight_params = filter(lambda p: id(p) not in arch_params, parameters) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( weight_params, # model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) # will cost time to download the data train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # split index train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size * len(gpus), sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size * len(gpus), sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, criterion, args) best_accuracy = 0 best_accuracy_different_cnn_counts = dict() if is_wandb_used: table = wandb.Table(columns=["Epoch", "Searched Architecture"]) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # training train_acc, train_obj, train_loss = train(epoch, train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) if is_wandb_used: wandb.log({"searching_train_acc": train_acc, "epoch": epoch}) wandb.log({"searching_train_loss": train_loss, "epoch": epoch}) # validation with torch.no_grad(): valid_acc, valid_obj, valid_loss = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) if is_wandb_used: wandb.log({"searching_valid_acc": valid_acc, "epoch": epoch}) wandb.log({"searching_valid_loss": valid_loss, "epoch": epoch}) wandb.log({ "search_train_valid_acc_gap": train_acc - valid_acc, "epoch": epoch }) wandb.log({ "search_train_valid_loss_gap": train_loss - valid_loss, "epoch": epoch }) # save the structure genotype, normal_cnn_count, reduce_cnn_count = model.module.genotype( ) if is_multi_gpu else model.genotype() cnn_count = normal_cnn_count + reduce_cnn_count wandb.log({"cnn_count": cnn_count, "epoch": epoch}) model_size = model.module.get_current_model_size( ) if is_multi_gpu else model.get_current_model_size() wandb.log({"model_size": model_size, "epoch": epoch}) # early stopping if args.early_stopping == 1: if normal_cnn_count == 6 and reduce_cnn_count == 0: break print("(n:%d,r:%d)" % (normal_cnn_count, reduce_cnn_count)) print( F.softmax(model.module.alphas_normal if is_multi_gpu else model.alphas_normal, dim=-1)) print( F.softmax(model.module.alphas_reduce if is_multi_gpu else model.alphas_reduce, dim=-1)) logging.info('genotype = %s', genotype) if is_wandb_used: wandb.log({"genotype": str(genotype)}, step=epoch - 1) table.add_data(str(epoch), str(genotype)) wandb.log({"Searched Architecture": table}) # save the cnn architecture according to the CNN count cnn_count = normal_cnn_count * 10 + reduce_cnn_count wandb.log({ "searching_cnn_count(%s)" % cnn_count: valid_acc, "epoch": epoch }) if cnn_count not in best_accuracy_different_cnn_counts.keys(): best_accuracy_different_cnn_counts[cnn_count] = valid_acc summary_key_cnn_structure = "best_acc_for_cnn_structure(n:%d,r:%d)" % ( normal_cnn_count, reduce_cnn_count) wandb.run.summary[summary_key_cnn_structure] = valid_acc summary_key_best_cnn_structure = "epoch_of_best_acc_for_cnn_structure(n:%d,r:%d)" % ( normal_cnn_count, reduce_cnn_count) wandb.run.summary[summary_key_best_cnn_structure] = epoch else: if valid_acc > best_accuracy_different_cnn_counts[cnn_count]: best_accuracy_different_cnn_counts[cnn_count] = valid_acc summary_key_cnn_structure = "best_acc_for_cnn_structure(n:%d,r:%d)" % ( normal_cnn_count, reduce_cnn_count) wandb.run.summary[summary_key_cnn_structure] = valid_acc summary_key_best_cnn_structure = "epoch_of_best_acc_for_cnn_structure(n:%d,r:%d)" % ( normal_cnn_count, reduce_cnn_count) wandb.run.summary[summary_key_best_cnn_structure] = epoch if valid_acc > best_accuracy: best_accuracy = valid_acc wandb.run.summary["best_valid_accuracy"] = valid_acc wandb.run.summary["epoch_of_best_accuracy"] = epoch utils.save(model, os.path.join(wandb.run.dir, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.op_search_layers, criterion) start_epoch=0 model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) arch_optimizer = torch.optim.Adam(model.arch_parameters(), lr=args.arch_learning_rate, betas=(0.9, 0.999), weight_decay=args.arch_weight_decay) architect = Architect(model, args) train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.set=='cifar100': train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) val_data = dset.CIFAR100(root=args.data, train=False, download=True, transform=valid_transform) else: train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) val_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) #train_queue_A and train_queue_B for bilevel optimization on operation #train_queue_full for onelevel optimization on topology #valid_queue test_data train_queue_A = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) train_queue_B = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2) train_queue_Full = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size,pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( val_data, batch_size=args.batch_size, pin_memory=True, num_workers=2) for epoch in range(start_epoch, args.epochs): if epoch == Op_Pretrain_Start: model.phase = 'op_pretrain' logging.info("Begin operation pretrain!") elif epoch == Op_Search_Start: model.phase = 'op_search' logging.info("Begin operation search!") elif epoch == Tp_Pretrain_Start: model.__init__(args.init_channels, CIFAR_CLASSES, args.op_search_layers, criterion, init_arch=False) model.phase = 'tp_pretrain' optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, # use twice data to update parameters momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) model.prune_model() arch_optimizer = torch.optim.Adam(model.arch_parameters(), lr=args.arch_learning_rate, betas=(0.9, 0.999), weight_decay=args.arch_weight_decay) model = model.cuda() architect = None # use one-step to optimize topology logging.info("Prune model finish!") logging.info("Load Prune Architecture finish!") logging.info("Begin topology pretrain!") elif epoch == Tp_Search_Start: model.phase = 'tp_search' logging.info("Begin topology search!") else: pass if 'pretrain' in model.phase: model.T = 1.0 else: if 'op' in model.phase: model.T = 1.0 else: model.T = 10 * pow(Tp_Anneal_Rate, epoch - Tp_Search_Start) scheduler.step(epoch) lr = scheduler.get_lr()[0] logging.info('epoch:%d phase:%s lr:%e', epoch, model.phase, lr) print_genotype(model) # training if 'op' in model.phase: train_acc, train_obj = train_op(train_queue_A, train_queue_B, model, architect, criterion, optimizer, lr) else: train_acc, train_obj = train_tp(train_queue_A, train_queue_Full, model, criterion, optimizer,arch_optimizer) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights_%s.pth'%model.phase)) model.save_arch(os.path.join(args.save, 'arch_%s.pth'%model.phase)) print_genotype(model)
def main(): os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) gpus = [int(i) for i in args.gpu.split(',')] # argparser传入的参数转为int list if len(gpus) == 1: torch.cuda.set_device(int(args.gpu)) # cudnn.benchmark = True torch.manual_seed(args.seed) # cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %s' % args.gpu) logging.info("args = %s", args) # loss function criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() # 初始化模型,构建一个超网,并将其部署到GPU上 model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() arch_params = list(map(id, model.arch_parameters())) weight_params = filter( lambda p: id(p) not in arch_params, #暂时没看到怎么用 model.parameters()) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), # 优化器更新的参数 # weight_params, args.learning_rate, # 学习率 momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) # dset:torchvision.dataset的缩写 num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # print("使用多线程做dataloader会报错!") # 数据集划分为训练和验证集,并打包成有序的结构 train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=0) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=0) # 在Architecture中创建架构参数和架构参数更新函数 architect = Architect(model, criterion, args) #有一个专门的architect.py 不知道是干嘛的,train要输入 model = nn.parallel.DataParallel(model) ''' if len(gpus)>1: print("True") print(gpus) model = nn.parallel.DataParallel(model) ''' for epoch in range(args.epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.module.genotype( ) # model_search.py里待搜索的Network类型自带的参数 logging.info('genotype = %s', genotype) # 打印当前epoch 的cell的网络结构 print(F.softmax(model.module.alphas_normal, dim=-1)) print(F.softmax(model.module.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation with torch.no_grad(): valid_acc, valid_obj = infer(valid_queue, model.module, criterion) logging.info('valid_acc %f', valid_acc) scheduler.step() utils.save(model.module, os.path.join(args.save, 'weights.pt'))
def main(): args = get_args() # get log args.save = '{}/search-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S")) tools.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(args.save, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logger = logging.getLogger('Train Search') logger.addHandler(fh) # monitor pymonitor = ProgressMonitor(logger) tbmonitor = TensorBoardMonitor(logger, args.save) monitors = [pymonitor, tbmonitor] if not torch.cuda.is_available(): logger.info('no gpu device available') sys.exit(1) # set random seed np.random.seed(args.seed) torch.manual_seed(args.seed) args.use_cuda = args.gpus > 0 and torch.cuda.is_available() args.device = torch.device('cuda:0' if args.use_cuda else 'cpu') if args.use_cuda: torch.cuda.manual_seed(args.seed) cudnn.enabled = True cudnn.benchmark = True setting = {k: v for k, v in args._get_kwargs()} logger.info(setting) with open(os.path.join(args.save, "args.yaml"), "w") as yaml_file: # dump experiment config yaml.dump(args, yaml_file) if args.cifar100: CIFAR_CLASSES = 100 data_folder = 'cifar-100-python' else: CIFAR_CLASSES = 10 data_folder = 'cifar-10-batches-py' # prepare dataset if args.cifar100: train_transform, valid_transform = tools._data_transforms_cifar100( args) else: train_transform, valid_transform = tools._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) vaild_ata = dset.CIFAR100(root=args.tmp_data_dir, train=False, download=False, transform=valid_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) vaild_ata = dset.CIFAR10(root=args.tmp_data_dir, train=False, download=False, transform=valid_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=args.workers) valLoader = torch.utils.data.DataLoader(vaild_ata, batch_size=args.batch_size, pin_memory=True, num_workers=args.workers) # build Network criterion = nn.CrossEntropyLoss() criterion = criterion.to(args.device) switches = [] for i in range(14): switches.append([True for j in range(len(PRIMITIVES))]) switches_normal = copy.deepcopy(switches) switches_reduce = copy.deepcopy(switches) # To be moved to args num_to_keep = [5, 3, 1] num_to_drop = [3, 2, 2] if len(args.add_width) == 3: add_width = args.add_width else: add_width = [0, 0, 0] if len(args.add_layers) == 3: add_layers = args.add_layers else: add_layers = [0, 6, 12] if len(args.dropout_rate) == 3: drop_rate = args.dropout_rate else: drop_rate = [0.1, 0.4, 0.7] eps_no_archs = [10, 10, 10] state_epochs = 0 for sp in range(len(num_to_keep)): model = Network(args.init_channels + int(add_width[sp]), CIFAR_CLASSES, args.layers + int(add_layers[sp]), criterion, steps=args.nodes, multiplier=args.multiplier, stem_multiplier=args.stem_multiplier, switches_normal=switches_normal, switches_reduce=switches_reduce, p=float(drop_rate[sp])) model = model.to(args.device) logger.info("stage:{} param size:{}MB".format( sp, tools.count_parameters_in_MB(model))) optimizer = torch.optim.SGD(model.weight_parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer_a = torch.optim.Adam(model.arch_parameters(), lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) sm_dim = -1 epochs = args.epochs eps_no_arch = eps_no_archs[sp] scale_factor = 0.2 for epoch in range(epochs): lr = scheduler.get_lr()[0] logger.info('Epoch: %d lr: %e', epoch, lr) epoch_start = time.time() # training if epoch < eps_no_arch: model.p = float(drop_rate[sp]) * (epochs - epoch - 1) / epochs model.update_p() train_acc, train_obj = train(state_epochs + epoch, train_queue, valid_queue, model, criterion, optimizer, optimizer_a, args, monitors, logger, train_arch=False) else: model.p = float(drop_rate[sp]) * np.exp( -(epoch - eps_no_arch) * scale_factor) model.update_p() train_acc, train_obj = train(state_epochs + epoch, train_queue, valid_queue, model, criterion, optimizer, optimizer_a, args, monitors, logger, train_arch=True) # validation valid_acc, valid_obj = infer(state_epochs + epoch, valLoader, model, criterion, args, monitors, logger) if epoch >= eps_no_arch: # 将本epoch的解析结果保存 arch_param = model.arch_parameters() normal_prob = F.softmax(arch_param[0], dim=-1).data.cpu().numpy() reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy() logger.info('Genotypev: {}'.format( parse_genotype(switches_normal.copy(), switches_reduce.copy(), normal_prob.copy(), reduce_prob.copy()))) scheduler.step() tools.save(model, os.path.join(args.save, 'state{}_weights.pt'.format(sp))) state_epochs += args.epochs # Save switches info for s-c refinement. if sp == len(num_to_keep) - 1: switches_normal_2 = copy.deepcopy(switches_normal) switches_reduce_2 = copy.deepcopy(switches_reduce) arch_param = model.arch_parameters() normal_prob = F.softmax(arch_param[0], dim=-1).data.cpu().numpy() reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy() logger.info('------Stage %d end!------' % sp) logger.info("normal: \n{}".format(normal_prob)) logger.info("reduce: \n{}".format(reduce_prob)) logger.info('Genotypev: {}'.format( parse_genotype(switches_normal.copy(), switches_reduce.copy(), normal_prob.copy(), reduce_prob.copy()))) # 根据最新的结构权重,旧的搜索空间,需要抛弃的数量,当前状态 来进行空间正则化 switches_normal = update_switches(normal_prob.copy(), switches_normal, num_to_drop[sp], sp, len(num_to_keep)) switches_reduce = update_switches(reduce_prob.copy(), switches_reduce, num_to_drop[sp], sp, len(num_to_keep)) logger.info('------Dropping %d paths------' % num_to_drop[sp]) logger.info('switches_normal = %s', switches_normal) logging_switches(switches_normal, logger) logger.info('switches_reduce = %s', switches_reduce) logging_switches(switches_reduce, logger) if sp == len(num_to_keep) - 1: # arch_param = model.arch_parameters() # normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() # reduce_prob = F.softmax(arch_param[1], dim=sm_dim).data.cpu().numpy() normal_final = [0 for idx in range(14)] reduce_final = [0 for idx in range(14)] # remove all Zero operations for i in range(14): if switches_normal_2[i][0] == True: normal_prob[i][0] = 0 normal_final[i] = max(normal_prob[i]) if switches_reduce_2[i][0] == True: reduce_prob[i][0] = 0 reduce_final[i] = max(reduce_prob[i]) # Generate Architecture, similar to DARTS keep_normal = [0, 1] keep_reduce = [0, 1] n = 3 start = 2 for i in range(3): end = start + n tbsn = normal_final[start:end] tbsr = reduce_final[start:end] edge_n = sorted(range(n), key=lambda x: tbsn[x]) keep_normal.append(edge_n[-1] + start) keep_normal.append(edge_n[-2] + start) edge_r = sorted(range(n), key=lambda x: tbsr[x]) keep_reduce.append(edge_r[-1] + start) keep_reduce.append(edge_r[-2] + start) start = end n = n + 1 # set switches according the ranking of arch parameters for i in range(14): if not i in keep_normal: for j in range(len(PRIMITIVES)): switches_normal[i][j] = False if not i in keep_reduce: for j in range(len(PRIMITIVES)): switches_reduce[i][j] = False # translate switches into genotype genotype = parse_network(switches_normal, switches_reduce) logger.info(genotype) ## restrict skipconnect (normal cell only) logger.info('Restricting skipconnect...') # generating genotypes with different numbers of skip-connect operations for sks in range(0, 9): max_sk = 8 - sks num_sk = check_sk_number(switches_normal) if not num_sk > max_sk: continue while num_sk > max_sk: normal_prob = delete_min_sk_prob(switches_normal, switches_normal_2, normal_prob) switches_normal = keep_1_on(switches_normal_2, normal_prob) switches_normal = keep_2_branches(switches_normal, normal_prob) num_sk = check_sk_number(switches_normal) logger.info('Number of skip-connect: %d', max_sk) genotype = parse_network(switches_normal, switches_reduce) logger.info(genotype)
class neural_architecture_search(): def __init__(self, args): self.args = args if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) torch.cuda.set_device(self.args.gpu) self.device = torch.device("cuda") self.rank = 0 self.seed = self.args.seed self.world_size = 1 if self.args.fix_cudnn: random.seed(self.seed) torch.backends.cudnn.deterministic = True np.random.seed(self.seed) cudnn.benchmark = False torch.manual_seed(self.seed) cudnn.enabled = True torch.cuda.manual_seed(self.seed) torch.cuda.manual_seed_all(self.seed) else: np.random.seed(self.seed) cudnn.benchmark = True torch.manual_seed(self.seed) cudnn.enabled = True torch.cuda.manual_seed(self.seed) torch.cuda.manual_seed_all(self.seed) self.path = os.path.join(generate_date, self.args.save) if self.rank == 0: utils.create_exp_dir(generate_date, self.path, scripts_to_save=glob.glob('*.py')) logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(self.path, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) logging.info("self.args = %s", self.args) self.logger = tensorboardX.SummaryWriter('./runs/' + generate_date + '/' + self.args.save_log) else: self.logger = None #initialize loss function self.criterion = nn.CrossEntropyLoss().to(self.device) #initialize model self.init_model() if self.args.resume: self.reload_model() #calculate model param size if self.rank == 0: logging.info("param size = %fMB", utils.count_parameters_in_MB(self.model)) self.model._logger = self.logger self.model._logging = logging #initialize optimizer self.init_optimizer() #iniatilize dataset loader self.init_loaddata() self.update_theta = True self.update_alpha = True def init_model(self): self.model = Network(self.args.init_channels, CIFAR_CLASSES, self.args.layers, self.criterion, self.args, self.rank, self.world_size, self.args.steps, self.args.multiplier) self.model.to(self.device) for v in self.model.parameters(): if v.requires_grad: if v.grad is None: v.grad = torch.zeros_like(v) self.model.normal_log_alpha.grad = torch.zeros_like( self.model.normal_log_alpha) self.model.reduce_log_alpha.grad = torch.zeros_like( self.model.reduce_log_alpha) def reload_model(self): self.model.load_state_dict(torch.load(self.args.resume_path + '/weights.pt'), strict=True) def init_optimizer(self): self.optimizer = torch.optim.SGD(self.model.parameters(), self.args.learning_rate, momentum=self.args.momentum, weight_decay=args.weight_decay) self.arch_optimizer = torch.optim.Adam( self.model.arch_parameters(), lr=self.args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=self.args.arch_weight_decay) def init_loaddata(self): train_transform, valid_transform = utils._data_transforms_cifar10( self.args) train_data = dset.CIFAR10(root=self.args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=self.args.data, train=False, download=True, transform=valid_transform) if self.args.seed: def worker_init_fn(): seed = self.seed np.random.seed(seed) random.seed(seed) torch.manual_seed(seed) return else: worker_init_fn = None num_train = len(train_data) indices = list(range(num_train)) self.train_queue = torch.utils.data.DataLoader( train_data, batch_size=self.args.batch_size, shuffle=True, pin_memory=False, num_workers=2) self.valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=self.args.batch_size, shuffle=False, pin_memory=False, num_workers=2) def main(self): # lr scheduler: cosine annealing # temp scheduler: linear annealing (self-defined in utils) self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( self.optimizer, float(self.args.epochs), eta_min=self.args.learning_rate_min) self.temp_scheduler = utils.Temp_Scheduler(self.args.epochs, self.model._temp, self.args.temp, temp_min=self.args.temp_min) for epoch in range(self.args.epochs): if self.args.child_reward_stat: self.update_theta = False self.update_alpha = False if self.args.current_reward: self.model.normal_reward_mean = torch.zeros_like( self.model.normal_reward_mean) self.model.reduce_reward_mean = torch.zeros_like( self.model.reduce_reward_mean) self.model.count = 0 if epoch < self.args.resume_epoch: continue self.scheduler.step() if self.args.temp_annealing: self.model._temp = self.temp_scheduler.step() self.lr = self.scheduler.get_lr()[0] if self.rank == 0: logging.info('epoch %d lr %e temp %e', epoch, self.lr, self.model._temp) self.logger.add_scalar('epoch_temp', self.model._temp, epoch) logging.info(self.model.normal_log_alpha) logging.info(self.model.reduce_log_alpha) logging.info(F.softmax(self.model.normal_log_alpha, dim=-1)) logging.info(F.softmax(self.model.reduce_log_alpha, dim=-1)) genotype_edge_all = self.model.genotype_edge_all() if self.rank == 0: logging.info('genotype_edge_all = %s', genotype_edge_all) # create genotypes.txt file txt_name = remark + '_genotype_edge_all_epoch' + str(epoch) utils.txt('genotype', self.args.save, txt_name, str(genotype_edge_all), generate_date) self.model.train() train_acc, loss, error_loss, loss_alpha = self.train( epoch, logging) if self.rank == 0: logging.info('train_acc %f', train_acc) self.logger.add_scalar("epoch_train_acc", train_acc, epoch) self.logger.add_scalar("epoch_train_error_loss", error_loss, epoch) if self.args.dsnas: self.logger.add_scalar("epoch_train_alpha_loss", loss_alpha, epoch) if self.args.dsnas and not self.args.child_reward_stat: if self.args.current_reward: logging.info('reward mean stat') logging.info(self.model.normal_reward_mean) logging.info(self.model.reduce_reward_mean) logging.info('count') logging.info(self.model.count) else: logging.info('reward mean stat') logging.info(self.model.normal_reward_mean) logging.info(self.model.reduce_reward_mean) if self.model.normal_reward_mean.size(0) > 1: logging.info('reward mean total stat') logging.info(self.model.normal_reward_mean.sum(0)) logging.info(self.model.reduce_reward_mean.sum(0)) if self.args.child_reward_stat: logging.info('reward mean stat') logging.info(self.model.normal_reward_mean.sum(0)) logging.info(self.model.reduce_reward_mean.sum(0)) logging.info('reward var stat') logging.info( self.model.normal_reward_mean_square.sum(0) - self.model.normal_reward_mean.sum(0)**2) logging.info( self.model.reduce_reward_mean_square.sum(0) - self.model.reduce_reward_mean.sum(0)**2) # validation self.model.eval() valid_acc, valid_obj = self.infer(epoch) if self.args.gen_max_child: self.args.gen_max_child_flag = True valid_acc_max_child, valid_obj_max_child = self.infer(epoch) self.args.gen_max_child_flag = False if self.rank == 0: logging.info('valid_acc %f', valid_acc) self.logger.add_scalar("epoch_valid_acc", valid_acc, epoch) if self.args.gen_max_child: logging.info('valid_acc_argmax_alpha %f', valid_acc_max_child) self.logger.add_scalar("epoch_valid_acc_argmax_alpha", valid_acc_max_child, epoch) utils.save(self.model, os.path.join(self.path, 'weights.pt')) if self.rank == 0: logging.info(self.model.normal_log_alpha) logging.info(self.model.reduce_log_alpha) genotype_edge_all = self.model.genotype_edge_all() logging.info('genotype_edge_all = %s', genotype_edge_all) def train(self, epoch, logging): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() grad = utils.AvgrageMeter() normal_loss_gradient = 0 reduce_loss_gradient = 0 normal_total_gradient = 0 reduce_total_gradient = 0 loss_alpha = None train_correct_count = 0 train_correct_cost = 0 train_correct_entropy = 0 train_correct_loss = 0 train_wrong_count = 0 train_wrong_cost = 0 train_wrong_entropy = 0 train_wrong_loss = 0 count = 0 for step, (input, target) in enumerate(self.train_queue): n = input.size(0) input = input.to(self.device) target = target.to(self.device, non_blocking=True) if self.args.snas: logits, logits_aux = self.model(input) error_loss = self.criterion(logits, target) if self.args.auxiliary: loss_aux = self.criterion(logits_aux, target) error_loss += self.args.auxiliary_weight * loss_aux if self.args.dsnas: logits, error_loss, loss_alpha = self.model( input, target, self.criterion, update_theta=self.update_theta, update_alpha=self.update_alpha) for i in range(logits.size(0)): index = logits[i].topk(5, 0, True, True)[1] if index[0].item() == target[i].item(): train_correct_cost += ( -logits[i, target[i].item()] + (F.softmax(logits[i]) * logits[i]).sum()) train_correct_count += 1 discrete_prob = F.softmax(logits[i], dim=-1) train_correct_entropy += -( discrete_prob * torch.log(discrete_prob)).sum(-1) train_correct_loss += -torch.log(discrete_prob)[ target[i].item()] else: train_wrong_cost += ( -logits[i, target[i].item()] + (F.softmax(logits[i]) * logits[i]).sum()) train_wrong_count += 1 discrete_prob = F.softmax(logits[i], dim=-1) train_wrong_entropy += -(discrete_prob * torch.log(discrete_prob)).sum(-1) train_wrong_loss += -torch.log(discrete_prob)[ target[i].item()] num_normal = self.model.num_normal num_reduce = self.model.num_reduce if self.args.snas or self.args.dsnas: loss = error_loss.clone() #self.update_lr() # logging gradient count += 1 if self.args.snas: self.optimizer.zero_grad() self.arch_optimizer.zero_grad() error_loss.backward(retain_graph=True) if not self.args.random_sample: normal_loss_gradient += self.model.normal_log_alpha.grad reduce_loss_gradient += self.model.reduce_log_alpha.grad self.optimizer.zero_grad() self.arch_optimizer.zero_grad() if self.args.snas and (not self.args.random_sample and not self.args.dsnas): loss.backward() if not self.args.random_sample: normal_total_gradient += self.model.normal_log_alpha.grad reduce_total_gradient += self.model.reduce_log_alpha.grad nn.utils.clip_grad_norm_(self.model.parameters(), self.args.grad_clip) arch_grad_norm = nn.utils.clip_grad_norm_( self.model.arch_parameters(), 10.) grad.update(arch_grad_norm) if not self.args.fix_weight and self.update_theta: self.optimizer.step() self.optimizer.zero_grad() if not self.args.random_sample and self.update_alpha: self.arch_optimizer.step() self.arch_optimizer.zero_grad() prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(error_loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % self.args.report_freq == 0 and self.rank == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) self.logger.add_scalar( "iter_train_top1_acc", top1.avg, step + len(self.train_queue.dataset) * epoch) if self.rank == 0: logging.info('-------loss gradient--------') logging.info(normal_loss_gradient / count) logging.info(reduce_loss_gradient / count) logging.info('-------total gradient--------') logging.info(normal_total_gradient / count) logging.info(reduce_total_gradient / count) logging.info('correct loss ') logging.info((train_correct_loss / train_correct_count).item()) logging.info('correct entropy ') logging.info((train_correct_entropy / train_correct_count).item()) logging.info('correct cost ') logging.info((train_correct_cost / train_correct_count).item()) logging.info('correct count ') logging.info(train_correct_count) logging.info('wrong loss ') logging.info((train_wrong_loss / train_wrong_count).item()) logging.info('wrong entropy ') logging.info((train_wrong_entropy / train_wrong_count).item()) logging.info('wrong cost ') logging.info((train_wrong_cost / train_wrong_count).item()) logging.info('wrong count ') logging.info(train_wrong_count) logging.info('total loss ') logging.info(((train_correct_loss + train_wrong_loss) / (train_correct_count + train_wrong_count)).item()) logging.info('total entropy ') logging.info(((train_correct_entropy + train_wrong_entropy) / (train_correct_count + train_wrong_count)).item()) logging.info('total cost ') logging.info(((train_correct_cost + train_wrong_cost) / (train_correct_count + train_wrong_count)).item()) logging.info('total count ') logging.info(train_correct_count + train_wrong_count) return top1.avg, loss, error_loss, loss_alpha def infer(self, epoch): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() self.model.eval() with torch.no_grad(): for step, (input, target) in enumerate(self.valid_queue): input = input.to(self.device) target = target.to(self.device) if self.args.snas: logits, logits_aux = self.model(input) loss = self.criterion(logits, target) elif self.args.dsnas: logits, error_loss, loss_alpha = self.model( input, target, self.criterion) loss = error_loss prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) if step % self.args.report_freq == 0 and self.rank == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) self.logger.add_scalar( "iter_valid_loss", loss, step + len(self.valid_queue.dataset) * epoch) self.logger.add_scalar( "iter_valid_top1_acc", top1.avg, step + len(self.valid_queue.dataset) * epoch) return top1.avg, objs.avg
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(0) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) run_start = time.time() start_epoch = 0 dur_time = 0 criterion_train = ConvSeparateLoss( weight=args.aux_loss_weight ) if args.sep_loss == 'l2' else TriSeparateLoss( weight=args.aux_loss_weight) criterion_val = nn.CrossEntropyLoss() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion_train, steps=4, multiplier=4, stem_multiplier=3, parse_method=args.parse_method, op_threshold=args.op_threshold) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) model_optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) arch_optimizer = torch.optim.Adam(model.arch_parameters(), lr=args.arch_learning_rate, betas=(0.9, 0.999), weight_decay=args.arch_weight_decay) train_transform, valid_transform = utils._data_transforms_cifar(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) architect = Architect(model, args) # resume from checkpoint if args.resume: if os.path.isfile(args.resume): logging.info("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] dur_time = checkpoint['dur_time'] model_optimizer.load_state_dict(checkpoint['model_optimizer']) architect.arch_optimizer.load_state_dict( checkpoint['arch_optimizer']) model.restore(checkpoint['network_states']) logging.info('=> loaded checkpoint \'{}\'(epoch {})'.format( args.resume, start_epoch)) else: logging.info('=> no checkpoint found at \'{}\''.format( args.resume)) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( model_optimizer, float(args.epochs), eta_min=args.learning_rate_min, last_epoch=-1 if start_epoch == 0 else start_epoch) if args.resume and os.path.isfile(args.resume): scheduler.load_state_dict(checkpoint['scheduler']) for epoch in range(start_epoch, args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) logging.info(F.sigmoid(model.alphas_normal)) logging.info(F.sigmoid(model.alphas_reduce)) model.update_history() # training and search the model train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion_train, model_optimizer, arch_optimizer) logging.info('train_acc %f', train_acc) # validation the model valid_acc, valid_obj = infer(valid_queue, model, criterion_val) logging.info('valid_acc %f', valid_acc) # save checkpoint utils.save_checkpoint( { 'epoch': epoch + 1, 'dur_time': dur_time + time.time() - run_start, 'scheduler': scheduler.state_dict(), 'model_optimizer': model_optimizer.state_dict(), 'arch_optimizer': architect.optimizer.state_dict(), 'network_states': model.states(), }, is_best=False, save=args.save) logging.info('save checkpoint (epoch %d) in %s dur_time: %s', epoch, args.save, utils.calc_time(dur_time + time.time() - run_start)) # save operation weights as fig utils.save_file(recoder=model.alphas_normal_history, path=os.path.join(args.save, 'normal')) utils.save_file(recoder=model.alphas_reduce_history, path=os.path.join(args.save, 'reduce')) # save last operations np.save(os.path.join(os.path.join(args.save, 'normal_weight.npy')), F.sigmoid(model.alphas_normal).data.cpu().numpy()) np.save(os.path.join(os.path.join(args.save, 'reduce_weight.npy')), F.sigmoid(model.alphas_reduce).data.cpu().numpy()) logging.info('save last weights done')
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) test_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) test_queue = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) bin_op = bin_utils_search.BinOp(model, args) best_acc = 0. best_genotypes = [] for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() genotype_img = model.genotype(args.gamma) logging.info('genotype = %s', genotype) logging.info(F.softmax(model.alphas_normal, dim=-1)) logging.info(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, bin_op, epoch) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion, bin_op) logging.info('valid_acc %f', valid_acc) if best_acc < valid_acc: best_acc = valid_acc if len(best_genotypes) > 0: best_genotypes[0] = genotype best_genotypes[1] = genotype_img else: best_genotypes.append(genotype) best_genotypes.append(genotype_img) utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'arch_param': model.arch_parameters(), 'val_acc': valid_acc, 'optimizer': optimizer.state_dict(), }, False, args.save) with open('./genotypes.py', 'a') as f: f.write(args.geno_name + ' = ' + str(best_genotypes[0]) + '\n') f.write(args.geno_name + '_img' + ' = ' + str(best_genotypes[1]) + '\n')
def train_search(gpu,args): print('START TRAIN') # Setting random seed print("Setting random seed",args.seed) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) torch.cuda.set_device(gpu) num_gpu = len([int(i) for i in args.gpu.split(',')]) rank = args.nr * num_gpu + gpu dist.init_process_group(backend= 'nccl', init_method='env://', world_size=args.world_size, rank=rank) # loss function criterion = nn.CrossEntropyLoss() criterion = criterion.cuda(gpu) # 初始化模型,构建一个超网,并将其部署到GPU上 model = Network(args.init_channels, args.CIFAR_CLASSES, args.layers, criterion) model = model.cuda(gpu) arch_params = list(map(id, model.arch_parameters())) weight_params = filter(lambda p: id(p) not in arch_params, #暂时没看到怎么用 model.parameters()) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), # 优化器更新的参数 # weight_params, args.learning_rate, # 学习率 momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) # dset:torchvision.dataset的缩写 ''' # FIXME: 在Distributed DataParallel中,看起来无法通过直接指定indices分割数据集 num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # print("使用多线程做dataloader会报错!") # 数据集划分为训练和验证集,并打包成有序的结构 ''' train_sampler = torch.utils.data.distributed.DistributedSampler(train_data, num_replicas= args.world_size, rank= rank) train_queue = torch.utils.data.DataLoader( dataset= train_data, batch_size= args.batch_size, shuffle= False, sampler= train_sampler, pin_memory= True, num_workers= 0) valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) # valid_sampler = torch.utils.data.distributed.DistributedSampler( # valid_data, # num_replicas= args.world_size, # rank= rank # ) valid_queue = torch.utils.data.DataLoader( dataset= valid_data, batch_size=args.batch_size, pin_memory=True, num_workers=0) ''' # FIXME: train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=0) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=0) ''' # 在Architecture中创建架构参数和架构参数更新函数 architect = Architect(model, criterion, args) #有一个专门的architect.py 不知道是干嘛的,train要输入 model = nn.parallel.DistributedDataParallel(model,device_ids=[gpu]) for epoch in range(args.epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.module.genotype() # model_search.py里待搜索的Network类型自带的参数 logging.info('genotype = %s', genotype)# 打印当前epoch 的cell的网络结构 print(F.softmax(model.module.alphas_normal, dim=-1)) print(F.softmax(model.module.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, args, gpu) logging.info('train_acc %f', train_acc) # validation with torch.no_grad(): valid_acc, valid_obj = infer(valid_queue, model.module, criterion, args, gpu) logging.info('valid_acc %f', valid_acc) scheduler.step() if gpu == 0: utils.save(model.module, os.path.join(args.save, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, args.eta_min, args.reg_flops, args.mu) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer_alpha = torch.optim.SGD( model.arch_parameters(), args.learning_rate_alpha, momentum=args.momentum, weight_decay=args.weight_decay_alpha) optimizer_omega = torch.optim.SGD( model.parameters(), args.learning_rate_omega, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) epoch = 0 flops_lambda = 0 flops_lambda_delta = args.lambda0 finished = False t = 0 while not finished: epoch_start = time.time() lr = args.learning_rate_omega model.drop_path_prob = 0 logging.info('epoch %d lr %e flops_weight %e', epoch, lr, flops_lambda) train_acc, train_obj = train(train_queue, model, criterion, optimizer_alpha, optimizer_omega, flops_lambda) logging.info('train_acc %f', train_acc) epoch_duration = time.time() - epoch_start logging.info('epoch time: %ds.', epoch_duration) pruning_epoch = prune_op(model, args) current_flops = model.current_flops() + args.base_flops logging.info('current model flops %e', current_flops) if pruning_epoch >= args.pruning_n0: flops_lambda_delta = args.lambda0 flops_lambda = flops_lambda / args.c0 else: flops_lambda_delta = flops_lambda_delta * args.c0 flops_lambda = flops_lambda + flops_lambda_delta if current_flops < args.min_flops: finished = True if pruning_epoch == 0: t = t + 1 else: if t > args.stable_round: genotype = model.genotype() logging.info('genotype = %s', genotype) t = 0 epoch += 1
# Horovod: scale learning rate by the number of GPUs. optimizer = optim.SGD(model.parameters(), lr=args.base_lr * hvd.size(), momentum=args.momentum, weight_decay=args.wd) #, nesterov=True) # Horovod: (optional) compression algorithm. compression = hvd.Compression.fp16 if args.fp16_allreduce else hvd.Compression.none # Horovod: wrap optimizer with DistributedOptimizer. optimizer = hvd.DistributedOptimizer(optimizer, named_parameters=model.named_parameters(), compression=compression) arch_optimizer = torch.optim.Adam(model.arch_parameters(), lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay) # Restore from a previous checkpoint, if initial_epoch is specified. # Horovod: restore on the first worker which will broadcast weights to other workers. if resume_from_epoch > 0 and hvd.rank() == 0: filepath = args.checkpoint_format.format(exp=args.save, epoch=resume_from_epoch) checkpoint = torch.load(filepath) model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) # Horovod: broadcast parameters & optimizer state. hvd.broadcast_parameters(model.state_dict(), root_rank=0)
def nas(args: Namespace, task: Task, preprocess_func: Compose) -> Module: ''' Network Architecture Search method Given task and preprocess function, this method returns a model output by NAS. The implementation of DARTS is available at https://github.com/alphadl/darts.pytorch1.1 ''' # TODO: Replace model with the output by NAS args.save = 'search-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S")) utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(args.save, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) CLASSES = task.n_classes if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) # torch.cuda.set_device(args.gpu) #gpus = [int(args.gpu)] gpus = [int(i) for i in args.gpu.split(',')] if len(gpus) == 1: torch.cuda.set_device(int(args.gpu)) # cudnn.benchmark = True torch.manual_seed(args.seed) # cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %s' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CLASSES, args.layers, criterion) model = model.cuda() if len(gpus) > 1: print("True") model = nn.parallel.DataParallel(model, device_ids=gpus, output_device=gpus[0]) model = model.module arch_params = list(map(id, model.arch_parameters())) weight_params = filter(lambda p: id(p) not in arch_params, model.parameters()) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( # model.parameters(), weight_params, args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer = nn.DataParallel(optimizer, device_ids=gpus) if task.name == 'cifar100': train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=preprocess_func) #train_transform, valid_transform = utils._data_transforms_cifar10(args) #train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) elif task.name == 'cifar10': train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=preprocess_func) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer.module, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, criterion, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(args, train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation with torch.no_grad(): valid_acc, valid_obj = infer(args, valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) # return a neural network model (torch.nn.Module) genotype = model.genotype() logging.info('genotype = %s', genotype) model = NetworkClassification(36, task.n_classes, 20, False, genotype) return model
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = {}'.format(args.gpus)) logging.info("args = %s", args) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") criterion = nn.CrossEntropyLoss().to(device) criterion = criterion.to(device) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.to(device) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) arch_optimizer = torch.optim.Adam(model.arch_parameters(), lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=4) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=4) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, arch_optimizer, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, device) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion, device) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('GPU device = %d' % args.gpu) logging.info("args = %s", args) # prepare dataset train_transform, valid_transform = utils.data_transforms(args.dataset,args.cutout,args.cutout_length) if args.dataset == "CIFAR100": train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) elif args.dataset == "CIFAR10": train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) elif args.dataset == 'mit67': dset_cls = dset.ImageFolder data_path = '%s/MIT67/train' % args.tmp_data_dir # 'data/MIT67/train' val_path = '%s/MIT67/test' % args.tmp_data_dir # 'data/MIT67/val' train_data = dset_cls(root=data_path, transform=train_transform) valid_data = dset_cls(root=val_path, transform=valid_transform) elif args.dataset == 'sport8': dset_cls = dset.ImageFolder data_path = '%s/Sport8/train' % args.tmp_data_dir # 'data/Sport8/train' val_path = '%s/Sport8/test' % args.tmp_data_dir # 'data/Sport8/val' train_data = dset_cls(root=data_path, transform=train_transform) valid_data = dset_cls(root=val_path, transform=valid_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) random.shuffle(indices) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=args.workers) # build Network criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() switches = [] for i in range(14): switches.append([True for j in range(len(PRIMITIVES))]) switches_normal = copy.deepcopy(switches) switches_reduce = copy.deepcopy(switches) # To be moved to args num_to_keep = [5, 3, 1] num_to_drop = [3, 2, 2] if len(args.add_width) == 3: add_width = args.add_width else: add_width = [0, 0, 0] if len(args.add_layers) == 3: add_layers = args.add_layers else: add_layers = [0, 3, 6] if len(args.dropout_rate) ==3: drop_rate = args.dropout_rate else: drop_rate = [0.0, 0.0, 0.0] eps_no_archs = [10, 10, 10] for sp in range(len(num_to_keep)): model = Network(args.init_channels + int(add_width[sp]), CLASSES, args.layers + int(add_layers[sp]), criterion, switches_normal=switches_normal, switches_reduce=switches_reduce, p=float(drop_rate[sp]), largemode=args.dataset in utils.LARGE_DATASETS) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) network_params = [] for k, v in model.named_parameters(): if not (k.endswith('alphas_normal') or k.endswith('alphas_reduce')): network_params.append(v) optimizer = torch.optim.SGD( network_params, args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer_a = torch.optim.Adam(model.arch_parameters(), lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) sm_dim = -1 epochs = args.epochs eps_no_arch = eps_no_archs[sp] scale_factor = 0.2 for epoch in range(epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('Epoch: %d lr: %e', epoch, lr) epoch_start = time.time() # training if epoch < eps_no_arch: model.p = float(drop_rate[sp]) * (epochs - epoch - 1) / epochs model.update_p() train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=False) else: model.p = float(drop_rate[sp]) * np.exp(-(epoch - eps_no_arch) * scale_factor) model.update_p() train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=True) logging.info('Train_acc %f', train_acc) epoch_duration = time.time() - epoch_start logging.info('Epoch time: %ds', epoch_duration) # validation if epochs - epoch < 5: valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('Valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) print('------Dropping %d paths------' % num_to_drop[sp]) # Save switches info for s-c refinement. if sp == len(num_to_keep) - 1: switches_normal_2 = copy.deepcopy(switches_normal) switches_reduce_2 = copy.deepcopy(switches_reduce) # drop operations with low architecture weights arch_param = model.arch_parameters() normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() for i in range(14): idxs = [] for j in range(len(PRIMITIVES)): if switches_normal[i][j]: idxs.append(j) if sp == len(num_to_keep) - 1: drop = get_min_k_no_zero(normal_prob[i, :], idxs, num_to_drop[sp]) else: drop = get_min_k(normal_prob[i, :], num_to_drop[sp]) for idx in drop: switches_normal[i][idxs[idx]] = False reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy() for i in range(14): idxs = [] for j in range(len(PRIMITIVES)): if switches_reduce[i][j]: idxs.append(j) if sp == len(num_to_keep) - 1: drop = get_min_k_no_zero(reduce_prob[i, :], idxs, num_to_drop[sp]) else: drop = get_min_k(reduce_prob[i, :], num_to_drop[sp]) for idx in drop: switches_reduce[i][idxs[idx]] = False logging.info('switches_normal = %s', switches_normal) logging_switches(switches_normal) logging.info('switches_reduce = %s', switches_reduce) logging_switches(switches_reduce) if sp == len(num_to_keep) - 1: arch_param = model.arch_parameters() normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() reduce_prob = F.softmax(arch_param[1], dim=sm_dim).data.cpu().numpy() normal_final = [0 for idx in range(14)] reduce_final = [0 for idx in range(14)] # remove all Zero operations for i in range(14): if switches_normal_2[i][0] == True: normal_prob[i][0] = 0 normal_final[i] = max(normal_prob[i]) if switches_reduce_2[i][0] == True: reduce_prob[i][0] = 0 reduce_final[i] = max(reduce_prob[i]) # Generate Architecture keep_normal = [0, 1] keep_reduce = [0, 1] n = 3 start = 2 for i in range(3): end = start + n tbsn = normal_final[start:end] tbsr = reduce_final[start:end] edge_n = sorted(range(n), key=lambda x: tbsn[x]) keep_normal.append(edge_n[-1] + start) keep_normal.append(edge_n[-2] + start) edge_r = sorted(range(n), key=lambda x: tbsr[x]) keep_reduce.append(edge_r[-1] + start) keep_reduce.append(edge_r[-2] + start) start = end n = n + 1 for i in range(14): if not i in keep_normal: for j in range(len(PRIMITIVES)): switches_normal[i][j] = False if not i in keep_reduce: for j in range(len(PRIMITIVES)): switches_reduce[i][j] = False # translate switches into genotype genotype = parse_network(switches_normal, switches_reduce) logging.info(genotype) ## restrict skipconnect (normal cell only) logging.info('Restricting skipconnect...') for sks in range(0, len(PRIMITIVES)+1): max_sk = len(PRIMITIVES) - sks num_sk = check_sk_number(switches_normal) if num_sk < max_sk: continue while num_sk > max_sk: normal_prob = delete_min_sk_prob(switches_normal, switches_normal_2, normal_prob) switches_normal = keep_1_on(switches_normal_2, normal_prob) switches_normal = keep_2_branches(switches_normal, normal_prob) num_sk = check_sk_number(switches_normal) logging.info('Number of skip-connect: %d', max_sk) genotype = parse_network(switches_normal, switches_reduce) logging.info(genotype) with open(args.save + "/best_genotype.txt", "w") as f: f.write(str(genotype))
from option.default_option import TrainOptions import os import tqdm import warnings warnings.filterwarnings("ignore") import matplotlib.pyplot as plt os.environ["CUDA_VISIBLE_DEVICES"] = '0' device = torch.device('cuda') opt = TrainOptions() CIFAR_CLASSES = 10 criterion = nn.CrossEntropyLoss().cuda() model = Network(opt.init_channels, CIFAR_CLASSES, opt.layers, criterion) model.cuda() optimizer_model = torch.optim.SGD(model.parameters(),lr= 0.025,momentum = 0.9, weight_decay=3e-4) optimizer_arch = torch.optim.Adam(model.arch_parameters(),lr = 3e-4, betas=(0.5, 0.999), weight_decay = 1e-3) train_transform, valid_transform = utils._data_transforms_cifar10(opt) train_data = dset.CIFAR10(root='../', train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) ####DATALOADER 수정 필요한부분 train_queue = torch.utils.data.DataLoader( train_data, batch_size=opt.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:5000]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=opt.batch_size,