def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) #损失函数 criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) #优化器 optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay)
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() number_of_classes = class_dict[args.dataset] in_channels = inp_channel_dict[args.dataset] model = Network(args.init_channels, number_of_classes, args.layers, criterion, in_channels) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # Get transforms to apply on data train_transform, valid_transform = utils.get_data_transforms(args) # Get the training queue train_queue, valid_queue = get_training_queues(args, train_transform) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def _construct_model_from_theta(self, theta): model_clone = Network(self.model._C, self.model._num_classes, self.model._layers, self.model._criterion).cuda() for x, y in zip(model_clone.arch_parameters(), self.model.arch_parameters()): x.data.copy_(y.data) model_dict = self.model.state_dict() params, offset = {}, 0 for k, v in self.model.named_parameters(): v_length = np.prod(v.size()) params[k] = theta[offset:offset + v_length].view(v.size()) offset += v_length assert offset == len(theta) model_dict.update(params) model_clone.load_state_dict(model_dict) return model_clone.cuda()
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) dataset = Dataset(args.dataset) train_examples = torch.from_numpy(dataset.get_train().astype('int64')) valid_examples = torch.from_numpy(dataset.get_valid().astype('int64')) CLASSES = dataset.get_shape()[0] criterion = nn.CrossEntropyLoss(reduction='mean') #criterion = CrossEntropyLabelSmooth(CLASSES, args.label_smooth) criterion = criterion.cuda() regularizer = { 'N2': N2(args.reg), 'N3': N3(args.reg), }[args.regularizer] model = Network(args.channels, CLASSES, args.layers, criterion, regularizer, args.interleaved, dataset.get_shape(), args.emb_dim, args.init, args.steps) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) #model = utils.load(model, 'search-EXP-20190823-173036%f/weights.pt') weights = torch.load('search-EXP-20190823-173036%f/weights.pt') #print(weights) embeddings = [weights['embeddings.0.weight'], weights['embeddings.1.weight']] torch.save(embeddings, 'search-EXP-20190823-173036%f/embeddings.pt')
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) #用来优化w的优化器 optimizer = torch.optim.SGD( model.parameters(), #优化器更新的参数,这里更新的是w args.learning_rate, #初始值是0.025,使用的余弦退火调度更新学习率,每个epoch的学习率都不一样 momentum=args.momentum, #0.9 weight_decay=args.weight_decay) #正则化参数3e-4 train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[:split] ), #自定义从样本中取数据的策略,当train_portion=0.5时,就是前一半的数据用于train pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), #数据集中后一半的数据用于验证 pin_memory=True, num_workers=2) # 学习率更新参数,每次迭代调整不同的学习率 scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( #使用余弦退火调度设置各组参数组的学习率 optimizer, float(args.epochs), eta_min=args.learning_rate_min) # 创建用于更新α的architect architect = Architect(model, args) # 经历50个epoch后搜索完毕 for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] #得到本次迭代的学习率lr logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() #对应论文2.4 选出来权重值大的两个前驱节点,并把(操作,前驱节点)存下来 logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, args.eta_min, args.reg_flops, args.mu) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer_alpha = torch.optim.SGD( model.arch_parameters(), args.learning_rate_alpha, momentum=args.momentum, weight_decay=args.weight_decay_alpha) optimizer_omega = torch.optim.SGD( model.parameters(), args.learning_rate_omega, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) epoch = 0 flops_lambda = 0 flops_lambda_delta = args.lambda0 finished = False t = 0 while not finished: epoch_start = time.time() lr = args.learning_rate_omega model.drop_path_prob = 0 logging.info('epoch %d lr %e flops_weight %e', epoch, lr, flops_lambda) train_acc, train_obj = train(train_queue, model, criterion, optimizer_alpha, optimizer_omega, flops_lambda) logging.info('train_acc %f', train_acc) epoch_duration = time.time() - epoch_start logging.info('epoch time: %ds.', epoch_duration) pruning_epoch = prune_op(model, args) current_flops = model.current_flops() + args.base_flops logging.info('current model flops %e', current_flops) if pruning_epoch >= args.pruning_n0: flops_lambda_delta = args.lambda0 flops_lambda = flops_lambda / args.c0 else: flops_lambda_delta = flops_lambda_delta * args.c0 flops_lambda = flops_lambda + flops_lambda_delta if current_flops < args.min_flops: finished = True if pruning_epoch == 0: t = t + 1 else: if t > args.stable_round: genotype = model.genotype() logging.info('genotype = %s', genotype) t = 0 epoch += 1
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) #如果cuda不可用则抛出异常 np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) #gpu device = 0 logging.info("args = %s", args) #args = Namespace(arch_learning_rate=0.0003, arch_weight_decay=0.001, batch_size=64, cutout=False, cutout_length=16, data='../data', drop_path_prob=0.3, epochs=50, gpu=0, grad_clip=5, init_channels=16, layers=8, learning_rate=0.025, learning_rate_min=0.001, model_path='saved_models', momentum=0.9, report_freq=50, save='search-EXP-20190624-154343', seed=2, train_portion=0.5, unrolled=False, weight_decay=0.0003) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() #定义一个8层的模型 model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() #参数量的大小 logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) #06/25 03:00:02 PM param size = 1.930618MB # for name, parameters in model.named_parameters(): # print(name, ':', parameters.size()) # exit() #定义优化器 optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) #model.parameters():这里说明模型优化的包括cell里面的参数alpha train_transform, valid_transform = utils._data_transforms_cifar10(args) #对数据集即将采取的变换,做数据增强 train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) #导入数据,将训练集train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)的一半做训练队列,另一半做验证队列 #训练队列 train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=1) #torch.utils.data.sampler.SubsetRandomSampler:采用无放回随机采样 #验证队列 valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=1) # 优化器的学习率调整策略:采用CosineAnnealingLR,余弦退火调整学习率 scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs, eta_min=args.learning_rate_min) architect = Architect(model, args) #执行训练过程 for epoch in range(args.epochs): scheduler.step() #这里应该是得到一个参数簇与学习率组成的词典:param_group['lr'] = lr # 得到学习率 lr = scheduler.get_lr()[0] logging.info('epoch = %d, lr = %e', epoch, lr) #得到当前cell的结构 genotype = model.genotype() logging.info('genotype = %s', genotype) '''genotype的一个示例: genotype = Genotype(normal=[('max_pool_3x3', 0), ('dil_conv_3x3', 1), ('max_pool_3x3', 1), ('dil_conv_3x3', 2), ('avg_pool_3x3', 0), ('sep_conv_3x3', 3), ('dil_conv_3x3', 4), ('avg_pool_3x3', 2)], normal_concat=range(2, 6), reduce=[('avg_pool_3x3', 1), ('avg_pool_3x3', 0), ('sep_conv_5x5', 2), ('dil_conv_3x3', 1), ('skip_connect', 3), ('dil_conv_5x5', 0), ('sep_conv_3x3', 1), ('avg_pool_3x3', 4)], reduce_concat=range(2, 6)) ''' #打印权重 # print(F.softmax(model.alphas_normal, dim=-1)) # print(F.softmax(model.alphas_reduce, dim=-1)) # training,训练集训练阶段 train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) #当前epoch的平均acc # validation,验证集训练阶段 valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) #当前epoch的平均acc utils.save(model, os.path.join(args.save, 'weights.pt')) #保存模型参数
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10_simple( args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) # adversarial testing adv_acc, adv_obj = infer_minibatch(valid_queue, model, criterion) logging.info('adv_acc %f', adv_acc) #infer_minibatch(valid_queue, model, criterion) utils.save(model, os.path.join(args.save, 'weights_' + str(epoch) + '.pt'))
def search(self, train_x, train_y, valid_x, valid_y, metadata): np.random.seed(self.seed) cudnn.benchmark = True torch.manual_seed(self.seed) cudnn.enabled = True torch.cuda.manual_seed(self.seed) is_multi_gpu = False helper_function() n_classes = metadata['n_classes'] # check torch available if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) cudnn.benchmark = True cudnn.enabled = True # loading criterion criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() train_pack = list(zip(train_x, train_y)) valid_pack = list(zip(valid_x, valid_y)) data_channel = np.array(train_x).shape[1] train_loader = torch.utils.data.DataLoader(train_pack, int(self.batch_size), pin_memory=True, num_workers=4) valid_loader = torch.utils.data.DataLoader(valid_pack, int(self.batch_size), pin_memory=True, num_workers=4) model = Network(self.init_channels, data_channel, n_classes, self.layers, criterion) model = model.cuda() # since submission server does not deal with multi-gpu if is_multi_gpu: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = nn.DataParallel(model) arch_parameters = model.module.arch_parameters( ) if is_multi_gpu else model.arch_parameters() arch_params = list(map(id, arch_parameters)) parameters = model.module.parameters( ) if is_multi_gpu else model.parameters() weight_params = filter(lambda p: id(p) not in arch_params, parameters) optimizer = torch.optim.SGD(weight_params, self.learning_rate, momentum=self.momentum, weight_decay=self.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(self.epochs), eta_min=self.learning_rate_min) architect = Architect(is_multi_gpu, model, criterion, self.momentum, self.weight_decay, self.arch_learning_rate, self.arch_weight_decay) best_accuracy = 0 best_accuracy_different_cnn_counts = dict() for epoch in range(self.epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # training objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() train_batch = time.time() for step, (input, target) in enumerate(train_loader): # logging.info("epoch %d, step %d START" % (epoch, step)) model.train() n = input.size(0) input = input.cuda() target = target.cuda() # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_loader)) input_search = input_search.cuda() target_search = target_search.cuda() # Update architecture alpha by Adam-SGD # logging.info("step %d. update architecture by Adam. START" % step) # if args.optimization == "DARTS": # architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) # else: architect.step_milenas_2ndorder(input, target, input_search, target_search, lr, optimizer, 1, 1) # logging.info("step %d. update architecture by Adam. FINISH" % step) # Update weights w by SGD, ignore the weights that gained during architecture training # logging.info("step %d. update weight by SGD. START" % step) optimizer.zero_grad() logits = model(input) loss = criterion(logits, target) loss.backward() parameters = model.module.arch_parameters( ) if is_multi_gpu else model.arch_parameters() nn.utils.clip_grad_norm_(parameters, self.grad_clip) optimizer.step() # logging.info("step %d. update weight by SGD. FINISH\n" % step) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) # torch.cuda.empty_cache() if step % self.report_freq == 0: average_batch_t = (time.time() - train_batch) / (step + 1) print("Epoch: {}, Step: {}, Top1: {}, Top5: {}, T: {}". format( epoch, step, top1.avg, top5.avg, show_time(average_batch_t * (len(train_loader) - step)))) model.eval() # validation with torch.no_grad(): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() for step, (input, target) in enumerate(valid_loader): input = input.cuda() target = target.cuda() logits = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % self.report_freq == 0: print("Epoch: {}, Step: {}, Top1: {}, Top5: {}".format( epoch, step, top1.avg, top5.avg)) scheduler.step() # save the structure genotype, normal_cnn_count, reduce_cnn_count = model.module.genotype( ) if is_multi_gpu else model.genotype() print("(n:%d,r:%d)" % (normal_cnn_count, reduce_cnn_count)) # print(F.softmax(model.module.alphas_normal if is_multi_gpu else model.alphas_normal, dim=-1)) # print(F.softmax(model.module.alphas_reduce if is_multi_gpu else model.alphas_reduce, dim=-1)) # logging.info('genotype = %s', genotype) return model
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() KD_loss = kd_loss.KDLoss(args.temp) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, KD_loss) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) if not args.cls: print('not cls') trainloader, infer_pair_loader, infer_random_loader, valloader = dataset.load_dataset( args.dataset, args.dataroot, batch_size=args.batch_size) else: trainloader, infer_pair_loader, infer_random_loader, valloader = dataset.load_dataset( args.dataset, args.dataroot, 'pair', batch_size=args.batch_size) print(len(trainloader)) print(len(infer_pair_loader)) print(len(valloader)) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(trainloader, infer_pair_loader, model, architect, criterion, KD_loss, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(infer_random_loader, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) random.seed(args.seed) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = False torch.manual_seed(args.seed) cudnn.enabled = True cudnn.deterministic = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) if args.loss_func == 'cce': criterion = nn.CrossEntropyLoss().cuda() elif args.loss_func == 'rll': criterion = utils.RobustLogLoss(alpha=args.alpha).cuda() else: assert False, "Invalid loss function '{}' given. Must be in {'cce', 'rll'}".format( args.loss_func) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() model.train() model.apply(weights_init) nn.utils.clip_grad_norm(model.parameters(), args.grad_clip) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) train_transform, valid_transform = utils._data_transforms_cifar10(args) # Load dataset if args.dataset == 'cifar10': train_data = CIFAR10(root=args.data, train=True, gold=False, gold_fraction=0.0, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) gold_train_data = CIFAR10(root=args.data, train=True, gold=True, gold_fraction=1.0, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) elif args.dataset == 'cifar100': train_data = CIFAR100(root=args.data, train=True, gold=False, gold_fraction=0.0, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) gold_train_data = CIFAR100(root=args.data, train=True, gold=True, gold_fraction=1.0, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) clean_train_queue = torch.utils.data.DataLoader( gold_train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=0) noisy_train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=0) clean_valid_queue = torch.utils.data.DataLoader( gold_train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]), pin_memory=True, num_workers=0) noisy_valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]), pin_memory=True, num_workers=0) clean_train_list, clean_valid_list, noisy_train_list, noisy_valid_list = [], [], [], [] for dst_list, queue in [ (clean_train_list, clean_train_queue), (clean_valid_list, clean_valid_queue), (noisy_train_list, noisy_train_queue), (noisy_valid_list, noisy_valid_queue), ]: for input, target in queue: input = Variable(input, volatile=True).cuda() target = Variable(target, volatile=True).cuda(async=True) dst_list.append((input, target)) for epoch in range(args.epochs): logging.info('Epoch %d, random architecture with fix weights', epoch) genotype = model.genotype() logging.info('genotype = %s', genotype) logging.info(F.softmax(model.alphas_normal, dim=-1)) logging.info(F.softmax(model.alphas_reduce, dim=-1)) # training clean_train_acc, clean_train_obj = infer(clean_train_list, model, criterion, kind='clean_train') logging.info('clean_train_acc %f, clean_train_loss %f', clean_train_acc, clean_train_obj) noisy_train_acc, noisy_train_obj = infer(noisy_train_list, model, criterion, kind='noisy_train') logging.info('noisy_train_acc %f, noisy_train_loss %f', noisy_train_acc, noisy_train_obj) # validation clean_valid_acc, clean_valid_obj = infer(clean_valid_list, model, criterion, kind='clean_valid') logging.info('clean_valid_acc %f, clean_valid_loss %f', clean_valid_acc, clean_valid_obj) noisy_valid_acc, noisy_valid_obj = infer(noisy_valid_list, model, criterion, kind='noisy_valid') logging.info('noisy_valid_acc %f, noisy_valid_loss %f', noisy_valid_acc, noisy_valid_obj) utils.save(model, os.path.join(args.save, 'weights.pt')) # Randomly change the alphas k = sum(1 for i in range(model._steps) for n in range(2 + i)) num_ops = len(PRIMITIVES) model.alphas_normal.data.copy_(torch.randn(k, num_ops)) model.alphas_reduce.data.copy_(torch.randn(k, num_ops))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, args.n_class, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) _, _, n_classes, train_data,val_dat,test_dat = utils2.get_data( "custom", args.train_data_path,args.val_data_path,args.test_data_path, cutout_length=0, validation=True,validation2 = True,n_class = args.n_class, image_size = args.image_size) #balanced split to train/validation print(train_data) # split data to train/validation num_train = len(train_data) n_val = len(val_dat) n_test = len(test_dat) indices1 = list(range(num_train)) indices2 = list(range(n_val)) indices3 = list(range(n_test)) train_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices1) valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices2) test_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices3) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, sampler=train_sampler, num_workers=2, pin_memory=True) valid_queue = torch.utils.data.DataLoader(val_dat, batch_size=args.batch_size, sampler=valid_sampler, num_workers=2, pin_memory=True) test_queue = torch.utils.data.DataLoader(test_dat, batch_size=args.batch_size, sampler=test_sampler, num_workers=2, pin_memory=True) """ train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.set=='cifar100': train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2) """ scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) bestMetric = -999 for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) #print(F.softmax(model.alphas_normal, dim=-1)) #print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr,epoch) logging.info('train_acc %f', train_acc) # validation #if args.epochs-epoch<=1: valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) test_acc,test_obj = infer(test_queue, model, criterion) logging.info('test_acc %f', test_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) if(valid_acc > bestMetric): bestMetric = valid_acc utils.save(model, os.path.join(args.save, 'best_weights.pt'))
def main(): # check gpu is available if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) # init np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) # criterion, model, optimizer, for model training criterion = nn.CrossEntropyLoss() # TODO add latency loss criterion = criterion.cuda() model = Network(channels, steps, strides, CLASSES, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # prepare datasets #train_transform, valid_transform = utils._data_transforms_cifar10(args) #train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) train_transform, valid_transform = utils._data_transforms_imagenet(args) train_data = dset.ImageNet(root=args.data, split='train', download=True, transform=train_transform) valid_data = dset.ImageNet(root=args.data, split='val', download=True, transform=valid_transform) num_train = len(train_data) #indices = list(range(num_train)) #split = int(np.floor(args.train_portion * num_train)) # create dataloader train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, #sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, #sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2) # learning rate scheduler with cosineAnnealingtopk scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) # architect architect = Architect(model, args) # training for epoch in range(args.epochs): # lr update scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # get genotype for logging genotype = model.genotype() logging.info('genotype = %s', genotype) for alpha in model.arch_parameters(): print(F.softmax(alpha, dim=-1).data) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) model = Network() model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum) optimizer_enhance = torch.optim.SGD(model.enhance_net_parameters(), args.learning_rate, momentum=args.momentum) optimizer_denoise = torch.optim.SGD(model.denoise_net_parameters(), args.learning_rate, momentum=args.momentum) # prepare DataLoader train_low_data_names = r'D:\ZJA\data\LOL\trainA/*.png' # train_low_data_names = r'H:\image-enhance\UPE500\trainA/*.png' TrainDataset = MemoryFriendlyLoader(img_dir=train_low_data_names, task='train') valid_low_data_names = r'D:\ZJA\data\LOL\validA/*.png' # valid_low_data_names = r'H:\image-enhance\UPE500\validA/*.png' ValidDataset = MemoryFriendlyLoader(img_dir=valid_low_data_names, task='valid') train_queue = torch.utils.data.DataLoader(TrainDataset, batch_size=args.batch_size, pin_memory=True, num_workers=0) valid_queue = torch.utils.data.DataLoader(ValidDataset, batch_size=args.batch_size, pin_memory=True, num_workers=0) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) enhance_architect = Enhence_Architect(model, args) denoise_architect = Denoise_Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) logging.info('Architect of IEM:') logging.info('iem = %s', str(0)) genotype = model.genotype(0, task='enhance') logging.info('genotype = %s', genotype) logging.info('iem %s', str(0)) logging.info('%s', F.softmax(model.alphas_enhances[0], dim=-1)) logging.info('Architect of NRM:') logging.info('nrm = %s', str(0)) genotype = model.genotype(0, task='denoise') logging.info('genotype = %s', genotype) logging.info('nrm %s', str(0)) logging.info('%s', F.softmax(model.alphas_denoises[0], dim=-1)) # training train(train_queue, valid_queue, model, enhance_architect, denoise_architect, optimizer_enhance, optimizer_denoise, lr, epoch)
def darts(exp_name, args): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) args['save'] = './{}/{}-{}-{}'.format(exp_name, args['save'], time.strftime("%Y%m%d-%H%M%S"), args['seed']) utils.create_exp_dir(args['save'], scripts_to_save=glob.glob('*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(args['save'], 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) np.random.seed(args['seed']) torch.cuda.set_device(args['gpu']) cudnn.benchmark = True torch.manual_seed(args['seed']) cudnn.enabled = True torch.cuda.manual_seed(args['seed']) logging.info('gpu device = %s' % args['gpu']) logging.info("args = %s", args) data_augmentations = transforms.ToTensor() train_data = KMNIST(args['data'], True, data_augmentations) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args['init_channels'], train_data.n_classes, args['layers'], criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args['learning_rate'], momentum=args['momentum'], weight_decay=args['weight_decay']) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args['train_portion'] * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args['batch_size'], sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split])) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args['batch_size'], sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train])) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args['epochs']), eta_min=args['learning_rate_min']) architect = Architect(model, args) for epoch in range(args['epochs']): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) print(F.softmax(model.betas_normal[2:5], dim=-1)) #model.drop_path_prob = args['drop_path_prob * epoch / args['epochs # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch) logging.info('train_acc %f', train_acc) # validation if args['epochs'] - epoch <= 1: valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args['save'], 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) if (args.regularize_type not in ["", "dirichlet", "gumball"]): logging.info('regularization type set incorrectly') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() #reg coef and alpha model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, 0.05, 1) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) alphas_normal = F.softmax(model.alphas_normal, dim=-1) alphas_reduce = F.softmax(model.alphas_reduce, dim=-1) logging.info(alphas_normal) logging.info(alphas_reduce) #exp4 alternation: epoch even = 1.0, not even = 10e-30 cur_temp = None if epoch % 2 == 0: cur_temp = 1.0 else: cur_temp = 10e-10 model.set_temperature(cur_temp) logging.info('starting with temperature %f', cur_temp) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) #logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) #logging.info('valid_acc %f', valid_acc) model_epoch_dir = (exp_dir + "/epoch{epoch_num}/").format(epoch_num=epoch) if not os.path.exists(model_epoch_dir): os.makedirs(model_epoch_dir) utils.save(model, os.path.join(model_epoch_dir, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, k=args.k) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.dataset == 'cifar100': train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True) architect = Architect(model, args) # configure progressive parameter epoch = 0 ks = [6, 4] num_keeps = [7, 4] train_epochs = [2, 2] if 'debug' in args.save else [25, 25] scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(sum(train_epochs)), eta_min=args.learning_rate_min) for i, current_epochs in enumerate(train_epochs): for e in range(current_epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) model.show_arch_parameters() # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, e) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) epoch += 1 scheduler.step() utils.save(model, os.path.join(args.save, 'weights.pt')) if not i == len(train_epochs) - 1: model.pruning(num_keeps[i + 1]) # architect.pruning([model.mask_normal, model.mask_reduce]) model.wider(ks[i + 1]) optimizer = configure_optimizer( optimizer, torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay)) scheduler = configure_scheduler( scheduler, torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(sum(train_epochs)), eta_min=args.learning_rate_min)) logging.info('pruning finish, %d ops left per edge', num_keeps[i + 1]) logging.info('network wider finish, current pc parameter %d', ks[i + 1]) genotype = model.genotype() logging.info('genotype = %s', genotype) model.show_arch_parameters()
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) # Set random seeds and log GPU info. np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) # Set of network and loss function. criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() # DARTS uses the Network class to store the alphas for optimizing the architecture as well as the ] # weights of the architecture determined through the first level of optimization. model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # Load and transform CIFAR10. train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) # Split CIFAR10 training data into training and validation for search. num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # Set up torch data loader on 2 CPUs for training data. train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) # Set up torch data loader on 2 CPUs for validation data. valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) # Cosine annealing learning rate. scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) # Start bi-level optimization. for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # Sample a genotype from the metamodel. genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # Train the deep network that corresponds to the genotype that was sampled. train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): path_to_best_loss_eval = "./generator/best_loss_model_{}.csv".format(args.seed) path_to_best_model = "./generator/best_model_{}.pth".format(args.seed) if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) # ================= DONAS ========================== low_flops = args.low_flops high_flops = args.high_flops nodes, edges = model.get_arch_param_nums() lookup_table = LookUpTable(edges, nodes) arch_param_nums = nodes * edges generator = get_generator(20) generator = generator.cuda() backbone_pool = BackbonePool(nodes, edges, lookup_table, arch_param_nums) backbone = backbone_pool.get_backbone((low_flops+high_flops)/2) g_optimizer = torch.optim.Adam(generator.parameters(), weight_decay=0, lr=0.001, betas=(0.5, 0.999)) tau = 5 best_hc_loss = 100000 # ================= DONAS ========================== architect = Architect(model, generator, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, low_flops, high_flops, backbone, tau) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion, generator, backbone, (low_flops+high_flops)//2, lookup_table) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) evalulate_metric, total_loss, kendall_tau = evalulate_generator(generator, backbone, lookup_table, low_flops, high_flops) if total_loss < best_hc_loss: logger.log("Best hc loss : {}. Save model!".format(total_loss)) save_generator_evaluate_metric(evalulate_metric, path_to_best_loss_eval) best_hc_loss = total_loss if valid_acc > best_top1: logger.log("Best top1-avg : {}. Save model!".format(valid_acc_top1)) save_model(generator, path_to_best_model) best_top1 = valid_acc tau *= 0.95
def nas(args: Namespace, task: Task, preprocess_func: Compose) -> Module: ''' Network Architecture Search method Given task and preprocess function, this method returns a model output by NAS. The implementation of DARTS is available at https://github.com/alphadl/darts.pytorch1.1 ''' # TODO: Replace model with the output by NAS args.save = 'search-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S")) utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(args.save, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) CLASSES = task.n_classes if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) # torch.cuda.set_device(args.gpu) #gpus = [int(args.gpu)] gpus = [int(i) for i in args.gpu.split(',')] if len(gpus) == 1: torch.cuda.set_device(int(args.gpu)) # cudnn.benchmark = True torch.manual_seed(args.seed) # cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %s' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CLASSES, args.layers, criterion) model = model.cuda() if len(gpus) > 1: print("True") model = nn.parallel.DataParallel(model, device_ids=gpus, output_device=gpus[0]) model = model.module arch_params = list(map(id, model.arch_parameters())) weight_params = filter(lambda p: id(p) not in arch_params, model.parameters()) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( # model.parameters(), weight_params, args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer = nn.DataParallel(optimizer, device_ids=gpus) if task.name == 'cifar100': train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=preprocess_func) #train_transform, valid_transform = utils._data_transforms_cifar10(args) #train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) elif task.name == 'cifar10': train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=preprocess_func) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer.module, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, criterion, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(args, train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation with torch.no_grad(): valid_acc, valid_obj = infer(args, valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) # return a neural network model (torch.nn.Module) genotype = model.genotype() logging.info('genotype = %s', genotype) model = NetworkClassification(36, task.n_classes, 20, False, genotype) return model
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) if args.loss_func == 'cce': criterion = nn.CrossEntropyLoss().cuda() elif args.loss_func == 'rll': criterion = utils.RobustLogLoss().cuda() else: assert False, "Invalid loss function '{}' given. Must be in {'cce', 'rll'}".format( args.loss_func) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar10(args) # Load dataset if args.gold_fraction == 0: train_data = CIFAR10(root=args.data, train=True, gold=False, gold_fraction=args.gold_fraction, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) if args.clean_valid: gold_train_data = CIFAR10(root=args.data, train=True, gold=True, gold_fraction=1.0, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) else: train_data = CIFAR10(root=args.data, train=True, gold=True, gold_fraction=args.gold_fraction, corruption_prob=args.corruption_prob, corruption_type=args.corruption_type, transform=train_transform, download=True, seed=args.seed) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) if args.clean_valid: valid_queue = torch.utils.data.DataLoader( gold_train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:]), pin_memory=True, num_workers=2) else: valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('GPU device = %d' % args.gpu) logging.info("args = %s", args) # prepare dataset train_transform, valid_transform = utils.data_transforms(args.dataset,args.cutout,args.cutout_length) if args.dataset == "CIFAR100": train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) elif args.dataset == "CIFAR10": train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) elif args.dataset == 'mit67': dset_cls = dset.ImageFolder data_path = '%s/MIT67/train' % args.tmp_data_dir # 'data/MIT67/train' val_path = '%s/MIT67/test' % args.tmp_data_dir # 'data/MIT67/val' train_data = dset_cls(root=data_path, transform=train_transform) valid_data = dset_cls(root=val_path, transform=valid_transform) elif args.dataset == 'sport8': dset_cls = dset.ImageFolder data_path = '%s/Sport8/train' % args.tmp_data_dir # 'data/Sport8/train' val_path = '%s/Sport8/test' % args.tmp_data_dir # 'data/Sport8/val' train_data = dset_cls(root=data_path, transform=train_transform) valid_data = dset_cls(root=val_path, transform=valid_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) random.shuffle(indices) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=args.workers) # build Network criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() switches = [] for i in range(14): switches.append([True for j in range(len(PRIMITIVES))]) switches_normal = copy.deepcopy(switches) switches_reduce = copy.deepcopy(switches) # To be moved to args num_to_keep = [5, 3, 1] num_to_drop = [3, 2, 2] if len(args.add_width) == 3: add_width = args.add_width else: add_width = [0, 0, 0] if len(args.add_layers) == 3: add_layers = args.add_layers else: add_layers = [0, 3, 6] if len(args.dropout_rate) ==3: drop_rate = args.dropout_rate else: drop_rate = [0.0, 0.0, 0.0] eps_no_archs = [10, 10, 10] for sp in range(len(num_to_keep)): model = Network(args.init_channels + int(add_width[sp]), CLASSES, args.layers + int(add_layers[sp]), criterion, switches_normal=switches_normal, switches_reduce=switches_reduce, p=float(drop_rate[sp]), largemode=args.dataset in utils.LARGE_DATASETS) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) network_params = [] for k, v in model.named_parameters(): if not (k.endswith('alphas_normal') or k.endswith('alphas_reduce')): network_params.append(v) optimizer = torch.optim.SGD( network_params, args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer_a = torch.optim.Adam(model.arch_parameters(), lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) sm_dim = -1 epochs = args.epochs eps_no_arch = eps_no_archs[sp] scale_factor = 0.2 for epoch in range(epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('Epoch: %d lr: %e', epoch, lr) epoch_start = time.time() # training if epoch < eps_no_arch: model.p = float(drop_rate[sp]) * (epochs - epoch - 1) / epochs model.update_p() train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=False) else: model.p = float(drop_rate[sp]) * np.exp(-(epoch - eps_no_arch) * scale_factor) model.update_p() train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=True) logging.info('Train_acc %f', train_acc) epoch_duration = time.time() - epoch_start logging.info('Epoch time: %ds', epoch_duration) # validation if epochs - epoch < 5: valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('Valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) print('------Dropping %d paths------' % num_to_drop[sp]) # Save switches info for s-c refinement. if sp == len(num_to_keep) - 1: switches_normal_2 = copy.deepcopy(switches_normal) switches_reduce_2 = copy.deepcopy(switches_reduce) # drop operations with low architecture weights arch_param = model.arch_parameters() normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() for i in range(14): idxs = [] for j in range(len(PRIMITIVES)): if switches_normal[i][j]: idxs.append(j) if sp == len(num_to_keep) - 1: drop = get_min_k_no_zero(normal_prob[i, :], idxs, num_to_drop[sp]) else: drop = get_min_k(normal_prob[i, :], num_to_drop[sp]) for idx in drop: switches_normal[i][idxs[idx]] = False reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy() for i in range(14): idxs = [] for j in range(len(PRIMITIVES)): if switches_reduce[i][j]: idxs.append(j) if sp == len(num_to_keep) - 1: drop = get_min_k_no_zero(reduce_prob[i, :], idxs, num_to_drop[sp]) else: drop = get_min_k(reduce_prob[i, :], num_to_drop[sp]) for idx in drop: switches_reduce[i][idxs[idx]] = False logging.info('switches_normal = %s', switches_normal) logging_switches(switches_normal) logging.info('switches_reduce = %s', switches_reduce) logging_switches(switches_reduce) if sp == len(num_to_keep) - 1: arch_param = model.arch_parameters() normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() reduce_prob = F.softmax(arch_param[1], dim=sm_dim).data.cpu().numpy() normal_final = [0 for idx in range(14)] reduce_final = [0 for idx in range(14)] # remove all Zero operations for i in range(14): if switches_normal_2[i][0] == True: normal_prob[i][0] = 0 normal_final[i] = max(normal_prob[i]) if switches_reduce_2[i][0] == True: reduce_prob[i][0] = 0 reduce_final[i] = max(reduce_prob[i]) # Generate Architecture keep_normal = [0, 1] keep_reduce = [0, 1] n = 3 start = 2 for i in range(3): end = start + n tbsn = normal_final[start:end] tbsr = reduce_final[start:end] edge_n = sorted(range(n), key=lambda x: tbsn[x]) keep_normal.append(edge_n[-1] + start) keep_normal.append(edge_n[-2] + start) edge_r = sorted(range(n), key=lambda x: tbsr[x]) keep_reduce.append(edge_r[-1] + start) keep_reduce.append(edge_r[-2] + start) start = end n = n + 1 for i in range(14): if not i in keep_normal: for j in range(len(PRIMITIVES)): switches_normal[i][j] = False if not i in keep_reduce: for j in range(len(PRIMITIVES)): switches_reduce[i][j] = False # translate switches into genotype genotype = parse_network(switches_normal, switches_reduce) logging.info(genotype) ## restrict skipconnect (normal cell only) logging.info('Restricting skipconnect...') for sks in range(0, len(PRIMITIVES)+1): max_sk = len(PRIMITIVES) - sks num_sk = check_sk_number(switches_normal) if num_sk < max_sk: continue while num_sk > max_sk: normal_prob = delete_min_sk_prob(switches_normal, switches_normal_2, normal_prob) switches_normal = keep_1_on(switches_normal_2, normal_prob) switches_normal = keep_2_branches(switches_normal, normal_prob) num_sk = check_sk_number(switches_normal) logging.info('Number of skip-connect: %d', max_sk) genotype = parse_network(switches_normal, switches_reduce) logging.info(genotype) with open(args.save + "/best_genotype.txt", "w") as f: f.write(str(genotype))
def __init__(self, save_path, seed, batch_size, grad_clip, epochs, resume_iter=None, init_channels=16): args = {} args['data'] = '/data/mzhang3/randomNAS_own/data' args['epochs'] = epochs args['learning_rate'] = 0.025 args['batch_size'] = batch_size args['learning_rate_min'] = 0.001 args['momentum'] = 0.9 args['weight_decay'] = 3e-4 args['init_channels'] = init_channels args['layers'] = 8 args['drop_path_prob'] = 0.3 args['grad_clip'] = grad_clip args['train_portion'] = 0.5 args['seed'] = seed args['log_interval'] = 50 args['save'] = save_path args['gpu'] = 0 args['cuda'] = True args['cutout'] = False args['cutout_length'] = 16 args['report_freq'] = 50 args = AttrDict(args) self.args = args self.seed = seed np.random.seed(args.seed) random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = False cudnn.enabled = True cudnn.deterministic = True torch.cuda.manual_seed_all(args.seed) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=False, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) self.train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[:split]), pin_memory=True, num_workers=0, worker_init_fn=np.random.seed(args.seed)) self.valid_queue = torch.utils.data.DataLoader( train_data, batch_size=32, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=0, worker_init_fn=np.random.seed(args.seed)) self.train_iter = iter(self.train_queue) self.valid_iter = iter(self.valid_queue) self.steps = 0 self.epochs = 0 self.total_loss = 0 self.start_time = time.time() criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() self.criterion = criterion model = Network(args.init_channels, 10, args.layers, self.criterion) model = model.cuda() self.model = model # try: # self.load() # logging.info('loaded previously saved weights') # except Exception as e: # print(e) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(self.model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) self.optimizer = optimizer self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) if resume_iter is not None: self.steps = resume_iter self.epochs = int(resume_iter / len(self.train_queue)) logging.info("Resuming from epoch %d" % self.epochs) self.objs = utils.AvgrageMeter() self.top1 = utils.AvgrageMeter() self.top5 = utils.AvgrageMeter() for i in range(self.epochs): self.scheduler.step() size = 0 for p in model.parameters(): size += p.nelement() logging.info('param size: {}'.format(size)) total_params = sum(x.data.nelement() for x in model.parameters()) logging.info('Args: {}'.format(args)) logging.info('Model total parameters: {}'.format(total_params))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) if args.set == 'cifar100': N_CLASSES = 100 if args.set == 'tiny_imagenet': N_CLASSES = 200 else: N_CLASSES = 10 config = QuantumFold_config(None, 0) if config.op_struc == "": args.batch_size = args.batch_size // 4 config.exp_dir = args.save config.primitive = args.primitive config.attention = args.attention config.device = OnInitInstance(args.seed, args.gpu) if config.primitive == "p0": config.PRIMITIVES_pool = [ 'none', 'max_pool_3x3', 'avg_pool_3x3', 'Identity', 'BatchNorm2d', 'ReLU', 'Conv_3', 'Conv_5' ] elif config.primitive == "p1": config.PRIMITIVES_pool = [ 'none', 'max_pool_3x3', 'skip_connect', 'BatchNorm2d', 'ReLU', 'Conv_3', 'DepthConv_3', 'Conv_11' ] elif config.primitive == "p2" or config.primitive == "p21": config.PRIMITIVES_pool = [ 'none', 'max_pool_3x3', 'skip_connect', 'BatchNorm2d', 'ReLU', 'Conv_3', 'DepthConv_3', 'Conv_11' ] elif config.primitive == "p3": config.PRIMITIVES_pool = [ 'none', 'max_pool_3x3', 'max_pool_5x5', 'skip_connect', 'Identity', 'BatchNorm2d', 'ReLU', 'Conv_3', 'DepthConv_3', 'Conv_5', 'DepthConv_5', 'Conv_11', 'sep_conv_3x3' ] elif config.primitive == "c0": config.PRIMITIVES_pool = [ 'none', 'max_pool_3x3', 'avg_pool_3x3', 'skip_connect', 'sep_conv_3x3', 'sep_conv_5x5', 'dil_conv_3x3', 'dil_conv_5x5' ] #args.load_workers = 8 np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(config, args.init_channels, N_CLASSES, args.layers, criterion) print(model) #dump_model_params(model) model = model.cuda() model.visual = Visdom_Visualizer( env_title=f"{args.set}_{model.title}_{args.legend}") model.visual.img_dir = "./results/images/" logging.info("param size = %.3fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) if args.set == 'cifar100': N_CLASSES = 100 train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) elif args.set == 'tiny_imagenet': N_CLASSES = 200 train_data = TinyImageNet200(root=args.data, train=True, download=True) infer_data = TinyImageNet200(root=args.data, train=False, download=True) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) infer_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) #这个更合理 num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=args.load_workers) #args.load_workers valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=0) if True: infer_queue = torch.utils.data.DataLoader(infer_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=0) else: infer_queue = valid_queue config.experiment = Experiment(config, "cifar_10", model, loss_fn=None, optimizer=optimizer, objective_metric=None) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) #model.init_on_data(valid_queue,criterion) architect.init_on_data(valid_queue, criterion) #data-aware init print(architect) print(f"======\tconfig={config.__dict__}\n") print(f"======\targs={args.__dict__}\n") valid_acc, t0 = 0, time.time() for epoch in range(args.epochs): scheduler.step() plot_path = f"{model.config.exp_dir}/{model.title}E{epoch}_a{valid_acc:.1f}_" dump_genotype(model, logging, plot_path) lr = scheduler.get_lr()[0] logging.info('epoch=%d lr=%e', epoch, lr) print( f"======\tnTrain={train_queue.dataset.__len__()} nSearch={valid_queue.dataset.__len__()} nTest={infer_queue.dataset.__len__()} " ) # plot_path=f"{model.config.exp_dir}/{model.title}E{epoch}_a{valid_acc:.1f}_" # dump_genotype(model,logging,plot_path) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch) logging.info(f'train_acc {train_acc} T={time.time()-t0:.2f}') # validation valid_acc, valid_obj = infer(infer_queue, model, criterion, epoch) logging.info(f'valid_acc {valid_acc} T={time.time()-t0:.2f}') config.experiment.best_score = max(valid_acc, config.experiment.best_score) utils.save(model, os.path.join(args.save, 'weights.pt')) model.visual.UpdateLoss(title=f"Accuracy on \"{args.set}\"", legend=f"{model.title}", loss=valid_acc, yLabel="Accuracy")
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) # torch.cuda.set_device(args.gpu) gpus = [int(i) for i in args.gpu.split(',')] if len(gpus) == 1: torch.cuda.set_device(int(args.gpu)) # cudnn.benchmark = True torch.manual_seed(args.seed) # cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %s' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() if len(gpus)>1: print("True") model = nn.parallel.DataParallel(model, device_ids=gpus, output_device=gpus[0]) model = model.module arch_params = list(map(id, model.arch_parameters())) weight_params = filter(lambda p: id(p) not in arch_params, model.parameters()) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( # model.parameters(), weight_params, args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) #optimizer = nn.DataParallel(optimizer, device_ids=gpus) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, criterion, args) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation with torch.no_grad(): valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) in_channels, num_classes, dataset_in_torch = utils.dataset_fields( args) # new criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, in_channels, num_classes, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( # SGD for weights model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_data = utils.dataset_split_and_transform(dataset_in_torch, args) # new num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) prune = Prune(args.epochs_pre_prune) for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) logging.info(F.softmax(model.alphas_normal, dim=-1)) logging.info(F.softmax(model.alphas_reduce, dim=-1)) # Pruning if epoch > args.epochs_pre_prune: if epoch == args.epochs - 1: prune.num_to_zero = 90 - ( len(prune.zeros_indices_alphas_normal) ) #need to prune 90 alphas by the end if args.sparse == 'sparse': prune.num_to_zero_sparse(epoch, args) prune.prune_all_alphas(model) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) dataset = Dataset(args.dataset) train_examples = torch.from_numpy(dataset.get_train().astype('int64')) valid_examples = torch.from_numpy(dataset.get_valid().astype('int64')) CLASSES = dataset.get_shape()[0] criterion = nn.CrossEntropyLoss(reduction='mean') #criterion = CrossEntropyLabelSmooth(CLASSES, args.label_smooth) criterion = criterion.cuda() regularizer = { 'N2': N2(args.reg), 'N3': N3(args.reg), }[args.regularizer] model = Network(args.channels, CLASSES, args.layers, criterion, regularizer, args.interleaved, dataset.get_shape(), args.emb_dim, args.init, args.steps) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = { 'Adagrad': lambda: optim.Adagrad(model.parameters(), lr=args.learning_rate), #momentum=args.momentum, #weight_decay=args.weight_decay) 'Adam': lambda: optim.Adam(model.parameters(), lr=args.learning_rate, betas=(args.decay1, args.decay2)), 'SGD': lambda: optim.SGD(model.parameters(), lr=args.learning_rate) }[args.optimizer]() # optimizer = torch.optim.SGD( # model.parameters(), # args.learning_rate, # #TODO can we reintroduce these? # momentum=args.momentum, # weight_decay=args.weight_decay) train_queue = torch.utils.data.DataLoader( train_examples, batch_size=args.batch_size, shuffle=True, #sampler=torch.utils.data.sampler.RandomSampler(), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( valid_examples, batch_size=args.batch_size, shuffle=True, #sampler=torch.utils.data.sampler.RandomSampler(), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) best_acc = 0 patience = 0 curve = {'valid': [], 'test': []} architect = Architect(model, args) for epoch in range(args.epochs): model.epoch = epoch print('model temperature param', 1.05**model.epoch) scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax((1.05**epoch) * model.alphas_normal, dim=-1)) train_epoch(train_examples, train_queue, valid_queue, model, architect, criterion, optimizer, regularizer, args.batch_size, args.learning_rate) if (epoch + 1) % args.report_freq == 0: valid, test = [ avg_both(*dataset.eval(model, split, -1 if split != 'train' else 50000)) for split in ['valid', 'test'] ] curve['valid'].append(valid) curve['test'].append(test) #curve['train'].append(train) #print("\t TRAIN: ", train) print("\t VALID: ", valid) print("\t TEST: ", test) is_best = False if valid['MRR'] > best_acc: best_acc = valid['MRR'] is_best = True patience = 0 else: patience += 1
def main(args): global log log = logging.getLogger("train_search") CIFAR_CLASSES = 10 if args.set == 'cifar100': CIFAR_CLASSES = 100 if not torch.cuda.is_available(): log.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) log.info('gpu device = %d' % args.gpu) log.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() log.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, _ = utils._data_transforms_cifar10(args) if args.set == 'cifar100': train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) targets = train_data.targets train_idx = np.arange(len(targets)) if args.subsample > 0: train_idx, _ = train_test_split(train_idx, test_size=1 - args.subsample, shuffle=True, stratify=targets) num_train = len(train_idx) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( train_idx[indices[:split]]), pin_memory=True, num_workers=4) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( train_idx[indices[split:num_train]]), pin_memory=True, num_workers=4) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.epochs, eta_min=args.learning_rate_min) architect = Architect(model, args) train_acc = None valid_acc = None l1_loss = torch.zeros(1) l2_loss = torch.zeros(1) criterion_loss = torch.zeros(1) genotype = model.genotype() log.info('initial genotype = %s', genotype) for epoch in range(args.epochs): lr = scheduler.get_last_lr()[0] log.info('epoch %d lr %e', epoch, lr) # model.drop_path_prob = args.drop_path_prob * epoch / args.epochs # training train_acc, train_obj, l1_loss, l2_loss, criterion_loss = train( train_queue, valid_queue, model, architect, criterion, optimizer, lr, epoch, args.grad_clip, args.report_lines, args.unrolled, args.criterion_weight, args.l1_weight, args.l2_weight) scheduler.step() log.info('train_acc %f', train_acc) log.info('%s %f', L1_LOSS, l1_loss) log.info('%s %f', L2_LOSS, l2_loss) log.info('criterion_loss %f', criterion_loss) # validation if args.epochs - epoch <= 1: valid_acc, valid_obj = infer(valid_queue, model, criterion, args.report_lines) log.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) genotype = model.genotype() log.info('genotype = %s', genotype) log.info('last genotype = %s', genotype) model = TrainNetwork(36, CIFAR_CLASSES, 20, False, genotype) model_size_mb = utils.count_parameters_in_MB(model) log.info("Train model param size = %.2fMB", model_size_mb) return { L1_LOSS: { tuple([args.l1_weight, args.criterion_weight]): { TRAIN_ACC: train_acc, VALID_ACC: valid_acc, REG_LOSS: l1_loss.cpu().data.item(), CRITERION_LOSS: criterion_loss.cpu().data.item(), SIZE: model_size_mb, GENOTYPE: genotype } }, L2_LOSS: { tuple([args.l2_weight, args.criterion_weight]): { TRAIN_ACC: train_acc, VALID_ACC: valid_acc, REG_LOSS: l2_loss.cpu().data.item(), CRITERION_LOSS: criterion_loss.cpu().data.item(), SIZE: model_size_mb, GENOTYPE: genotype } } }
def main(): args.exp_path /= f'{args.gpu}_{time.strftime("%Y%m%d-%H%M%S")}' utils.create_exp_dir(Path(args.exp_path), scripts_to_save=glob.glob('*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(args.exp_path / 'log.txt') fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) if args.seed is None: raise Exception('designate seed.') np.random.seed(args.seed) cudnn.benchmark = True cudnn.enabled = True torch.manual_seed(args.seed) # ================================================ # total, used = os.popen( # 'nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader' # ).read().split('\n')[args.gpu].split(',') # total = int(total) # used = int(used) # print('Total GPU mem:', total, 'used:', used) # try: # block_mem = 0.85 * (total - used) # print(block_mem) # x = torch.empty((256, 1024, int(block_mem))).cuda() # del x # except RuntimeError as err: # print(err) # block_mem = 0.8 * (total - used) # print(block_mem) # x = torch.empty((256, 1024, int(block_mem))).cuda() # del x # # # print('reuse mem now ...') # ================================================ logging.info(f'GPU device = {args.gpu}') logging.info(f'args = {args}') criterion = nn.CrossEntropyLoss().to(device) setting = args.location model = Network(args.init_ch, 10, args.layers, criterion, setting) checkpoint = None previous_epochs = 0 if args.checkpoint_path: checkpoint = torch.load(args.checkpoint_path) utils.load(model, checkpoint['state_dict'], False) previous_epochs = checkpoint['epoch'] args.epochs -= previous_epochs if args.epochs <= 0: raise Exception('args.epochs is too small.') if use_DataParallel: print('use Data Parallel') model = nn.parallel.DataParallel(model) model = model.cuda() module = model.module torch.cuda.manual_seed_all(args.seed) else: model = model.to(device) module = model param_size = utils.count_parameters_in_MB(model) logging.info(f'param size = {param_size}MB') arch_and_attn_params = list( map( id, module.arch_and_attn_parameters() if use_DataParallel else model.arch_and_attn_parameters())) weight_params = filter( lambda p: id(p) not in arch_and_attn_params, module.parameters() if use_DataParallel else model.parameters()) optimizer = optim.SGD(weight_params, args.lr, momentum=args.momentum, weight_decay=args.wd) if checkpoint: optimizer.load_state_dict(checkpoint['optimizer']) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) # 50000 indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # 25000 train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=8) # from 2 valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]), pin_memory=True, num_workers=8) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs, eta_min=args.lr_min) if checkpoint: scheduler.load_state_dict(checkpoint['scheduler']) arch = Arch(model, criterion, args) if checkpoint: arch.optimizer.load_state_dict(checkpoint['arch_optimizer']) for epoch in tqdm(range(args.epochs), desc='Total Progress'): scheduler.step() lr = scheduler.get_lr()[0] logging.info(f'\nEpoch: {epoch} lr: {lr}') gen = module.genotype() logging.info(f'Genotype: {gen}') print(F.softmax(module.alphas_normal, dim=-1)) print(F.softmax(module.alphas_reduce, dim=-1)) if module.betas_normal is not None: print(F.softmax(module.betas_normal, dim=-1)) print(F.softmax(module.betas_reduce, dim=-1)) if module.gammas_normal is not None: print(F.softmax(module.gammas_normal, dim=-1)) print(F.softmax(module.gammas_reduce, dim=-1)) # training train_acc, train_obj = train(train_queue, valid_queue, model, arch, criterion, optimizer, lr, epoch + 1) logging.info(f'train acc: {train_acc}') # validation valid_acc, valid_obj = infer(valid_queue, model, criterion, epoch + 1) logging.info(f'valid acc: {valid_acc}') utils.save(model, args.exp_path / 'search.pt') utils.save_checkpoint( { 'epoch': epoch + 1 + previous_epochs, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'arch_optimizer': arch.optimizer.state_dict(), 'scheduler': scheduler.state_dict() }, False, args.exp_path) gen = module.genotype() gen_path = args.exp_path / 'genotype.json' utils.save_genotype(gen, gen_path) logging.info(f'Result genotype: {gen}')
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info("args = %s", args) # prepare dataset if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100( args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=args.workers) # build Network criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() switches = [] for i in range(14): switches.append([True for j in range(len(PRIMITIVES))]) switches_normal = copy.deepcopy(switches) switches_reduce = copy.deepcopy(switches) # eps_no_archs = [10, 10, 10] eps_no_archs = [2, 2, 2] for sp in range(len(num_to_keep)): # if sp < 1: # continue model = Network(args.init_channels + int(add_width[sp]), CIFAR_CLASSES, args.layers + int(add_layers[sp]), criterion, switches_normal=switches_normal, switches_reduce=switches_reduce, p=float(drop_rate[sp])) model = nn.DataParallel(model) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) network_params = [] for k, v in model.named_parameters(): if not (k.endswith('alphas_normal') or k.endswith('alphas_reduce')): network_params.append(v) optimizer = torch.optim.SGD(network_params, args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # optimizer_a = torch.optim.Adam(model.module.arch_parameters(), # lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay) optimizer_a = torch.optim.Adam(model.module.arch_parameters(), lr=args.arch_learning_rate, betas=(0, 0.999), weight_decay=args.arch_weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) sm_dim = -1 epochs = args.epochs eps_no_arch = eps_no_archs[sp] scale_factor = 0.2 # cur_sub_model = get_cur_model(model,switches_normal,switches_reduce,num_to_keep,num_to_drop,sp) for epoch in range(epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('Epoch: %d lr: %e', epoch, lr) epoch_start = time.time() # training if epoch < eps_no_arch: # if 0: model.module.p = float( drop_rate[sp]) * (epochs - epoch - 1) / epochs model.module.update_p() train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=False) else: model.module.p = float(drop_rate[sp]) * np.exp( -(epoch - eps_no_arch) * scale_factor) model.module.update_p() train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=True) logging.info('Train_acc %f', train_acc) epoch_duration = time.time() - epoch_start logging.info('Epoch time: %ds', epoch_duration) # validation if epochs - epoch < 5: valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('Valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) print('------Dropping %d paths------' % num_to_drop[sp]) # Save switches info for s-c refinement. if sp == len(num_to_keep) - 1: switches_normal_2 = copy.deepcopy(switches_normal) switches_reduce_2 = copy.deepcopy(switches_reduce) # drop operations with low architecture weights arch_param = model.module.arch_parameters() normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() for i in range(14): idxs = [] for j in range(len(PRIMITIVES)): if switches_normal[i][j]: idxs.append(j) if sp == len(num_to_keep) - 1: # for the last stage, drop all Zero operations drop = get_min_k_no_zero(normal_prob[i, :], idxs, num_to_drop[sp]) else: drop = get_min_k(normal_prob[i, :], num_to_drop[sp]) for idx in drop: switches_normal[i][idxs[idx]] = False reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy() for i in range(14): idxs = [] for j in range(len(PRIMITIVES)): if switches_reduce[i][j]: idxs.append(j) if sp == len(num_to_keep) - 1: drop = get_min_k_no_zero(reduce_prob[i, :], idxs, num_to_drop[sp]) else: drop = get_min_k(reduce_prob[i, :], num_to_drop[sp]) for idx in drop: switches_reduce[i][idxs[idx]] = False logging.info('switches_normal = %s', switches_normal) logging_switches(switches_normal) logging.info('switches_reduce = %s', switches_reduce) logging_switches(switches_reduce) if sp == len(num_to_keep) - 1: arch_param = model.module.arch_parameters() normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() reduce_prob = F.softmax(arch_param[1], dim=sm_dim).data.cpu().numpy() normal_final = [0 for idx in range(14)] reduce_final = [0 for idx in range(14)] # remove all Zero operations for i in range(14): if switches_normal_2[i][0] == True: normal_prob[i][0] = 0 normal_final[i] = max(normal_prob[i]) if switches_reduce_2[i][0] == True: reduce_prob[i][0] = 0 reduce_final[i] = max(reduce_prob[i]) # Generate Architecture, similar to DARTS keep_normal = [0, 1] keep_reduce = [0, 1] n = 3 start = 2 for i in range(3): # 选出最大的两个前序节点 end = start + n tbsn = normal_final[start:end] tbsr = reduce_final[start:end] edge_n = sorted(range(n), key=lambda x: tbsn[x]) keep_normal.append(edge_n[-1] + start) keep_normal.append(edge_n[-2] + start) edge_r = sorted(range(n), key=lambda x: tbsr[x]) keep_reduce.append(edge_r[-1] + start) keep_reduce.append(edge_r[-2] + start) start = end n = n + 1 # set switches according the ranking of arch parameters for i in range(14): if not i in keep_normal: for j in range(len(PRIMITIVES)): switches_normal[i][j] = False if not i in keep_reduce: for j in range(len(PRIMITIVES)): switches_reduce[i][j] = False # translate switches into genotype genotype = parse_network(switches_normal, switches_reduce) logging.info(genotype) ## restrict skipconnect (normal cell only) logging.info('Restricting skipconnect...') # generating genotypes with different numbers of skip-connect operations for sks in range(0, 9): max_sk = 8 - sks num_sk = check_sk_number(switches_normal) if not num_sk > max_sk: continue while num_sk > max_sk: normal_prob = delete_min_sk_prob(switches_normal, switches_normal_2, normal_prob) switches_normal = keep_1_on(switches_normal_2, normal_prob) switches_normal = keep_2_branches(switches_normal, normal_prob) num_sk = check_sk_number(switches_normal) logging.info('Number of skip-connect: %d', max_sk) genotype = parse_network(switches_normal, switches_reduce) logging.info(genotype)