def train(train_queue, model, criterion, optimizer): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.train() for step, (input, target) in enumerate(train_queue): input = input.cuda() target = target.cuda(non_blocking=True) optimizer.zero_grad() logits, logits_aux = model(input) loss = criterion(logits, target) if args.auxiliary: loss_aux = criterion(logits_aux, target) loss += args.auxiliary_weight * loss_aux loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break return top1.avg, objs.avg
def infer(valid_queue, model, criterion): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.eval() with torch.no_grad(): for step, (input, target) in enumerate(valid_queue): input = input.cuda() target = target.cuda(non_blocking=True) logits = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break return top1.avg, objs.avg
def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, perturb_alpha, epsilon_alpha): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) input = input.cuda() target = target.cuda(non_blocking=True) # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) input_search = input_search.cuda() target_search = target_search.cuda(non_blocking=True) architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) optimizer.zero_grad() architect.optimizer.zero_grad() # print('before softmax', model.arch_parameters()) model.softmax_arch_parameters() # perturb on alpha # print('after softmax', model.arch_parameters()) if perturb_alpha: perturb_alpha(model, input, target, epsilon_alpha) optimizer.zero_grad() architect.optimizer.zero_grad() # print('after perturb', model.arch_parameters()) logits = model(input, updateType='weight') loss = criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() model.restore_arch_parameters() # print('after restore', model.arch_parameters()) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break return top1.avg, objs.avg
def infer(valid_queue, model, criterion): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.eval() test_loss = 0 correct = 0 total = 0 max_step = 0 best_acc = 0 with torch.no_grad(): for step, (input, target) in enumerate(valid_queue): input = input.cuda() target = target.cuda(non_blocking=True) logits = model(input) loss = criterion(logits, target) test_loss += loss.item() _, predicted = logits.max(1) total += target.size(0) correct += predicted.eq(target).sum().item() max_step = step progress_bar( step, len(valid_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (test_loss / (step + 1), 100. * correct / total, correct, total)) # Save checkpoint. acc = 100. * correct / total if acc > best_acc: print('Saving..') state = { 'net': net.state_dict(), 'acc': acc, 'epoch': epoch, } if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') torch.save(state, './checkpoint/ckpt.pth') best_acc = acc # prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) # n = input.size(0) # objs.update(loss.data, n) # top1.update(prec1.data, n) # top5.update(prec5.data, n) # if step % args.report_freq == 0: # logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) # if 'debug' in args.save: # break # return top1.avg, objs.avg return 100. * correct / total, test_loss / (max_step + 1)
def train4(train_queue, valid_queue, model, architect, criterion, optimizer, lr, perturb_alpha, epsilon_alpha, model2, epoch, delta): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() train_loss = 0 correct = 0 total = 0 max_step = 0 # delta = torch.empty(5, 3, 32, 32) m = 64 for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) input = input.cuda() target = target.cuda(non_blocking=True) # logits2 = resnet18(input*diff, updateType='weight') # pert_inp = input * epsilon_alpha # pert_inp = input * diff if delta.size() != input.size(): print(list(delta.size())) print(list(input.size())) break else: pert_inp = torch.mul(input, delta) logits2 = model2(pert_inp) # logits2 = model2(x) loss2 = criterion(logits2, target) loss2.backward() nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip) optimizer.step() # model.restore_arch_parameters() train_loss += loss2.item() _, predicted = logits2.max(1) total += target.size(0) correct += predicted.eq(target).sum().item() max_step = step progress_bar( step, len(train_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (step + 1), 100. * correct / total, correct, total)) return 100. * correct / total, train_loss / (max_step + 1)
def train_controller(self): total_loss = utils.AvgrageMeter() total_reward = utils.AvgrageMeter() total_entropy = utils.AvgrageMeter() for step in range(300): input, target = self.reward_queue.next_batch() self.model.eval() n = input.size(0) input = input.cuda() target = target.cuda() self.controller_optimizer.zero_grad() self.controller.train() # Sample an architecture from the controller and plug it into the one-shot model. arch, log_prob, entropy = self.controller() arch_parameters = self.get_weights_from_arch(arch) self.set_arch_model_weights(arch_parameters) with torch.no_grad(): # Make sure that no gradients are propagated through the one-shot model # for the controller updates logits = self.model(input, discrete=True).detach() reward = utils.accuracy(logits, target)[0] if self.args.entropy_weight is not None: reward += self.args.entropy_weight * entropy log_prob = torch.sum(log_prob) if self.baseline is None: self.baseline = reward self.baseline = self.args.bl_dec * self.baseline + ( 1 - self.args.bl_dec) * reward loss = log_prob * (reward - self.baseline) loss = loss.mean() loss.backward() self.controller_optimizer.step() total_loss.update(loss.item(), n) total_reward.update(reward.item(), n) total_entropy.update(entropy.item(), n) if step % self.args.report_freq == 0: logging.info('controller %03d %e %f %f', step, total_loss.avg, total_reward.avg, self.baseline.item())
def train_batch(self, arch): args = self.args if self.steps % len(self.train_queue) == 0: self.scheduler.step() self.objs = utils.AvgrageMeter() self.top1 = utils.AvgrageMeter() self.top5 = utils.AvgrageMeter() lr = self.scheduler.get_lr()[0] weights = self.get_weights_from_arch(arch) self.set_arch_model_weights(weights) step = self.steps % len(self.train_queue) input, target = next(self.train_iter) self.model.train() n = input.size(0) input = input.cuda() target = target.cuda(non_blocking=True) # get a random_ws minibatch from the search queue with replacement self.optimizer.zero_grad() logits = self.model(input, discrete=True) loss = self.criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), args.grad_clip) self.optimizer.step() prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) self.objs.update(loss.data.item(), n) self.top1.update(prec1.data.item(), n) self.top5.update(prec5.data.item(), n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, self.objs.avg, self.top1.avg, self.top5.avg) self.steps += 1 if self.steps % len(self.train_queue) == 0: # Save the model weights self.epochs += 1 self.train_iter = iter(self.train_queue) valid_err = self.evaluate(arch) logging.info('epoch %d | train_acc %f | valid_acc %f' % (self.epochs, self.top1.avg, 1 - valid_err)) if self.epochs % 20 == 0: self.save(epoch=self.epochs)
def infer(valid_queue, model, criterion): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.eval() test_loss = 0 correct = 0 total = 0 max_step = 0 best_acc = 0 with torch.no_grad(): for step, (input, target) in enumerate(valid_queue): input = input.cuda() target = target.cuda(non_blocking=True) logits = model(input) loss = criterion(logits, target) test_loss += loss.item() _, predicted = logits.max(1) total += target.size(0) correct += predicted.eq(target).sum().item() max_step = step progress_bar( step, len(valid_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (test_loss / (step + 1), 100. * correct / total, correct, total)) # Save checkpoint. acc = 100. * correct / total if acc > best_acc: print('Saving..') state = { 'net': model.state_dict(), 'acc': acc, } if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') torch.save(state, './checkpoint/ckpt.pth') best_acc = acc return 100. * correct / total, test_loss / (max_step + 1)
def infer(valid_queue, model, criterion): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.eval() with torch.no_grad(): for step, (input, target) in enumerate(valid_queue): input = input.cuda() target = target.cuda(non_blocking=True) logits = model(input, updateType='weight') loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) return top1.avg, objs.avg
def evaluate_test(self, arch, split=None, discrete=False, normalize=True): # Return error since we want to minimize obj val logging.info(arch) objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() weights = self.get_weights_from_arch(arch) self.set_arch_model_weights(weights) self.model.eval() if split is None: n_batches = 10 else: n_batches = len(self.test_queue) for step in range(n_batches): try: input, target = next(self.test_iter) except Exception as e: logging.info('looping back over valid set') self.test_iter = iter(self.test_queue) input, target = next(self.test_iter) input = input.cuda() target = target.cuda(non_blocking=True) logits = self.model(input, discrete=discrete, normalize=normalize) loss = self.criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data.item(), n) top1.update(prec1.data.item(), n) top5.update(prec5.data.item(), n) if step % self.args.report_freq == 0: logging.info('test %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return 1 - 0.01 * top1.avg
def train_model(self, epoch): self.objs = utils.AvgrageMeter() self.top1 = utils.AvgrageMeter() self.top5 = utils.AvgrageMeter() for step, (input, target) in enumerate(self.train_queue): self.model.train() input = input.cuda() target = target.cuda() self.optimizer.zero_grad() self.controller.eval() # Sample an architecture from the controller arch, _, _ = self.controller() arch_parameters = self.get_weights_from_arch(arch) self.set_arch_model_weights(arch_parameters) # Evaluate the architecture logits = self.model(input, discrete=True) loss = self.criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), self.args.grad_clip) self.optimizer.step() n = input.size(0) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) self.objs.update(loss.data.item(), n) self.top1.update(prec1.data.item(), n) self.top5.update(prec5.data.item(), n) if step % self.args.report_freq == 0: logging.info('train %03d %e %f %f', step, self.objs.avg, self.top1.avg, self.top5.avg) self.scheduler.step() valid_err = self.evaluate(arch) logging.info('epoch %d | train_acc %f | valid_acc %f' % (epoch, self.top1.avg, 1 - valid_err)) return self.top1.avg
def infer(test_queue, model, criterion): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.eval() for step, (input, target) in enumerate(test_queue): input = Variable(input, volatile=True).cuda() target = Variable(target, volatile=True).cuda() logits, _ = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('test %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return top1.avg, objs.avg
def evaluate(self, arch, split=None): # Return error since we want to minimize obj val # logging.info(arch) objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() weights = self.get_weights_from_arch(arch) self.set_arch_model_weights(weights) self.model.eval() self.controller.eval() if split is None: n_batches = 1 else: n_batches = len(self.valid_queue) for step in range(n_batches): input, target = self.valid_queue.next_batch() input = input.cuda() target = target.cuda(non_blocking=True) logits = self.model(input, discrete=True) loss = self.criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data.item(), n) top1.update(prec1.data.item(), n) top5.update(prec5.data.item(), n) # if step % self.args.report_freq == 0: # logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return 1 - 0.01 * top1.avg
def __init__(self, save_path, seed, batch_size, grad_clip, epochs, num_intermediate_nodes, search_space, cutout, resume_iter=None, init_channels=16): args = {} args['data'] = '../data' args['epochs'] = epochs args['learning_rate'] = 0.025 args['batch_size'] = batch_size args['learning_rate_min'] = 0.001 args['momentum'] = 0.9 args['weight_decay'] = 3e-4 args['init_channels'] = init_channels # Adapted to nasbench args['layers'] = 9 args['drop_path_prob'] = 0.3 args['grad_clip'] = grad_clip args['train_portion'] = 0.5 args['seed'] = seed args['log_interval'] = 50 args['save'] = save_path args['gpu'] = 0 args['cuda'] = True args['cutout'] = cutout args['cutout_length'] = 16 args['report_freq'] = 50 args['output_weights'] = True args['steps'] = num_intermediate_nodes args['search_space'] = search_space.search_space_number self.search_space = search_space args = AttrDict(args) self.args = args # Dump the config of the run, but if only if it doesn't yet exist config_path = os.path.join(args.save, 'config.json') if not os.path.exists(config_path): with open(config_path, 'w') as fp: json.dump(args.__dict__, fp) self.seed = seed np.random.seed(args.seed) random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = False cudnn.enabled = True cudnn.deterministic = True torch.cuda.manual_seed_all(args.seed) train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) self.train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=0, worker_init_fn=np.random.seed(args.seed)) self.valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=0, worker_init_fn=np.random.seed(args.seed)) _, test_transform = utils._data_transforms_cifar10(args) test_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=test_transform) self.test_queue = torch.utils.data.DataLoader( test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) self.train_iter = iter(self.train_queue) self.valid_iter = iter(self.valid_queue) self.steps = 0 self.epochs = 0 self.total_loss = 0 self.start_time = time.time() criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() self.criterion = criterion model = Network(args.init_channels, 10, args.layers, self.criterion, output_weights=args.output_weights, search_space=search_space, steps=args.steps) model = model.cuda() self.model = model logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( self.model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) self.optimizer = optimizer self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) if resume_iter is not None: self.steps = resume_iter self.epochs = int(resume_iter / len(self.train_queue)) logging.info("Resuming from epoch %d" % self.epochs) self.objs = utils.AvgrageMeter() self.top1 = utils.AvgrageMeter() self.top5 = utils.AvgrageMeter() for i in range(self.epochs): self.scheduler.step() size = 0 for p in model.parameters(): size += p.nelement() logging.info('param size: {}'.format(size)) total_params = sum(x.data.nelement() for x in model.parameters()) logging.info('Args: {}'.format(args)) logging.info('Model total parameters: {}'.format(total_params))
def train(train_queue, valid_queue, model, architect, criterion, optimizer, optimizer2, lr, lr2, model2, epoch): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() train_loss = 0 correct = 0 total = 0 max_step = 0 for step, (input, target) in enumerate(train_queue): model.train() # model2.train() n = input.size(0) input = input.cuda() target = target.cuda(non_blocking=True) # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) input_search = input_search.cuda() target_search = target_search.cuda(non_blocking=True) architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) architect.optimizer.zero_grad() optimizer.zero_grad() # logits, diff, x = model(input, target) logits = model(input, updateType='weight') loss = criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() # model_adv = AttackPGD(model) # logits1, diff, x = model_adv(input, target) # deltas = torch.round(torch.abs(diff) * 255/8 + 0.499 - (epoch/300)) # pert_inp = torch.mul (input, deltas) # # pert_inp = torch.mul (input, torch.abs(diff)) # optimizer2.zero_grad() # logits2 = model2(pert_inp) # loss2 = criterion(logits2, target) # loss2.backward() # nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip) # optimizer2.step() # if epoch<0: # optimizer2.zero_grad() # logits2 = model2(input) # loss2 = criterion(logits2, target) # loss2.backward() # nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip) # optimizer2.step() # else: # model_adv = AttackPGD(model) # logits1, diff, x = model_adv(input, target) # deltas = torch.round(torch.abs(diff) * 255/8 + 0.499 - (epoch/300)) # pert_inp = torch.mul (input, deltas) # # pert_inp = torch.mul (input, torch.abs(diff)) # optimizer2.zero_grad() # logits2 = model2(pert_inp) # loss2 = criterion(logits2, target) # loss2.backward() # nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip) # optimizer2.step() # train_loss += loss2.item() # _, predicted = logits2.max(1) # total += target.size(0) # correct += predicted.eq(target).sum().item() # max_step = step prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break # progress_bar(step, len(train_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' # % (train_loss/(step+1), 100.*correct/total, correct, total)) # return 100.*correct/total, train_loss/(max_step+1) return top1.avg, objs.avg
def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, perturb_alpha, epsilon_alpha): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() train_loss = 0 correct = 0 total = 0 # with torch.no_grad(): for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) if torch.cuda.is_available(): input = input.cuda() target = target.cuda(non_blocking=True) # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) if torch.cuda.is_available(): input_search = input_search.cuda() target_search = target_search.cuda(non_blocking=True) architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) optimizer.zero_grad() architect.optimizer.zero_grad() # print('before softmax', model.arch_parameters()) model.softmax_arch_parameters() # perturb on alpha # print('after softmax', model.arch_parameters()) # model_adv = AttackPGD(model) # # logits1, diff = model_adv(input, target) # logits1, diff, x = model_adv(input, target) # loss1 = criterion(logits1, target) # optimizer.zero_grad() # # model_adv.zero_grad() # loss1.backward() # optimizer.step() if perturb_alpha: # perturb_alpha(model, input, target, epsilon_alpha) ############################################################################################################ diff = perturb_alpha(model, input, target, epsilon_alpha) # print(diff) # print(epsilon_alpha) # print(input) ############################################################################################################ optimizer.zero_grad() architect.optimizer.zero_grad() # print('after perturb', model.arch_parameters()) logits = model(input, updateType='weight') loss = criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() model.restore_arch_parameters() # model_adv = AttackPGD(model) # # logits1, diff = model_adv(input, target) # logits1, diff, x = model_adv(input, target) # loss1 = criterion(logits1, target) # optimizer.zero_grad() # # model_adv.zero_grad() # loss1.backward() # optimizer.step() # if perturb_alpha: # # perturb_alpha(model, input, target, epsilon_alpha) # ############################################################################################################ # diff = perturb_alpha(model, input, target, epsilon_alpha) # print(diff) # print(epsilon_alpha) # # print(input) # ############################################################################################################ # optimizer.zero_grad() # architect.optimizer.zero_grad() ############################################################################################################ # logits2 = resnet18(input*diff, updateType='weight') # pert_inp = input * epsilon_alpha pert_inp = input * diff logits2 = resnet18(pert_inp) # logits2 = resnet18(x) loss2 = criterion(logits2, target) loss2.backward() nn.utils.clip_grad_norm_(resnet18.parameters(), args.grad_clip) optimizer.step() # resnet18.restore_arch_parameters() # print('after restore', model.arch_parameters()) # prec21, prec25 = utils.accuracy(logits2, target, topk=(1, 5)) # objs2.update(loss2.data, n) # top21.update(prec21.data, n) # top25.update(prec25.data, n) # if step2 % args.report_freq == 0: # logging.info('train %03d %e %f %f', step2, objs2.avg, top21.avg, top25.avg) # if 'debug' in args.save: # break # train_loss += loss2.item() # _, predicted = logits2.max(1) # total += target.size(0) # correct += predicted.eq(target).sum().item() # progress_bar(step, len(train_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' # % (train_loss/(step+1), 100.*correct/total, correct, total)) prec1, prec5 = utils.accuracy(logits2, target, topk=(1, 5)) objs.update(loss2.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break ############################################################################################################ return top1.avg, objs.avg
def train(train_queue, valid_queue, model, architect, criterion, optimizer, optimizer2, lr, lr2, model2, epoch): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) input = input.cuda() target = target.cuda(non_blocking=True) # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) input_search = input_search.cuda() target_search = target_search.cuda(non_blocking=True) architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) optimizer.zero_grad() architect.optimizer.zero_grad() # print('before softmax', model.arch_parameters()) model.softmax_arch_parameters() logits = model(input, updateType='weight') loss = criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() model.restore_arch_parameters() # print('after restore', model.arch_parameters()) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break # model_adv.train() model_adv = AttackPGD(model) logits1, diff, x = model_adv(input, target) deltas = torch.round(torch.abs(diff) * 255 / 8 + 0.499 - (epoch / 300)) pert_inp = torch.mul(input, deltas) # pert_inp = torch.mul (input, torch.abs(diff)) model2.train() optimizer2.zero_grad() logits2 = model2(pert_inp) loss2 = criterion(logits2, target) loss2.backward() nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip) optimizer2.step() return top1.avg, objs.avg
def train(train_queue, model, criterion, optimizer): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.train() #res = resnet18() #res = res.cuda() #res.load_state_dict(torch.load("resnet18.pt")) #gcam = GradCAM(model=res) #target_layer = "cells.9" #target_layer = "layer1" #target_csv = open("target_train.csv", "w") #target_csv.write("index,label\n") cnt = 0 for step, (input, target) in enumerate(train_queue): """ print (cnt) target_csv.write(str(cnt) + "," + str(target.item()) + "\n") if cnt == 12800: break input = input.cuda() #bxcxhxw ori_img = input[0, :, :, :].detach().cpu().numpy() target = target.cuda(non_blocking=True) new_img = np.zeros((33, 32, 32)) new_img[:3, :, :] = ori_img for idx in range(10): _p = gcam.forward(input) gcam.backward(idx=idx) region = gcam.generate(target_layer=target_layer) cmap = cv2.resize(region, (32, 32)) new_img[3*idx+3:3*idx+6, :, :] = cmap np.save("newdata/reweight_{}.npy".format(cnt), new_img) cnt += 1 # mixup training alpha = 1 use_cuda = True inputs, targets_a, targets_b, lam = mixup_data(inputs, target, alpha, use_cuda) inputs, targets_a, targets_b = map(Variable, (inputs, targets_a, targets_b)) outputs = model(inputs) loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() prec1, prec5 = utils.accuracy(outputs, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) """ input = input.cuda() #bxcxhxw target = target.cuda(non_blocking=True) r = np.random.rand(1) if args.beta > 0 and r < args.cutmix_prob: # generate mixed sample lam = np.random.beta(args.beta, args.beta) rand_index = torch.randperm(input.size()[0]).cuda() target_a = target target_b = target[rand_index] bbx1, bby1, bbx2, bby2 = rand_bbox(input.size(), lam) input[:, :, bbx1:bbx2, bby1:bby2] = input[rand_index, :, bbx1:bbx2, bby1:bby2] # adjust lambda to exactly match pixel ratio lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (input.size()[-1] * input.size()[-2])) # compute output logits = model(input) loss = criterion(logits, target_a) * lam + criterion(logits, target_b) * (1. - lam) else: # compute output logits = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) optimizer.zero_grad() #logits = model(input) #loss = criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() if step % args.report_freq == 0: logging.info('train %03d avg: %e top1: %f top5: %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break return top1.avg, top5.avg, objs.avg
def train3(train_queue, valid_queue, model, architect, criterion, optimizer, lr, perturb_alpha, epsilon_alpha, model2, epoch): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() train_loss = 0 correct = 0 total = 0 max_step = 0 delta = torch.empty(64, 3, 32, 32) m = 64 for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) print(n) input = input.cuda() target = target.cuda(non_blocking=True) # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) input_search = input_search.cuda() target_search = target_search.cuda(non_blocking=True) # if epoch>=15: # architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) # optimizer.zero_grad() architect.optimizer.zero_grad() # print('before softmax', model.arch_parameters()) # model.softmax_arch_parameters() ############################################################################################################ ############################################################################################################ # model_adv = AttackPGD(model) # # # logits1, diff = model_adv(input, target) # logits1, diff, x = model_adv(input, target) # loss1 = criterion(logits1, target) # optimizer.zero_grad() # loss1.backward() # optimizer.step() ############################################################################################################ # if perturb_alpha: # diff = perturb_alpha(model, input, target, epsilon_alpha) # optimizer.zero_grad() # architect.optimizer.zero_grad() # print('after perturb', model.arch_parameters()) ############################################################################################################ ############################################################################################################ logits = model(input, updateType='weight') loss = criterion(logits, target) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() # model.restore_arch_parameters() ############################################################################################################ ############################################################################################################ model_adv = AttackPGD(model) # logits1, diff = model_adv(input, target) logits1, diff, x = model_adv(input, target) loss1 = criterion(logits1, target) optimizer.zero_grad() loss1.backward() optimizer.step() ############################################################################################################ # if perturb_alpha: # diff = perturb_alpha(model, input, target, epsilon_alpha) # print(diff) # print(epsilon_alpha) # # optimizer.zero_grad() # architect.optimizer.zero_grad() ############################################################################################################ ############################################################################################################ if diff.size() != delta.size(): print(list(diff.size())) print(list(input.size())) break delta = diff prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break # if step > 5: # break return top1.avg, objs.avg, delta
def train2(train_queue, valid_queue, model, architect, criterion, optimizer, lr, perturb_alpha, epsilon_alpha, model2, epoch): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() train_loss = 0 correct = 0 total = 0 max_step = 0 # delta = torch.empty(5, 3, 32, 32) m = 64 for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) print(n) input = input.cuda() target = target.cuda(non_blocking=True) # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) input_search = input_search.cuda() target_search = target_search.cuda(non_blocking=True) # if epoch>=15: # architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) # optimizer.zero_grad() architect.optimizer.zero_grad() # print('before softmax', model.arch_parameters()) # model.softmax_arch_parameters() ############################################################################################################ ############################################################################################################ # model_adv = AttackPGD(model) # # # logits1, diff = model_adv(input, target) # logits1, diff, x = model_adv(input, target) # loss1 = criterion(logits1, target) # optimizer.zero_grad() # loss1.backward() # optimizer.step() ############################################################################################################ # if perturb_alpha: # diff = perturb_alpha(model, input, target, epsilon_alpha) # optimizer.zero_grad() # architect.optimizer.zero_grad() # print('after perturb', model.arch_parameters()) ############################################################################################################ ############################################################################################################ logits = model(input, updateType='weight') loss = criterion(logits, target) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() # model.restore_arch_parameters() ############################################################################################################ ############################################################################################################ model_adv = AttackPGD(model) # logits1, diff = model_adv(input, target) logits1, diff, x = model_adv(input, target) loss1 = criterion(logits1, target) optimizer.zero_grad() loss1.backward() optimizer.step() ############################################################################################################ # if perturb_alpha: # diff = perturb_alpha(model, input, target, epsilon_alpha) # print(diff) # print(epsilon_alpha) # # optimizer.zero_grad() # architect.optimizer.zero_grad() ############################################################################################################ ############################################################################################################ if diff.size() != delta.size(): print(list(diff.size())) print(list(input.size())) break delta = diff prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break # if step > 5: # break for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) input = input.cuda() target = target.cuda(non_blocking=True) # logits2 = resnet18(input*diff, updateType='weight') # pert_inp = input * epsilon_alpha # pert_inp = input * diff if delta.size() != input.size(): print(list(delta.size())) print(list(input.size())) break else: pert_inp = torch.mul(input, delta) logits2 = model2(pert_inp) # logits2 = model2(x) loss2 = criterion(logits2, target) loss2.backward() nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip) optimizer.step() # model.restore_arch_parameters() train_loss += loss2.item() _, predicted = logits2.max(1) total += target.size(0) correct += predicted.eq(target).sum().item() max_step = step progress_bar( step, len(train_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (step + 1), 100. * correct / total, correct, total)) return 100. * correct / total, train_loss / (max_step + 1)
def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, perturb_alpha, epsilon_alpha, model2, epoch): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() train_loss = 0 correct = 0 total = 0 max_step = 0 for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) input = input.cuda() target = target.cuda(non_blocking=True) # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) input_search = input_search.cuda() target_search = target_search.cuda(non_blocking=True) # if epoch>=15: # architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) # optimizer.zero_grad() architect.optimizer.zero_grad() # print('before softmax', model.arch_parameters()) # model.softmax_arch_parameters() ############################################################################################################ ############################################################################################################ # model_adv = AttackPGD(model) # # # logits1, diff = model_adv(input, target) # logits1, diff, x = model_adv(input, target) # loss1 = criterion(logits1, target) # optimizer.zero_grad() # loss1.backward() # optimizer.step() ############################################################################################################ # if perturb_alpha: # diff = perturb_alpha(model, input, target, epsilon_alpha) # optimizer.zero_grad() # architect.optimizer.zero_grad() # print('after perturb', model.arch_parameters()) ############################################################################################################ ############################################################################################################ logits = model(input, updateType='weight') loss = criterion(logits, target) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() # model.restore_arch_parameters() ############################################################################################################ ############################################################################################################ model_adv = AttackPGD(model) # logits1, diff = model_adv(input, target) logits1, diff, x = model_adv(input, target) loss1 = criterion(logits1, target) optimizer.zero_grad() loss1.backward() optimizer.step() ############################################################################################################ # if perturb_alpha: # diff = perturb_alpha(model, input, target, epsilon_alpha) # print(diff) # print(epsilon_alpha) # # optimizer.zero_grad() # architect.optimizer.zero_grad() ############################################################################################################ ############################################################################################################ # logits2 = resnet18(input*diff, updateType='weight') # pert_inp = input * epsilon_alpha # pert_inp = input * diff pert_inp = torch.mul(input, diff) logits2 = model2(pert_inp) # logits2 = model2(x) loss2 = criterion(logits2, target) loss2.backward() nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip) optimizer.step() train_loss += loss2.item() _, predicted = logits2.max(1) total += target.size(0) correct += predicted.eq(target).sum().item() max_step = step progress_bar( step, len(train_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (step + 1), 100. * correct / total, correct, total)) return 100. * correct / total, train_loss / (max_step + 1)