def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, perturb_alpha, epsilon_alpha): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) input = input.cuda() target = target.cuda(non_blocking=True) # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) input_search = input_search.cuda() target_search = target_search.cuda(non_blocking=True) architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) optimizer.zero_grad() architect.optimizer.zero_grad() # print('before softmax', model.arch_parameters()) model.softmax_arch_parameters() # perturb on alpha # print('after softmax', model.arch_parameters()) if perturb_alpha: perturb_alpha(model, input, target, epsilon_alpha) optimizer.zero_grad() architect.optimizer.zero_grad() # print('after perturb', model.arch_parameters()) logits = model(input, updateType='weight') loss = criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() model.restore_arch_parameters() # print('after restore', model.arch_parameters()) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break return top1.avg, objs.avg
def infer(valid_queue, model, criterion): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.eval() with torch.no_grad(): for step, (input, target) in enumerate(valid_queue): input = input.cuda() target = target.cuda(non_blocking=True) logits = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break return top1.avg, objs.avg
def train(train_queue, model, criterion, optimizer): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.train() for step, (input, target) in enumerate(train_queue): input = input.cuda() target = target.cuda(non_blocking=True) optimizer.zero_grad() logits, logits_aux = model(input) loss = criterion(logits, target) if args.auxiliary: loss_aux = criterion(logits_aux, target) loss += args.auxiliary_weight * loss_aux loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break return top1.avg, objs.avg
def train_controller(self): total_loss = utils.AvgrageMeter() total_reward = utils.AvgrageMeter() total_entropy = utils.AvgrageMeter() for step in range(300): input, target = self.reward_queue.next_batch() self.model.eval() n = input.size(0) input = input.cuda() target = target.cuda() self.controller_optimizer.zero_grad() self.controller.train() # Sample an architecture from the controller and plug it into the one-shot model. arch, log_prob, entropy = self.controller() arch_parameters = self.get_weights_from_arch(arch) self.set_arch_model_weights(arch_parameters) with torch.no_grad(): # Make sure that no gradients are propagated through the one-shot model # for the controller updates logits = self.model(input, discrete=True).detach() reward = utils.accuracy(logits, target)[0] if self.args.entropy_weight is not None: reward += self.args.entropy_weight * entropy log_prob = torch.sum(log_prob) if self.baseline is None: self.baseline = reward self.baseline = self.args.bl_dec * self.baseline + ( 1 - self.args.bl_dec) * reward loss = log_prob * (reward - self.baseline) loss = loss.mean() loss.backward() self.controller_optimizer.step() total_loss.update(loss.item(), n) total_reward.update(reward.item(), n) total_entropy.update(entropy.item(), n) if step % self.args.report_freq == 0: logging.info('controller %03d %e %f %f', step, total_loss.avg, total_reward.avg, self.baseline.item())
def train_batch(self, arch): args = self.args if self.steps % len(self.train_queue) == 0: self.scheduler.step() self.objs = utils.AvgrageMeter() self.top1 = utils.AvgrageMeter() self.top5 = utils.AvgrageMeter() lr = self.scheduler.get_lr()[0] weights = self.get_weights_from_arch(arch) self.set_arch_model_weights(weights) step = self.steps % len(self.train_queue) input, target = next(self.train_iter) self.model.train() n = input.size(0) input = input.cuda() target = target.cuda(non_blocking=True) # get a random_ws minibatch from the search queue with replacement self.optimizer.zero_grad() logits = self.model(input, discrete=True) loss = self.criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), args.grad_clip) self.optimizer.step() prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) self.objs.update(loss.data.item(), n) self.top1.update(prec1.data.item(), n) self.top5.update(prec5.data.item(), n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, self.objs.avg, self.top1.avg, self.top5.avg) self.steps += 1 if self.steps % len(self.train_queue) == 0: # Save the model weights self.epochs += 1 self.train_iter = iter(self.train_queue) valid_err = self.evaluate(arch) logging.info('epoch %d | train_acc %f | valid_acc %f' % (self.epochs, self.top1.avg, 1 - valid_err)) if self.epochs % 20 == 0: self.save(epoch=self.epochs)
def infer(valid_queue, model, criterion): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.eval() with torch.no_grad(): for step, (input, target) in enumerate(valid_queue): input = input.cuda() target = target.cuda(non_blocking=True) logits = model(input, updateType='weight') loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) return top1.avg, objs.avg
def train_model(self, epoch): self.objs = utils.AvgrageMeter() self.top1 = utils.AvgrageMeter() self.top5 = utils.AvgrageMeter() for step, (input, target) in enumerate(self.train_queue): self.model.train() input = input.cuda() target = target.cuda() self.optimizer.zero_grad() self.controller.eval() # Sample an architecture from the controller arch, _, _ = self.controller() arch_parameters = self.get_weights_from_arch(arch) self.set_arch_model_weights(arch_parameters) # Evaluate the architecture logits = self.model(input, discrete=True) loss = self.criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), self.args.grad_clip) self.optimizer.step() n = input.size(0) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) self.objs.update(loss.data.item(), n) self.top1.update(prec1.data.item(), n) self.top5.update(prec5.data.item(), n) if step % self.args.report_freq == 0: logging.info('train %03d %e %f %f', step, self.objs.avg, self.top1.avg, self.top5.avg) self.scheduler.step() valid_err = self.evaluate(arch) logging.info('epoch %d | train_acc %f | valid_acc %f' % (epoch, self.top1.avg, 1 - valid_err)) return self.top1.avg
def evaluate_test(self, arch, split=None, discrete=False, normalize=True): # Return error since we want to minimize obj val logging.info(arch) objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() weights = self.get_weights_from_arch(arch) self.set_arch_model_weights(weights) self.model.eval() if split is None: n_batches = 10 else: n_batches = len(self.test_queue) for step in range(n_batches): try: input, target = next(self.test_iter) except Exception as e: logging.info('looping back over valid set') self.test_iter = iter(self.test_queue) input, target = next(self.test_iter) input = input.cuda() target = target.cuda(non_blocking=True) logits = self.model(input, discrete=discrete, normalize=normalize) loss = self.criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data.item(), n) top1.update(prec1.data.item(), n) top5.update(prec5.data.item(), n) if step % self.args.report_freq == 0: logging.info('test %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return 1 - 0.01 * top1.avg
def infer(test_queue, model, criterion): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.eval() for step, (input, target) in enumerate(test_queue): input = Variable(input, volatile=True).cuda() target = Variable(target, volatile=True).cuda() logits, _ = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('test %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return top1.avg, objs.avg
def evaluate(self, arch, split=None): # Return error since we want to minimize obj val # logging.info(arch) objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() weights = self.get_weights_from_arch(arch) self.set_arch_model_weights(weights) self.model.eval() self.controller.eval() if split is None: n_batches = 1 else: n_batches = len(self.valid_queue) for step in range(n_batches): input, target = self.valid_queue.next_batch() input = input.cuda() target = target.cuda(non_blocking=True) logits = self.model(input, discrete=True) loss = self.criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data.item(), n) top1.update(prec1.data.item(), n) top5.update(prec5.data.item(), n) # if step % self.args.report_freq == 0: # logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return 1 - 0.01 * top1.avg
def train(train_queue, valid_queue, model, architect, criterion, optimizer, optimizer2, lr, lr2, model2, epoch): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) input = input.cuda() target = target.cuda(non_blocking=True) # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) input_search = input_search.cuda() target_search = target_search.cuda(non_blocking=True) architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) optimizer.zero_grad() architect.optimizer.zero_grad() # print('before softmax', model.arch_parameters()) model.softmax_arch_parameters() logits = model(input, updateType='weight') loss = criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() model.restore_arch_parameters() # print('after restore', model.arch_parameters()) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break # model_adv.train() model_adv = AttackPGD(model) logits1, diff, x = model_adv(input, target) deltas = torch.round(torch.abs(diff) * 255 / 8 + 0.499 - (epoch / 300)) pert_inp = torch.mul(input, deltas) # pert_inp = torch.mul (input, torch.abs(diff)) model2.train() optimizer2.zero_grad() logits2 = model2(pert_inp) loss2 = criterion(logits2, target) loss2.backward() nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip) optimizer2.step() return top1.avg, objs.avg
def train3(train_queue, valid_queue, model, architect, criterion, optimizer, lr, perturb_alpha, epsilon_alpha, model2, epoch): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() train_loss = 0 correct = 0 total = 0 max_step = 0 delta = torch.empty(64, 3, 32, 32) m = 64 for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) print(n) input = input.cuda() target = target.cuda(non_blocking=True) # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) input_search = input_search.cuda() target_search = target_search.cuda(non_blocking=True) # if epoch>=15: # architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) # optimizer.zero_grad() architect.optimizer.zero_grad() # print('before softmax', model.arch_parameters()) # model.softmax_arch_parameters() ############################################################################################################ ############################################################################################################ # model_adv = AttackPGD(model) # # # logits1, diff = model_adv(input, target) # logits1, diff, x = model_adv(input, target) # loss1 = criterion(logits1, target) # optimizer.zero_grad() # loss1.backward() # optimizer.step() ############################################################################################################ # if perturb_alpha: # diff = perturb_alpha(model, input, target, epsilon_alpha) # optimizer.zero_grad() # architect.optimizer.zero_grad() # print('after perturb', model.arch_parameters()) ############################################################################################################ ############################################################################################################ logits = model(input, updateType='weight') loss = criterion(logits, target) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() # model.restore_arch_parameters() ############################################################################################################ ############################################################################################################ model_adv = AttackPGD(model) # logits1, diff = model_adv(input, target) logits1, diff, x = model_adv(input, target) loss1 = criterion(logits1, target) optimizer.zero_grad() loss1.backward() optimizer.step() ############################################################################################################ # if perturb_alpha: # diff = perturb_alpha(model, input, target, epsilon_alpha) # print(diff) # print(epsilon_alpha) # # optimizer.zero_grad() # architect.optimizer.zero_grad() ############################################################################################################ ############################################################################################################ if diff.size() != delta.size(): print(list(diff.size())) print(list(input.size())) break delta = diff prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break # if step > 5: # break return top1.avg, objs.avg, delta
def train2(train_queue, valid_queue, model, architect, criterion, optimizer, lr, perturb_alpha, epsilon_alpha, model2, epoch): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() train_loss = 0 correct = 0 total = 0 max_step = 0 # delta = torch.empty(5, 3, 32, 32) m = 64 for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) print(n) input = input.cuda() target = target.cuda(non_blocking=True) # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) input_search = input_search.cuda() target_search = target_search.cuda(non_blocking=True) # if epoch>=15: # architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) # optimizer.zero_grad() architect.optimizer.zero_grad() # print('before softmax', model.arch_parameters()) # model.softmax_arch_parameters() ############################################################################################################ ############################################################################################################ # model_adv = AttackPGD(model) # # # logits1, diff = model_adv(input, target) # logits1, diff, x = model_adv(input, target) # loss1 = criterion(logits1, target) # optimizer.zero_grad() # loss1.backward() # optimizer.step() ############################################################################################################ # if perturb_alpha: # diff = perturb_alpha(model, input, target, epsilon_alpha) # optimizer.zero_grad() # architect.optimizer.zero_grad() # print('after perturb', model.arch_parameters()) ############################################################################################################ ############################################################################################################ logits = model(input, updateType='weight') loss = criterion(logits, target) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() # model.restore_arch_parameters() ############################################################################################################ ############################################################################################################ model_adv = AttackPGD(model) # logits1, diff = model_adv(input, target) logits1, diff, x = model_adv(input, target) loss1 = criterion(logits1, target) optimizer.zero_grad() loss1.backward() optimizer.step() ############################################################################################################ # if perturb_alpha: # diff = perturb_alpha(model, input, target, epsilon_alpha) # print(diff) # print(epsilon_alpha) # # optimizer.zero_grad() # architect.optimizer.zero_grad() ############################################################################################################ ############################################################################################################ if diff.size() != delta.size(): print(list(diff.size())) print(list(input.size())) break delta = diff prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break # if step > 5: # break for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) input = input.cuda() target = target.cuda(non_blocking=True) # logits2 = resnet18(input*diff, updateType='weight') # pert_inp = input * epsilon_alpha # pert_inp = input * diff if delta.size() != input.size(): print(list(delta.size())) print(list(input.size())) break else: pert_inp = torch.mul(input, delta) logits2 = model2(pert_inp) # logits2 = model2(x) loss2 = criterion(logits2, target) loss2.backward() nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip) optimizer.step() # model.restore_arch_parameters() train_loss += loss2.item() _, predicted = logits2.max(1) total += target.size(0) correct += predicted.eq(target).sum().item() max_step = step progress_bar( step, len(train_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (step + 1), 100. * correct / total, correct, total)) return 100. * correct / total, train_loss / (max_step + 1)
def train(train_queue, model, criterion, optimizer): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() model.train() #res = resnet18() #res = res.cuda() #res.load_state_dict(torch.load("resnet18.pt")) #gcam = GradCAM(model=res) #target_layer = "cells.9" #target_layer = "layer1" #target_csv = open("target_train.csv", "w") #target_csv.write("index,label\n") cnt = 0 for step, (input, target) in enumerate(train_queue): """ print (cnt) target_csv.write(str(cnt) + "," + str(target.item()) + "\n") if cnt == 12800: break input = input.cuda() #bxcxhxw ori_img = input[0, :, :, :].detach().cpu().numpy() target = target.cuda(non_blocking=True) new_img = np.zeros((33, 32, 32)) new_img[:3, :, :] = ori_img for idx in range(10): _p = gcam.forward(input) gcam.backward(idx=idx) region = gcam.generate(target_layer=target_layer) cmap = cv2.resize(region, (32, 32)) new_img[3*idx+3:3*idx+6, :, :] = cmap np.save("newdata/reweight_{}.npy".format(cnt), new_img) cnt += 1 # mixup training alpha = 1 use_cuda = True inputs, targets_a, targets_b, lam = mixup_data(inputs, target, alpha, use_cuda) inputs, targets_a, targets_b = map(Variable, (inputs, targets_a, targets_b)) outputs = model(inputs) loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() prec1, prec5 = utils.accuracy(outputs, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) """ input = input.cuda() #bxcxhxw target = target.cuda(non_blocking=True) r = np.random.rand(1) if args.beta > 0 and r < args.cutmix_prob: # generate mixed sample lam = np.random.beta(args.beta, args.beta) rand_index = torch.randperm(input.size()[0]).cuda() target_a = target target_b = target[rand_index] bbx1, bby1, bbx2, bby2 = rand_bbox(input.size(), lam) input[:, :, bbx1:bbx2, bby1:bby2] = input[rand_index, :, bbx1:bbx2, bby1:bby2] # adjust lambda to exactly match pixel ratio lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (input.size()[-1] * input.size()[-2])) # compute output logits = model(input) loss = criterion(logits, target_a) * lam + criterion(logits, target_b) * (1. - lam) else: # compute output logits = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) optimizer.zero_grad() #logits = model(input) #loss = criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() if step % args.report_freq == 0: logging.info('train %03d avg: %e top1: %f top5: %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break return top1.avg, top5.avg, objs.avg
def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, perturb_alpha, epsilon_alpha): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() train_loss = 0 correct = 0 total = 0 # with torch.no_grad(): for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) if torch.cuda.is_available(): input = input.cuda() target = target.cuda(non_blocking=True) # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) if torch.cuda.is_available(): input_search = input_search.cuda() target_search = target_search.cuda(non_blocking=True) architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) optimizer.zero_grad() architect.optimizer.zero_grad() # print('before softmax', model.arch_parameters()) model.softmax_arch_parameters() # perturb on alpha # print('after softmax', model.arch_parameters()) # model_adv = AttackPGD(model) # # logits1, diff = model_adv(input, target) # logits1, diff, x = model_adv(input, target) # loss1 = criterion(logits1, target) # optimizer.zero_grad() # # model_adv.zero_grad() # loss1.backward() # optimizer.step() if perturb_alpha: # perturb_alpha(model, input, target, epsilon_alpha) ############################################################################################################ diff = perturb_alpha(model, input, target, epsilon_alpha) # print(diff) # print(epsilon_alpha) # print(input) ############################################################################################################ optimizer.zero_grad() architect.optimizer.zero_grad() # print('after perturb', model.arch_parameters()) logits = model(input, updateType='weight') loss = criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() model.restore_arch_parameters() # model_adv = AttackPGD(model) # # logits1, diff = model_adv(input, target) # logits1, diff, x = model_adv(input, target) # loss1 = criterion(logits1, target) # optimizer.zero_grad() # # model_adv.zero_grad() # loss1.backward() # optimizer.step() # if perturb_alpha: # # perturb_alpha(model, input, target, epsilon_alpha) # ############################################################################################################ # diff = perturb_alpha(model, input, target, epsilon_alpha) # print(diff) # print(epsilon_alpha) # # print(input) # ############################################################################################################ # optimizer.zero_grad() # architect.optimizer.zero_grad() ############################################################################################################ # logits2 = resnet18(input*diff, updateType='weight') # pert_inp = input * epsilon_alpha pert_inp = input * diff logits2 = resnet18(pert_inp) # logits2 = resnet18(x) loss2 = criterion(logits2, target) loss2.backward() nn.utils.clip_grad_norm_(resnet18.parameters(), args.grad_clip) optimizer.step() # resnet18.restore_arch_parameters() # print('after restore', model.arch_parameters()) # prec21, prec25 = utils.accuracy(logits2, target, topk=(1, 5)) # objs2.update(loss2.data, n) # top21.update(prec21.data, n) # top25.update(prec25.data, n) # if step2 % args.report_freq == 0: # logging.info('train %03d %e %f %f', step2, objs2.avg, top21.avg, top25.avg) # if 'debug' in args.save: # break # train_loss += loss2.item() # _, predicted = logits2.max(1) # total += target.size(0) # correct += predicted.eq(target).sum().item() # progress_bar(step, len(train_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' # % (train_loss/(step+1), 100.*correct/total, correct, total)) prec1, prec5 = utils.accuracy(logits2, target, topk=(1, 5)) objs.update(loss2.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break ############################################################################################################ return top1.avg, objs.avg
def train(train_queue, valid_queue, model, architect, criterion, optimizer, optimizer2, lr, lr2, model2, epoch): objs = utils.AvgrageMeter() top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() train_loss = 0 correct = 0 total = 0 max_step = 0 for step, (input, target) in enumerate(train_queue): model.train() # model2.train() n = input.size(0) input = input.cuda() target = target.cuda(non_blocking=True) # get a random minibatch from the search queue with replacement input_search, target_search = next(iter(valid_queue)) input_search = input_search.cuda() target_search = target_search.cuda(non_blocking=True) architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) architect.optimizer.zero_grad() optimizer.zero_grad() # logits, diff, x = model(input, target) logits = model(input, updateType='weight') loss = criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() # model_adv = AttackPGD(model) # logits1, diff, x = model_adv(input, target) # deltas = torch.round(torch.abs(diff) * 255/8 + 0.499 - (epoch/300)) # pert_inp = torch.mul (input, deltas) # # pert_inp = torch.mul (input, torch.abs(diff)) # optimizer2.zero_grad() # logits2 = model2(pert_inp) # loss2 = criterion(logits2, target) # loss2.backward() # nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip) # optimizer2.step() # if epoch<0: # optimizer2.zero_grad() # logits2 = model2(input) # loss2 = criterion(logits2, target) # loss2.backward() # nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip) # optimizer2.step() # else: # model_adv = AttackPGD(model) # logits1, diff, x = model_adv(input, target) # deltas = torch.round(torch.abs(diff) * 255/8 + 0.499 - (epoch/300)) # pert_inp = torch.mul (input, deltas) # # pert_inp = torch.mul (input, torch.abs(diff)) # optimizer2.zero_grad() # logits2 = model2(pert_inp) # loss2 = criterion(logits2, target) # loss2.backward() # nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip) # optimizer2.step() # train_loss += loss2.item() # _, predicted = logits2.max(1) # total += target.size(0) # correct += predicted.eq(target).sum().item() # max_step = step prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if 'debug' in args.save: break # progress_bar(step, len(train_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' # % (train_loss/(step+1), 100.*correct/total, correct, total)) # return 100.*correct/total, train_loss/(max_step+1) return top1.avg, objs.avg