Esempio n. 1
0
def train(train_queue, model, criterion, optimizer):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()
    model.train()

    for step, (input, target) in enumerate(train_queue):
        input = input.cuda()
        target = target.cuda(non_blocking=True)

        optimizer.zero_grad()
        logits, logits_aux = model(input)
        loss = criterion(logits, target)
        if args.auxiliary:
            loss_aux = criterion(logits_aux, target)
            loss += args.auxiliary_weight * loss_aux
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
        optimizer.step()

        prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
        n = input.size(0)
        objs.update(loss.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)

        if step % args.report_freq == 0:
            logging.info('train %03d %e %f %f', step, objs.avg, top1.avg,
                         top5.avg)
            if 'debug' in args.save:
                break

    return top1.avg, objs.avg
Esempio n. 2
0
def infer(valid_queue, model, criterion):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()
    model.eval()

    with torch.no_grad():
        for step, (input, target) in enumerate(valid_queue):
            input = input.cuda()
            target = target.cuda(non_blocking=True)

            logits = model(input)
            loss = criterion(logits, target)

            prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
            n = input.size(0)
            objs.update(loss.data, n)
            top1.update(prec1.data, n)
            top5.update(prec5.data, n)

            if step % args.report_freq == 0:
                logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg,
                             top5.avg)
                if 'debug' in args.save:
                    break

    return top1.avg, objs.avg
Esempio n. 3
0
def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr,
          perturb_alpha, epsilon_alpha):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()

    for step, (input, target) in enumerate(train_queue):
        model.train()
        n = input.size(0)

        input = input.cuda()
        target = target.cuda(non_blocking=True)

        # get a random minibatch from the search queue with replacement
        input_search, target_search = next(iter(valid_queue))
        input_search = input_search.cuda()
        target_search = target_search.cuda(non_blocking=True)

        architect.step(input,
                       target,
                       input_search,
                       target_search,
                       lr,
                       optimizer,
                       unrolled=args.unrolled)
        optimizer.zero_grad()
        architect.optimizer.zero_grad()

        # print('before softmax', model.arch_parameters())
        model.softmax_arch_parameters()

        # perturb on alpha
        # print('after softmax', model.arch_parameters())
        if perturb_alpha:
            perturb_alpha(model, input, target, epsilon_alpha)
            optimizer.zero_grad()
            architect.optimizer.zero_grad()
        # print('after perturb', model.arch_parameters())

        logits = model(input, updateType='weight')
        loss = criterion(logits, target)

        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
        optimizer.step()
        model.restore_arch_parameters()
        # print('after restore', model.arch_parameters())

        prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
        objs.update(loss.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)

        if step % args.report_freq == 0:
            logging.info('train %03d %e %f %f', step, objs.avg, top1.avg,
                         top5.avg)
            if 'debug' in args.save:
                break

    return top1.avg, objs.avg
Esempio n. 4
0
def infer(valid_queue, model, criterion):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()
    model.eval()

    test_loss = 0
    correct = 0
    total = 0
    max_step = 0
    best_acc = 0

    with torch.no_grad():
        for step, (input, target) in enumerate(valid_queue):
            input = input.cuda()
            target = target.cuda(non_blocking=True)

            logits = model(input)
            loss = criterion(logits, target)

            test_loss += loss.item()
            _, predicted = logits.max(1)
            total += target.size(0)
            correct += predicted.eq(target).sum().item()
            max_step = step

            progress_bar(
                step, len(valid_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
                (test_loss /
                 (step + 1), 100. * correct / total, correct, total))

    # Save checkpoint.
    acc = 100. * correct / total
    if acc > best_acc:
        print('Saving..')
        state = {
            'net': net.state_dict(),
            'acc': acc,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        torch.save(state, './checkpoint/ckpt.pth')
        best_acc = acc


#             prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
#             n = input.size(0)
#             objs.update(loss.data, n)
#             top1.update(prec1.data, n)
#             top5.update(prec5.data, n)

#             if step % args.report_freq == 0:
#                 logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)
#                 if 'debug' in args.save:
#                     break
#     return top1.avg, objs.avg

    return 100. * correct / total, test_loss / (max_step + 1)
Esempio n. 5
0
def train4(train_queue, valid_queue, model, architect, criterion, optimizer,
           lr, perturb_alpha, epsilon_alpha, model2, epoch, delta):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()

    train_loss = 0
    correct = 0
    total = 0
    max_step = 0
    #     delta = torch.empty(5, 3, 32, 32)
    m = 64

    for step, (input, target) in enumerate(train_queue):
        model.train()
        n = input.size(0)

        input = input.cuda()
        target = target.cuda(non_blocking=True)

        #         logits2 = resnet18(input*diff, updateType='weight')

        #         pert_inp = input * epsilon_alpha
        #         pert_inp = input * diff
        if delta.size() != input.size():
            print(list(delta.size()))
            print(list(input.size()))
            break
        else:
            pert_inp = torch.mul(input, delta)
        logits2 = model2(pert_inp)
        #         logits2 = model2(x)
        loss2 = criterion(logits2, target)

        loss2.backward()
        nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip)
        optimizer.step()
        #         model.restore_arch_parameters()

        train_loss += loss2.item()
        _, predicted = logits2.max(1)
        total += target.size(0)
        correct += predicted.eq(target).sum().item()
        max_step = step

        progress_bar(
            step, len(train_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (train_loss / (step + 1), 100. * correct / total, correct, total))

    return 100. * correct / total, train_loss / (max_step + 1)
Esempio n. 6
0
    def train_controller(self):
        total_loss = utils.AvgrageMeter()
        total_reward = utils.AvgrageMeter()
        total_entropy = utils.AvgrageMeter()

        for step in range(300):
            input, target = self.reward_queue.next_batch()
            self.model.eval()
            n = input.size(0)

            input = input.cuda()
            target = target.cuda()

            self.controller_optimizer.zero_grad()

            self.controller.train()
            # Sample an architecture from the controller and plug it into the one-shot model.
            arch, log_prob, entropy = self.controller()
            arch_parameters = self.get_weights_from_arch(arch)
            self.set_arch_model_weights(arch_parameters)

            with torch.no_grad():
                # Make sure that no gradients are propagated through the one-shot model
                # for the controller updates
                logits = self.model(input, discrete=True).detach()
                reward = utils.accuracy(logits, target)[0]

            if self.args.entropy_weight is not None:
                reward += self.args.entropy_weight * entropy

            log_prob = torch.sum(log_prob)
            if self.baseline is None:
                self.baseline = reward
            self.baseline = self.args.bl_dec * self.baseline + (
                1 - self.args.bl_dec) * reward

            loss = log_prob * (reward - self.baseline)
            loss = loss.mean()

            loss.backward()

            self.controller_optimizer.step()

            total_loss.update(loss.item(), n)
            total_reward.update(reward.item(), n)
            total_entropy.update(entropy.item(), n)

            if step % self.args.report_freq == 0:
                logging.info('controller %03d %e %f %f', step, total_loss.avg,
                             total_reward.avg, self.baseline.item())
    def train_batch(self, arch):
        args = self.args
        if self.steps % len(self.train_queue) == 0:
            self.scheduler.step()
            self.objs = utils.AvgrageMeter()
            self.top1 = utils.AvgrageMeter()
            self.top5 = utils.AvgrageMeter()
        lr = self.scheduler.get_lr()[0]

        weights = self.get_weights_from_arch(arch)
        self.set_arch_model_weights(weights)

        step = self.steps % len(self.train_queue)
        input, target = next(self.train_iter)

        self.model.train()
        n = input.size(0)

        input = input.cuda()
        target = target.cuda(non_blocking=True)

        # get a random_ws minibatch from the search queue with replacement
        self.optimizer.zero_grad()
        logits = self.model(input, discrete=True)
        loss = self.criterion(logits, target)

        loss.backward()
        nn.utils.clip_grad_norm_(self.model.parameters(), args.grad_clip)
        
        self.optimizer.step()

        prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
        self.objs.update(loss.data.item(), n)
        self.top1.update(prec1.data.item(), n)
        self.top5.update(prec5.data.item(), n)

        if step % args.report_freq == 0:
            logging.info('train %03d %e %f %f', step, self.objs.avg, self.top1.avg, self.top5.avg)

        self.steps += 1
        if self.steps % len(self.train_queue) == 0:
            # Save the model weights
            self.epochs += 1
            self.train_iter = iter(self.train_queue)
            valid_err = self.evaluate(arch)
            logging.info('epoch %d  |  train_acc %f  |  valid_acc %f' % (self.epochs, self.top1.avg, 1 - valid_err))
            
            if self.epochs % 20 == 0:
                self.save(epoch=self.epochs)
def infer(valid_queue, model, criterion):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()
    model.eval()

    test_loss = 0
    correct = 0
    total = 0
    max_step = 0
    best_acc = 0

    with torch.no_grad():
        for step, (input, target) in enumerate(valid_queue):
            input = input.cuda()
            target = target.cuda(non_blocking=True)

            logits = model(input)
            loss = criterion(logits, target)

            test_loss += loss.item()
            _, predicted = logits.max(1)
            total += target.size(0)
            correct += predicted.eq(target).sum().item()
            max_step = step

            progress_bar(
                step, len(valid_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
                (test_loss /
                 (step + 1), 100. * correct / total, correct, total))

    # Save checkpoint.
    acc = 100. * correct / total
    if acc > best_acc:
        print('Saving..')
        state = {
            'net': model.state_dict(),
            'acc': acc,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        torch.save(state, './checkpoint/ckpt.pth')
        best_acc = acc

    return 100. * correct / total, test_loss / (max_step + 1)
Esempio n. 9
0
def infer(valid_queue, model, criterion):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()
    model.eval()

    with torch.no_grad():
        for step, (input, target) in enumerate(valid_queue):
            input = input.cuda()
            target = target.cuda(non_blocking=True)

            logits = model(input, updateType='weight')
            loss = criterion(logits, target)

            prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
            n = input.size(0)
            objs.update(loss.data, n)
            top1.update(prec1.data, n)
            top5.update(prec5.data, n)
    return top1.avg, objs.avg
Esempio n. 10
0
    def evaluate_test(self, arch, split=None, discrete=False, normalize=True):
        # Return error since we want to minimize obj val
        logging.info(arch)
        objs = utils.AvgrageMeter()
        top1 = utils.AvgrageMeter()
        top5 = utils.AvgrageMeter()

        weights = self.get_weights_from_arch(arch)
        self.set_arch_model_weights(weights)

        self.model.eval()

        if split is None:
            n_batches = 10
        else:
            n_batches = len(self.test_queue)

        for step in range(n_batches):
            try:
                input, target = next(self.test_iter)
            except Exception as e:
                logging.info('looping back over valid set')
                self.test_iter = iter(self.test_queue)
                input, target = next(self.test_iter)
            input = input.cuda()
            target = target.cuda(non_blocking=True)

            logits = self.model(input, discrete=discrete, normalize=normalize)
            loss = self.criterion(logits, target)

            prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
            n = input.size(0)
            objs.update(loss.data.item(), n)
            top1.update(prec1.data.item(), n)
            top5.update(prec5.data.item(), n)

            if step % self.args.report_freq == 0:
                logging.info('test %03d %e %f %f', step, objs.avg, top1.avg,
                             top5.avg)

        return 1 - 0.01 * top1.avg
Esempio n. 11
0
    def train_model(self, epoch):
        self.objs = utils.AvgrageMeter()
        self.top1 = utils.AvgrageMeter()
        self.top5 = utils.AvgrageMeter()
        for step, (input, target) in enumerate(self.train_queue):
            self.model.train()

            input = input.cuda()
            target = target.cuda()

            self.optimizer.zero_grad()
            self.controller.eval()

            # Sample an architecture from the controller
            arch, _, _ = self.controller()
            arch_parameters = self.get_weights_from_arch(arch)
            self.set_arch_model_weights(arch_parameters)

            # Evaluate the architecture
            logits = self.model(input, discrete=True)
            loss = self.criterion(logits, target)
            loss.backward()
            nn.utils.clip_grad_norm_(self.model.parameters(),
                                     self.args.grad_clip)
            self.optimizer.step()

            n = input.size(0)
            prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
            self.objs.update(loss.data.item(), n)
            self.top1.update(prec1.data.item(), n)
            self.top5.update(prec5.data.item(), n)

            if step % self.args.report_freq == 0:
                logging.info('train %03d %e %f %f', step, self.objs.avg,
                             self.top1.avg, self.top5.avg)
            self.scheduler.step()

        valid_err = self.evaluate(arch)
        logging.info('epoch %d  |  train_acc %f  |  valid_acc %f' %
                     (epoch, self.top1.avg, 1 - valid_err))
        return self.top1.avg
Esempio n. 12
0
def infer(test_queue, model, criterion):
  objs = utils.AvgrageMeter()
  top1 = utils.AvgrageMeter()
  top5 = utils.AvgrageMeter()
  model.eval()

  for step, (input, target) in enumerate(test_queue):
    input = Variable(input, volatile=True).cuda()
    target = Variable(target, volatile=True).cuda()

    logits, _ = model(input)
    loss = criterion(logits, target)

    prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
    n = input.size(0)
    objs.update(loss.data, n)
    top1.update(prec1.data, n)
    top5.update(prec5.data, n)

    if step % args.report_freq == 0:
      logging.info('test %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)

  return top1.avg, objs.avg
Esempio n. 13
0
    def evaluate(self, arch, split=None):
        # Return error since we want to minimize obj val
        # logging.info(arch)
        objs = utils.AvgrageMeter()
        top1 = utils.AvgrageMeter()
        top5 = utils.AvgrageMeter()

        weights = self.get_weights_from_arch(arch)
        self.set_arch_model_weights(weights)

        self.model.eval()
        self.controller.eval()

        if split is None:
            n_batches = 1
        else:
            n_batches = len(self.valid_queue)

        for step in range(n_batches):
            input, target = self.valid_queue.next_batch()
            input = input.cuda()
            target = target.cuda(non_blocking=True)

            logits = self.model(input, discrete=True)
            loss = self.criterion(logits, target)

            prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
            n = input.size(0)
            objs.update(loss.data.item(), n)
            top1.update(prec1.data.item(), n)
            top5.update(prec5.data.item(), n)

            # if step % self.args.report_freq == 0:
            # logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)

        return 1 - 0.01 * top1.avg
    def __init__(self, save_path, seed, batch_size, grad_clip, epochs, num_intermediate_nodes, search_space, cutout,
                 resume_iter=None, init_channels=16):
        args = {}
        args['data'] = '../data'
        args['epochs'] = epochs
        args['learning_rate'] = 0.025
        args['batch_size'] = batch_size
        args['learning_rate_min'] = 0.001
        args['momentum'] = 0.9
        args['weight_decay'] = 3e-4
        args['init_channels'] = init_channels
        # Adapted to nasbench
        args['layers'] = 9
        args['drop_path_prob'] = 0.3
        args['grad_clip'] = grad_clip
        args['train_portion'] = 0.5
        args['seed'] = seed
        args['log_interval'] = 50
        args['save'] = save_path
        args['gpu'] = 0
        args['cuda'] = True
        args['cutout'] = cutout
        args['cutout_length'] = 16
        args['report_freq'] = 50
        args['output_weights'] = True
        args['steps'] = num_intermediate_nodes
        args['search_space'] = search_space.search_space_number
        self.search_space = search_space
        args = AttrDict(args)
        self.args = args

        # Dump the config of the run, but if only if it doesn't yet exist
        config_path = os.path.join(args.save, 'config.json')
        if not os.path.exists(config_path):
            with open(config_path, 'w') as fp:
                json.dump(args.__dict__, fp)
        self.seed = seed

        np.random.seed(args.seed)
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.set_device(args.gpu)
        cudnn.benchmark = False
        cudnn.enabled = True
        cudnn.deterministic = True
        torch.cuda.manual_seed_all(args.seed)

        train_transform, valid_transform = utils._data_transforms_cifar10(args)
        train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)

        num_train = len(train_data)
        indices = list(range(num_train))
        split = int(np.floor(args.train_portion * num_train))

        self.train_queue = torch.utils.data.DataLoader(
            train_data, batch_size=args.batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
            pin_memory=True, num_workers=0, worker_init_fn=np.random.seed(args.seed))

        self.valid_queue = torch.utils.data.DataLoader(
            train_data, batch_size=args.batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
            pin_memory=True, num_workers=0, worker_init_fn=np.random.seed(args.seed))

        _, test_transform = utils._data_transforms_cifar10(args)
        test_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=test_transform)
        self.test_queue = torch.utils.data.DataLoader(
            test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2)

        self.train_iter = iter(self.train_queue)
        self.valid_iter = iter(self.valid_queue)

        self.steps = 0
        self.epochs = 0
        self.total_loss = 0
        self.start_time = time.time()
        criterion = nn.CrossEntropyLoss()
        criterion = criterion.cuda()
        self.criterion = criterion

        model = Network(args.init_channels, 10, args.layers, self.criterion, output_weights=args.output_weights,
                        search_space=search_space, steps=args.steps)

        model = model.cuda()
        self.model = model

        logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

        optimizer = torch.optim.SGD(
            self.model.parameters(),
            args.learning_rate,
            momentum=args.momentum,
            weight_decay=args.weight_decay)
        self.optimizer = optimizer

        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, float(args.epochs), eta_min=args.learning_rate_min)

        if resume_iter is not None:
            self.steps = resume_iter
            self.epochs = int(resume_iter / len(self.train_queue))
            logging.info("Resuming from epoch %d" % self.epochs)
            self.objs = utils.AvgrageMeter()
            self.top1 = utils.AvgrageMeter()
            self.top5 = utils.AvgrageMeter()
            for i in range(self.epochs):
                self.scheduler.step()

        size = 0
        for p in model.parameters():
            size += p.nelement()
        logging.info('param size: {}'.format(size))

        total_params = sum(x.data.nelement() for x in model.parameters())
        logging.info('Args: {}'.format(args))
        logging.info('Model total parameters: {}'.format(total_params))
def train(train_queue, valid_queue, model, architect, criterion, optimizer,
          optimizer2, lr, lr2, model2, epoch):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()

    train_loss = 0
    correct = 0
    total = 0
    max_step = 0

    for step, (input, target) in enumerate(train_queue):
        model.train()
        #         model2.train()
        n = input.size(0)
        input = input.cuda()
        target = target.cuda(non_blocking=True)

        # get a random minibatch from the search queue with replacement
        input_search, target_search = next(iter(valid_queue))
        input_search = input_search.cuda()
        target_search = target_search.cuda(non_blocking=True)

        architect.step(input,
                       target,
                       input_search,
                       target_search,
                       lr,
                       optimizer,
                       unrolled=args.unrolled)
        architect.optimizer.zero_grad()

        optimizer.zero_grad()
        #         logits, diff, x = model(input, target)
        logits = model(input, updateType='weight')
        loss = criterion(logits, target)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
        optimizer.step()

        #         model_adv = AttackPGD(model)
        #         logits1, diff, x = model_adv(input, target)
        #         deltas = torch.round(torch.abs(diff) * 255/8 + 0.499 - (epoch/300))
        #         pert_inp = torch.mul (input, deltas)
        # #         pert_inp = torch.mul (input, torch.abs(diff))
        #         optimizer2.zero_grad()
        #         logits2 = model2(pert_inp)
        #         loss2 = criterion(logits2, target)
        #         loss2.backward()
        #         nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip)
        #         optimizer2.step()

        #         if epoch<0:
        #             optimizer2.zero_grad()
        #             logits2 = model2(input)
        #             loss2 = criterion(logits2, target)
        #             loss2.backward()
        #             nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip)
        #             optimizer2.step()

        #         else:
        #             model_adv = AttackPGD(model)
        #             logits1, diff, x = model_adv(input, target)
        #             deltas = torch.round(torch.abs(diff) * 255/8 + 0.499 - (epoch/300))
        #             pert_inp = torch.mul (input, deltas)
        #     #         pert_inp = torch.mul (input, torch.abs(diff))
        #             optimizer2.zero_grad()
        #             logits2 = model2(pert_inp)
        #             loss2 = criterion(logits2, target)
        #             loss2.backward()
        #             nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip)
        #             optimizer2.step()

        #         train_loss += loss2.item()
        #         _, predicted = logits2.max(1)
        #         total += target.size(0)
        #         correct += predicted.eq(target).sum().item()
        #         max_step = step

        prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
        objs.update(loss.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)

        if step % args.report_freq == 0:
            logging.info('train %03d %e %f %f', step, objs.avg, top1.avg,
                         top5.avg)
            if 'debug' in args.save:
                break


#         progress_bar(step, len(train_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
#                      % (train_loss/(step+1), 100.*correct/total, correct, total))

#     return  100.*correct/total, train_loss/(max_step+1)
    return top1.avg, objs.avg
def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr,
          perturb_alpha, epsilon_alpha):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()

    train_loss = 0
    correct = 0
    total = 0

    #     with torch.no_grad():
    for step, (input, target) in enumerate(train_queue):
        model.train()
        n = input.size(0)

        if torch.cuda.is_available():
            input = input.cuda()
            target = target.cuda(non_blocking=True)

        # get a random minibatch from the search queue with replacement
        input_search, target_search = next(iter(valid_queue))
        if torch.cuda.is_available():
            input_search = input_search.cuda()
            target_search = target_search.cuda(non_blocking=True)

        architect.step(input,
                       target,
                       input_search,
                       target_search,
                       lr,
                       optimizer,
                       unrolled=args.unrolled)
        optimizer.zero_grad()
        architect.optimizer.zero_grad()

        # print('before softmax', model.arch_parameters())
        model.softmax_arch_parameters()

        # perturb on alpha
        # print('after softmax', model.arch_parameters())

        #         model_adv = AttackPGD(model)
        # #         logits1, diff = model_adv(input, target)
        #         logits1, diff, x = model_adv(input, target)
        #         loss1 = criterion(logits1, target)

        #         optimizer.zero_grad()
        # #         model_adv.zero_grad()
        #         loss1.backward()
        #         optimizer.step()

        if perturb_alpha:
            #             perturb_alpha(model, input, target, epsilon_alpha)
            ############################################################################################################
            diff = perturb_alpha(model, input, target, epsilon_alpha)
            #             print(diff)
            #             print(epsilon_alpha)
            #             print(input)
            ############################################################################################################
            optimizer.zero_grad()
            architect.optimizer.zero_grad()
        # print('after perturb', model.arch_parameters())

        logits = model(input, updateType='weight')
        loss = criterion(logits, target)

        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
        optimizer.step()
        model.restore_arch_parameters()

        #         model_adv = AttackPGD(model)
        # #         logits1, diff = model_adv(input, target)
        #         logits1, diff, x = model_adv(input, target)
        #         loss1 = criterion(logits1, target)

        #         optimizer.zero_grad()
        # #         model_adv.zero_grad()
        #         loss1.backward()
        #         optimizer.step()

        #         if perturb_alpha:
        #     #             perturb_alpha(model, input, target, epsilon_alpha)
        #                 ############################################################################################################
        #                 diff = perturb_alpha(model, input, target, epsilon_alpha)
        #                 print(diff)
        #                 print(epsilon_alpha)
        #     #             print(input)
        #                 ############################################################################################################
        #                 optimizer.zero_grad()
        #                 architect.optimizer.zero_grad()

        ############################################################################################################

        #         logits2 = resnet18(input*diff, updateType='weight')

        #         pert_inp = input * epsilon_alpha
        pert_inp = input * diff
        logits2 = resnet18(pert_inp)
        #         logits2 = resnet18(x)
        loss2 = criterion(logits2, target)

        loss2.backward()
        nn.utils.clip_grad_norm_(resnet18.parameters(), args.grad_clip)
        optimizer.step()
        #         resnet18.restore_arch_parameters()
        # print('after restore', model.arch_parameters())

        #         prec21, prec25 = utils.accuracy(logits2, target, topk=(1, 5))
        #         objs2.update(loss2.data, n)
        #         top21.update(prec21.data, n)
        #         top25.update(prec25.data, n)

        #         if step2 % args.report_freq == 0:
        #             logging.info('train %03d %e %f %f', step2, objs2.avg, top21.avg, top25.avg)
        #             if 'debug' in args.save:
        #                 break

        #         train_loss += loss2.item()
        #         _, predicted = logits2.max(1)
        #         total += target.size(0)
        #         correct += predicted.eq(target).sum().item()

        #         progress_bar(step, len(train_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
        #                      % (train_loss/(step+1), 100.*correct/total, correct, total))

        prec1, prec5 = utils.accuracy(logits2, target, topk=(1, 5))
        objs.update(loss2.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)

        if step % args.report_freq == 0:
            logging.info('train %03d %e %f %f', step, objs.avg, top1.avg,
                         top5.avg)
            if 'debug' in args.save:
                break
        ############################################################################################################

    return top1.avg, objs.avg
Esempio n. 17
0
def train(train_queue, valid_queue, model, architect, criterion, optimizer,
          optimizer2, lr, lr2, model2, epoch):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()

    for step, (input, target) in enumerate(train_queue):
        model.train()
        n = input.size(0)

        input = input.cuda()
        target = target.cuda(non_blocking=True)

        # get a random minibatch from the search queue with replacement
        input_search, target_search = next(iter(valid_queue))
        input_search = input_search.cuda()
        target_search = target_search.cuda(non_blocking=True)

        architect.step(input,
                       target,
                       input_search,
                       target_search,
                       lr,
                       optimizer,
                       unrolled=args.unrolled)
        optimizer.zero_grad()
        architect.optimizer.zero_grad()

        # print('before softmax', model.arch_parameters())
        model.softmax_arch_parameters()

        logits = model(input, updateType='weight')
        loss = criterion(logits, target)

        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
        optimizer.step()
        model.restore_arch_parameters()
        # print('after restore', model.arch_parameters())

        prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
        objs.update(loss.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)

        if step % args.report_freq == 0:
            logging.info('train %03d %e %f %f', step, objs.avg, top1.avg,
                         top5.avg)
            if 'debug' in args.save:
                break


#         model_adv.train()

        model_adv = AttackPGD(model)
        logits1, diff, x = model_adv(input, target)
        deltas = torch.round(torch.abs(diff) * 255 / 8 + 0.499 - (epoch / 300))
        pert_inp = torch.mul(input, deltas)
        #         pert_inp = torch.mul (input, torch.abs(diff))

        model2.train()

        optimizer2.zero_grad()
        logits2 = model2(pert_inp)
        loss2 = criterion(logits2, target)
        loss2.backward()
        nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip)
        optimizer2.step()

    return top1.avg, objs.avg
Esempio n. 18
0
def train(train_queue, model, criterion, optimizer):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()
    model.train()
    #res = resnet18()
    #res = res.cuda()
    #res.load_state_dict(torch.load("resnet18.pt"))
    #gcam = GradCAM(model=res)
    #target_layer = "cells.9"
    #target_layer = "layer1"
    #target_csv = open("target_train.csv", "w")
    #target_csv.write("index,label\n")
    cnt = 0
    for step, (input, target) in enumerate(train_queue):
        """
        print (cnt)
        target_csv.write(str(cnt) + "," + str(target.item()) + "\n")
        if cnt == 12800:
            break
        input = input.cuda() #bxcxhxw
        ori_img = input[0, :, :, :].detach().cpu().numpy()
        target = target.cuda(non_blocking=True)
        new_img = np.zeros((33, 32, 32))
        new_img[:3, :, :] = ori_img
        for idx in range(10):
            _p = gcam.forward(input)
            gcam.backward(idx=idx)
            region = gcam.generate(target_layer=target_layer)
            cmap = cv2.resize(region, (32, 32))
            new_img[3*idx+3:3*idx+6, :, :] = cmap
        np.save("newdata/reweight_{}.npy".format(cnt), new_img)
        cnt += 1
        # mixup training
        alpha = 1
        use_cuda = True
        inputs, targets_a, targets_b, lam = mixup_data(inputs, target,
                                                       alpha, use_cuda)
        inputs, targets_a, targets_b = map(Variable, (inputs,
                                                      targets_a, targets_b))
        outputs = model(inputs)
        loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)

        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
        optimizer.step()

        prec1, prec5 = utils.accuracy(outputs, target, topk=(1, 5))
        n = input.size(0)
        objs.update(loss.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)
        """
        input = input.cuda() #bxcxhxw
        target = target.cuda(non_blocking=True)
        r = np.random.rand(1)
        if args.beta > 0 and r < args.cutmix_prob:
            # generate mixed sample
            lam = np.random.beta(args.beta, args.beta)
            rand_index = torch.randperm(input.size()[0]).cuda()
            target_a = target
            target_b = target[rand_index]
            bbx1, bby1, bbx2, bby2 = rand_bbox(input.size(), lam)
            input[:, :, bbx1:bbx2, bby1:bby2] = input[rand_index, :, bbx1:bbx2, bby1:bby2]
            # adjust lambda to exactly match pixel ratio
            lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (input.size()[-1] * input.size()[-2]))
            # compute output
            logits = model(input)
            loss = criterion(logits, target_a) * lam + criterion(logits, target_b) * (1. - lam)
        else:
            # compute output
            logits = model(input)
            loss = criterion(logits, target)
            prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
            n = input.size(0)
            objs.update(loss.data, n)
            top1.update(prec1.data, n)
            top5.update(prec5.data, n)

        optimizer.zero_grad()
        #logits = model(input)
        #loss = criterion(logits, target)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
        optimizer.step()

        if step % args.report_freq == 0:
            logging.info('train %03d avg: %e top1: %f top5: %f', step, objs.avg, top1.avg, top5.avg)
            if 'debug' in args.save:
                break

    return top1.avg, top5.avg, objs.avg
Esempio n. 19
0
def train3(train_queue, valid_queue, model, architect, criterion, optimizer,
           lr, perturb_alpha, epsilon_alpha, model2, epoch):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()

    train_loss = 0
    correct = 0
    total = 0
    max_step = 0
    delta = torch.empty(64, 3, 32, 32)
    m = 64
    for step, (input, target) in enumerate(train_queue):
        model.train()
        n = input.size(0)
        print(n)

        input = input.cuda()
        target = target.cuda(non_blocking=True)

        # get a random minibatch from the search queue with replacement
        input_search, target_search = next(iter(valid_queue))
        input_search = input_search.cuda()
        target_search = target_search.cuda(non_blocking=True)
        #         if epoch>=15:
        #             architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled)

        architect.step(input,
                       target,
                       input_search,
                       target_search,
                       lr,
                       optimizer,
                       unrolled=args.unrolled)
        #         optimizer.zero_grad()
        architect.optimizer.zero_grad()

        # print('before softmax', model.arch_parameters())
        #         model.softmax_arch_parameters()

        ############################################################################################################
        ############################################################################################################
        #         model_adv = AttackPGD(model)
        # # #         logits1, diff = model_adv(input, target)
        #         logits1, diff, x = model_adv(input, target)
        #         loss1 = criterion(logits1, target)

        #         optimizer.zero_grad()
        #         loss1.backward()
        #         optimizer.step()

        ############################################################################################################
        #         if perturb_alpha:
        #             diff = perturb_alpha(model, input, target, epsilon_alpha)
        #             optimizer.zero_grad()
        #             architect.optimizer.zero_grad()
        # print('after perturb', model.arch_parameters())
        ############################################################################################################
        ############################################################################################################

        logits = model(input, updateType='weight')
        loss = criterion(logits, target)

        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
        optimizer.step()
        #         model.restore_arch_parameters()

        ############################################################################################################
        ############################################################################################################
        model_adv = AttackPGD(model)
        #         logits1, diff = model_adv(input, target)
        logits1, diff, x = model_adv(input, target)
        loss1 = criterion(logits1, target)

        optimizer.zero_grad()
        loss1.backward()
        optimizer.step()

        ############################################################################################################
        #         if perturb_alpha:
        #                 diff = perturb_alpha(model, input, target, epsilon_alpha)
        #                 print(diff)
        #                 print(epsilon_alpha)
        #
        #                 optimizer.zero_grad()
        #                 architect.optimizer.zero_grad()

        ############################################################################################################
        ############################################################################################################
        if diff.size() != delta.size():
            print(list(diff.size()))
            print(list(input.size()))
            break
        delta = diff

        prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
        objs.update(loss.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)

        if step % args.report_freq == 0:
            logging.info('train %03d %e %f %f', step, objs.avg, top1.avg,
                         top5.avg)
            if 'debug' in args.save:
                break


#         if step > 5:
#             break

    return top1.avg, objs.avg, delta
Esempio n. 20
0
def train2(train_queue, valid_queue, model, architect, criterion, optimizer,
           lr, perturb_alpha, epsilon_alpha, model2, epoch):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()

    train_loss = 0
    correct = 0
    total = 0
    max_step = 0
    #     delta = torch.empty(5, 3, 32, 32)
    m = 64
    for step, (input, target) in enumerate(train_queue):
        model.train()
        n = input.size(0)
        print(n)

        input = input.cuda()
        target = target.cuda(non_blocking=True)

        # get a random minibatch from the search queue with replacement
        input_search, target_search = next(iter(valid_queue))
        input_search = input_search.cuda()
        target_search = target_search.cuda(non_blocking=True)
        #         if epoch>=15:
        #             architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled)

        architect.step(input,
                       target,
                       input_search,
                       target_search,
                       lr,
                       optimizer,
                       unrolled=args.unrolled)
        #         optimizer.zero_grad()
        architect.optimizer.zero_grad()

        # print('before softmax', model.arch_parameters())
        #         model.softmax_arch_parameters()

        ############################################################################################################
        ############################################################################################################
        #         model_adv = AttackPGD(model)
        # # #         logits1, diff = model_adv(input, target)
        #         logits1, diff, x = model_adv(input, target)
        #         loss1 = criterion(logits1, target)

        #         optimizer.zero_grad()
        #         loss1.backward()
        #         optimizer.step()

        ############################################################################################################
        #         if perturb_alpha:
        #             diff = perturb_alpha(model, input, target, epsilon_alpha)
        #             optimizer.zero_grad()
        #             architect.optimizer.zero_grad()
        # print('after perturb', model.arch_parameters())
        ############################################################################################################
        ############################################################################################################

        logits = model(input, updateType='weight')
        loss = criterion(logits, target)

        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
        optimizer.step()
        #         model.restore_arch_parameters()

        ############################################################################################################
        ############################################################################################################
        model_adv = AttackPGD(model)
        #         logits1, diff = model_adv(input, target)
        logits1, diff, x = model_adv(input, target)
        loss1 = criterion(logits1, target)

        optimizer.zero_grad()
        loss1.backward()
        optimizer.step()

        ############################################################################################################
        #         if perturb_alpha:
        #                 diff = perturb_alpha(model, input, target, epsilon_alpha)
        #                 print(diff)
        #                 print(epsilon_alpha)
        #
        #                 optimizer.zero_grad()
        #                 architect.optimizer.zero_grad()

        ############################################################################################################
        ############################################################################################################
        if diff.size() != delta.size():
            print(list(diff.size()))
            print(list(input.size()))
            break
        delta = diff

        prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
        objs.update(loss.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)

        if step % args.report_freq == 0:
            logging.info('train %03d %e %f %f', step, objs.avg, top1.avg,
                         top5.avg)
            if 'debug' in args.save:
                break


#         if step > 5:
#             break

    for step, (input, target) in enumerate(train_queue):
        model.train()
        n = input.size(0)

        input = input.cuda()
        target = target.cuda(non_blocking=True)

        #         logits2 = resnet18(input*diff, updateType='weight')

        #         pert_inp = input * epsilon_alpha
        #         pert_inp = input * diff
        if delta.size() != input.size():
            print(list(delta.size()))
            print(list(input.size()))
            break
        else:
            pert_inp = torch.mul(input, delta)
        logits2 = model2(pert_inp)
        #         logits2 = model2(x)
        loss2 = criterion(logits2, target)

        loss2.backward()
        nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip)
        optimizer.step()
        #         model.restore_arch_parameters()

        train_loss += loss2.item()
        _, predicted = logits2.max(1)
        total += target.size(0)
        correct += predicted.eq(target).sum().item()
        max_step = step

        progress_bar(
            step, len(train_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (train_loss / (step + 1), 100. * correct / total, correct, total))

    return 100. * correct / total, train_loss / (max_step + 1)
Esempio n. 21
0
def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr,
          perturb_alpha, epsilon_alpha, model2, epoch):
    objs = utils.AvgrageMeter()
    top1 = utils.AvgrageMeter()
    top5 = utils.AvgrageMeter()

    train_loss = 0
    correct = 0
    total = 0
    max_step = 0

    for step, (input, target) in enumerate(train_queue):
        model.train()
        n = input.size(0)

        input = input.cuda()
        target = target.cuda(non_blocking=True)

        # get a random minibatch from the search queue with replacement
        input_search, target_search = next(iter(valid_queue))
        input_search = input_search.cuda()
        target_search = target_search.cuda(non_blocking=True)
        #         if epoch>=15:
        #             architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled)

        architect.step(input,
                       target,
                       input_search,
                       target_search,
                       lr,
                       optimizer,
                       unrolled=args.unrolled)
        #         optimizer.zero_grad()
        architect.optimizer.zero_grad()

        # print('before softmax', model.arch_parameters())
        #         model.softmax_arch_parameters()

        ############################################################################################################
        ############################################################################################################
        #         model_adv = AttackPGD(model)
        # # #         logits1, diff = model_adv(input, target)
        #         logits1, diff, x = model_adv(input, target)
        #         loss1 = criterion(logits1, target)

        #         optimizer.zero_grad()
        #         loss1.backward()
        #         optimizer.step()

        ############################################################################################################
        #         if perturb_alpha:
        #             diff = perturb_alpha(model, input, target, epsilon_alpha)
        #             optimizer.zero_grad()
        #             architect.optimizer.zero_grad()
        # print('after perturb', model.arch_parameters())
        ############################################################################################################
        ############################################################################################################

        logits = model(input, updateType='weight')
        loss = criterion(logits, target)

        optimizer.zero_grad()
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip)
        optimizer.step()
        #         model.restore_arch_parameters()

        ############################################################################################################
        ############################################################################################################
        model_adv = AttackPGD(model)
        #         logits1, diff = model_adv(input, target)
        logits1, diff, x = model_adv(input, target)
        loss1 = criterion(logits1, target)

        optimizer.zero_grad()
        loss1.backward()
        optimizer.step()

        ############################################################################################################
        #         if perturb_alpha:
        #                 diff = perturb_alpha(model, input, target, epsilon_alpha)
        #                 print(diff)
        #                 print(epsilon_alpha)
        #
        #                 optimizer.zero_grad()
        #                 architect.optimizer.zero_grad()

        ############################################################################################################
        ############################################################################################################

        #         logits2 = resnet18(input*diff, updateType='weight')

        #         pert_inp = input * epsilon_alpha
        #         pert_inp = input * diff
        pert_inp = torch.mul(input, diff)
        logits2 = model2(pert_inp)
        #         logits2 = model2(x)
        loss2 = criterion(logits2, target)

        loss2.backward()
        nn.utils.clip_grad_norm_(model2.parameters(), args.grad_clip)
        optimizer.step()

        train_loss += loss2.item()
        _, predicted = logits2.max(1)
        total += target.size(0)
        correct += predicted.eq(target).sum().item()
        max_step = step

        progress_bar(
            step, len(train_queue), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
            (train_loss / (step + 1), 100. * correct / total, correct, total))

    return 100. * correct / total, train_loss / (max_step + 1)