def generate(self, images, labels):
        adv_grad = Variable(images.clone(), requires_grad=True)
        adv_nograd = Variable(images.clone())
        adv_images = adv_grad * self.mask + adv_nograd * (1 - self.mask)

        if self.target is not None:
            if self.target == -1:
                # set target as the Least-Likely Class
                _, labels = torch.min(self.model(Variable(images)).data, dim=1)
            else:
                # set target as a given integer
                labels = self.target * torch.ones_like(labels)

        for i in range(self.max_iter):
            adv_out = self.model(adv_images)
            loss = self.criterion(adv_out, Variable(labels))
            if self.target is not None:
                loss = -loss

            self.model.zero_grad()
            if adv_grad.grad is not None:
                adv_grad.grad.data.zero_()
            loss.backward()

            if self.norm == 'Linf':
                adv_grad.grad.sign_()
            elif self.norm == 'L1':
                L1_norm = torch.norm(adv_grad.grad.view(labels.size(0), -1),
                                     p=1,
                                     dim=1)
                adv_grad.grad = adv_grad.grad / L1_norm.view(-1, 1, 1, 1)
            elif self.norm == 'L2':
                L2_norm = torch.norm(adv_grad.grad.view(labels.size(0), -1),
                                     p=2,
                                     dim=1)
                adv_grad.grad = adv_grad.grad / L2_norm.view(-1, 1, 1, 1)

            adv_images = adv_images + self.alpha * adv_grad.grad
            diff = torch.clamp(
                denormalize(adv_images, self.args.dataset) -
                denormalize(Variable(images), self.args.dataset),
                -self.max_clip, self.max_clip)
            adv_images = torch.clamp(
                denormalize(images, self.args.dataset) + diff.data, 0, 1)
            adv_images = normalize(adv_images, self.args.dataset)

            adv_grad = Variable(adv_images.clone(), requires_grad=True)
            adv_nograd = Variable(adv_images.clone())
            adv_images = adv_grad * self.mask + adv_nograd * (1 - self.mask)

        return adv_images.data, labels
    def generate_sample(self, image, label):
        rcam = self.get_rcam(image)
        def_image = self.pixel_deflection(image, rcam, self.ndeflection, self.window)
        def_image = denormalize(def_image.unsqueeze(0), self.args.dataset).squeeze(0)
        def_image = np.transpose(def_image.cpu().numpy(), [1, 2, 0])
        def_image = self.denoise(self.denoiser, def_image, self.sigma)
        def_image = np.transpose(def_image, [2, 0, 1])
        def_image = torch.FloatTensor(def_image).cuda()
        def_image = normalize(def_image.unsqueeze(0), self.args.dataset).squeeze(0)

        return def_image
    def generate_sample(self, image, label):
        # PGD Obfuscated Gradients setting
        alpha = 0.1
        max_clip = 0.031
        adv_img = image.clone()
        criterion = nn.CrossEntropyLoss()
        label = torch.LongTensor([label])

        if isinstance(adv_img, Variable):
            adv_img = adv_img.data
        adv_img = Variable(adv_img, requires_grad=True)  # Start of graph

        for i in range(self.args.eot_iter):

            ensemble_images = torch.cat([self.defense.generate(adv_img.unsqueeze(0), label)[0] for i in range(self.nsamples)], dim=0)
            ensemble_labels = to_var(label.repeat(self.nsamples), self.cuda)

            ensemble_outputs = self.model(ensemble_images)
            ensemble_loss = criterion(ensemble_outputs, ensemble_labels)
            if adv_img.grad is not None:
                adv_img.grad.data.zero_()
            ensemble_loss.backward()

            if self.args.eot_norm == 'linf':
                adv_img.grad.sign()
            elif self.args.eot_norm == 'l2':
                L2_norm = torch.norm(adv_img.grad.view(label.size(0), -1), p=2, dim=1)
                adv_img.grad = adv_img.grad / L2_norm.view(-1,1,1)
            else:
                raise ValueError

            adv_img = adv_img + alpha * adv_img.grad
            diff = torch.clamp(denormalize(adv_img, self.args.dataset) - denormalize(Variable(image), self.args.dataset), -max_clip, max_clip)
            adv_img = torch.clamp(denormalize(image, self.args.dataset) + diff.data, 0, 1)
            adv_img = Variable(normalize(adv_img, self.args.dataset)[0], requires_grad=True)

        return adv_img
    def defend(self):
        self.model.eval()
        defense_scheme = getattr(defenses,
                                 self.args.defense)(self.model, self.args,
                                                    **self.kwargs)
        source = self.model
        if self.args.source is not None and (self.args.ckpt_name !=
                                             self.args.ckpt_src):
            target = self.args.ckpt_name
            self.args.model = self.args.source
            self.args.ckpt_name = self.args.ckpt_src
            source = get_model(self.args)
            self.logger.log("Transfer attack from {} -> {}".format(
                self.args.ckpt_src, target))
        attack_scheme = getattr(attacks, self.args.attack)(source, self.args,
                                                           **self.kwargs)

        eval_metrics = EvaluationMetrics(
            ['Test/Acc', 'Test/Top5', 'Test/Time'])
        eval_def_metrics = EvaluationMetrics(
            ['Def-Test/Acc', 'Def-Test/Top5', 'Def-Test/Time'])
        attack_metrics = EvaluationMetrics(
            ['Attack/Acc', 'Attack/Top5', 'Attack/Time'])
        defense_metrics = EvaluationMetrics(
            ['Defense/Acc', 'Defense/Top5', 'Defense/Time'])
        dist_metrics = EvaluationMetrics(['L0', 'L1', 'L2', 'Li'])

        for i, (images, labels) in enumerate(self.val_loader):
            self.step += 1
            if self.cuda:
                images = images.cuda()
                labels = labels.cuda()
            if self.args.half: images = images.half()

            # Inference
            st = time.time()
            outputs = self.model(self.to_var(images, self.cuda, True))
            outputs = outputs.float()
            _, preds = torch.topk(outputs, 5)

            acc = (labels == preds.data[:, 0]).float().mean()
            top5 = torch.sum(
                (labels.unsqueeze(1).repeat(1, 5) == preds.data).float(),
                dim=1).mean()
            eval_metrics.update('Test/Acc', float(acc), labels.size(0))
            eval_metrics.update('Test/Top5', float(top5), labels.size(0))
            eval_metrics.update('Test/Time', time.time() - st, labels.size(0))

            # Attacker
            st = time.time()
            adv_images, adv_labels = attack_scheme.generate(images, labels)
            if isinstance(adv_images, Variable):
                adv_images = adv_images.data
            attack_metrics.update('Attack/Time',
                                  time.time() - st, labels.size(0))

            # Lp distance
            diff = torch.abs(
                denormalize(adv_images, self.args.dataset) -
                denormalize(images, self.args.dataset))
            L0 = torch.sum((torch.sum(diff, dim=1) > 1e-3).float().view(
                labels.size(0), -1),
                           dim=1).mean()
            diff = diff.view(labels.size(0), -1)
            L1 = torch.norm(diff, p=1, dim=1).mean()
            L2 = torch.norm(diff, p=2, dim=1).mean()
            Li = torch.max(diff, dim=1)[0].mean()
            dist_metrics.update('L0', float(L0), labels.size(0))
            dist_metrics.update('L1', float(L1), labels.size(0))
            dist_metrics.update('L2', float(L2), labels.size(0))
            dist_metrics.update('Li', float(Li), labels.size(0))

            # Defender
            st = time.time()
            def_images, def_labels = defense_scheme.generate(
                adv_images, adv_labels)
            if isinstance(
                    def_images, Variable
            ):  # FIXME - Variable in Variable out for all methods
                def_images = def_images.data
            defense_metrics.update('Defense/Time',
                                   time.time() - st, labels.size(0))
            self.calc_stats('Attack', adv_images, images, adv_labels, labels,
                            attack_metrics)
            self.calc_stats('Defense', def_images, images, def_labels, labels,
                            defense_metrics)

            # Defense-Inference for shift of original image
            st = time.time()
            def_images_org, _ = defense_scheme.generate(images, labels)
            if isinstance(
                    def_images_org, Variable
            ):  # FIXME - Variable in Variable out for all methods
                def_images_org = def_images_org.data
            outputs = self.model(self.to_var(def_images_org, self.cuda, True))
            outputs = outputs.float()
            _, preds = torch.topk(outputs, 5)

            acc = (labels == preds.data[:, 0]).float().mean()
            top5 = torch.sum(
                (labels.unsqueeze(1).repeat(1, 5) == preds.data).float(),
                dim=1).mean()
            eval_def_metrics.update('Def-Test/Acc', float(acc), labels.size(0))
            eval_def_metrics.update('Def-Test/Top5', float(top5),
                                    labels.size(0))
            eval_def_metrics.update('Def-Test/Time',
                                    time.time() - st, labels.size(0))

            if self.step % self.args.log_step == 0 or self.step == len(
                    self.val_loader):
                self.logger.scalar_summary(eval_metrics.avg, self.step, 'TEST')
                self.logger.scalar_summary(eval_def_metrics.avg, self.step,
                                           'TEST')
                self.logger.scalar_summary(attack_metrics.avg, self.step,
                                           'ATTACK')
                self.logger.scalar_summary(defense_metrics.avg, self.step,
                                           'DEFENSE')
                self.logger.scalar_summary(dist_metrics.avg, self.step, 'DIST')

                defense_rate = eval_metrics.avg[
                    'Test/Acc'] - defense_metrics.avg['Defense/Acc']
                if eval_metrics.avg['Test/Acc'] - attack_metrics.avg[
                        'Attack/Acc']:
                    defense_rate /= eval_metrics.avg[
                        'Test/Acc'] - attack_metrics.avg['Attack/Acc']
                else:
                    defense_rate = 0
                defense_rate = 1 - defense_rate

                defense_top5 = eval_metrics.avg[
                    'Test/Top5'] - defense_metrics.avg['Defense/Top5']
                if eval_metrics.avg['Test/Top5'] - attack_metrics.avg[
                        'Attack/Top5']:
                    defense_top5 /= eval_metrics.avg[
                        'Test/Top5'] - attack_metrics.avg['Attack/Top5']
                else:
                    defense_top5 = 0
                defense_top5 = 1 - defense_top5

                self.logger.log(
                    "Defense Rate Top1: {:5.3f} | Defense Rate Top5: {:5.3f}".
                    format(defense_rate, defense_top5), 'DEFENSE')

            if self.step % self.args.img_log_step == 0:
                image_dict = {
                    'Original':
                    to_np(denormalize(images, self.args.dataset))[0],
                    'Attacked':
                    to_np(denormalize(adv_images, self.args.dataset))[0],
                    'Defensed':
                    to_np(denormalize(def_images, self.args.dataset))[0],
                    'Perturbation':
                    to_np(denormalize(images - adv_images,
                                      self.args.dataset))[0]
                }
                self.logger.image_summary(image_dict, self.step)
Exemple #5
0
    def generate(self, images, labels):
        if self.target is not None:
            if self.target == -1:  # Least likely method
                _, labels = torch.min(self.model(to_var(images)).data, dim=1)
            else:
                labels = self.target * torch.ones_like(labels)
        labels = to_var(labels)
        # images = to_var(images*self.std.data + self.mean.data)
        images = denormalize(to_var(images), self.args.dataset)
        outer_adv_images = images.data.clone()
        outer_Lp = torch.ones(images.size(0)) * 1e10
        if self.args.cuda: outer_Lp = outer_Lp.cuda()

        self.lower = torch.zeros(self.args.batch_size)
        self.upper = torch.ones(self.args.batch_size) * 1e10
        if self.args.cuda:
            self.lower = self.lower.cuda()
            self.upper = self.upper.cuda()
        c = to_var(torch.ones(self.args.batch_size) * self.c0)
        tau = to_var(torch.ones(self.args.batch_size) * self.tau0)

        # perform binary search for the best c, i.e. constant for confidence loss
        for binary_step in range(self.outer_iter):

            update = torch.zeros(images.size(0))
            if self.args.cuda: update = update.cuda()
            valid = to_var(
                torch.ones(images.size(0), 1, images.size(2), images.size(3)))

            # variables used only inside the binary search loop
            inner_adv_grad = to_var(self.unclip(images.data))
            inner_adv_grad.requires_grad = True
            inner_adv_nograd = to_var(self.unclip(images.data))
            inner_adv_latent = inner_adv_grad * self.mask + inner_adv_nograd * (
                1 - self.mask)

            inner_adv_images = self.clip(inner_adv_latent)
            inner_adv_out = self.model(
                normalize(inner_adv_images, self.args.dataset))
            inner_Lp = torch.ones(images.size(0)) * 1e10
            inner_grad = torch.zeros_like(images.data)
            if self.args.cuda: inner_Lp = inner_Lp.cuda()

            optimizer = get_optimizer(self.args.optimizer, [inner_adv_grad],
                                      self.args)

            for step in range(self.inner_iter):
                diff = (inner_adv_images - images).view(images.size(0), -1)
                if self.norm == 'Li':
                    Lp = torch.max(torch.abs(diff), tau.view(-1, 1))
                    Lp = torch.sum(Lp, dim=1)
                else:
                    Lp = torch.norm(diff, p=2, dim=1)**2
                Lp_loss = torch.sum(Lp)

                Z_t = inner_adv_out.gather(1, labels.view(-1, 1)).squeeze(1)
                Z_nt, _ = torch.max(inner_adv_out.scatter(
                    1, labels.view(-1, 1), -1e10),
                                    dim=1)
                Z_diff = Z_nt - Z_t
                if self.target is None:
                    Z_diff = -Z_diff
                conf_loss = torch.max(Z_diff,
                                      torch.ones_like(Z_diff) * (-self.kappa))

                loss = Lp_loss + torch.dot(c, conf_loss)
                optimizer.zero_grad()
                loss.backward(retain_graph=True)
                optimizer.step()

                grad = inner_adv_grad.grad
                inner_adv_latent = inner_adv_grad * self.mask + inner_adv_nograd * (
                    1 - self.mask)
                inner_adv_images = self.clip(
                    inner_adv_latent) * valid + images * (1 - valid)
                # inner_adv_out = self.model((inner_adv_images - self.mean)/self.std)
                inner_adv_out = self.model(
                    normalize(inner_adv_images, self.args.dataset))
                success = (torch.max(inner_adv_out, dim=1)[1] == labels).data
                if self.target is None:
                    success = ~success
                inner_update = ((inner_Lp > Lp.data) * success).float()
                outer_update = ((outer_Lp > Lp.data) * success).float()
                update = update + inner_update

                inner_Lp += inner_update * (Lp.data - inner_Lp)
                outer_Lp += outer_update * (Lp.data - outer_Lp)

                inner_update = inner_update.view(-1, 1, 1, 1)
                inner_grad += inner_update * (grad.data - inner_grad)

                outer_update = outer_update.view(-1, 1, 1, 1)
                outer_adv_images = outer_update * inner_adv_images.data + \
                                   (1 - outer_update) * outer_adv_images

            c = self.binary_search(c, update)
            abs_diff = torch.abs(inner_adv_images - images)
            if self.norm == 'L0':
                totalchange = torch.sum(abs_diff.data * torch.abs(inner_grad),
                                        dim=1)
                valid = (totalchange > self.max_eps)
                valid = valid.view(
                    (images.size(0), 1, images.size(2), images.size(3)))
            elif self.norm == 'Li':
                actual_tau, _ = torch.max(abs_diff.view(images.size(0), -1),
                                          dim=1)
                tau = self.reduce_tau(tau, actual_tau, update)

        # adv_images = (outer_adv_images - self.mean.data) / self.std.data
        adv_images = normalize(to_var(outer_adv_images), self.args.dataset)
        return adv_images.data, labels
Exemple #6
0
    def generate(self, images, labels):
        """Generate adversarial images
        """
        preds = np.argmax(to_np(self.model(to_var(images))), axis=1)
        images = denormalize(images, self.args.dataset) * 255
        #self.n_pix = int(images.size(2)*images.size(3)*self.args.gamma)

        bounds = [(0, images[0].size(1) - 1), (0, images[0].size(2) - 1),
                  (0, 255), (0, 255), (0, 255)] * self.n_pix

        adv_images = []
        adv_labels = []

        for i in range(len(images)):
            self.image = images[i]
            self.label = int(preds[i])

            if self.target is not None:
                self.label = self.target
            self.convergence = False

            if self.init == 'normal':
                x_loc = np.random.uniform(0, images[0].size(1),
                                          self.n_pix * self.popsize)
                y_loc = np.random.uniform(0, images[0].size(2),
                                          self.n_pix * self.popsize)
                val = np.array(
                    np.split(
                        np.random.normal(128, 127,
                                         self.n_pix * self.popsize * 3), 3))
                init = np.array(
                    np.split(np.vstack((x_loc, y_loc, val)),
                             self.n_pix,
                             axis=1))
                init = np.transpose(init.reshape(-1, self.popsize))

            else:
                init = self.init

            self.step = 0
            if self.args.domain_restrict:
                self.mapping = self.create_map(self.args.gamma,
                                               self.kwargs.get('artifact'))
            else:
                self.mapping = lambda x, y: (x, y)

            result = differential_evolution(self.optimize,
                                            bounds,
                                            init=init,
                                            strategy=self.strategy,
                                            maxiter=self.max_iter,
                                            popsize=self.popsize,
                                            seed=self.args.seed,
                                            callback=self.callback,
                                            mutation=0.5,
                                            recombination=1,
                                            polish=False,
                                            tol=0,
                                            atol=-1)

            adv_image = self.perturb(result.x).squeeze(0)
            adv_images.append(adv_image)
            adv_labels.append(self.label)

            self.step_meter.update(self.step - 1)

        #print("Average step per iter: {}".format(self.step_meter.avg))

        return torch.stack(adv_images), torch.LongTensor(
            adv_labels)  #, torch.FloatTensor(steps)