def generate(self, images, labels): adv_grad = Variable(images.clone(), requires_grad=True) adv_nograd = Variable(images.clone()) adv_images = adv_grad * self.mask + adv_nograd * (1 - self.mask) if self.target is not None: if self.target == -1: # set target as the Least-Likely Class _, labels = torch.min(self.model(Variable(images)).data, dim=1) else: # set target as a given integer labels = self.target * torch.ones_like(labels) for i in range(self.max_iter): adv_out = self.model(adv_images) loss = self.criterion(adv_out, Variable(labels)) if self.target is not None: loss = -loss self.model.zero_grad() if adv_grad.grad is not None: adv_grad.grad.data.zero_() loss.backward() if self.norm == 'Linf': adv_grad.grad.sign_() elif self.norm == 'L1': L1_norm = torch.norm(adv_grad.grad.view(labels.size(0), -1), p=1, dim=1) adv_grad.grad = adv_grad.grad / L1_norm.view(-1, 1, 1, 1) elif self.norm == 'L2': L2_norm = torch.norm(adv_grad.grad.view(labels.size(0), -1), p=2, dim=1) adv_grad.grad = adv_grad.grad / L2_norm.view(-1, 1, 1, 1) adv_images = adv_images + self.alpha * adv_grad.grad diff = torch.clamp( denormalize(adv_images, self.args.dataset) - denormalize(Variable(images), self.args.dataset), -self.max_clip, self.max_clip) adv_images = torch.clamp( denormalize(images, self.args.dataset) + diff.data, 0, 1) adv_images = normalize(adv_images, self.args.dataset) adv_grad = Variable(adv_images.clone(), requires_grad=True) adv_nograd = Variable(adv_images.clone()) adv_images = adv_grad * self.mask + adv_nograd * (1 - self.mask) return adv_images.data, labels
def generate_sample(self, image, label): rcam = self.get_rcam(image) def_image = self.pixel_deflection(image, rcam, self.ndeflection, self.window) def_image = denormalize(def_image.unsqueeze(0), self.args.dataset).squeeze(0) def_image = np.transpose(def_image.cpu().numpy(), [1, 2, 0]) def_image = self.denoise(self.denoiser, def_image, self.sigma) def_image = np.transpose(def_image, [2, 0, 1]) def_image = torch.FloatTensor(def_image).cuda() def_image = normalize(def_image.unsqueeze(0), self.args.dataset).squeeze(0) return def_image
def generate_sample(self, image, label): # PGD Obfuscated Gradients setting alpha = 0.1 max_clip = 0.031 adv_img = image.clone() criterion = nn.CrossEntropyLoss() label = torch.LongTensor([label]) if isinstance(adv_img, Variable): adv_img = adv_img.data adv_img = Variable(adv_img, requires_grad=True) # Start of graph for i in range(self.args.eot_iter): ensemble_images = torch.cat([self.defense.generate(adv_img.unsqueeze(0), label)[0] for i in range(self.nsamples)], dim=0) ensemble_labels = to_var(label.repeat(self.nsamples), self.cuda) ensemble_outputs = self.model(ensemble_images) ensemble_loss = criterion(ensemble_outputs, ensemble_labels) if adv_img.grad is not None: adv_img.grad.data.zero_() ensemble_loss.backward() if self.args.eot_norm == 'linf': adv_img.grad.sign() elif self.args.eot_norm == 'l2': L2_norm = torch.norm(adv_img.grad.view(label.size(0), -1), p=2, dim=1) adv_img.grad = adv_img.grad / L2_norm.view(-1,1,1) else: raise ValueError adv_img = adv_img + alpha * adv_img.grad diff = torch.clamp(denormalize(adv_img, self.args.dataset) - denormalize(Variable(image), self.args.dataset), -max_clip, max_clip) adv_img = torch.clamp(denormalize(image, self.args.dataset) + diff.data, 0, 1) adv_img = Variable(normalize(adv_img, self.args.dataset)[0], requires_grad=True) return adv_img
def defend(self): self.model.eval() defense_scheme = getattr(defenses, self.args.defense)(self.model, self.args, **self.kwargs) source = self.model if self.args.source is not None and (self.args.ckpt_name != self.args.ckpt_src): target = self.args.ckpt_name self.args.model = self.args.source self.args.ckpt_name = self.args.ckpt_src source = get_model(self.args) self.logger.log("Transfer attack from {} -> {}".format( self.args.ckpt_src, target)) attack_scheme = getattr(attacks, self.args.attack)(source, self.args, **self.kwargs) eval_metrics = EvaluationMetrics( ['Test/Acc', 'Test/Top5', 'Test/Time']) eval_def_metrics = EvaluationMetrics( ['Def-Test/Acc', 'Def-Test/Top5', 'Def-Test/Time']) attack_metrics = EvaluationMetrics( ['Attack/Acc', 'Attack/Top5', 'Attack/Time']) defense_metrics = EvaluationMetrics( ['Defense/Acc', 'Defense/Top5', 'Defense/Time']) dist_metrics = EvaluationMetrics(['L0', 'L1', 'L2', 'Li']) for i, (images, labels) in enumerate(self.val_loader): self.step += 1 if self.cuda: images = images.cuda() labels = labels.cuda() if self.args.half: images = images.half() # Inference st = time.time() outputs = self.model(self.to_var(images, self.cuda, True)) outputs = outputs.float() _, preds = torch.topk(outputs, 5) acc = (labels == preds.data[:, 0]).float().mean() top5 = torch.sum( (labels.unsqueeze(1).repeat(1, 5) == preds.data).float(), dim=1).mean() eval_metrics.update('Test/Acc', float(acc), labels.size(0)) eval_metrics.update('Test/Top5', float(top5), labels.size(0)) eval_metrics.update('Test/Time', time.time() - st, labels.size(0)) # Attacker st = time.time() adv_images, adv_labels = attack_scheme.generate(images, labels) if isinstance(adv_images, Variable): adv_images = adv_images.data attack_metrics.update('Attack/Time', time.time() - st, labels.size(0)) # Lp distance diff = torch.abs( denormalize(adv_images, self.args.dataset) - denormalize(images, self.args.dataset)) L0 = torch.sum((torch.sum(diff, dim=1) > 1e-3).float().view( labels.size(0), -1), dim=1).mean() diff = diff.view(labels.size(0), -1) L1 = torch.norm(diff, p=1, dim=1).mean() L2 = torch.norm(diff, p=2, dim=1).mean() Li = torch.max(diff, dim=1)[0].mean() dist_metrics.update('L0', float(L0), labels.size(0)) dist_metrics.update('L1', float(L1), labels.size(0)) dist_metrics.update('L2', float(L2), labels.size(0)) dist_metrics.update('Li', float(Li), labels.size(0)) # Defender st = time.time() def_images, def_labels = defense_scheme.generate( adv_images, adv_labels) if isinstance( def_images, Variable ): # FIXME - Variable in Variable out for all methods def_images = def_images.data defense_metrics.update('Defense/Time', time.time() - st, labels.size(0)) self.calc_stats('Attack', adv_images, images, adv_labels, labels, attack_metrics) self.calc_stats('Defense', def_images, images, def_labels, labels, defense_metrics) # Defense-Inference for shift of original image st = time.time() def_images_org, _ = defense_scheme.generate(images, labels) if isinstance( def_images_org, Variable ): # FIXME - Variable in Variable out for all methods def_images_org = def_images_org.data outputs = self.model(self.to_var(def_images_org, self.cuda, True)) outputs = outputs.float() _, preds = torch.topk(outputs, 5) acc = (labels == preds.data[:, 0]).float().mean() top5 = torch.sum( (labels.unsqueeze(1).repeat(1, 5) == preds.data).float(), dim=1).mean() eval_def_metrics.update('Def-Test/Acc', float(acc), labels.size(0)) eval_def_metrics.update('Def-Test/Top5', float(top5), labels.size(0)) eval_def_metrics.update('Def-Test/Time', time.time() - st, labels.size(0)) if self.step % self.args.log_step == 0 or self.step == len( self.val_loader): self.logger.scalar_summary(eval_metrics.avg, self.step, 'TEST') self.logger.scalar_summary(eval_def_metrics.avg, self.step, 'TEST') self.logger.scalar_summary(attack_metrics.avg, self.step, 'ATTACK') self.logger.scalar_summary(defense_metrics.avg, self.step, 'DEFENSE') self.logger.scalar_summary(dist_metrics.avg, self.step, 'DIST') defense_rate = eval_metrics.avg[ 'Test/Acc'] - defense_metrics.avg['Defense/Acc'] if eval_metrics.avg['Test/Acc'] - attack_metrics.avg[ 'Attack/Acc']: defense_rate /= eval_metrics.avg[ 'Test/Acc'] - attack_metrics.avg['Attack/Acc'] else: defense_rate = 0 defense_rate = 1 - defense_rate defense_top5 = eval_metrics.avg[ 'Test/Top5'] - defense_metrics.avg['Defense/Top5'] if eval_metrics.avg['Test/Top5'] - attack_metrics.avg[ 'Attack/Top5']: defense_top5 /= eval_metrics.avg[ 'Test/Top5'] - attack_metrics.avg['Attack/Top5'] else: defense_top5 = 0 defense_top5 = 1 - defense_top5 self.logger.log( "Defense Rate Top1: {:5.3f} | Defense Rate Top5: {:5.3f}". format(defense_rate, defense_top5), 'DEFENSE') if self.step % self.args.img_log_step == 0: image_dict = { 'Original': to_np(denormalize(images, self.args.dataset))[0], 'Attacked': to_np(denormalize(adv_images, self.args.dataset))[0], 'Defensed': to_np(denormalize(def_images, self.args.dataset))[0], 'Perturbation': to_np(denormalize(images - adv_images, self.args.dataset))[0] } self.logger.image_summary(image_dict, self.step)
def generate(self, images, labels): if self.target is not None: if self.target == -1: # Least likely method _, labels = torch.min(self.model(to_var(images)).data, dim=1) else: labels = self.target * torch.ones_like(labels) labels = to_var(labels) # images = to_var(images*self.std.data + self.mean.data) images = denormalize(to_var(images), self.args.dataset) outer_adv_images = images.data.clone() outer_Lp = torch.ones(images.size(0)) * 1e10 if self.args.cuda: outer_Lp = outer_Lp.cuda() self.lower = torch.zeros(self.args.batch_size) self.upper = torch.ones(self.args.batch_size) * 1e10 if self.args.cuda: self.lower = self.lower.cuda() self.upper = self.upper.cuda() c = to_var(torch.ones(self.args.batch_size) * self.c0) tau = to_var(torch.ones(self.args.batch_size) * self.tau0) # perform binary search for the best c, i.e. constant for confidence loss for binary_step in range(self.outer_iter): update = torch.zeros(images.size(0)) if self.args.cuda: update = update.cuda() valid = to_var( torch.ones(images.size(0), 1, images.size(2), images.size(3))) # variables used only inside the binary search loop inner_adv_grad = to_var(self.unclip(images.data)) inner_adv_grad.requires_grad = True inner_adv_nograd = to_var(self.unclip(images.data)) inner_adv_latent = inner_adv_grad * self.mask + inner_adv_nograd * ( 1 - self.mask) inner_adv_images = self.clip(inner_adv_latent) inner_adv_out = self.model( normalize(inner_adv_images, self.args.dataset)) inner_Lp = torch.ones(images.size(0)) * 1e10 inner_grad = torch.zeros_like(images.data) if self.args.cuda: inner_Lp = inner_Lp.cuda() optimizer = get_optimizer(self.args.optimizer, [inner_adv_grad], self.args) for step in range(self.inner_iter): diff = (inner_adv_images - images).view(images.size(0), -1) if self.norm == 'Li': Lp = torch.max(torch.abs(diff), tau.view(-1, 1)) Lp = torch.sum(Lp, dim=1) else: Lp = torch.norm(diff, p=2, dim=1)**2 Lp_loss = torch.sum(Lp) Z_t = inner_adv_out.gather(1, labels.view(-1, 1)).squeeze(1) Z_nt, _ = torch.max(inner_adv_out.scatter( 1, labels.view(-1, 1), -1e10), dim=1) Z_diff = Z_nt - Z_t if self.target is None: Z_diff = -Z_diff conf_loss = torch.max(Z_diff, torch.ones_like(Z_diff) * (-self.kappa)) loss = Lp_loss + torch.dot(c, conf_loss) optimizer.zero_grad() loss.backward(retain_graph=True) optimizer.step() grad = inner_adv_grad.grad inner_adv_latent = inner_adv_grad * self.mask + inner_adv_nograd * ( 1 - self.mask) inner_adv_images = self.clip( inner_adv_latent) * valid + images * (1 - valid) # inner_adv_out = self.model((inner_adv_images - self.mean)/self.std) inner_adv_out = self.model( normalize(inner_adv_images, self.args.dataset)) success = (torch.max(inner_adv_out, dim=1)[1] == labels).data if self.target is None: success = ~success inner_update = ((inner_Lp > Lp.data) * success).float() outer_update = ((outer_Lp > Lp.data) * success).float() update = update + inner_update inner_Lp += inner_update * (Lp.data - inner_Lp) outer_Lp += outer_update * (Lp.data - outer_Lp) inner_update = inner_update.view(-1, 1, 1, 1) inner_grad += inner_update * (grad.data - inner_grad) outer_update = outer_update.view(-1, 1, 1, 1) outer_adv_images = outer_update * inner_adv_images.data + \ (1 - outer_update) * outer_adv_images c = self.binary_search(c, update) abs_diff = torch.abs(inner_adv_images - images) if self.norm == 'L0': totalchange = torch.sum(abs_diff.data * torch.abs(inner_grad), dim=1) valid = (totalchange > self.max_eps) valid = valid.view( (images.size(0), 1, images.size(2), images.size(3))) elif self.norm == 'Li': actual_tau, _ = torch.max(abs_diff.view(images.size(0), -1), dim=1) tau = self.reduce_tau(tau, actual_tau, update) # adv_images = (outer_adv_images - self.mean.data) / self.std.data adv_images = normalize(to_var(outer_adv_images), self.args.dataset) return adv_images.data, labels
def generate(self, images, labels): """Generate adversarial images """ preds = np.argmax(to_np(self.model(to_var(images))), axis=1) images = denormalize(images, self.args.dataset) * 255 #self.n_pix = int(images.size(2)*images.size(3)*self.args.gamma) bounds = [(0, images[0].size(1) - 1), (0, images[0].size(2) - 1), (0, 255), (0, 255), (0, 255)] * self.n_pix adv_images = [] adv_labels = [] for i in range(len(images)): self.image = images[i] self.label = int(preds[i]) if self.target is not None: self.label = self.target self.convergence = False if self.init == 'normal': x_loc = np.random.uniform(0, images[0].size(1), self.n_pix * self.popsize) y_loc = np.random.uniform(0, images[0].size(2), self.n_pix * self.popsize) val = np.array( np.split( np.random.normal(128, 127, self.n_pix * self.popsize * 3), 3)) init = np.array( np.split(np.vstack((x_loc, y_loc, val)), self.n_pix, axis=1)) init = np.transpose(init.reshape(-1, self.popsize)) else: init = self.init self.step = 0 if self.args.domain_restrict: self.mapping = self.create_map(self.args.gamma, self.kwargs.get('artifact')) else: self.mapping = lambda x, y: (x, y) result = differential_evolution(self.optimize, bounds, init=init, strategy=self.strategy, maxiter=self.max_iter, popsize=self.popsize, seed=self.args.seed, callback=self.callback, mutation=0.5, recombination=1, polish=False, tol=0, atol=-1) adv_image = self.perturb(result.x).squeeze(0) adv_images.append(adv_image) adv_labels.append(self.label) self.step_meter.update(self.step - 1) #print("Average step per iter: {}".format(self.step_meter.avg)) return torch.stack(adv_images), torch.LongTensor( adv_labels) #, torch.FloatTensor(steps)