def perturb(self, z, y): z = z.detach().clone() y = y.detach().clone() delta = torch.zeros_like(z) delta.requires_grad_() for ii in range(self.nb_iter): outputs = self.predict(self.gan(z + delta)) adv_loss = self.loss_fn(outputs, y) reg_loss = F.relu(delta.pow(2) - 0.01).mean(dim=-1).sum() loss = adv_loss - 100 * reg_loss loss.backward() if self.ord == 'inf': grad_sign = delta.grad.data.sign() delta.data = delta.data + batch_multiply( self.eps_iter, grad_sign) elif self.ord == 'l2': grad = delta.grad.data grad = normalize_by_pnorm(grad) delta.data = delta.data + batch_multiply(self.eps_iter, grad) else: delta.data = delta.data + batch_multiply( self.eps_iter, delta.grad.data) delta.grad.data.zero_() print('[{}/{}] Loss: {:4f}'.format(ii, self.nb_iter, adv_loss.item())) return self.gan(z + delta)
def perturb_iterative(xvar, yvar, predict, nb_iter, eps, eps_iter, loss_fn, delta_init=None, minimize=False, ord=np.inf, clip_min=0.0, clip_max=1.0): """ Iteratively maximize the loss over the input. It is a shared method for iterative attacks including IterativeGradientSign, LinfPGD, etc. :param xvar: input data. :param yvar: input labels. :param predict: forward pass function. :param nb_iter: number of iterations. :param eps: maximum distortion. :param eps_iter: attack step size per iteration. :param loss_fn: loss function. :param delta_init: (optional) tensor contains the random initialization. :param minimize: (optional bool) whether to minimize or maximize the loss. :param ord: (optional) the order of maximum distortion (inf or 2). :param clip_min: (optional float) mininum value per input dimension. :param clip_max: (optional float) maximum value per input dimension. :return: tensor containing the perturbed input. """ if delta_init is not None: delta = delta_init else: delta = torch.zeros_like(xvar) delta.requires_grad_() for ii in range(nb_iter): outputs = predict(xvar + delta) loss = loss_fn(outputs, yvar) if minimize: loss = -loss loss.backward() if ord == np.inf: grad_sign = delta.grad.data.sign() delta.data = delta.data + batch_multiply(eps_iter, grad_sign) delta.data = batch_clamp(eps, delta.data) delta.data = clamp(xvar.data + delta.data, clip_min, clip_max ) - xvar.data elif ord == 2: grad = delta.grad.data grad = normalize_by_pnorm(grad) delta.data = delta.data + batch_multiply(eps_iter, grad) delta.data = clamp(xvar.data + delta.data, clip_min, clip_max ) - xvar.data if eps is not None: delta.data = clamp_by_pnorm(delta.data, ord, eps) else: error = "Only ord = inf and ord = 2 have been implemented" raise NotImplementedError(error) delta.grad.data.zero_() x_adv = clamp(xvar + delta, clip_min, clip_max) return x_adv
def perturb(self, x, y=None): """ Given examples (x, y), returns their adversarial counterparts with an attack length of eps. :param x: input tensor. :param y: label tensor. - if None and self.targeted=False, compute y as predicted labels. - if self.targeted=True, then y must be the targeted labels. :return: tensor containing perturbed inputs. """ x, y = self._verify_and_process_inputs(x, y) xadv = x.requires_grad_() if self.predict2 is not None: outputs = self.predict2(self.predict1(xadv)) else: outputs = self.predict1(xadv) loss = self.loss_fn(outputs, y) if self.targeted: loss = -loss loss.backward() grad_sign = xadv.grad.detach().sign() xadv = xadv + batch_multiply(self.eps, grad_sign) xadv = clamp(xadv, self.clip_min, self.clip_max) return xadv.detach()
def rand_init_delta(delta, x, ord, eps, clip_min, clip_max): # TODO: Currently only considered one way of "uniform" sampling # for Linf, there are 3 ways: # 1) true uniform sampling by first calculate the rectangle then sample # 2) uniform in eps box then truncate using data domain (implemented) # 3) uniform sample in data domain then truncate with eps box # for L2, true uniform sampling is hard, since it requires uniform sampling # inside a intersection of cube and ball, so there are 2 ways: # 1) uniform sample in the data domain, then truncate using the L2 ball # (implemented) # 2) uniform sample in the L2 ball, then truncate using the data domain if isinstance(eps, torch.Tensor): assert len(eps) == len(delta) if ord == np.inf: delta.data.uniform_(-1, 1) delta.data = batch_multiply(eps, delta.data) elif ord == 2: delta.data.uniform_(0, 1) delta.data = delta.data - x delta.data = clamp_by_pnorm(delta.data, ord, eps) else: error = "Only ord = inf and ord = 2 have been implemented" raise NotImplementedError(error) delta.data = clamp(x + delta.data, min=clip_min, max=clip_max) - x return delta.data
def perturb(self, x, y=None): """ Given examples (x, y), returns their adversarial counterparts with an attack length of eps. :param x: input tensor. :param y: label tensor. - if None and self.targeted=False, compute y as predicted labels. - if self.targeted=True, then y must be the targeted labels. :return: tensor containing perturbed inputs. """ x, y = self._verify_and_process_inputs(x, y) delta = torch.zeros_like(x) g = torch.zeros_like(x) delta = nn.Parameter(delta) for i in range(self.nb_iter): if delta.grad is not None: delta.grad.detach_() delta.grad.zero_() imgadv = x + delta outputs = self.predict(imgadv) loss = self.loss_fn(outputs, y) if self.targeted: loss = -loss loss.backward() g = self.decay_factor * g + normalize_by_pnorm(delta.grad.data, p=1) # according to the paper it should be .sum(), but in their # implementations (both cleverhans and the link from the paper) # it is .mean(), but actually it shouldn't matter if self.ord == np.inf: delta.data += batch_multiply(self.eps_iter, torch.sign(g)) delta.data = batch_clamp(self.eps, delta.data) delta.data = clamp( x + delta.data, min=self.clip_min, max=self.clip_max) - x elif self.ord == 2: delta.data += self.eps_iter * normalize_by_pnorm(g, p=2) delta.data *= clamp( (self.eps * normalize_by_pnorm(delta.data, p=2) / delta.data), max=1.) delta.data = clamp( x + delta.data, min=self.clip_min, max=self.clip_max) - x else: error = "Only ord = inf and ord = 2 have been implemented" raise NotImplementedError(error) rval = x + delta.data return rval
def _get_unitptb_and_eps(self, xadv, x, y, prev_eps): unitptb = batch_multiply(1. / (prev_eps + 1e-12), (xadv - x)) logit_margin = self.search_loss_fn(self.predict(xadv), y) maxeps = self.maxeps * torch.ones_like(y).float() curr_eps = bisection_search(prev_eps, unitptb, self.predict, x, y, self.search_loss_fn, logit_margin, maxeps, self.num_search_steps) return unitptb, curr_eps
def _get_unitptb_and_eps(self, xadv, x, y, prev_eps): unitptb = batch_multiply(1. / (prev_eps + 1e-12), (xadv - x)) logit_margin = elementwise_margin(self.predict(xadv), y) ones = torch.ones_like(y).float() maxeps = self.maxeps * ones curr_eps = bisection_search(maxeps * 0.5, unitptb, self.predict, x, y, elementwise_margin, logit_margin, maxeps, self.num_search_steps) return unitptb, curr_eps
def perturb(self, x, y, prev_eps): self.pgdadv.eps = prev_eps self.pgdadv.eps_iter = self.scale_eps_iter(self.pgdadv.eps, self.pgdadv.nb_iter) with ctx_noparamgrad_and_eval(self.predict): xadv = self.pgdadv.perturb(x, y) unitptb, curr_eps = self._get_unitptb_and_eps(xadv, x, y, prev_eps) xadv = x + batch_multiply(curr_eps, unitptb) return xadv, curr_eps
def _get_unitptb_and_eps(self, xadv, x, y, prev_eps): unitptb = batch_multiply(1. / (prev_eps + 1e-12), (xadv - x)) adv_logits = self.predict(xadv) logit_margin = elementwise_margin(adv_logits, y) ones = torch.ones_like(y).float() # maxeps = self.maxeps * ones maxeps = torch.norm((xadv-x).view(x.shape[0],-1), p=self.pgdadv.ord, dim=1) adv_pred = adv_logits.argmax(1) # print(1 - (adv_pred == y).float().mean()) # print(maxeps.min(), maxeps.max()) pred = adv_pred.clone() i=0 # print(i, self.pgdadv.eps, float((adv_pred == pred).float().mean()), float((pred == self.target_y).float().mean()), float(maxeps.min()), float(maxeps.max())) while i < 10: if self.pgdadv.targeted: unsuccessful_adv_idx = (adv_pred != self.target_y) & (pred != self.target_y) if not unsuccessful_adv_idx.any(): break else: unsuccessful_adv_idx = (adv_pred == y) & (pred == y) maxeps[unsuccessful_adv_idx] *= 1.5 maxeps_ = maxeps[unsuccessful_adv_idx] unitptb_ = unitptb[unsuccessful_adv_idx] x_ = x[unsuccessful_adv_idx] x_ = clamp(x_ + batch_multiply(maxeps_, unitptb_), min=0., max=1.) pred[unsuccessful_adv_idx] = self.predict(x_).argmax(1) i += 1 # print(i, self.pgdadv.eps, float((adv_pred == pred).float().mean()), float((pred == self.target_y).float().mean()), float(maxeps.min()), float(maxeps.max())) # print(logit_margin) curr_eps = bisection_search( maxeps * 0.5, unitptb, self.predict, x, y, elementwise_margin, logit_margin, maxeps, self.num_search_steps) if self.pgdadv.targeted: curr_eps[pred != self.target_y] = np.inf return unitptb, curr_eps
def perturb(self, x, y, target_y=None): with ctx_noparamgrad_and_eval(self.predict): if self.pgdadv.targeted: self.target_y = target_y xadv = self.pgdadv.perturb(x, target_y) adv_pred = self.pgdadv.predict(xadv).argmax(1) # print((adv_pred == target_y).float().mean()) else: xadv = self.pgdadv.perturb(x, y) # print(self.pgdadv.eps, x.shape, xadv.shape, torch.norm((x-xadv).view(x.shape[0],-1), p=float('inf'), dim=1).mean()) unitptb, curr_eps = self._get_unitptb_and_eps( xadv, x, y, self.pgdadv.eps) xadv = clamp(x + batch_multiply(curr_eps, unitptb), min=self.pgdadv.clip_min, max=self.pgdadv.clip_max) # print('') return xadv
def bisection_search(cur_eps, ptb, model, data, label, fn_margin, margin_init, maxeps, num_steps, cur_min=None, clip_min=0., clip_max=1.): assert torch.all(cur_eps <= maxeps) margin = margin_init if cur_min is None: cur_min = torch.zeros_like(margin) cur_max = maxeps.clone().detach() for ii in range(num_steps): cur_min = torch.max((margin < 0).float() * cur_eps, cur_min) cur_max = torch.min( ((margin < 0).float() * maxeps + (margin >= 0).float() * cur_eps), cur_max) cur_eps = (cur_min + cur_max) / 2 margin = fn_margin( model( clamp(data + batch_multiply(cur_eps, ptb), min=clip_min, max=clip_max)), label) assert torch.all(cur_eps <= maxeps) return cur_eps
def perturb_iterative(xvar, yvar, predict, nb_iter, eps, eps_iter, loss_fn, delta_init=None, minimize=False, ord=np.inf, clip_min=0.0, clip_max=1.0, sparsity=0.01, eot_samples=1): """ Iteratively maximize the loss over the input. It is a shared method for iterative attacks including IterativeGradientSign, LinfPGD, etc. :param xvar: input data. :param yvar: input labels. :param predict: forward pass function. :param nb_iter: number of iterations. :param eps: maximum distortion. :param eps_iter: attack step size. :param loss_fn: loss function. :param delta_init: (optional) tensor contains the random initialization. :param minimize: (optional bool) whether to minimize or maximize the loss. :param ord: (optional) the order of maximum distortion (inf or 2). :param clip_min: mininum value per input dimension. :param clip_max: maximum value per input dimension. :return: tensor containing the perturbed input. """ if delta_init is not None: delta = delta_init else: delta = torch.zeros_like(xvar) n = normal.Normal(0, 100) delta.requires_grad_() for ii in range(nb_iter): avg_grad = torch.FloatTensor(xvar.shape) avg_grad.zero_() if xvar.is_cuda: avg_grad = avg_grad.cuda() for _ in range(eot_samples): outputs = predict(xvar+delta) loss = loss_fn(outputs, yvar) if minimize: loss = -loss loss.backward() avg_grad += delta.grad.detach() avg_grad = avg_grad/eot_samples if ord == np.inf: grad_sign = avg_grad.sign() delta.data = delta.data + batch_multiply(eps_iter, grad_sign) delta.data = batch_clamp(eps, delta.data) delta.data = clamp(xvar.data + delta.data, clip_min, clip_max ) - xvar.data elif ord == 2: grad = avg_grad grad = normalize_by_pnorm(grad) delta.data = delta.data + batch_multiply(eps_iter, grad) delta.data = clamp(xvar.data + delta.data, clip_min, clip_max ) - xvar.data if eps is not None: delta.data = clamp_by_pnorm(delta.data, ord, eps) elif ord == 1: with torch.no_grad(): grad = avg_grad abs_grad = torch.abs(avg_grad) batch_size = grad.size(0) view = abs_grad.view(batch_size, -1) view_size = view.size(1) vals, idx = view.topk(int(sparsity*view_size)) out = torch.zeros_like(view).scatter_(1, idx, vals) out = out.view_as(grad) grad = grad.sign()*(out > 0).float() grad = normalize_by_pnorm(grad, p=1) delta.data += batch_multiply(eps_iter, grad) delta.data = batch_l1_proj(delta.data.cpu(), eps) if xvar.is_cuda: delta.data = delta.data.cuda() delta.data = clamp(xvar.data + delta.data, clip_min, clip_max ) - xvar.data else: error = "Only ord = inf, ord = 1 and ord = 2 have been implemented" raise NotImplementedError(error) delta.grad.data.zero_() x_adv = clamp(xvar + delta, clip_min, clip_max) return x_adv
def masked_perturb_iterative(xvar, yvar, predict, nb_iter, eps, eps_iter, loss_fn, delta_init=None, minimize=False, ord=np.inf, clip_min=0.0, clip_max=1.0, l1_sparsity=None, mask_steps=100, device="cuda:0"): """ Iteratively maximize the loss over the input. It is a shared method for iterative attacks including IterativeGradientSign, LinfPGD, etc. :param xvar: input data. :param yvar: input labels. :param predict: forward pass function. :param nb_iter: number of iterations. :param eps: maximum distortion. :param eps_iter: attack step size. :param loss_fn: loss function. :param delta_init: (optional) tensor contains the random initialization. :param minimize: (optional bool) whether to minimize or maximize the loss. :param ord: (optional) the order of maximum distortion (inf or 2). :param clip_min: mininum value per input dimension. :param clip_max: maximum value per input dimension. :param l1_sparsity: sparsity value for L1 projection. - if None, then perform regular L1 projection. - if float value, then perform sparse L1 descent from Algorithm 1 in https://arxiv.org/pdf/1904.13000v1.pdf :param mask_steps: number of times a mask should be drawn and a delta computed. :return: tensor containing the perturbed input. """ if delta_init is not None: delta = delta_init else: delta = torch.zeros_like(xvar) delta.requires_grad_() for ii in tqdm(range(nb_iter)): new_delta = 0 # added for jj in range(mask_steps): # added outputs = predict(xvar + delta) # MASKED part mask = torch.Tensor(np.random.randint(0,2,size=outputs.shape[1])) # added mask = torch.stack([mask for i in range(outputs.shape[0])]) # force true label to not be masked for i in range(len(yvar)): mask[i][yvar[i]] = 1 # allow for the multiplciaiton in log space mask[mask == 0] = -100000 mask = mask.to(device) outputs = outputs * mask loss = loss_fn(outputs, yvar) if minimize: loss = -loss loss.backward() if ord == np.inf: grad_sign = delta.grad.data.sign() delta.data = delta.data + batch_multiply(eps_iter, grad_sign) delta.data = batch_clamp(eps, delta.data) delta.data = clamp(xvar.data + delta.data, clip_min, clip_max ) - xvar.data elif ord == 2: grad = delta.grad.data grad = normalize_by_pnorm(grad) delta.data = delta.data + batch_multiply(eps_iter, grad) delta.data = clamp(xvar.data + delta.data, clip_min, clip_max ) - xvar.data if eps is not None: delta.data = clamp_by_pnorm(delta.data, ord, eps) elif ord == 1: grad = delta.grad.data abs_grad = torch.abs(grad) batch_size = grad.size(0) view = abs_grad.view(batch_size, -1) view_size = view.size(1) if l1_sparsity is None: vals, idx = view.topk(1) else: vals, idx = view.topk( int(np.round((1 - l1_sparsity) * view_size))) out = torch.zeros_like(view).scatter_(1, idx, vals) out = out.view_as(grad) grad = grad.sign() * (out > 0).float() grad = normalize_by_pnorm(grad, p=1) delta.data = delta.data + batch_multiply(eps_iter, grad) delta.data = batch_l1_proj(delta.data.cpu(), eps) if xvar.is_cuda: delta.data = delta.data.to(device) delta.data = clamp(xvar.data + delta.data, clip_min, clip_max ) - xvar.data else: error = "Only ord = inf, ord = 1 and ord = 2 have been implemented" raise NotImplementedError(error) new_delta += delta.data # added delta.grad.data.zero_() delta.data = new_delta / mask_steps # added x_adv = clamp(xvar + delta, clip_min, clip_max) return x_adv, delta
def perturb_iterative_fool_many(xvar, embvar, indlistvar, yvar, predict, nb_iter, eps, epscand, eps_iter, loss_fn, rayon, delta_init=None, minimize=False, ord=np.inf, clip_min=0.0, clip_max=1.0, l1_sparsity=None): """ Iteratively maximize the loss over the input. It is a shared method for iterative attacks including IterativeGradientSign, LinfPGD, etc. :param xvar: input data. :param yvar: input labels. :param predict: forward pass function. :param nb_iter: number of iterations. :param eps: maximum distortion. :param eps_iter: attack step size. :param loss_fn: loss function. :param delta_init: (optional) tensor contains the random initialization. :param minimize: (optional bool) whether to minimize or maximize the loss. :param ord: (optional) the order of maximum distortion (inf or 2). :param clip_min: mininum value per input dimension. :param clip_max: maximum value per input dimension. :param l1_sparsity: sparsity value for L1 projection. - if None, then perform regular L1 projection. - if float value, then perform sparse L1 descent from Algorithm 1 in https://arxiv.org/pdf/1904.13000v1.pdf :return: tensor containing the perturbed input. """ #will contain all words encountered during PGD nb = len(indlistvar) tablist = [] for t in range(nb): tablist += [[]] fool = False #contain each loss on embed and each difference of loss on word nearest neighboor loss_memory = np.zeros((nb_iter, )) word_balance_memory = np.zeros((nb_iter, )) candid = [torch.empty(0)] * nb convers = [[]] * nb for u in range(nb): #prepare all potential candidates, once and for all candidates = torch.empty([0, 768]).to(device) conversion = [] emb_matrix = model.roberta.embeddings.word_embeddings.weight normed_emb_matrix = F.normalize(emb_matrix, p=2, dim=1) normed_emb_word = F.normalize(embvar[0][indlistvar[u]], p=2, dim=0) cosine_similarity = torch.matmul( normed_emb_word, torch.transpose(normed_emb_matrix, 0, 1)) for t in range( len(cosine_similarity)): #evitez de faire DEUX boucles . if cosine_similarity[t] > epscand: if levenshtein( tokenizer.decode( torch.tensor([xvar[0][indlistvar[u]]])), tokenizer.decode(torch.tensor([t]))) != 1: candidates = torch.cat( (candidates, normed_emb_matrix[t].unsqueeze(0)), 0) conversion += [t] candid[u] = candidates convers[u] = conversion print("nb of candidates :") print(len(conversion)) #U, S, V = torch.svd(model.roberta.embeddings.word_embeddings.weight) if delta_init is not None: delta = delta_init else: delta = torch.zeros_like(embvar) #PGD delta.requires_grad_() ii = 0 while ii < nb_iter and not (fool): outputs = predict(xvar, embvar + delta) loss = loss_fn(outputs, yvar) if minimize: loss = -loss loss.backward() if ord == np.inf: grad_sign = delta.grad.data.sign() grad_sign = tozerolist(grad_sign, indlistvar) delta.data = delta.data + batch_multiply(eps_iter, grad_sign) delta.data = batch_clamp(eps, delta.data) delta.data = clamp( embvar.data + delta.data, clip_min, clip_max #à retirer? ) - embvar.data with torch.no_grad(): delta.data = tozero(delta.data, indlistvar) if (ii % 300) == 0: adverslist = [] for t in range(nb): advers, nb_vois = neighboors_np_dens_cand( (embvar + delta)[0][indlistvar[t]], rayon, candid[t]) advers = int(advers[0]) advers = torch.tensor(convers[t][advers]) if len(tablist[t]) == 0: tablist[t] += [ (tokenizer.decode(advers.unsqueeze(0)), ii, nb_vois) ] elif not (first( tablist[t][-1]) == tokenizer.decode( advers.unsqueeze(0))): tablist[t] += [ (tokenizer.decode(advers.unsqueeze(0)), ii, nb_vois) ] adverslist += [advers] word_balance_memory[ii] = float( model(replacelist(xvar, indlistvar, adverslist), labels=1 - yvar)[0]) - float( model(replacelist( xvar, indlistvar, adverslist), labels=yvar)[0]) if word_balance_memory[ii] < 0: fool = True elif ord == 0: grad = delta.grad.data grad = tozero(grad, indlistvar) grad = torch.matmul( torch.cat((torch.matmul(grad, v)[:, :, :50], torch.zeros([768 - 50]).to(device)), 2), v.t()) delta.data = delta.data + batch_multiply(eps_iter, grad) delta.data[0] = my_proj_all(embvar.data[0] + delta.data[0], embvar[0], indlistvar, eps) - embvar.data[0] delta.data = clamp(embvar.data + delta.data, clip_min, clip_max) - embvar.data #à virer je pense with torch.no_grad(): delta.data = tozero(delta.data, indlistvar) if (ii % 300) == 0: adverslist = [] for t in range(nb): advers, nb_vois = neighboors_np_dens_cand( (embvar + delta)[0][indlistvar[t]], rayon, candid[t]) advers = int(advers[0]) advers = torch.tensor(convers[t][advers]) if len(tablist[t]) == 0: tablist[t] += [ (tokenizer.decode(advers.unsqueeze(0)), ii, nb_vois) ] elif not (first( tablist[t][-1]) == tokenizer.decode( advers.unsqueeze(0))): tablist[t] += [ (tokenizer.decode(advers.unsqueeze(0)), ii, nb_vois) ] adverslist += [advers] word_balance_memory[ii] = float( model(replacelist(xvar, indlistvar, adverslist), labels=1 - yvar)[0]) - float( model(replacelist( xvar, indlistvar, adverslist), labels=yvar)[0]) if word_balance_memory[ii] < 0: fool = True elif ord == 2: grad = delta.grad.data grad = tozero(grad, indlistvar) grad = normalize_by_pnorm(grad) delta.data = delta.data + batch_multiply(eps_iter, grad) delta.data = clamp(embvar.data + delta.data, clip_min, clip_max) - embvar.data if eps is not None: delta.data = clamp_by_pnorm(delta.data, ord, eps) with torch.no_grad(): delta.data = tozero(delta.data, indlistvar) if (ii % 300) == 0: adverslist = [] for t in range(nb): advers, nb_vois = neighboors_np_dens_cand( (embvar + delta)[0][indlistvar[t]], rayon, candid[t]) advers = int(advers[0]) advers = torch.tensor(convers[t][advers]) if len(tablist[t]) == 0: tablist[t] += [ (tokenizer.decode(advers.unsqueeze(0)), ii, nb_vois) ] elif not (first( tablist[t][-1]) == tokenizer.decode( advers.unsqueeze(0))): tablist[t] += [ (tokenizer.decode(advers.unsqueeze(0)), ii, nb_vois) ] adverslist += [advers] word_balance_memory[ii] = float( model(replacelist(xvar, indlistvar, adverslist), labels=1 - yvar)[0]) - float( model(replacelist( xvar, indlistvar, adverslist), labels=yvar)[0]) if word_balance_memory[ii] < 0: fool = True elif ord == 1: grad = delta.grad.data grad_sign = tozero(grad_sign, indvar) abs_grad = torch.abs(grad) batch_size = grad.size(0) view = abs_grad.view(batch_size, -1) view_size = view.size(1) if l1_sparsity is None: vals, idx = view.topk(1) else: vals, idx = view.topk( int(np.round((1 - l1_sparsity) * view_size))) out = torch.zeros_like(view).scatter_(1, idx, vals) out = out.view_as(grad) grad = grad.sign() * (out > 0).float() grad = normalize_by_pnorm(grad, p=1) delta.data = delta.data + batch_multiply(eps_iter, grad) delta.data = batch_l1_proj(delta.data.cpu(), eps) if embvar.is_cuda: delta.data = delta.data.cuda() delta.data = clamp(embvar.data + delta.data, clip_min, clip_max) - embvar.data else: error = "Only ord = inf, ord = 1 and ord = 2 have been implemented" raise NotImplementedError(error) delta.grad.data.zero_() with torch.no_grad(): loss_memory[ii] = loss ii += 1 #plt.plot(loss_memory) #plt.title("evolution of embed loss") #plt.show() #plt.plot(word_balance_memory) #plt.title("evolution of word loss difference") #plt.show() emb_adv = clamp(embvar + delta, clip_min, clip_max) return emb_adv, word_balance_memory, loss_memory, tablist, fool
def perturb_iterative(xvar, yvar, predict1, predict2, nb_iter, eps, eps_iter, loss_fn, delta_init=None, minimize=False, ord=np.inf, clip_min=0.0, clip_max=1.0, l1_sparsity=None): """ Iteratively maximize the loss over the input. It is a shared method for iterative attacks including IterativeGradientSign, LinfPGD, etc. :param xvar: input data. :param yvar: input labels. :param predict: forward pass function. :param nb_iter: number of iterations. :param eps: maximum distortion. :param eps_iter: attack step size. :param loss_fn: loss function. :param delta_init: (optional) tensor contains the random initialization. :param minimize: (optional bool) whether to minimize or maximize the loss. :param ord: (optional) the order of maximum distortion (inf or 2). :param clip_min: mininum value per input dimension. :param clip_max: maximum value per input dimension. :param l1_sparsity: sparsity value for L1 projection. - if None, then perform regular L1 projection. - if float value, then perform sparse L1 descent from Algorithm 1 in https://arxiv.org/pdf/1904.13000v1.pdf :return: tensor containing the perturbed input. """ if delta_init is not None: delta = delta_init else: delta = torch.zeros_like(xvar) delta.requires_grad_() for ii in range(nb_iter): if predict2 is not None: outputs = predict2(predict1(xvar + delta)) else: outputs = predict1(xvar + delta) loss = loss_fn(outputs, yvar) if minimize: loss = -loss loss.backward() if ord == np.inf: grad_sign = delta.grad.data.sign() delta.data = delta.data + batch_multiply(eps_iter, grad_sign) delta.data = batch_clamp(eps, delta.data) delta.data = clamp(xvar.data + delta.data, clip_min, clip_max) - xvar.data elif ord == 2: grad = delta.grad.data grad = normalize_by_pnorm(grad) delta.data = delta.data + batch_multiply(eps_iter, grad) delta.data = clamp(xvar.data + delta.data, clip_min, clip_max) - xvar.data if eps is not None: delta.data = clamp_by_pnorm(delta.data, ord, eps) elif ord == 1: grad = delta.grad.data abs_grad = torch.abs(grad) batch_size = grad.size(0) view = abs_grad.view(batch_size, -1) view_size = view.size(1) if l1_sparsity is None: vals, idx = view.topk(1) else: vals, idx = view.topk( int(np.round((1 - l1_sparsity) * view_size))) out = torch.zeros_like(view).scatter_(1, idx, vals) out = out.view_as(grad) grad = grad.sign() * (out > 0).float() grad = normalize_by_pnorm(grad, p=1) delta.data = delta.data + batch_multiply(eps_iter, grad) delta.data = batch_l1_proj(delta.data.cpu(), eps) if xvar.is_cuda: delta.data = delta.data.cuda() delta.data = clamp(xvar.data + delta.data, clip_min, clip_max) - xvar.data else: error = "Only ord = inf, ord = 1 and ord = 2 have been implemented" raise NotImplementedError(error) delta.grad.data.zero_() x_adv = clamp(xvar + delta, clip_min, clip_max) return x_adv
def white_box_untargeted(args, image, target, model, enc=None, dec=None, \ vae=None, ae= None, normalize=None): epsilon = 0.3 # Create noise vector delta = torch.zeros_like(image, requires_grad=True).to(args.device) # Optimize noise vector (only) to fool model x = image use_vae = True if (vae is not None) else False use_ae = True if (ae is not None) else False print("Target is %d" % (target)) for t in range(args.PGD_steps): if normalize is not None: if use_vae: x = x.view(x.size(0), -1).unsqueeze(0) z, mu, logvar = vae(x) z = z.clamp(0, 1) x = z.view(z.size(0), 1, 28, 28) elif use_ae: x = ae(x) pred = model(normalize(x + delta)) else: if use_vae: x = x.view(x.size(0), -1).unsqueeze(0) z, mu, logvar = vae(x) z = z.clamp(0, 1) x = z.view(z.size(0), 1, 28, 28) elif use_ae: x = ae(x) pred = model(x.detach() + delta) recon_pred = model(x.detach()) out = pred.max( 1, keepdim=True)[1] # get the index of the max log-probability recon_out = recon_pred.max( 1, keepdim=True)[1] # get the index of the max log-probability loss = nn.CrossEntropyLoss(reduction="sum")(pred, target) recon_image = (x)[0].detach() if args.comet: args.experiment.log_metric("Whitebox CE loss", loss, step=t) plot_image_to_comet(args, recon_image, "recon.png") if t % 5 == 0: print(t, out[0][0], recon_out[0][0], loss.item()) loss.backward() grad_sign = delta.grad.data.sign() delta.data = delta.data + batch_multiply(0.01, grad_sign) # Clipping is equivalent to projecting back onto the l_\infty ball # This technique is known as projected gradient descent (PGD) delta.data.clamp_(-epsilon, epsilon) delta.data = clamp(x.data + delta.data, 0., 1.) - x.data delta.grad.data.zero_() # if out != target: # print(t, out[0][0], loss.item()) # break if args.comet: if not args.mnist: clean_image = (image)[0].detach().cpu().numpy().transpose(1, 2, 0) adv_image = (x + delta)[0].detach().cpu().numpy().transpose( 1, 2, 0) delta_image = (delta)[0].detach().cpu().numpy().transpose(1, 2, 0) else: clean_image = (image)[0].detach() adv_image = (x + delta)[0].detach() recon_image = (x)[0].detach() delta_image = (delta)[0].detach().cpu() plot_image_to_comet(args, clean_image, "clean.png") plot_image_to_comet(args, adv_image, "Adv.png") plot_image_to_comet(args, delta_image, "delta.png") plot_image_to_comet(args, recon_image, "recon.png") return out, delta
def perturb_iterative(xvar, yvar, predict, nb_iter, eps, eps_iter, loss_fn, delta_init=None, minimize=False, ord=np.inf, clip_min=0.0, clip_max=1.0, beta=0.5, early_stop=True): """ Iteratively maximize the loss over the input. It is a shared method for iterative attacks including IterativeGradientSign, LinfPGD, etc. :param xvar: input data. :param yvar: input labels. :param predict: forward pass function. :param nb_iter: number of iterations. :param eps: maximum distortion. :param eps_iter: attack step size per iteration. :param loss_fn: loss function. :param delta_init: (optional) tensor contains the random initialization. :param minimize: (optional bool) whether to minimize or maximize the loss. :param ord: (optional) the order of maximum distortion (inf or 2). :param clip_min: (optional float) mininum value per input dimension. :param clip_max: (optional float) maximum value per input dimension. :return: tensor containing the perturbed input. """ if delta_init is not None: delta = delta_init else: delta = torch.zeros_like(xvar) count = 0 delta.requires_grad_() for ii in range(nb_iter): count += 1 loss, w_loss = loss_fn(predict, yvar, xvar, xvar + delta) outputs = predict(xvar + delta) p = torch.argmax(outputs, dim=1) if torch.max(p == yvar) != 1 and early_stop: break # 攻击成功提前结束迭代 predict.zero_grad() loss.backward(retain_graph=True) g1 = torch.mean(delta.grad.data.abs().reshape(-1, 28 * 28)).float() delta.grad.data.zero_() w_loss.backward(retain_graph=True) g2 = torch.mean(delta.grad.data.abs().reshape(-1, 28 * 28)).float() g = g1 / g2 g = torch.min(g, torch.tensor(1e6)) if count % 5 == 0: # may not coverage beta = beta / 10 delta.grad.data.zero_() # print('loss',loss) # print('w_loss', w_loss) # print(count) # print((p == yvar).sum()) final_loss = loss + beta * g * w_loss final_loss.backward(retain_graph=True) if ord == np.inf: grad_sign = delta.grad.data.sign() delta.data = delta.data + batch_multiply(eps_iter, grad_sign) delta.data = batch_clamp(eps, delta.data) delta.data = clamp(xvar.data + delta.data, clip_min, clip_max) - xvar.data elif ord == 1: grad = delta.grad.data grad = normalize_by_pnorm(grad, 1) grad = grad * 28 * 28 delta.data = delta.data + batch_multiply(eps_iter, grad) delta.data = batch_clamp(eps, delta.data) delta.data = clamp(xvar.data + delta.data, clip_min, clip_max) - xvar.data elif ord == 2: grad = delta.grad.data grad = normalize_by_pnorm(grad) delta.data = delta.data + batch_multiply(eps_iter, grad) delta.data = clamp(xvar.data + delta.data, clip_min, clip_max) - xvar.data if eps is not None: delta.data = clamp_by_pnorm(delta.data, ord, eps) else: error = "Only ord = inf and ord = 2 have been implemented" raise NotImplementedError(error) delta.grad.data.zero_() x_adv = clamp(xvar + delta, clip_min, clip_max) iter_count = count return x_adv, iter_count