def perturb(self, x, y=None): """ Given examples (x, y), returns their adversarial counterparts with an attack length of eps. :param x: input tensor. :param y: label tensor. - if None and self.targeted=False, compute y as predicted labels. - if self.targeted=True, then y must be the targeted labels. :return: tensor containing perturbed inputs. """ x, y = self._verify_and_process_inputs(x, y) delta = torch.zeros_like(x) delta = nn.Parameter(delta) if self.rand_init: rand_init_delta( delta, x, self.ord, self.eps, self.clip_min, self.clip_max) delta.data = clamp( x + delta.data, min=self.clip_min, max=self.clip_max) - x rval = perturb_iterative( x, y, self.predict, nb_iter=self.nb_iter, eps=self.eps, eps_iter=self.eps_iter, loss_fn=self.loss_fn, minimize=self.targeted, ord=self.ord, clip_min=self.clip_min, clip_max=self.clip_max, delta_init=delta, l1_sparsity=self.l1_sparsity, accumulate_param_grad_prob=self.accumulate_param_grad_prob ) return rval.data
def perturb_fool_many(self, x, emb, indlist, y=None): #list of ind of words to be perturbed """ Given examples (x, y), returns their adversarial counterparts with an attack length of eps. :param x: input tensor. :param y: label tensor. - if None and self.targeted=False, compute y as predicted labels. - if self.targeted=True, then y must be the targeted labels. :return: tensor containing perturbed inputs. """ emb, y = self._verify_and_process_inputs(emb, y) #??? delta = torch.zeros_like(emb) delta = nn.Parameter(delta) if self.rand_init: rand_init_delta(delta, emb, np.inf, self.eps, self.clip_min, self.clip_max) delta.data = clamp(emb + delta.data, min=self.clip_min, max=self.clip_max) - emb with torch.no_grad(): for ba in range(delta.size()[0]): for t in range(delta.size()[1]): if not (t in indlist[ba]): for k in range(delta.size()[2]): delta[ba][t][k] = 0 if self.ord == 0: for ba in range(delta.size()[0]): delta[ba] = my_proj_all(emb[ba] + delta[ba], emb[ba], indlist[ba], self.eps) - emb[ba] rval, word_balance_memory, loss_memory, tablistbatch, fool = perturb_iterative_fool_many( x, emb, indlist, y, self.predict, nb_iter=self.nb_iter, eps=self.eps, epscand=self.epscand, eps_iter=self.eps_iter, loss_fn=self.loss_fn, minimize=self.targeted, ord=self.ord, clip_min=self.clip_min, clip_max=self.clip_max, delta_init=delta, l1_sparsity=self.l1_sparsity, rayon=self.rayon) return rval.data, word_balance_memory, loss_memory, tablistbatch, fool
def perturb(self, x, y=None): """ Given examples (x, y), returns their adversarial counterparts with an attack length of eps. :param x: input tensor. :param y: label tensor. - if None and self.targeted=False, compute y as predicted labels. - if self.targeted=True, then y must be the targeted labels. :return: tensor containing perturbed inputs. """ x, y = self._verify_and_process_inputs(x, y) shape, flat_x = _flatten(x) data_shape = tuple(shape[1:]) def f(x): new_shape = (x.shape[0], ) + data_shape input = x.reshape(new_shape) return self.predict(input) f_nes = NESWrapper(f, nb_samples=self.nb_samples, fd_eta=self.fd_eta) delta = torch.zeros_like(flat_x) delta = nn.Parameter(delta) if self.rand_init: rand_init_delta(delta, flat_x, self.ord, self.eps, self.clip_min, self.clip_max) delta.data = clamp(flat_x + delta.data, min=self.clip_min, max=self.clip_max) - flat_x rval = perturb_iterative(flat_x, y, f_nes, nb_iter=self.nb_iter, eps=self.eps, eps_iter=self.eps_iter, loss_fn=self.loss_fn, minimize=self.targeted, ord=self.ord, clip_min=self.clip_min, clip_max=self.clip_max, delta_init=delta, l1_sparsity=None) return rval.data.reshape(shape)
def perturb(self, x, y=None, num_rand_init=3): max_loss = -10000 max_adv_x = torch.zeros_like(x) for rand_int in range(num_rand_init): delta = torch.zeros_like(x) delta = nn.Parameter(delta) if self.rand_init: rand_init_delta( delta, x, self.ord, self.eps, self.clip_min, self.clip_max) delta.data = clamp( x + delta.data, min=self.clip_min, max=self.clip_max) - x adv_x, adv_loss = perturb_iterative( x, y, predictor_list=self.estimator_list, dis_list=self.distribution_list, nb_iter=self.nb_iter, eps=self.eps, eps_iter=self.eps_iter, loss_fn=self.loss_fn, minimize=self.targeted, ord=self.ord, clip_min=self.clip_min, clip_max=self.clip_max, delta_init=delta, sparsity=self.sparsity) if max_loss < adv_loss: max_loss = adv_loss max_adv_x = adv_x return max_adv_x.data
def perturb_iterative(xvar, yvar, predict, nb_iter, eps, eps_iter, loss_fn, delta_init=None, minimize=False, ord=np.inf, clip_min=0.0, clip_max=1.0, sparsity=0.01): """ Iteratively maximize the loss over the input. It is a shared method for iterative attacks including IterativeGradientSign, LinfPGD, etc. :param xvar: input data. :param yvar: input labels. :param predict: forward pass function. :param nb_iter: number of iterations. :param eps: maximum distortion. :param eps_iter: attack step size. :param loss_fn: loss function. :param delta_init: (optional) tensor contains the random initialization. :param minimize: (optional bool) whether to minimize or maximize the loss. :param ord: (optional) the order of maximum distortion (inf or 2). :param clip_min: mininum value per input dimension. :param clip_max: maximum value per input dimension. :return: tensor containing the perturbed input. """ max_loss_value = -100000 for jj in range(3): max_loss_value_iter = -100000 if delta_init is not None: delta = torch.zeros_like(xvar) delta = nn.Parameter(delta) rand_init_delta(delta, xvar, ord, eps, clip_min, clip_max) delta.data = clamp(xvar + delta.data, min=clip_min, max=clip_max) - xvar else: delta = torch.zeros_like(xvar) delta.requires_grad_() for ii in range(nb_iter): avg_grad = torch.FloatTensor(xvar.shape) avg_grad.zero_() if xvar.is_cuda: avg_grad = avg_grad.cuda() p = predict.weights / np.sum(predict.weights) for id_classifier, classifier in enumerate(predict.classifiers): if id_classifier == 0: outputs = classifier(xvar + delta) * p[id_classifier] else: outputs = outputs + classifier(xvar + delta) * p[id_classifier] loss = loss_fn(outputs, yvar) if minimize: loss = -loss loss.backward() avg_grad = delta.grad.detach() if ord == np.inf: grad_sign = avg_grad.sign() delta.data = delta.data + batch_multiply(eps_iter, grad_sign) delta.data = batch_clamp(eps, delta.data) delta.data = clamp(xvar.data + delta.data, clip_min, clip_max) - xvar.data elif ord == 2: grad = avg_grad grad = normalize_by_pnorm(grad) delta.data = delta.data + batch_multiply(eps_iter, grad) delta.data = clamp(xvar.data + delta.data, clip_min, clip_max) - xvar.data if eps is not None: delta.data = clamp_by_pnorm(delta.data, ord, eps) elif ord == 1: with torch.no_grad(): grad = avg_grad abs_grad = torch.abs(avg_grad) batch_size = grad.size(0) view = abs_grad.view(batch_size, -1) view_size = view.size(1) vals, idx = view.topk(int(sparsity * view_size)) out = torch.zeros_like(view).scatter_(1, idx, vals) out = out.view_as(grad) grad = grad.sign() * (out > 0).float() grad = normalize_by_pnorm(grad, p=1) delta.data += batch_multiply(eps_iter, grad) delta.data = batch_l1_proj(delta.data.cpu(), eps) if xvar.is_cuda: delta.data = delta.data.cuda() delta.data = clamp(xvar.data + delta.data, clip_min, clip_max) - xvar.data else: error = "Only ord = inf, ord = 1 and ord = 2 have been implemented" raise NotImplementedError(error) delta.grad.data.zero_() x_adv = clamp(xvar + delta, clip_min, clip_max) for id_classifier, classifier in enumerate(predict.classifiers): if id_classifier == 0: outputs_2 = classifier(xvar + delta) * p[id_classifier] else: outputs_2 = outputs_2 + classifier( xvar + delta) * p[id_classifier] loss_2 = loss_fn(outputs, yvar) if max_loss_value_iter < loss_2: max_loss_value_iter = loss_2 max_adv_iter = x_adv if max_loss_value < max_loss_value_iter: max_loss_value = max_loss_value_iter max_adv = max_adv_iter return max_adv