def _propose( x: torch.Tensor, x_adv: torch.Tensor, y: torch.Tensor, model: Module, clamp: Tuple[float, float] = (0, 1)) -> torch.Tensor: """Generate proposal perturbed sample Args: x: Original sample x_adv: Adversarial sample y: Label of original sample clamp: Domain (i.e. max/min) of samples """ # Sample from unit Normal distribution with same shape as input perturbation = torch.normal(torch.zeros_like(x_adv), torch.ones_like(x_adv)) # Rescale perturbation so l2 norm is delta perturbation = project(torch.zeros_like(perturbation), perturbation, norm=2, eps=orthogonal_step) # Apply perturbation and project onto sphere around original sample such that the distance # between the perturbed adversarial sample and the original sample is the same as the # distance between the unperturbed adversarial sample and the original sample # i.e. d(x_adv, x) = d(x_adv + perturbation, x) perturbed = x_adv + perturbation perturbed = project(x, perturbed, 2, torch.norm(x_adv - x, 2)).clamp(*clamp) # Record success/failure of orthogonal step orth_step_stats.append(model(perturbed).argmax(dim=1) != y) # Make step towards original sample step_towards_original = project(torch.zeros_like(perturbation), x - perturbed, norm=2, eps=perpendicular_step) perturbed = (perturbed + step_towards_original).clamp(*clamp) # Record success/failure of perpendicular step perp_step_stats.append(model(perturbed).argmax(dim=1) != y) # Clamp to domain of sample perturbed = perturbed.clamp(*clamp) return perturbed
def _iterative_gradient( model: Module, x: torch.Tensor, y: torch.Tensor, loss_fn: Callable, k: int, step: float, eps: float, norm: Union[str, float], step_norm: Union[str, float], y_target: torch.Tensor = None, random: bool = False, clamp: Tuple[float, float] = (0, 1), beta=0.0, flip_grad_ratio: float = 0.0 ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: """Base function for PGD and iterated FGSM Args: model: Model x: Batch of samples y: Corresponding labels loss_fn: Loss function to maximise k: Number of iterations to make step: Size of step to make at each iteration eps: Maximum size of adversarial perturbation, larger perturbations will be projected back into the L_norm ball norm: Type of norm (used for projection), 'inf' for infinity-norm, any integer number for other norm step_norm: Type of norm per each step, 'inf' for infinity-norm, any integer number for other norm y_target: If None use untargeted attack, else y_target contain a batch of labels for targeted attack random: Whether to start Iterated FGSM within a random point in the l_norm ball clamp: Max and minimum values of elements in the samples i.e. (0, 1) for MNIST Returns: x_adv: Adversarially perturbed version of x adv_loss: The loss of the aversarial sample prediction: The predictions for the adversarial sample genie_prob: The probability of the genie (if exist, else None) """ loss_fn = torch.nn.NLLLoss(reduction='none') targeted = y_target is not None # x_adv = x.clone().detach().requires_grad_(True).to(x.device) x_adv = x.detach().clone().to(x.device) if random: # x_adv = random_perturbation(x_adv, norm, eps) rand_gen = torch.distributions.uniform.Uniform( x_adv - eps, x_adv + eps) #Create a point around x_adv within a range of eps x_adv = rand_gen.sample().clamp(*clamp) for i in range(k): # print("_iterative_gradient iter: {}".format(i)) # Each new loop x_adv is a new variable (x_adv += gradients), therefore we must detach it (otherwise backward() # will result in calculating the old clones gradients as well and memory overflow) and then requires_grad_(True) since detach() # disabled the grad. # The other option (original) is to work with temp variable _x_adv (see below) but it seems to prelong the # calculation time maybe as a result of re-cloning # _x_adv = x_adv.clone().detach().requires_grad_(True) # x_adv_old = x_adv.detach().clone() x_adv = x_adv.detach() x_adv = x_adv.requires_grad_(True) prediction = model.calc_log_prob(x_adv) loss = loss_fn(prediction, y_target if targeted else y).mean() - beta * model.regularization.mean() # loss.backward() # x_adv_grad = x_adv.grad x_adv_grad = torch.autograd.grad(loss, x_adv, create_graph=False)[0] if flip_grad_ratio > 0.0: bit_mask = torch.rand_like(x_adv_grad) < flip_grad_ratio x_adv_grad[bit_mask] = x_adv_grad[bit_mask] * -1 # x_adv_grad[:, :, :, 0:int(x_adv_grad.shape[3]*flip_grad_ratio)] = x_adv_grad[:, :, :, 0:int(x_adv_grad.shape[3]*flip_grad_ratio)] * -1 with torch.no_grad(): if step_norm == 'inf': gradients = (x_adv_grad.sign() * step).detach() else: # .view() assumes batched image data as 4D tensor gradients = x_adv_grad * step / x_adv_grad.view(x_adv.shape[0], -1).norm(step_norm, dim=-1)\ .view(-1, 1, 1, 1) if targeted: # Targeted: Gradient descent on the loss of the (incorrect) target label # w.r.t. the model parameters (increasing prob. to predict the incorrect label) x_adv = x_adv - gradients else: # Untargeted: Gradient ascent on the loss of the correct label w.r.t. # the model parameters x_adv = x_adv + gradients # Project back into l_norm ball and correct range x_adv = project(x, x_adv, norm, eps).clamp(*clamp).detach() x_adv = x_adv.detach() # x_adv.requires_grad_(True) # This is done so model with refinement could do backprop loss, prob, genie_prob = model.eval_batch(x_adv, y_target if targeted else y, enable_grad=model.pnml_model) # logits_or_prob = model(x_adv).detach() # could be logits or probability # if model.pnml_model: # prediction = logits_or_prob.detach() # log_prob = torch.log(logits_or_prob).detach() # genie_prob = model.get_genie_prob().detach() # else: # if Logits: # prediction = torch.softmax(logits_or_prob, 1).detach() # log_prob = torch.log_softmax(logits_or_prob, 1).detach() # genie_prob = None # adv_loss = loss_fn(log_prob, y_target if targeted else y).detach() # x_adv.requires_grad_ = False return x_adv, loss.detach(), prob.detach(), genie_prob
def _iterative_gradient( model: Module, x: torch.Tensor, y: torch.Tensor, loss_fn: Callable, k: int, step: float, eps: float, norm: Union[str, float], step_norm: Union[str, float], y_target: torch.Tensor = None, random: bool = False, clamp: Tuple[float, float] = (0, 1)) -> torch.Tensor: """Base function for PGD and iterated FGSM Args: model: Model x: Batch of samples y: Corresponding labels loss_fn: Loss function to maximise k: Number of iterations to make step: Size of step to make at each iteration eps: Maximum size of adversarial perturbation, larger perturbations will be projected back into the L_norm ball norm: Type of norm step_norm: 2 for PGD, 'inf' for iterated FGSM y_target: random: Whether to start Iterated FGSM within a random point in the l_norm ball clamp: Max and minimum values of elements in the samples i.e. (0, 1) for MNIST Returns: x_adv: Adversarially perturbed version of x """ x_adv = x.clone().detach().requires_grad_(True).to(x.device) targeted = y_target is not None if random: x_adv = random_perturbation(x_adv, norm, eps) for i in range(k): _x_adv = x_adv.clone().detach().requires_grad_(True) prediction = model(_x_adv) loss = loss_fn(prediction, y_target if targeted else y) loss.backward() with torch.no_grad(): if step_norm == 'inf': gradients = _x_adv.grad.sign() * step else: # .view() assumes batched image data as 4D tensor gradients = _x_adv.grad * step / _x_adv.grad.view(_x_adv.shape[0], -1).norm(step_norm, dim=-1)\ .view(-1, 1, 1, 1) if targeted: # Targeted: Gradient descent with on the loss of the (incorrect) target label # w.r.t. the model parameters x_adv -= gradients else: # Untargeted: Gradient ascent on the loss of the correct label w.r.t. # the model parameters x_adv += gradients # Project back into l_norm ball and correct range x_adv = project(x, x_adv, norm, eps).clamp(*clamp) return x_adv.detach()