예제 #1
0
    def _propose(
        x: torch.Tensor,
        x_adv: torch.Tensor,
        y: torch.Tensor,
        model: Module,
        clamp: Tuple[float, float] = (0, 1)) -> torch.Tensor:
        """Generate proposal perturbed sample

        Args:
            x: Original sample
            x_adv: Adversarial sample
            y: Label of original sample
            clamp: Domain (i.e. max/min) of samples
        """
        # Sample from unit Normal distribution with same shape as input
        perturbation = torch.normal(torch.zeros_like(x_adv),
                                    torch.ones_like(x_adv))

        # Rescale perturbation so l2 norm is delta
        perturbation = project(torch.zeros_like(perturbation),
                               perturbation,
                               norm=2,
                               eps=orthogonal_step)

        # Apply perturbation and project onto sphere around original sample such that the distance
        # between the perturbed adversarial sample and the original sample is the same as the
        # distance between the unperturbed adversarial sample and the original sample
        # i.e. d(x_adv, x) = d(x_adv + perturbation, x)
        perturbed = x_adv + perturbation
        perturbed = project(x, perturbed, 2, torch.norm(x_adv - x,
                                                        2)).clamp(*clamp)

        # Record success/failure of orthogonal step
        orth_step_stats.append(model(perturbed).argmax(dim=1) != y)

        # Make step towards original sample
        step_towards_original = project(torch.zeros_like(perturbation),
                                        x - perturbed,
                                        norm=2,
                                        eps=perpendicular_step)
        perturbed = (perturbed + step_towards_original).clamp(*clamp)

        # Record success/failure of perpendicular step
        perp_step_stats.append(model(perturbed).argmax(dim=1) != y)

        # Clamp to domain of sample
        perturbed = perturbed.clamp(*clamp)

        return perturbed
예제 #2
0
def _iterative_gradient(
    model: Module,
    x: torch.Tensor,
    y: torch.Tensor,
    loss_fn: Callable,
    k: int,
    step: float,
    eps: float,
    norm: Union[str, float],
    step_norm: Union[str, float],
    y_target: torch.Tensor = None,
    random: bool = False,
    clamp: Tuple[float, float] = (0, 1),
    beta=0.0,
    flip_grad_ratio: float = 0.0
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
    """Base function for PGD and iterated FGSM

    Args:
        model: Model
        x: Batch of samples
        y: Corresponding labels
        loss_fn: Loss function to maximise
        k: Number of iterations to make
        step: Size of step to make at each iteration
        eps: Maximum size of adversarial perturbation, larger perturbations will be projected back into the
            L_norm ball
        norm: Type of norm (used for projection), 'inf' for infinity-norm, any integer number for other norm
        step_norm: Type of norm per each step, 'inf' for infinity-norm, any integer number for other norm
        y_target: If None use untargeted attack, else y_target contain a batch of labels for targeted attack
        random: Whether to start Iterated FGSM within a random point in the l_norm ball
        clamp: Max and minimum values of elements in the samples i.e. (0, 1) for MNIST

    Returns:
        x_adv: Adversarially perturbed version of x
        adv_loss: The loss of the aversarial sample
        prediction: The predictions for the adversarial sample
        genie_prob: The probability of the genie (if exist, else None)
    """

    loss_fn = torch.nn.NLLLoss(reduction='none')
    targeted = y_target is not None
    # x_adv = x.clone().detach().requires_grad_(True).to(x.device)
    x_adv = x.detach().clone().to(x.device)
    if random:
        # x_adv = random_perturbation(x_adv, norm, eps)
        rand_gen = torch.distributions.uniform.Uniform(
            x_adv - eps,
            x_adv + eps)  #Create a point around x_adv within a range of eps
        x_adv = rand_gen.sample().clamp(*clamp)

    for i in range(k):
        # print("_iterative_gradient iter: {}".format(i))
        # Each new loop x_adv is a new variable (x_adv += gradients), therefore we must detach it (otherwise backward()
        # will result in calculating the old clones gradients as well and memory overflow) and then requires_grad_(True) since detach()
        # disabled the grad.
        # The other option (original) is to work with temp variable _x_adv (see below) but it seems to prelong the
        # calculation time maybe as a result of re-cloning
        # _x_adv = x_adv.clone().detach().requires_grad_(True)
        # x_adv_old = x_adv.detach().clone()
        x_adv = x_adv.detach()
        x_adv = x_adv.requires_grad_(True)
        prediction = model.calc_log_prob(x_adv)
        loss = loss_fn(prediction, y_target if targeted else
                       y).mean() - beta * model.regularization.mean()
        # loss.backward()
        # x_adv_grad = x_adv.grad
        x_adv_grad = torch.autograd.grad(loss, x_adv, create_graph=False)[0]
        if flip_grad_ratio > 0.0:
            bit_mask = torch.rand_like(x_adv_grad) < flip_grad_ratio
            x_adv_grad[bit_mask] = x_adv_grad[bit_mask] * -1
            # x_adv_grad[:, :, :, 0:int(x_adv_grad.shape[3]*flip_grad_ratio)] = x_adv_grad[:, :, :, 0:int(x_adv_grad.shape[3]*flip_grad_ratio)] * -1
        with torch.no_grad():
            if step_norm == 'inf':
                gradients = (x_adv_grad.sign() * step).detach()
            else:
                # .view() assumes batched image data as 4D tensor
                gradients = x_adv_grad * step / x_adv_grad.view(x_adv.shape[0], -1).norm(step_norm, dim=-1)\
                    .view(-1, 1, 1, 1)

            if targeted:
                # Targeted: Gradient descent on the loss of the (incorrect) target label
                # w.r.t. the model parameters (increasing prob. to predict the incorrect label)
                x_adv = x_adv - gradients
            else:
                # Untargeted: Gradient ascent on the loss of the correct label w.r.t.
                # the model parameters
                x_adv = x_adv + gradients

        # Project back into l_norm ball and correct range
        x_adv = project(x, x_adv, norm, eps).clamp(*clamp).detach()
    x_adv = x_adv.detach()
    # x_adv.requires_grad_(True) #  This is done so model with refinement could do backprop

    loss, prob, genie_prob = model.eval_batch(x_adv,
                                              y_target if targeted else y,
                                              enable_grad=model.pnml_model)
    # logits_or_prob = model(x_adv).detach()  # could be logits or probability
    # if model.pnml_model:
    #     prediction = logits_or_prob.detach()
    #     log_prob = torch.log(logits_or_prob).detach()
    #     genie_prob = model.get_genie_prob().detach()
    # else:  # if Logits:
    #     prediction = torch.softmax(logits_or_prob, 1).detach()
    #     log_prob = torch.log_softmax(logits_or_prob, 1).detach()
    #     genie_prob = None
    # adv_loss = loss_fn(log_prob, y_target if targeted else y).detach()
    # x_adv.requires_grad_ = False

    return x_adv, loss.detach(), prob.detach(), genie_prob
예제 #3
0
def _iterative_gradient(
    model: Module,
    x: torch.Tensor,
    y: torch.Tensor,
    loss_fn: Callable,
    k: int,
    step: float,
    eps: float,
    norm: Union[str, float],
    step_norm: Union[str, float],
    y_target: torch.Tensor = None,
    random: bool = False,
    clamp: Tuple[float, float] = (0, 1)) -> torch.Tensor:
    """Base function for PGD and iterated FGSM

    Args:
        model: Model
        x: Batch of samples
        y: Corresponding labels
        loss_fn: Loss function to maximise
        k: Number of iterations to make
        step: Size of step to make at each iteration
        eps: Maximum size of adversarial perturbation, larger perturbations will be projected back into the
            L_norm ball
        norm: Type of norm
        step_norm: 2 for PGD, 'inf' for iterated FGSM
        y_target:
        random: Whether to start Iterated FGSM within a random point in the l_norm ball
        clamp: Max and minimum values of elements in the samples i.e. (0, 1) for MNIST

    Returns:
        x_adv: Adversarially perturbed version of x
    """
    x_adv = x.clone().detach().requires_grad_(True).to(x.device)
    targeted = y_target is not None

    if random:
        x_adv = random_perturbation(x_adv, norm, eps)

    for i in range(k):
        _x_adv = x_adv.clone().detach().requires_grad_(True)

        prediction = model(_x_adv)
        loss = loss_fn(prediction, y_target if targeted else y)
        loss.backward()

        with torch.no_grad():
            if step_norm == 'inf':
                gradients = _x_adv.grad.sign() * step
            else:
                # .view() assumes batched image data as 4D tensor
                gradients = _x_adv.grad * step / _x_adv.grad.view(_x_adv.shape[0], -1).norm(step_norm, dim=-1)\
                    .view(-1, 1, 1, 1)

            if targeted:
                # Targeted: Gradient descent with on the loss of the (incorrect) target label
                # w.r.t. the model parameters
                x_adv -= gradients
            else:
                # Untargeted: Gradient ascent on the loss of the correct label w.r.t.
                # the model parameters
                x_adv += gradients

        # Project back into l_norm ball and correct range
        x_adv = project(x, x_adv, norm, eps).clamp(*clamp)

    return x_adv.detach()