def train_step(
        X,
        y,
        model,
        optimizer,
        scheduler,  # generic
        reg_weight,
        epsilon,
        criterion,
        fim_reg  # experiment-specific
) -> tuple:
    """Penalize the Frobenious-norm of the Fisher Information Matrix (FIM).
    """
    model.train()
    model.apply(zero_grad)

    with toggle_requires_grad(X, True):
        # clean loss
        output = model(X)
        loss = criterion(output, y)
        loss.backward(retain_graph=True)

        # Jacobian regularizer
        fim_weights = softmax_reciprocal(logits=output)
        loss_fim = fim_reg(X, output, epsilon=epsilon, weights=fim_weights)
        loss_fim *= reg_weight

    # update model
    loss_fim.backward()
    optimizer.step()
    scheduler.step()

    return loss, output
Example #2
0
def train_step(
        X,
        y,
        model,
        optimizer,
        scheduler,  # generic 
        reg_weight,
        geometry,
        epsilon,
        alpha,
        lower_limit,
        upper_limit,
        attack_iters,
        criterion  # experiment-specific
):
    """TRADES algorithm [https://arxiv.org/abs/1901.08573].
    """
    ### adversarial perturbation
    # init perturbation
    delta = torch.zeros_like(X)
    for i in range(len(epsilon)):
        delta[:, i, :, :].uniform_(-epsilon[i][0][0].item(),
                                   epsilon[i][0][0].item())
    delta.data = clamp(delta, lower_limit - X, upper_limit - X)

    # find approx optimal perturbation
    criterion_kl = nn.KLDivLoss(reduction='sum')
    with toggle_eval(model):  # turn off batchnorm stat tracking
        for _ in range(attack_iters):
            with toggle_requires_grad(delta, True):
                grad = torch.autograd.grad(outputs=criterion_kl(
                    F.log_softmax(model(X + delta), dim=1),
                    F.softmax(model(X), dim=1)),
                                           inputs=delta,
                                           only_inputs=True)[0]
            delta = project(delta + alpha * grad.sign(),
                            epsilon,
                            geometry=geometry)
            delta = clamp(delta, lower_limit - X, upper_limit - X)

    ### adversarial loss
    model.apply(zero_grad)
    # disable batchnorm stat tracking to avoid distribution shift from adversarial examples
    with disable_batchnorm_tracking(model):
        loss = criterion_kl(F.log_softmax(model(X + delta), dim=1),
                            F.softmax(model(X), dim=1))
        loss *= (reg_weight / X.shape[0])
    loss.backward()

    ### clean loss
    # batchnorm stat tracking is fine here
    output = model(X)
    loss = criterion(output, y)

    ### update model
    loss.backward()
    optimizer.step()
    scheduler.step()

    return loss, output
Example #3
0
def train_step(
        X,
        y,
        model,
        optimizer,
        scheduler,  # generic
        epsilon,
        alpha,
        lower_limit,
        upper_limit,
        criterion,  # experiment-specific
) -> tuple:
    """FGSM with configuration from Fast is Better than Free [https://arxiv.org/abs/2001.03994].
    """
    ### adversarial perturbation
    # init
    delta = torch.zeros_like(X)
    for j in range(len(epsilon)):
        delta[:, j, :, :].uniform_(-epsilon[j][0][0].item(),
                                   epsilon[j][0][0].item())
    delta.data = clamp(delta, lower_limit - X, upper_limit - X)

    # perturb
    with toggle_eval(model), toggle_requires_grad(
            delta, True):  # turn off batch norm statistics updating
        grad = torch.autograd.grad(outputs=criterion(model(X + delta), y),
                                   inputs=delta,
                                   only_inputs=True)[0]
    delta = project(delta + alpha * torch.sign(grad), epsilon, geometry='linf')
    delta = clamp(delta, lower_limit - X, upper_limit - X)

    ### adversarial loss
    output = model(X + delta)
    loss = criterion(output, y)

    ### update model
    model.apply(zero_grad)
    loss.backward()
    optimizer.step()
    scheduler.step()

    # return clean predictions
    with toggle_eval(model), torch.no_grad():
        logits = model(X)
        loss = criterion(logits, y)
    return loss, logits
Example #4
0
def train_step(
    X, y, model, optimizer, scheduler,                                           # generic
    geometry, epsilon, alpha, lower_limit, upper_limit, attack_iters, criterion  # experiment-specific
) -> tuple:  
    """Madry PGD algorithm [https://arxiv.org/abs/1706.06083]
    """
    ### adversarial perturbation
    # init
    delta = torch.zeros_like(X)
    for i in range(len(epsilon)):
        delta[:, i, :, :].uniform_(-epsilon[i][0][0].item(), epsilon[i][0][0].item())
    delta.data = clamp(delta, lower_limit - X, upper_limit - X)

    # perturb
    with toggle_eval(model):  # turn off batch normalization statistics updating
        for _ in range(attack_iters):
            with toggle_requires_grad(delta, True):
                grad = torch.autograd.grad(
                    outputs    =criterion(model(X + delta), y), 
                    inputs     =delta, 
                    only_inputs=True)[0]
            delta = project(delta + alpha * grad.sign(), epsilon, geometry=geometry)
            delta = clamp(delta, lower_limit - X, upper_limit - X)

    ### compute loss
    output = model(X + delta)
    loss = criterion(output, y)

    ### update model
    model.apply(zero_grad)
    loss.backward()
    optimizer.step()
    scheduler.step()

    with toggle_eval(model), torch.no_grad():
        logits = model(X)
        loss = criterion(logits, y)
    return loss, logits
Example #5
0
def train_step(
        X,
        y,
        model,
        optimizer,
        scheduler,  # generic
        reg_weight,
        epsilon,
        criterion  # implementation-specific
) -> tuple:
    """Input gradient regularization in the dual norm.
    """
    model.train()
    model.apply(zero_grad)

    # compute classification loss first to avoid two forward passes
    with toggle_requires_grad(X, True):
        output = model(X)
        loss = criterion(output, y)

        # compute gradient penalty
        grad = torch.autograd.grad(loss,
                                   X,
                                   create_graph=True,
                                   only_inputs=True)[0]

    grad *= epsilon  # adversarial weight (note: tensor-valued)
    loss_grad = grad.flatten(1).norm(p=1,
                                     dim=1).mean(dim=0)  # dual norm to linf
    loss_grad *= reg_weight  # additional regularization weight

    # update model
    loss.backward(retain_graph=True)
    loss_grad.backward()
    optimizer.step()
    scheduler.step()

    return loss, output
def inversion(
    model         :Module,
    x0            :Tensor,
    *,
    stepsize      :float,
    category      :int,
    untargeted    :bool                    =False,
    max_iters     :int                     =10000,
    clamp         :Optional[torch.Tensor]  =None,
    keep_path     :bool                    =False,
    geometry      :str                     ='linf',
) -> Union[List[Tensor], Tensor]:
    """Runs adversarial perturbation -- both targeted and untargeted version are available.
    No constraints are assumed by default but can be added via the clamp argument.
    This means that only the projection operator is only defined for l_\infty balls. 

    :param model         : PyTorch module
    :param x0            : Initialization point
    :param stepsize      : The stepsize of each gradient ascent step
    :param category      : The target label for cross-entropy loss
    :param untargeted    : (False) - Targeted perturbation. 
                           (True)  - Untargeted perturbation.
    :param max_iters     : Number of iterations. Currently there is no convergence criterion.
    :param clamp         : A tensor specifying the range of values to restrict the pixel values to. 
                           A range needs to be specified for each color channel.
    :param keep_path     : Flag to retain the perturbation iterates. Useful for visualization.
    :param geometry      : ('l2' or 'linf') Under which geometry to normalize the gradient directions.
    """
    device = next(model.parameters()).device
    model.eval()

    batch_size = x0.shape[0]
    if isinstance(category, int):
        category = torch.LongTensor([category]).repeat(batch_size).to(device)
    else:
        category = category.to(device)

    x_inv = x0.clone().to(device)

    if clamp is not None:
        assert len(clamp) == x0.shape[1]
        assert len(clamp[0]) == 2

    if keep_path:
        path = []

    for n_iter in range(max_iters):   
        with toggle_requires_grad(model, False), toggle_requires_grad(x_inv, True):
            grad = torch.autograd.grad(
                outputs    =F.cross_entropy(model(x_inv), category), 
                inputs     =x_inv, 
                only_inputs=True)[0]

        if geometry == 'l2':
            norm = grad.flatten(1).norm(dim=1)[:, None, None, None]
            x_inv -= stepsize * grad / (norm + 1e-10)
        elif geometry == 'linf':
            x_inv -= stepsize * grad.sign()
        else:
            raise NotImplementedError
            
        if clamp is not None:
            # clamp each channel to proper range
            for i in range(len(clamp)):
                x_inv[:,i] = torch.clamp(x_inv[:,i], *clamp[i])

        if keep_path:
            path.append(x_inv.clone().cpu())

    if keep_path:
        x_inv = path
    return x_inv