def bca_k(x, y, model, loss_fct, k=25, is_report_loss_diff=False, use_sample=False): """ Multi-step bit coordinate ascent :param use_sample: :param is_report_loss_diff: :param y: :param x: (tensor) feature vector :param model: nn model :param loss_fct: loss function :param k: num of steps :return: the adversarial version of x according to bca_k (tensor) """ if next(model.parameters()).is_cuda: x = x.cuda() y = y.cuda() y = Variable(y) # compute natural loss loss_natural = loss_fct(model(Variable(x)), y).data # keeping worst loss loss_worst = loss_natural.clone() x_worst = x.clone() # multi-step with gradients loss = None x_var = None x_next = None for t in range(k): if t == 0: # initialize starting point x_next = get_x0(x, use_sample) else: # compute gradient grad_vars = torch.autograd.grad(loss.mean(), x_var) grad_data = grad_vars[0].data # compute the updates (can be made more efficient than this) aug_grad = (1. - 2. * x_next) * grad_data val, _ = torch.topk(aug_grad, 1) x_update = (aug_grad >= val.expand_as(aug_grad)).float() # find the next sample with projection to the feasible set x_next = xor_float_tensors(x_update, x_next) x_next = or_float_tensors(x_next, x) # forward pass x_var = Variable(x_next, requires_grad=True) y_model = model(x_var) loss = loss_fct(y_model, y) # update worst loss and adversarial samples replace_flag = (loss.data > loss_worst) loss_worst[replace_flag] = loss.data[replace_flag] x_worst[replace_flag.unsqueeze(1).expand_as(x_worst)] = x_next[ replace_flag.unsqueeze(1).expand_as(x_worst)] if is_report_loss_diff: print( "Natural loss (%.4f) vs Adversarial loss (%.4f), Difference: (%.4f)" % (loss_natural.mean(), loss_worst.mean(), loss_worst.mean() - loss_natural.mean())) if x_worst.is_cuda: x_worst = x_worst.cpu() return x_worst
def grosse_k(x, y, model, loss_fct, k=25, is_report_loss_diff=False, use_sample=False): """ Multi-step bit coordinate ascent using gradient of output, advancing in direction of maximal change :param use_sample: :param is_report_loss_diff: :param loss_fct: :param y: :param x: (tensor) feature vector :param model: nn model :param k: num of steps :return adversarial version of x (tensor) """ if next(model.parameters()).is_cuda: x = x.cuda() y = y.cuda() y = Variable(y) # compute natural loss loss_natural = loss_fct(model(Variable(x)), y).data # keeping worst loss loss_worst = loss_natural.clone() x_worst = x.clone() output = None x_var = None x_next = None for t in range(k): if t == 0: # initialize starting point x_next = get_x0(x, use_sample) else: grad_vars = torch.autograd.grad(output[:, 0].mean(), x_var) grad_data = grad_vars[0].data # Only consider gradients for points of 0 value aug_grad = (1. - x_next) * grad_data val, _ = torch.topk(aug_grad, 1) x_update = (aug_grad >= val.expand_as(aug_grad)).float() # find the next sample with projection to the feasible set x_next = xor_float_tensors(x_update, x_next) x_next = or_float_tensors(x_next, x) x_var = Variable(x_next, requires_grad=True) output = model(x_var) loss = loss_fct(output, y) # update worst loss and adversarial samples replace_flag = (loss.data > loss_worst) loss_worst[replace_flag] = loss.data[replace_flag] x_worst[replace_flag.unsqueeze(1).expand_as(x_worst)] = x_next[ replace_flag.unsqueeze(1).expand_as(x_worst)] if is_report_loss_diff: print( "Natural loss (%.4f) vs Adversarial loss (%.4f), Difference: (%.4f)" % (loss_natural.mean(), loss_worst.mean(), loss_worst.mean() - loss_natural.mean())) if x_worst.is_cuda: x_worst = x_worst.cpu() return x_worst
def bga_k(x, y, model, loss_fct, k=25, is_report_loss_diff=True, use_sample=False, return_loss=False): """ Multi-step bit gradient ascent :param x: (tensor) feature vector :param model: nn model :param loss_fct: loss function :param k: num of steps :param epsilon: update value in each direction :return: the adversarial version of x according to bga_k (tensor) """ # some book-keeping sqrt_m = torch.from_numpy(np.sqrt([x.size()[1]])).float() if next(model.parameters()).is_cuda: x = x.cuda() y = y.cuda() sqrt_m = sqrt_m.cuda() y = Variable(y) # compute natural loss loss_natural = loss_fct(model(Variable(x)), y).data # keeping worst loss loss_worst = loss_natural.clone() x_worst = x.clone() losses = [] # multi-step with gradients for t in range(k): if t == 0: # initialize starting point x_next = get_x0(x, use_sample) else: # compute gradient grad_vars = torch.autograd.grad(loss.mean(), x_var) grad_data = grad_vars[0].data # compute the updates x_update = (sqrt_m * (1. - 2. * x_next) * grad_data >= torch.norm( grad_data, 2, 1).unsqueeze(1).expand_as(x_next)).float() # find the next sample with projection to the feasible set x_next = xor_float_tensors(x_update, x_next) x_next = or_float_tensors(x_next, x) # forward pass x_var = Variable(x_next, requires_grad=True) y_model = model(x_var) loss = loss_fct(y_model, y) losses.append(loss.cpu().data.numpy()[0]) # update worst loss and adversarial samples replace_flag = (loss.data > loss_worst) loss_worst[replace_flag] = loss.data[replace_flag] x_worst[replace_flag.unsqueeze(1).expand_as(x_worst)] = x_next[ replace_flag.unsqueeze(1).expand_as(x_worst)] losses.append(loss_worst) if is_report_loss_diff: print( "Natural loss (%.4f) vs Adversarial loss (%.4f), Difference: (%.4f)" % (loss_natural.mean(), loss_worst.mean(), loss_worst.mean() - loss_natural.mean())) if x_worst.is_cuda: x_worst = x_worst.cpu() if return_loss: return x_worst, losses else: return x_worst