def _outputs_and_loss(self, x, modifiers, starting_atanh, y, const, taus): adversarials = tanh_rescale(starting_atanh + modifiers, self.clip_min, self.clip_max) outputs = self.predict(adversarials) y_onehot = to_one_hot(y, self.num_classes).float() real = (y_onehot * outputs).sum(dim=1) other = ((1.0 - y_onehot) * outputs - (y_onehot * TARGET_MULT)).max(dim=1)[0] # - (y_onehot * TARGET_MULT) is for the true label not to be selected if self.targeted: loss1 = torch.clamp(other - real, min=0.) else: loss1 = torch.clamp(real - other, min=0.) loss1 = const * loss1 image_dimensions = tuple(range(1, len(x.shape))) taus_shape = (-1, ) + (1, ) * (len(x.shape) - 1) penalties = torch.clamp(torch.abs(x - adversarials) - taus.view(taus_shape), min=0) loss2 = torch.sum(penalties, dim=image_dimensions) assert loss1.shape == loss2.shape loss = loss1 + loss2 return outputs.detach(), loss
def perturb(self, x, y=None): x, y = self._verify_and_process_inputs(x, y) # Initialization if y is None: y = self._get_predicted_label(x) x = replicate_input(x) batch_size = len(x) coeff_lower_bound = x.new_zeros(batch_size) coeff_upper_bound = x.new_ones(batch_size) * CARLINI_COEFF_UPPER loss_coeffs = torch.ones_like(y).float() * self.initial_const final_l2distsqs = [CARLINI_L2DIST_UPPER] * batch_size final_labels = [INVALID_LABEL] * batch_size final_advs = x x_atanh = self._get_arctanh_x(x) y_onehot = to_one_hot(y, self.num_classes).float() final_l2distsqs = torch.FloatTensor(final_l2distsqs).to(x.device) final_labels = torch.LongTensor(final_labels).to(x.device) # Start binary search for outer_step in range(self.binary_search_steps): delta = nn.Parameter(torch.zeros_like(x)) optimizer = optim.Adam([delta], lr=self.learning_rate) cur_l2distsqs = [CARLINI_L2DIST_UPPER] * batch_size cur_labels = [INVALID_LABEL] * batch_size cur_l2distsqs = torch.FloatTensor(cur_l2distsqs).to(x.device) cur_labels = torch.LongTensor(cur_labels).to(x.device) prevloss = PREV_LOSS_INIT # record current output cur_output = torch.zeros(x.size()[0], self.num_classes).float().cuda() if (self.repeat and outer_step == (self.binary_search_steps - 1)): loss_coeffs = coeff_upper_bound for ii in range(self.max_iterations): loss, l2distsq, output, adv_img = \ self._forward_and_update_delta( optimizer, x_atanh, delta, y_onehot, loss_coeffs) if self.abort_early: if ii % (self.max_iterations // NUM_CHECKS or 1) == 0: if loss > prevloss * ONE_MINUS_EPS: break prevloss = loss self._update_if_smaller_dist_succeed(adv_img, y, output, l2distsq, batch_size, cur_l2distsqs, cur_labels, final_l2distsqs, final_labels, final_advs, cur_output) self._update_loss_coeffs(y, cur_labels, batch_size, loss_coeffs, coeff_upper_bound, coeff_lower_bound, cur_output) return final_advs
def _outputs_and_loss(self, x, modifiers, starting_atanh, y, const, taus, active_mask=None, filter_=None): # If you're comparing with Carlini's original implementation, x # is the name that has been given to tf.tanh(timg)/2, while # adversarials is the name that has been given to tf.tanh(modifier + simg)/2, aka newimg adversarials = tanh_rescale(starting_atanh + modifiers, self.clip_min, self.clip_max) assert x.shape == adversarials.shape outputs = self._outputs(adversarials, active_mask=active_mask, filter_=filter_) assert outputs.shape == (adversarials.shape[0], self.num_classes) y_onehot = to_one_hot(y, self.num_classes).float() assert y_onehot.shape == outputs.shape real = (y_onehot * outputs).sum(dim=1) other = ((1.0 - y_onehot) * outputs - (y_onehot * TARGET_MULT)).max(dim=1)[0] # - (y_onehot * TARGET_MULT) is for the true label not to be selected if self.targeted: loss1 = torch.clamp(other - real, min=0.) else: loss1 = torch.clamp(real - other, min=0.) image_dimensions = tuple(range(1, len(x.shape))) # Reshape taus to [batch_size, 1, 1, 1] for broadcasting taus_shape = (len(x), ) + (1, ) * (len(x.shape) - 1) penalties = torch.clamp(torch.abs(adversarials - x) - taus.view(taus_shape), min=0) assert penalties.shape == x.shape loss2 = torch.sum(penalties, dim=image_dimensions) assert loss2.shape == loss1.shape losses = const * loss1 + loss2 assert losses.shape == (len(x), ) # losses is returned as a (batch_size,) vector to support abort_early # Only later it is converted to a scalar return outputs.detach(), losses
def forward(self, input, target): """ :param input: pre-softmax/logits. :param target: true labels. :return: CW loss value. """ num_classes = input.size(1) label_mask = to_one_hot(target, num_classes=num_classes).float() correct_logit = torch.sum(label_mask * input, dim=1) wrong_logit = torch.max((1. - label_mask) * input, dim=1)[0] loss = -F.relu(correct_logit - wrong_logit + self.conf).sum() return loss
def perturb(self, x, y=None): x, y = self._verify_and_process_inputs(x, y) batch_size = len(x) loss_coeffs = x.new_ones(batch_size) * self.initial_const final_l2dists = [L2DIST_UPPER] * batch_size final_labels = [INVALID_LABEL] * batch_size final_step = [INVALID_LABEL] * batch_size final_advs = x.clone() #fixing bug from advertorch # TODO: refactor the theta generation theta = torch.tensor([[[1., 0., 0.], [0., 1., 0.]]]).to(x.device) theta = theta.repeat((x.shape[0], 1, 1)) grid = F.affine_grid(theta, x.size()) grid_ori = grid.clone() y_onehot = to_one_hot(y, self.num_classes).float() clip_min = np.ones(grid_ori.shape[:]) * -1 clip_max = np.ones(grid_ori.shape[:]) * 1 clip_bound = list(zip(clip_min.flatten(), clip_max.flatten())) grid_ret = grid.clone().data.cpu().numpy().flatten().astype(float) from scipy.optimize import fmin_l_bfgs_b for outer_step in range(self.search_steps): grid_ret, f, d = fmin_l_bfgs_b( self._loss_fn_spatial, grid_ret, args=( x.clone().detach(), y_onehot, loss_coeffs, grid_ori.clone().detach()), maxiter=self.max_iterations, bounds=clip_bound, iprint=0, maxls=100, ) grid = torch.from_numpy( grid_ret.reshape(grid_ori.shape)).float().to(x.device) adv_x = F.grid_sample(x.clone(), grid) l2s = calc_l2distsq(grid.data, grid_ori.data) output = self.predict(adv_x) self._update_if_better( adv_x.data, y, output.data, l2s, batch_size, final_l2dists, final_labels, final_advs, outer_step, final_step) return final_advs
def cw_log_loss(output, target, targeted=False, buff=1e-5): """ :param outputs: pre-softmax/logits. :param target: true labels. :return: CW loss value. """ num_classes = output.size(1) label_mask = to_one_hot(target, num_classes=num_classes).float() correct_logit = torch.log(torch.sum(label_mask * output, dim=1) + buff) wrong_logit = torch.log( torch.max((1. - label_mask) * output, dim=1)[0] + buff) if targeted: loss = -0.5 * F.relu(wrong_logit - correct_logit + 50.) else: loss = -0.5 * F.relu(correct_logit - wrong_logit + 50.) return loss
def perturb(self, x, y=None): x, y = self._verify_and_process_inputs(x, y) # Initialization if y is None: y = self._get_predicted_label(x) x = replicate_input(x) # batch_size = len(x) final_advs = x x_atanh = self._get_arctanh_x(x) y_onehot = to_one_hot(y, self.num_classes).float() delta = nn.Parameter(torch.zeros_like(x)) optimizer = optim.Adam([delta], lr=self.learning_rate) prevloss = PREV_LOSS_INIT for ii in range(self.max_iterations): # loss, l2distsq, output, adv_img = \ # self._forward_and_update_delta( # optimizer, x_atanh, delta, y_onehot, self.c) optimizer.zero_grad() adv = tanh_rescale(delta + x_atanh, self.clip_min, self.clip_max) transimgs_rescale = tanh_rescale(x_atanh, self.clip_min, self.clip_max) output = self.predict(adv) l2distsq = calc_l2distsq(adv, transimgs_rescale) loss, l2dist, adv_loss = self._loss_fn(output, y_onehot, l2distsq, self.c) loss.backward() optimizer.step() if ii % 1000 == 1: print('step: {}, dis: {:.2f}, loss1: {:.2f}.'.format(ii, l2dist.item(), adv_loss.item())) # if self.abort_early: # if ii % (self.max_iterations // NUM_CHECKS or 1) == 0: # if loss > prevloss * ONE_MINUS_EPS: # break # prevloss = loss final_advs = adv.data return final_advs
def perturb_(self, x, zh=None, y=None): offsets = torch.arange(0, x.size(0)).cuda() * R zhat = torch.repeat_interleave(zh, R, dim=0).detach() zhat.requires_grad_() x_tiled = torch.repeat_interleave(x, R, dim=0) y_onehot = to_one_hot(y, self.num_classes).float() y_onehot = torch.repeat_interleave(y_onehot, R, dim=0) # create a mask which checks whether attacks are done/not done not_dones_mask = torch.ones(zhat.shape[0]) # initialize the dual variable/lagrange multiplier for the perturbation constraint LAM = 1000 * torch.ones_like(not_dones_mask, device=x.device) LAM.requires_grad_() opt = optim.Adam([zhat], lr=ADAM_LR) lam_opt = optim.SGD([LAM], lr=SGD_LR) lr_maker = StepLR(opt, step_size=I) LAM = grad_reverse(LAM) for i in range(self.max_iterations): gen = self.gan(zhat) adv_loss_flatten = self._look_ahead(gen, y_onehot) adv_loss = adv_loss_flatten.view(-1, R) l2_loss_flatten = (gen - x_tiled).pow(2).mean(dim=(1, 2, 3)) # if the perturbation is below THR/2, don't include it in the loss, set it to some constant l2_loss_flatten = l2_loss_flatten * ( l2_loss_flatten > self.l2_square_threshold / 2).float() - ( l2_loss_flatten <= self.l2_square_threshold / 2).float() l2_loss = l2_loss_flatten.view(-1, R) not_dones_mask = 1 - (l2_loss <= self.l2_square_threshold ).float() * (adv_loss <= -1).float() # weird here. For each image, not_dones will be all 1 or all 0 not_dones_mask = not_dones_mask.min(dim=1)[0].repeat(1, R) not_dones_mask = not_dones_mask.view(-1, 1) ind = (adv_loss + LARGE_NUM * (l2_loss > self.l2_square_threshold).float()).argmin(dim=1) ind = ind + offsets best_adv_loss = adv_loss_flatten[ind] best_l2_loss = l2_loss_flatten[ind] # evaluate and terminate early to prevent dividing by zero if not_dones_mask.mean() < 0.1 or i == self.max_iterations - 1: return gen[ind].clone().detach(), zhat[ind].clone().detach() print("----") print("Norms", best_l2_loss.item()) print("Losses", best_adv_loss.item()) print("Success rate: ", 1. - not_dones_mask.mean()) print("Lambda: ", LAM) not_dones_mask = not_dones_mask / not_dones_mask.mean() opt.zero_grad() lam_opt.zero_grad() for noise in self.noise_buffer: gen = self.gan(zhat) output = self.predict(gen + noise) real = (y_onehot * output).sum(dim=1) other = ((1.0 - y_onehot) * output - y_onehot * LARGE_NUM).max(dim=1)[0] loss = (real - other) / SAMPLES_PER_ITER ((loss * not_dones_mask).mean()).backward() gen = self.gan(zhat) l2_loss_flatten = (gen - x_tiled).pow(2).mean(dim=(1, 2, 3)) # if the perturbation is below THR/2, don't include it in the loss, set it to some constant l2_loss_flatten = l2_loss_flatten * ( l2_loss_flatten > self.l2_square_threshold / 2).float() - ( l2_loss_flatten <= self.l2_square_threshold / 2).float() ((LAM * l2_loss_flatten * not_dones_mask).mean()).backward() opt.step() lam_opt.step() lr_maker.step()
def perturb(self, inputs: torch.Tensor, labels: torch.Tensor) -> torch.Tensor: """ Performs the adversary of the model given the inputs and labels. Parameters """ # scale to [0, 1] inputs = (inputs.clamp(-1, 1) * 0.5 + 0.5).clamp(0, 1).detach().clone() X_adv_round_best = inputs.clone() alpha_l_min = self.alpha_l_init / 100 alpha_c_min = self.alpha_c_init / 10 multiplier = -1 if self.targeted else 1 inputs_LAB = rgb2lab_diff(inputs, self.device) batch_size = inputs.shape[0] delta = torch.zeros_like(inputs, requires_grad=True) mask_isadv = torch.zeros(batch_size, dtype=torch.bool).to(self.device) color_l2_delta_bound_best = (torch.ones(batch_size) * 100000).to( self.device) if not self.targeted and self.confidence != 0: # TODO: have num_classes as class member. However, it is not compatible with other attacks in Advertorch labels_onehot = to_one_hot(labels, 10) if self.targeted and self.confidence != 0: print('Only support setting confidence in untargeted case!') return for i in range(self.max_iterations): # cosine annealing for alpha_l_init and alpha_c_init alpha_c = alpha_c_min + 0.5 * (self.alpha_c_init - alpha_c_min) * ( 1 + cos(i / self.max_iterations * pi)) alpha_l = alpha_l_min + 0.5 * (self.alpha_l_init - alpha_l_min) * ( 1 + cos(i / self.max_iterations * pi)) loss = multiplier * nn.CrossEntropyLoss(reduction='sum')( self.predict((inputs + delta) * 2. - 1.), labels) loss.backward() grad_a = delta.grad.clone() delta.grad.zero_() delta.data[~mask_isadv] = delta.data[~mask_isadv] + alpha_l * ( grad_a.permute(1, 2, 3, 0) / torch.norm(grad_a.view(batch_size, -1), dim=1)).permute( 3, 0, 1, 2)[~mask_isadv] d_map = ciede2000_diff(inputs_LAB, rgb2lab_diff(inputs + delta, self.device), self.device).unsqueeze(1) color_dis = torch.norm(d_map.view(batch_size, -1), dim=1) color_loss = color_dis.sum() color_loss.backward() grad_color = delta.grad.clone() delta.grad.zero_() delta.data[mask_isadv] = delta.data[mask_isadv] - alpha_c * ( grad_color.permute(1, 2, 3, 0) / torch.norm(grad_color.view(batch_size, -1), dim=1)).permute( 3, 0, 1, 2)[mask_isadv] delta.data = (inputs + delta.data).clamp(0, 1) - inputs X_adv_round = inputs + delta.data if not self.targeted and self.confidence != 0: logits = self.predict(X_adv_round * 2. - 1.) real = (labels_onehot * logits).sum(dim=1) # TODO: make loss modular, write a loss class other = ((1.0 - labels_onehot) * logits - (labels_onehot * TARGET_MULT)).max(1)[0] mask_isadv = (real - other) <= -40 elif self.confidence == 0: if self.targeted: mask_isadv = torch.argmax(self.predict(X_adv_round * 2. - 1.), dim=1) == labels else: mask_isadv = torch.argmax( self.predict(X_adv_round * 2. - 1.), dim=1) != labels mask_best = (color_dis.data < color_l2_delta_bound_best) mask = mask_best * mask_isadv color_l2_delta_bound_best[mask] = color_dis.data[mask] X_adv_round_best[mask] = X_adv_round[mask] return X_adv_round_best * 2. - 1.
def perturb(self, x, y=None): x, y = self._verify_and_process_inputs(x, y) # Initialization if y is None: y = self._get_predicted_label(x) x = replicate_input(x) batch_size = len(x) coeff_lower_bound = x.new_zeros(batch_size) coeff_upper_bound = x.new_ones(batch_size) * COEFF_UPPER loss_coeffs = torch.ones_like(y).float() * self.initial_const final_dist = [DIST_UPPER] * batch_size final_labels = [INVALID_LABEL] * batch_size final_advs = x.clone() y_onehot = to_one_hot(y, self.num_classes).float() final_dist = torch.FloatTensor(final_dist).to(x.device) final_labels = torch.LongTensor(final_labels).to(x.device) # Start binary search for outer_step in range(self.binary_search_steps): self.global_step = 0 # slack vector from the paper yy_k = nn.Parameter(x.clone()) xx_k = x.clone() cur_dist = [DIST_UPPER] * batch_size cur_labels = [INVALID_LABEL] * batch_size cur_dist = torch.FloatTensor(cur_dist).to(x.device) cur_labels = torch.LongTensor(cur_labels).to(x.device) prevloss = PREV_LOSS_INIT if (self.repeat and outer_step == (self.binary_search_steps - 1)): loss_coeffs = coeff_upper_bound lr = self.learning_rate for ii in range(self.max_iterations): # reset gradient if yy_k.grad is not None: yy_k.grad.detach_() yy_k.grad.zero_() # loss over yy_k with only L2 same as C&W # we don't update L1 loss with SGD because we use ISTA output = self.predict(yy_k) l2distsq = calc_l2distsq(yy_k, x) loss_opt = self._loss_fn(output, y_onehot, None, l2distsq, loss_coeffs, opt=True) loss_opt.backward() # gradient step yy_k.data.add_(-lr, yy_k.grad.data) self.global_step += 1 # ploynomial decay of learning rate lr = self.init_learning_rate * \ (1 - self.global_step / self.max_iterations)**0.5 yy_k, xx_k = self._fast_iterative_shrinkage_thresholding( x, yy_k, xx_k) # loss ElasticNet or L1 over xx_k output = self.predict(xx_k) l2distsq = calc_l2distsq(xx_k, x) l1dist = calc_l1dist(xx_k, x) if self.decision_rule == 'EN': dist = l2distsq + (l1dist * self.beta) elif self.decision_rule == 'L1': dist = l1dist loss = self._loss_fn(output, y_onehot, l1dist, l2distsq, loss_coeffs) if self.abort_early: if ii % (self.max_iterations // NUM_CHECKS or 1) == 0: if loss > prevloss * ONE_MINUS_EPS: break prevloss = loss self._update_if_smaller_dist_succeed(xx_k.data, y, output, dist, batch_size, cur_dist, cur_labels, final_dist, final_labels, final_advs) self._update_loss_coeffs(y, cur_labels, batch_size, loss_coeffs, coeff_upper_bound, coeff_lower_bound) return final_advs