Beispiel #1
0
    def _outputs_and_loss(self, x, modifiers, starting_atanh, y, const, taus):
        adversarials = tanh_rescale(starting_atanh + modifiers, self.clip_min,
                                    self.clip_max)

        outputs = self.predict(adversarials)
        y_onehot = to_one_hot(y, self.num_classes).float()

        real = (y_onehot * outputs).sum(dim=1)

        other = ((1.0 - y_onehot) * outputs -
                 (y_onehot * TARGET_MULT)).max(dim=1)[0]
        # - (y_onehot * TARGET_MULT) is for the true label not to be selected

        if self.targeted:
            loss1 = torch.clamp(other - real, min=0.)
        else:
            loss1 = torch.clamp(real - other, min=0.)

        loss1 = const * loss1

        image_dimensions = tuple(range(1, len(x.shape)))
        taus_shape = (-1, ) + (1, ) * (len(x.shape) - 1)

        penalties = torch.clamp(torch.abs(x - adversarials) -
                                taus.view(taus_shape),
                                min=0)
        loss2 = torch.sum(penalties, dim=image_dimensions)

        assert loss1.shape == loss2.shape

        loss = loss1 + loss2
        return outputs.detach(), loss
Beispiel #2
0
    def perturb(self, x, y=None):
        x, y = self._verify_and_process_inputs(x, y)

        # Initialization
        if y is None:
            y = self._get_predicted_label(x)
        x = replicate_input(x)
        batch_size = len(x)
        coeff_lower_bound = x.new_zeros(batch_size)
        coeff_upper_bound = x.new_ones(batch_size) * CARLINI_COEFF_UPPER
        loss_coeffs = torch.ones_like(y).float() * self.initial_const
        final_l2distsqs = [CARLINI_L2DIST_UPPER] * batch_size
        final_labels = [INVALID_LABEL] * batch_size
        final_advs = x
        x_atanh = self._get_arctanh_x(x)
        y_onehot = to_one_hot(y, self.num_classes).float()

        final_l2distsqs = torch.FloatTensor(final_l2distsqs).to(x.device)
        final_labels = torch.LongTensor(final_labels).to(x.device)

        # Start binary search
        for outer_step in range(self.binary_search_steps):
            delta = nn.Parameter(torch.zeros_like(x))
            optimizer = optim.Adam([delta], lr=self.learning_rate)
            cur_l2distsqs = [CARLINI_L2DIST_UPPER] * batch_size
            cur_labels = [INVALID_LABEL] * batch_size
            cur_l2distsqs = torch.FloatTensor(cur_l2distsqs).to(x.device)
            cur_labels = torch.LongTensor(cur_labels).to(x.device)
            prevloss = PREV_LOSS_INIT

            # record current output
            cur_output = torch.zeros(x.size()[0],
                                     self.num_classes).float().cuda()

            if (self.repeat and outer_step == (self.binary_search_steps - 1)):
                loss_coeffs = coeff_upper_bound
            for ii in range(self.max_iterations):
                loss, l2distsq, output, adv_img = \
                    self._forward_and_update_delta(
                        optimizer, x_atanh, delta, y_onehot, loss_coeffs)
                if self.abort_early:
                    if ii % (self.max_iterations // NUM_CHECKS or 1) == 0:
                        if loss > prevloss * ONE_MINUS_EPS:
                            break
                        prevloss = loss

                self._update_if_smaller_dist_succeed(adv_img, y, output,
                                                     l2distsq, batch_size,
                                                     cur_l2distsqs, cur_labels,
                                                     final_l2distsqs,
                                                     final_labels, final_advs,
                                                     cur_output)

            self._update_loss_coeffs(y, cur_labels, batch_size, loss_coeffs,
                                     coeff_upper_bound, coeff_lower_bound,
                                     cur_output)

        return final_advs
Beispiel #3
0
    def _outputs_and_loss(self,
                          x,
                          modifiers,
                          starting_atanh,
                          y,
                          const,
                          taus,
                          active_mask=None,
                          filter_=None):
        # If you're comparing with Carlini's original implementation, x
        # is the name that has been given to tf.tanh(timg)/2, while
        # adversarials is the name that has been given to tf.tanh(modifier + simg)/2, aka newimg
        adversarials = tanh_rescale(starting_atanh + modifiers, self.clip_min,
                                    self.clip_max)

        assert x.shape == adversarials.shape

        outputs = self._outputs(adversarials,
                                active_mask=active_mask,
                                filter_=filter_)
        assert outputs.shape == (adversarials.shape[0], self.num_classes)

        y_onehot = to_one_hot(y, self.num_classes).float()
        assert y_onehot.shape == outputs.shape

        real = (y_onehot * outputs).sum(dim=1)

        other = ((1.0 - y_onehot) * outputs -
                 (y_onehot * TARGET_MULT)).max(dim=1)[0]
        # - (y_onehot * TARGET_MULT) is for the true label not to be selected

        if self.targeted:
            loss1 = torch.clamp(other - real, min=0.)
        else:
            loss1 = torch.clamp(real - other, min=0.)

        image_dimensions = tuple(range(1, len(x.shape)))

        # Reshape taus to [batch_size, 1, 1, 1] for broadcasting
        taus_shape = (len(x), ) + (1, ) * (len(x.shape) - 1)

        penalties = torch.clamp(torch.abs(adversarials - x) -
                                taus.view(taus_shape),
                                min=0)
        assert penalties.shape == x.shape

        loss2 = torch.sum(penalties, dim=image_dimensions)
        assert loss2.shape == loss1.shape

        losses = const * loss1 + loss2
        assert losses.shape == (len(x), )

        # losses is returned as a (batch_size,) vector to support abort_early
        # Only later it is converted to a scalar
        return outputs.detach(), losses
Beispiel #4
0
 def forward(self, input, target):
     """
     :param input: pre-softmax/logits.
     :param target: true labels.
     :return: CW loss value.
     """
     num_classes = input.size(1)
     label_mask = to_one_hot(target, num_classes=num_classes).float()
     correct_logit = torch.sum(label_mask * input, dim=1)
     wrong_logit = torch.max((1. - label_mask) * input, dim=1)[0]
     loss = -F.relu(correct_logit - wrong_logit + self.conf).sum()
     return loss
Beispiel #5
0
    def perturb(self, x, y=None):
        x, y = self._verify_and_process_inputs(x, y)
        batch_size = len(x)
        loss_coeffs = x.new_ones(batch_size) * self.initial_const
        final_l2dists = [L2DIST_UPPER] * batch_size
        final_labels = [INVALID_LABEL] * batch_size
        final_step = [INVALID_LABEL] * batch_size
        final_advs = x.clone() #fixing bug from advertorch

        # TODO: refactor the theta generation
        theta = torch.tensor([[[1., 0., 0.],
                               [0., 1., 0.]]]).to(x.device)
        theta = theta.repeat((x.shape[0], 1, 1))


        grid = F.affine_grid(theta, x.size())

        grid_ori = grid.clone()
        y_onehot = to_one_hot(y, self.num_classes).float()

        clip_min = np.ones(grid_ori.shape[:]) * -1
        clip_max = np.ones(grid_ori.shape[:]) * 1
        clip_bound = list(zip(clip_min.flatten(), clip_max.flatten()))
        grid_ret = grid.clone().data.cpu().numpy().flatten().astype(float)
        from scipy.optimize import fmin_l_bfgs_b
        for outer_step in range(self.search_steps):
            grid_ret, f, d = fmin_l_bfgs_b(
                self._loss_fn_spatial,
                grid_ret,
                args=(
                    x.clone().detach(),
                    y_onehot, loss_coeffs,
                    grid_ori.clone().detach()),
                maxiter=self.max_iterations,
                bounds=clip_bound,
                iprint=0,
                maxls=100,
            )
            grid = torch.from_numpy(
                grid_ret.reshape(grid_ori.shape)).float().to(x.device)
            adv_x = F.grid_sample(x.clone(), grid)
            l2s = calc_l2distsq(grid.data, grid_ori.data)
            output = self.predict(adv_x)
            self._update_if_better(
                adv_x.data, y, output.data, l2s, batch_size,
                final_l2dists, final_labels, final_advs,
                outer_step, final_step)

        return final_advs
Beispiel #6
0
def cw_log_loss(output, target, targeted=False, buff=1e-5):
    """
    :param outputs: pre-softmax/logits.
    :param target: true labels.
    :return: CW loss value.
    """
    num_classes = output.size(1)
    label_mask = to_one_hot(target, num_classes=num_classes).float()
    correct_logit = torch.log(torch.sum(label_mask * output, dim=1) + buff)
    wrong_logit = torch.log(
        torch.max((1. - label_mask) * output, dim=1)[0] + buff)

    if targeted:
        loss = -0.5 * F.relu(wrong_logit - correct_logit + 50.)
    else:
        loss = -0.5 * F.relu(correct_logit - wrong_logit + 50.)
    return loss
Beispiel #7
0
    def perturb(self, x, y=None):
        x, y = self._verify_and_process_inputs(x, y)

        # Initialization
        if y is None:
            y = self._get_predicted_label(x)
        x = replicate_input(x)
        # batch_size = len(x)

        final_advs = x
        x_atanh = self._get_arctanh_x(x)
        y_onehot = to_one_hot(y, self.num_classes).float()

        delta = nn.Parameter(torch.zeros_like(x))
        optimizer = optim.Adam([delta], lr=self.learning_rate)
        prevloss = PREV_LOSS_INIT

        for ii in range(self.max_iterations):
            # loss, l2distsq, output, adv_img = \
            #     self._forward_and_update_delta(
            #         optimizer, x_atanh, delta, y_onehot, self.c)

            optimizer.zero_grad()
            adv = tanh_rescale(delta + x_atanh, self.clip_min, self.clip_max)
            transimgs_rescale = tanh_rescale(x_atanh, self.clip_min, self.clip_max)
            output = self.predict(adv)
            l2distsq = calc_l2distsq(adv, transimgs_rescale)
            loss, l2dist, adv_loss = self._loss_fn(output, y_onehot, l2distsq, self.c)
            loss.backward()
            optimizer.step()

            if ii % 1000 == 1:
                print('step: {}, dis: {:.2f}, loss1: {:.2f}.'.format(ii, l2dist.item(), adv_loss.item()))

            # if self.abort_early:
            #     if ii % (self.max_iterations // NUM_CHECKS or 1) == 0:
            #         if loss > prevloss * ONE_MINUS_EPS:
            #             break
            #         prevloss = loss

            final_advs = adv.data
        return final_advs
Beispiel #8
0
    def perturb_(self, x, zh=None, y=None):
        offsets = torch.arange(0, x.size(0)).cuda() * R

        zhat = torch.repeat_interleave(zh, R, dim=0).detach()
        zhat.requires_grad_()
        x_tiled = torch.repeat_interleave(x, R, dim=0)

        y_onehot = to_one_hot(y, self.num_classes).float()
        y_onehot = torch.repeat_interleave(y_onehot, R, dim=0)

        # create a mask which checks whether attacks are done/not done
        not_dones_mask = torch.ones(zhat.shape[0])
        # initialize the dual variable/lagrange multiplier for the perturbation constraint
        LAM = 1000 * torch.ones_like(not_dones_mask, device=x.device)
        LAM.requires_grad_()

        opt = optim.Adam([zhat], lr=ADAM_LR)
        lam_opt = optim.SGD([LAM], lr=SGD_LR)
        lr_maker = StepLR(opt, step_size=I)

        LAM = grad_reverse(LAM)

        for i in range(self.max_iterations):

            gen = self.gan(zhat)
            adv_loss_flatten = self._look_ahead(gen, y_onehot)
            adv_loss = adv_loss_flatten.view(-1, R)

            l2_loss_flatten = (gen - x_tiled).pow(2).mean(dim=(1, 2, 3))
            # if the perturbation is below THR/2, don't include it in the loss, set it to some constant
            l2_loss_flatten = l2_loss_flatten * (
                l2_loss_flatten > self.l2_square_threshold / 2).float() - (
                    l2_loss_flatten <= self.l2_square_threshold / 2).float()
            l2_loss = l2_loss_flatten.view(-1, R)

            not_dones_mask = 1 - (l2_loss <= self.l2_square_threshold
                                  ).float() * (adv_loss <= -1).float()

            # weird here. For each image, not_dones will be all 1 or all 0
            not_dones_mask = not_dones_mask.min(dim=1)[0].repeat(1, R)
            not_dones_mask = not_dones_mask.view(-1, 1)

            ind = (adv_loss + LARGE_NUM *
                   (l2_loss > self.l2_square_threshold).float()).argmin(dim=1)
            ind = ind + offsets

            best_adv_loss = adv_loss_flatten[ind]
            best_l2_loss = l2_loss_flatten[ind]
            # evaluate and terminate early to prevent dividing by zero
            if not_dones_mask.mean() < 0.1 or i == self.max_iterations - 1:
                return gen[ind].clone().detach(), zhat[ind].clone().detach()

            print("----")
            print("Norms", best_l2_loss.item())
            print("Losses", best_adv_loss.item())
            print("Success rate: ", 1. - not_dones_mask.mean())
            print("Lambda: ", LAM)

            not_dones_mask = not_dones_mask / not_dones_mask.mean()

            opt.zero_grad()
            lam_opt.zero_grad()

            for noise in self.noise_buffer:
                gen = self.gan(zhat)
                output = self.predict(gen + noise)
                real = (y_onehot * output).sum(dim=1)
                other = ((1.0 - y_onehot) * output -
                         y_onehot * LARGE_NUM).max(dim=1)[0]

                loss = (real - other) / SAMPLES_PER_ITER
                ((loss * not_dones_mask).mean()).backward()

            gen = self.gan(zhat)
            l2_loss_flatten = (gen - x_tiled).pow(2).mean(dim=(1, 2, 3))
            # if the perturbation is below THR/2, don't include it in the loss, set it to some constant
            l2_loss_flatten = l2_loss_flatten * (
                l2_loss_flatten > self.l2_square_threshold / 2).float() - (
                    l2_loss_flatten <= self.l2_square_threshold / 2).float()
            ((LAM * l2_loss_flatten * not_dones_mask).mean()).backward()
            opt.step()
            lam_opt.step()
            lr_maker.step()
Beispiel #9
0
    def perturb(self, inputs: torch.Tensor,
                labels: torch.Tensor) -> torch.Tensor:
        """
        Performs the adversary of the model given the inputs and labels.
        Parameters
        """

        # scale to [0, 1]
        inputs = (inputs.clamp(-1, 1) * 0.5 + 0.5).clamp(0, 1).detach().clone()
        X_adv_round_best = inputs.clone()

        alpha_l_min = self.alpha_l_init / 100
        alpha_c_min = self.alpha_c_init / 10
        multiplier = -1 if self.targeted else 1

        inputs_LAB = rgb2lab_diff(inputs, self.device)
        batch_size = inputs.shape[0]
        delta = torch.zeros_like(inputs, requires_grad=True)
        mask_isadv = torch.zeros(batch_size, dtype=torch.bool).to(self.device)
        color_l2_delta_bound_best = (torch.ones(batch_size) * 100000).to(
            self.device)

        if not self.targeted and self.confidence != 0:
            # TODO: have num_classes as class member. However, it is not compatible with other attacks in Advertorch
            labels_onehot = to_one_hot(labels, 10)
        if self.targeted and self.confidence != 0:
            print('Only support setting confidence in untargeted case!')
            return
        for i in range(self.max_iterations):
            # cosine annealing for alpha_l_init and alpha_c_init
            alpha_c = alpha_c_min + 0.5 * (self.alpha_c_init - alpha_c_min) * (
                1 + cos(i / self.max_iterations * pi))
            alpha_l = alpha_l_min + 0.5 * (self.alpha_l_init - alpha_l_min) * (
                1 + cos(i / self.max_iterations * pi))
            loss = multiplier * nn.CrossEntropyLoss(reduction='sum')(
                self.predict((inputs + delta) * 2. - 1.), labels)
            loss.backward()
            grad_a = delta.grad.clone()
            delta.grad.zero_()
            delta.data[~mask_isadv] = delta.data[~mask_isadv] + alpha_l * (
                grad_a.permute(1, 2, 3, 0) /
                torch.norm(grad_a.view(batch_size, -1), dim=1)).permute(
                    3, 0, 1, 2)[~mask_isadv]
            d_map = ciede2000_diff(inputs_LAB,
                                   rgb2lab_diff(inputs + delta, self.device),
                                   self.device).unsqueeze(1)
            color_dis = torch.norm(d_map.view(batch_size, -1), dim=1)
            color_loss = color_dis.sum()
            color_loss.backward()
            grad_color = delta.grad.clone()
            delta.grad.zero_()
            delta.data[mask_isadv] = delta.data[mask_isadv] - alpha_c * (
                grad_color.permute(1, 2, 3, 0) /
                torch.norm(grad_color.view(batch_size, -1), dim=1)).permute(
                    3, 0, 1, 2)[mask_isadv]

            delta.data = (inputs + delta.data).clamp(0, 1) - inputs
            X_adv_round = inputs + delta.data

            if not self.targeted and self.confidence != 0:
                logits = self.predict(X_adv_round * 2. - 1.)
                real = (labels_onehot * logits).sum(dim=1)

                # TODO: make loss modular, write a loss class
                other = ((1.0 - labels_onehot) * logits -
                         (labels_onehot * TARGET_MULT)).max(1)[0]
                mask_isadv = (real - other) <= -40
            elif self.confidence == 0:
                if self.targeted:
                    mask_isadv = torch.argmax(self.predict(X_adv_round * 2. -
                                                           1.),
                                              dim=1) == labels
                else:
                    mask_isadv = torch.argmax(
                        self.predict(X_adv_round * 2. - 1.), dim=1) != labels
            mask_best = (color_dis.data < color_l2_delta_bound_best)
            mask = mask_best * mask_isadv
            color_l2_delta_bound_best[mask] = color_dis.data[mask]
            X_adv_round_best[mask] = X_adv_round[mask]

        return X_adv_round_best * 2. - 1.
Beispiel #10
0
    def perturb(self, x, y=None):

        x, y = self._verify_and_process_inputs(x, y)

        # Initialization
        if y is None:
            y = self._get_predicted_label(x)

        x = replicate_input(x)
        batch_size = len(x)
        coeff_lower_bound = x.new_zeros(batch_size)
        coeff_upper_bound = x.new_ones(batch_size) * COEFF_UPPER
        loss_coeffs = torch.ones_like(y).float() * self.initial_const

        final_dist = [DIST_UPPER] * batch_size
        final_labels = [INVALID_LABEL] * batch_size

        final_advs = x.clone()
        y_onehot = to_one_hot(y, self.num_classes).float()

        final_dist = torch.FloatTensor(final_dist).to(x.device)
        final_labels = torch.LongTensor(final_labels).to(x.device)

        # Start binary search
        for outer_step in range(self.binary_search_steps):

            self.global_step = 0

            # slack vector from the paper
            yy_k = nn.Parameter(x.clone())
            xx_k = x.clone()

            cur_dist = [DIST_UPPER] * batch_size
            cur_labels = [INVALID_LABEL] * batch_size

            cur_dist = torch.FloatTensor(cur_dist).to(x.device)
            cur_labels = torch.LongTensor(cur_labels).to(x.device)

            prevloss = PREV_LOSS_INIT

            if (self.repeat and outer_step == (self.binary_search_steps - 1)):
                loss_coeffs = coeff_upper_bound

            lr = self.learning_rate

            for ii in range(self.max_iterations):

                # reset gradient
                if yy_k.grad is not None:
                    yy_k.grad.detach_()
                    yy_k.grad.zero_()

                # loss over yy_k with only L2 same as C&W
                # we don't update L1 loss with SGD because we use ISTA
                output = self.predict(yy_k)
                l2distsq = calc_l2distsq(yy_k, x)
                loss_opt = self._loss_fn(output,
                                         y_onehot,
                                         None,
                                         l2distsq,
                                         loss_coeffs,
                                         opt=True)
                loss_opt.backward()

                # gradient step
                yy_k.data.add_(-lr, yy_k.grad.data)
                self.global_step += 1

                # ploynomial decay of learning rate
                lr = self.init_learning_rate * \
                    (1 - self.global_step / self.max_iterations)**0.5

                yy_k, xx_k = self._fast_iterative_shrinkage_thresholding(
                    x, yy_k, xx_k)

                # loss ElasticNet or L1 over xx_k
                output = self.predict(xx_k)
                l2distsq = calc_l2distsq(xx_k, x)
                l1dist = calc_l1dist(xx_k, x)

                if self.decision_rule == 'EN':
                    dist = l2distsq + (l1dist * self.beta)
                elif self.decision_rule == 'L1':
                    dist = l1dist
                loss = self._loss_fn(output, y_onehot, l1dist, l2distsq,
                                     loss_coeffs)

                if self.abort_early:
                    if ii % (self.max_iterations // NUM_CHECKS or 1) == 0:
                        if loss > prevloss * ONE_MINUS_EPS:
                            break
                        prevloss = loss

                self._update_if_smaller_dist_succeed(xx_k.data, y, output,
                                                     dist, batch_size,
                                                     cur_dist, cur_labels,
                                                     final_dist, final_labels,
                                                     final_advs)

            self._update_loss_coeffs(y, cur_labels, batch_size, loss_coeffs,
                                     coeff_upper_bound, coeff_lower_bound)

        return final_advs