def perturb(self, z, y):
        z = z.detach().clone()
        y = y.detach().clone()

        delta = torch.zeros_like(z)
        delta.requires_grad_()

        for ii in range(self.nb_iter):
            outputs = self.predict(self.gan(z + delta))
            adv_loss = self.loss_fn(outputs, y)
            reg_loss = F.relu(delta.pow(2) - 0.01).mean(dim=-1).sum()
            loss = adv_loss - 100 * reg_loss
            loss.backward()

            if self.ord == 'inf':
                grad_sign = delta.grad.data.sign()
                delta.data = delta.data + batch_multiply(
                    self.eps_iter, grad_sign)
            elif self.ord == 'l2':
                grad = delta.grad.data
                grad = normalize_by_pnorm(grad)
                delta.data = delta.data + batch_multiply(self.eps_iter, grad)
            else:
                delta.data = delta.data + batch_multiply(
                    self.eps_iter, delta.grad.data)

            delta.grad.data.zero_()
            print('[{}/{}] Loss: {:4f}'.format(ii, self.nb_iter,
                                               adv_loss.item()))

        return self.gan(z + delta)
def perturb_iterative(xvar, yvar, predict, nb_iter, eps, eps_iter, loss_fn,
                      delta_init=None, minimize=False, ord=np.inf,
                      clip_min=0.0, clip_max=1.0):
    """
    Iteratively maximize the loss over the input. It is a shared method for
    iterative attacks including IterativeGradientSign, LinfPGD, etc.

    :param xvar: input data.
    :param yvar: input labels.
    :param predict: forward pass function.
    :param nb_iter: number of iterations.
    :param eps: maximum distortion.
    :param eps_iter: attack step size per iteration.
    :param loss_fn: loss function.
    :param delta_init: (optional) tensor contains the random initialization.
    :param minimize: (optional bool) whether to minimize or maximize the loss.
    :param ord: (optional) the order of maximum distortion (inf or 2).
    :param clip_min: (optional float) mininum value per input dimension.
    :param clip_max: (optional float) maximum value per input dimension.
    :return: tensor containing the perturbed input.
    """
    if delta_init is not None:
        delta = delta_init
    else:
        delta = torch.zeros_like(xvar)

    delta.requires_grad_()
    for ii in range(nb_iter):
        outputs = predict(xvar + delta)
        loss = loss_fn(outputs, yvar)
        if minimize:
            loss = -loss

        loss.backward()
        if ord == np.inf:
            grad_sign = delta.grad.data.sign()
            delta.data = delta.data + batch_multiply(eps_iter, grad_sign)
            delta.data = batch_clamp(eps, delta.data)
            delta.data = clamp(xvar.data + delta.data, clip_min, clip_max
                               ) - xvar.data

        elif ord == 2:
            grad = delta.grad.data
            grad = normalize_by_pnorm(grad)
            delta.data = delta.data + batch_multiply(eps_iter, grad)
            delta.data = clamp(xvar.data + delta.data, clip_min, clip_max
                               ) - xvar.data
            if eps is not None:
                delta.data = clamp_by_pnorm(delta.data, ord, eps)
        else:
            error = "Only ord = inf and ord = 2 have been implemented"
            raise NotImplementedError(error)

        delta.grad.data.zero_()

    x_adv = clamp(xvar + delta, clip_min, clip_max)
    return x_adv
Esempio n. 3
0
    def perturb(self, x, y=None):
        """
        Given examples (x, y), returns their adversarial counterparts with
        an attack length of eps.

        :param x: input tensor.
        :param y: label tensor.
                  - if None and self.targeted=False, compute y as predicted
                    labels.
                  - if self.targeted=True, then y must be the targeted labels.
        :return: tensor containing perturbed inputs.
        """

        x, y = self._verify_and_process_inputs(x, y)
        xadv = x.requires_grad_()
        if self.predict2 is not None:
            outputs = self.predict2(self.predict1(xadv))
        else:
            outputs = self.predict1(xadv)

        loss = self.loss_fn(outputs, y)
        if self.targeted:
            loss = -loss
        loss.backward()
        grad_sign = xadv.grad.detach().sign()

        xadv = xadv + batch_multiply(self.eps, grad_sign)

        xadv = clamp(xadv, self.clip_min, self.clip_max)

        return xadv.detach()
Esempio n. 4
0
def rand_init_delta(delta, x, ord, eps, clip_min, clip_max):
    # TODO: Currently only considered one way of "uniform" sampling
    # for Linf, there are 3 ways:
    #   1) true uniform sampling by first calculate the rectangle then sample
    #   2) uniform in eps box then truncate using data domain (implemented)
    #   3) uniform sample in data domain then truncate with eps box
    # for L2, true uniform sampling is hard, since it requires uniform sampling
    #   inside a intersection of cube and ball, so there are 2 ways:
    #   1) uniform sample in the data domain, then truncate using the L2 ball
    #       (implemented)
    #   2) uniform sample in the L2 ball, then truncate using the data domain

    if isinstance(eps, torch.Tensor):
        assert len(eps) == len(delta)

    if ord == np.inf:
        delta.data.uniform_(-1, 1)
        delta.data = batch_multiply(eps, delta.data)
    elif ord == 2:
        delta.data.uniform_(0, 1)
        delta.data = delta.data - x
        delta.data = clamp_by_pnorm(delta.data, ord, eps)
    else:
        error = "Only ord = inf and ord = 2 have been implemented"
        raise NotImplementedError(error)

    delta.data = clamp(x + delta.data, min=clip_min, max=clip_max) - x
    return delta.data
    def perturb(self, x, y=None):
        """
        Given examples (x, y), returns their adversarial counterparts with
        an attack length of eps.

        :param x: input tensor.
        :param y: label tensor.
                  - if None and self.targeted=False, compute y as predicted
                    labels.
                  - if self.targeted=True, then y must be the targeted labels.
        :return: tensor containing perturbed inputs.
        """
        x, y = self._verify_and_process_inputs(x, y)

        delta = torch.zeros_like(x)
        g = torch.zeros_like(x)

        delta = nn.Parameter(delta)

        for i in range(self.nb_iter):

            if delta.grad is not None:
                delta.grad.detach_()
                delta.grad.zero_()

            imgadv = x + delta
            outputs = self.predict(imgadv)
            loss = self.loss_fn(outputs, y)
            if self.targeted:
                loss = -loss
            loss.backward()

            g = self.decay_factor * g + normalize_by_pnorm(delta.grad.data,
                                                           p=1)
            # according to the paper it should be .sum(), but in their
            #   implementations (both cleverhans and the link from the paper)
            #   it is .mean(), but actually it shouldn't matter
            if self.ord == np.inf:
                delta.data += batch_multiply(self.eps_iter, torch.sign(g))
                delta.data = batch_clamp(self.eps, delta.data)
                delta.data = clamp(
                    x + delta.data, min=self.clip_min, max=self.clip_max) - x
            elif self.ord == 2:
                delta.data += self.eps_iter * normalize_by_pnorm(g, p=2)
                delta.data *= clamp(
                    (self.eps * normalize_by_pnorm(delta.data, p=2) /
                     delta.data),
                    max=1.)
                delta.data = clamp(
                    x + delta.data, min=self.clip_min, max=self.clip_max) - x
            else:
                error = "Only ord = inf and ord = 2 have been implemented"
                raise NotImplementedError(error)

        rval = x + delta.data
        return rval
Esempio n. 6
0
    def _get_unitptb_and_eps(self, xadv, x, y, prev_eps):
        unitptb = batch_multiply(1. / (prev_eps + 1e-12), (xadv - x))
        logit_margin = self.search_loss_fn(self.predict(xadv), y)

        maxeps = self.maxeps * torch.ones_like(y).float()

        curr_eps = bisection_search(prev_eps, unitptb, self.predict, x, y,
                                    self.search_loss_fn, logit_margin, maxeps,
                                    self.num_search_steps)
        return unitptb, curr_eps
Esempio n. 7
0
    def _get_unitptb_and_eps(self, xadv, x, y, prev_eps):
        unitptb = batch_multiply(1. / (prev_eps + 1e-12), (xadv - x))
        logit_margin = elementwise_margin(self.predict(xadv), y)

        ones = torch.ones_like(y).float()
        maxeps = self.maxeps * ones

        curr_eps = bisection_search(maxeps * 0.5, unitptb, self.predict, x, y,
                                    elementwise_margin, logit_margin, maxeps,
                                    self.num_search_steps)
        return unitptb, curr_eps
Esempio n. 8
0
    def perturb(self, x, y, prev_eps):

        self.pgdadv.eps = prev_eps
        self.pgdadv.eps_iter = self.scale_eps_iter(self.pgdadv.eps,
                                                   self.pgdadv.nb_iter)
        with ctx_noparamgrad_and_eval(self.predict):
            xadv = self.pgdadv.perturb(x, y)

        unitptb, curr_eps = self._get_unitptb_and_eps(xadv, x, y, prev_eps)

        xadv = x + batch_multiply(curr_eps, unitptb)
        return xadv, curr_eps
 def _get_unitptb_and_eps(self, xadv, x, y, prev_eps):
     unitptb = batch_multiply(1. / (prev_eps + 1e-12), (xadv - x))
     adv_logits = self.predict(xadv)
     logit_margin = elementwise_margin(adv_logits, y)        
     ones = torch.ones_like(y).float()
     # maxeps = self.maxeps * ones        
     maxeps = torch.norm((xadv-x).view(x.shape[0],-1), p=self.pgdadv.ord, dim=1)
     
     adv_pred = adv_logits.argmax(1)
     # print(1 - (adv_pred == y).float().mean())
     # print(maxeps.min(), maxeps.max())
     pred = adv_pred.clone()
     i=0
     # print(i, self.pgdadv.eps, float((adv_pred == pred).float().mean()), float((pred == self.target_y).float().mean()), float(maxeps.min()), float(maxeps.max()))
     while i < 10:            
         if self.pgdadv.targeted:
             unsuccessful_adv_idx = (adv_pred != self.target_y) & (pred != self.target_y)
             if not unsuccessful_adv_idx.any():
                 break
         else:
             unsuccessful_adv_idx = (adv_pred == y) & (pred == y)
         maxeps[unsuccessful_adv_idx] *= 1.5
         maxeps_ = maxeps[unsuccessful_adv_idx]
         unitptb_ = unitptb[unsuccessful_adv_idx]
         x_ = x[unsuccessful_adv_idx]            
                     
         x_ = clamp(x_ + batch_multiply(maxeps_, unitptb_),
                     min=0., max=1.)
         pred[unsuccessful_adv_idx] = self.predict(x_).argmax(1)            
         i += 1
         # print(i, self.pgdadv.eps, float((adv_pred == pred).float().mean()), float((pred == self.target_y).float().mean()), float(maxeps.min()), float(maxeps.max()))
     # print(logit_margin)
     curr_eps = bisection_search(
         maxeps * 0.5, unitptb, self.predict, x, y, elementwise_margin,
         logit_margin, maxeps, self.num_search_steps)
     if self.pgdadv.targeted:
         curr_eps[pred != self.target_y] = np.inf
     return unitptb, curr_eps
 def perturb(self, x, y, target_y=None):
     with ctx_noparamgrad_and_eval(self.predict):
         if self.pgdadv.targeted:
             self.target_y = target_y
             xadv = self.pgdadv.perturb(x, target_y)
             adv_pred = self.pgdadv.predict(xadv).argmax(1)
             # print((adv_pred == target_y).float().mean())
         else:
             xadv = self.pgdadv.perturb(x, y)        
     # print(self.pgdadv.eps, x.shape, xadv.shape, torch.norm((x-xadv).view(x.shape[0],-1), p=float('inf'), dim=1).mean())
     unitptb, curr_eps = self._get_unitptb_and_eps(
         xadv, x, y, self.pgdadv.eps)
     xadv = clamp(x + batch_multiply(curr_eps, unitptb),
                     min=self.pgdadv.clip_min, max=self.pgdadv.clip_max)
     # print('')
     return xadv
Esempio n. 11
0
def bisection_search(cur_eps,
                     ptb,
                     model,
                     data,
                     label,
                     fn_margin,
                     margin_init,
                     maxeps,
                     num_steps,
                     cur_min=None,
                     clip_min=0.,
                     clip_max=1.):

    assert torch.all(cur_eps <= maxeps)

    margin = margin_init

    if cur_min is None:
        cur_min = torch.zeros_like(margin)
    cur_max = maxeps.clone().detach()

    for ii in range(num_steps):
        cur_min = torch.max((margin < 0).float() * cur_eps, cur_min)
        cur_max = torch.min(
            ((margin < 0).float() * maxeps + (margin >= 0).float() * cur_eps),
            cur_max)

        cur_eps = (cur_min + cur_max) / 2
        margin = fn_margin(
            model(
                clamp(data + batch_multiply(cur_eps, ptb),
                      min=clip_min,
                      max=clip_max)), label)

    assert torch.all(cur_eps <= maxeps)

    return cur_eps
def perturb_iterative(xvar, yvar, predict, nb_iter, eps, eps_iter, loss_fn,
                      delta_init=None, minimize=False, ord=np.inf,
                      clip_min=0.0, clip_max=1.0, sparsity=0.01, eot_samples=1):
    """
    Iteratively maximize the loss over the input. It is a shared method for
    iterative attacks including IterativeGradientSign, LinfPGD, etc.

    :param xvar: input data.
    :param yvar: input labels.
    :param predict: forward pass function.
    :param nb_iter: number of iterations.
    :param eps: maximum distortion.
    :param eps_iter: attack step size.
    :param loss_fn: loss function.
    :param delta_init: (optional) tensor contains the random initialization.
    :param minimize: (optional bool) whether to minimize or maximize the loss.
    :param ord: (optional) the order of maximum distortion (inf or 2).
    :param clip_min: mininum value per input dimension.
    :param clip_max: maximum value per input dimension.

    :return: tensor containing the perturbed input.
    """
    if delta_init is not None:
        delta = delta_init
    else:
        delta = torch.zeros_like(xvar)
    n = normal.Normal(0, 100)
    delta.requires_grad_()
    for ii in range(nb_iter):
        avg_grad = torch.FloatTensor(xvar.shape)
        avg_grad.zero_()
        if xvar.is_cuda:
            avg_grad = avg_grad.cuda()

        for _ in range(eot_samples):
            outputs = predict(xvar+delta)
            loss = loss_fn(outputs, yvar)
            if minimize:
                loss = -loss
            loss.backward()
            avg_grad += delta.grad.detach()
        avg_grad = avg_grad/eot_samples

        if ord == np.inf:
            grad_sign = avg_grad.sign()
            delta.data = delta.data + batch_multiply(eps_iter, grad_sign)
            delta.data = batch_clamp(eps, delta.data)
            delta.data = clamp(xvar.data + delta.data, clip_min, clip_max
                               ) - xvar.data

        elif ord == 2:
            grad = avg_grad
            grad = normalize_by_pnorm(grad)
            delta.data = delta.data + batch_multiply(eps_iter, grad)
            delta.data = clamp(xvar.data + delta.data, clip_min, clip_max
                               ) - xvar.data
            if eps is not None:
                delta.data = clamp_by_pnorm(delta.data, ord, eps)

        elif ord == 1:
            with torch.no_grad():
                grad = avg_grad
                abs_grad = torch.abs(avg_grad)

                batch_size = grad.size(0)
                view = abs_grad.view(batch_size, -1)
                view_size = view.size(1)
                vals, idx = view.topk(int(sparsity*view_size))

                out = torch.zeros_like(view).scatter_(1, idx, vals)

                out = out.view_as(grad)
                grad = grad.sign()*(out > 0).float()
                grad = normalize_by_pnorm(grad, p=1)
                delta.data += batch_multiply(eps_iter, grad)
                delta.data = batch_l1_proj(delta.data.cpu(), eps)
                if xvar.is_cuda:
                    delta.data = delta.data.cuda()
                delta.data = clamp(xvar.data + delta.data, clip_min, clip_max
                                   ) - xvar.data
        else:
            error = "Only ord = inf, ord = 1 and ord = 2 have been implemented"
            raise NotImplementedError(error)

        delta.grad.data.zero_()

    x_adv = clamp(xvar + delta, clip_min, clip_max)
    return x_adv
Esempio n. 13
0
def masked_perturb_iterative(xvar, yvar, predict, nb_iter, eps, eps_iter, loss_fn,
                      delta_init=None, minimize=False, ord=np.inf,
                      clip_min=0.0, clip_max=1.0,
                      l1_sparsity=None, mask_steps=100, device="cuda:0"):
    """
    Iteratively maximize the loss over the input. It is a shared method for
    iterative attacks including IterativeGradientSign, LinfPGD, etc.
    :param xvar: input data.
    :param yvar: input labels.
    :param predict: forward pass function.
    :param nb_iter: number of iterations.
    :param eps: maximum distortion.
    :param eps_iter: attack step size.
    :param loss_fn: loss function.
    :param delta_init: (optional) tensor contains the random initialization.
    :param minimize: (optional bool) whether to minimize or maximize the loss.
    :param ord: (optional) the order of maximum distortion (inf or 2).
    :param clip_min: mininum value per input dimension.
    :param clip_max: maximum value per input dimension.
    :param l1_sparsity: sparsity value for L1 projection.
                  - if None, then perform regular L1 projection.
                  - if float value, then perform sparse L1 descent from
                    Algorithm 1 in https://arxiv.org/pdf/1904.13000v1.pdf
    :param mask_steps: number of times a mask should be drawn and a delta computed.
    :return: tensor containing the perturbed input.
    """
    if delta_init is not None:
        delta = delta_init
    else:
        delta = torch.zeros_like(xvar)

    delta.requires_grad_()
    for ii in tqdm(range(nb_iter)):
        new_delta = 0 # added
        for jj in range(mask_steps): # added

            outputs = predict(xvar + delta)

            # MASKED part
            mask = torch.Tensor(np.random.randint(0,2,size=outputs.shape[1])) # added
            mask = torch.stack([mask for i in range(outputs.shape[0])])

            # force true label to not be masked
            for i in range(len(yvar)):
                mask[i][yvar[i]] = 1

            # allow for the multiplciaiton in log space
            mask[mask == 0] = -100000

            mask = mask.to(device)

            outputs = outputs * mask

            loss = loss_fn(outputs, yvar)
            if minimize:
                loss = -loss

            loss.backward()
            if ord == np.inf:
                grad_sign = delta.grad.data.sign()
                delta.data = delta.data + batch_multiply(eps_iter, grad_sign)
                delta.data = batch_clamp(eps, delta.data)
                delta.data = clamp(xvar.data + delta.data, clip_min, clip_max
                                   ) - xvar.data

            elif ord == 2:
                grad = delta.grad.data
                grad = normalize_by_pnorm(grad)
                delta.data = delta.data + batch_multiply(eps_iter, grad)
                delta.data = clamp(xvar.data + delta.data, clip_min, clip_max
                                   ) - xvar.data
                if eps is not None:
                    delta.data = clamp_by_pnorm(delta.data, ord, eps)

            elif ord == 1:
                grad = delta.grad.data
                abs_grad = torch.abs(grad)

                batch_size = grad.size(0)
                view = abs_grad.view(batch_size, -1)
                view_size = view.size(1)
                if l1_sparsity is None:
                    vals, idx = view.topk(1)
                else:
                    vals, idx = view.topk(
                        int(np.round((1 - l1_sparsity) * view_size)))

                out = torch.zeros_like(view).scatter_(1, idx, vals)
                out = out.view_as(grad)
                grad = grad.sign() * (out > 0).float()
                grad = normalize_by_pnorm(grad, p=1)
                delta.data = delta.data + batch_multiply(eps_iter, grad)

                delta.data = batch_l1_proj(delta.data.cpu(), eps)
                if xvar.is_cuda:
                    delta.data = delta.data.to(device)
                delta.data = clamp(xvar.data + delta.data, clip_min, clip_max
                                   ) - xvar.data
            else:
                error = "Only ord = inf, ord = 1 and ord = 2 have been implemented"
                raise NotImplementedError(error)

            new_delta += delta.data # added
            delta.grad.data.zero_()

        delta.data = new_delta / mask_steps # added

    x_adv = clamp(xvar + delta, clip_min, clip_max)
    return x_adv, delta
Esempio n. 14
0
    def perturb_iterative_fool_many(xvar,
                                    embvar,
                                    indlistvar,
                                    yvar,
                                    predict,
                                    nb_iter,
                                    eps,
                                    epscand,
                                    eps_iter,
                                    loss_fn,
                                    rayon,
                                    delta_init=None,
                                    minimize=False,
                                    ord=np.inf,
                                    clip_min=0.0,
                                    clip_max=1.0,
                                    l1_sparsity=None):
        """
      Iteratively maximize the loss over the input. It is a shared method for
      iterative attacks including IterativeGradientSign, LinfPGD, etc.
      :param xvar: input data.
      :param yvar: input labels.
      :param predict: forward pass function.
      :param nb_iter: number of iterations.
      :param eps: maximum distortion.
      :param eps_iter: attack step size.
      :param loss_fn: loss function.
      :param delta_init: (optional) tensor contains the random initialization.
      :param minimize: (optional bool) whether to minimize or maximize the loss.
      :param ord: (optional) the order of maximum distortion (inf or 2).
      :param clip_min: mininum value per input dimension.
      :param clip_max: maximum value per input dimension.
      :param l1_sparsity: sparsity value for L1 projection.
                    - if None, then perform regular L1 projection.
                    - if float value, then perform sparse L1 descent from
                      Algorithm 1 in https://arxiv.org/pdf/1904.13000v1.pdf
      :return: tensor containing the perturbed input.
      """

        #will contain all words encountered during PGD
        nb = len(indlistvar)
        tablist = []
        for t in range(nb):
            tablist += [[]]
        fool = False

        #contain each loss on embed and each difference of loss on word nearest neighboor
        loss_memory = np.zeros((nb_iter, ))
        word_balance_memory = np.zeros((nb_iter, ))

        candid = [torch.empty(0)] * nb
        convers = [[]] * nb
        for u in range(nb):
            #prepare all potential candidates, once and for all
            candidates = torch.empty([0, 768]).to(device)
            conversion = []
            emb_matrix = model.roberta.embeddings.word_embeddings.weight
            normed_emb_matrix = F.normalize(emb_matrix, p=2, dim=1)
            normed_emb_word = F.normalize(embvar[0][indlistvar[u]], p=2, dim=0)
            cosine_similarity = torch.matmul(
                normed_emb_word, torch.transpose(normed_emb_matrix, 0, 1))
            for t in range(
                    len(cosine_similarity)):  #evitez de faire DEUX boucles .
                if cosine_similarity[t] > epscand:
                    if levenshtein(
                            tokenizer.decode(
                                torch.tensor([xvar[0][indlistvar[u]]])),
                            tokenizer.decode(torch.tensor([t]))) != 1:
                        candidates = torch.cat(
                            (candidates, normed_emb_matrix[t].unsqueeze(0)), 0)
                        conversion += [t]
            candid[u] = candidates
            convers[u] = conversion
            print("nb of candidates :")
            print(len(conversion))

        #U, S, V = torch.svd(model.roberta.embeddings.word_embeddings.weight)

        if delta_init is not None:
            delta = delta_init
        else:
            delta = torch.zeros_like(embvar)

        #PGD
        delta.requires_grad_()
        ii = 0
        while ii < nb_iter and not (fool):
            outputs = predict(xvar, embvar + delta)
            loss = loss_fn(outputs, yvar)
            if minimize:
                loss = -loss

            loss.backward()
            if ord == np.inf:
                grad_sign = delta.grad.data.sign()
                grad_sign = tozerolist(grad_sign, indlistvar)
                delta.data = delta.data + batch_multiply(eps_iter, grad_sign)
                delta.data = batch_clamp(eps, delta.data)
                delta.data = clamp(
                    embvar.data + delta.data,
                    clip_min,
                    clip_max  #à retirer?
                ) - embvar.data
                with torch.no_grad():
                    delta.data = tozero(delta.data, indlistvar)
                    if (ii % 300) == 0:
                        adverslist = []
                        for t in range(nb):
                            advers, nb_vois = neighboors_np_dens_cand(
                                (embvar + delta)[0][indlistvar[t]], rayon,
                                candid[t])
                            advers = int(advers[0])
                            advers = torch.tensor(convers[t][advers])
                            if len(tablist[t]) == 0:
                                tablist[t] += [
                                    (tokenizer.decode(advers.unsqueeze(0)), ii,
                                     nb_vois)
                                ]
                            elif not (first(
                                    tablist[t][-1]) == tokenizer.decode(
                                        advers.unsqueeze(0))):
                                tablist[t] += [
                                    (tokenizer.decode(advers.unsqueeze(0)), ii,
                                     nb_vois)
                                ]
                            adverslist += [advers]
                        word_balance_memory[ii] = float(
                            model(replacelist(xvar, indlistvar, adverslist),
                                  labels=1 - yvar)[0]) - float(
                                      model(replacelist(
                                          xvar, indlistvar, adverslist),
                                            labels=yvar)[0])
                        if word_balance_memory[ii] < 0:
                            fool = True

            elif ord == 0:
                grad = delta.grad.data
                grad = tozero(grad, indlistvar)
                grad = torch.matmul(
                    torch.cat((torch.matmul(grad, v)[:, :, :50],
                               torch.zeros([768 - 50]).to(device)), 2), v.t())
                delta.data = delta.data + batch_multiply(eps_iter, grad)
                delta.data[0] = my_proj_all(embvar.data[0] + delta.data[0],
                                            embvar[0], indlistvar,
                                            eps) - embvar.data[0]
                delta.data = clamp(embvar.data + delta.data, clip_min,
                                   clip_max) - embvar.data  #à virer je pense
                with torch.no_grad():
                    delta.data = tozero(delta.data, indlistvar)
                    if (ii % 300) == 0:
                        adverslist = []
                        for t in range(nb):
                            advers, nb_vois = neighboors_np_dens_cand(
                                (embvar + delta)[0][indlistvar[t]], rayon,
                                candid[t])
                            advers = int(advers[0])
                            advers = torch.tensor(convers[t][advers])
                            if len(tablist[t]) == 0:
                                tablist[t] += [
                                    (tokenizer.decode(advers.unsqueeze(0)), ii,
                                     nb_vois)
                                ]
                            elif not (first(
                                    tablist[t][-1]) == tokenizer.decode(
                                        advers.unsqueeze(0))):
                                tablist[t] += [
                                    (tokenizer.decode(advers.unsqueeze(0)), ii,
                                     nb_vois)
                                ]
                            adverslist += [advers]
                        word_balance_memory[ii] = float(
                            model(replacelist(xvar, indlistvar, adverslist),
                                  labels=1 - yvar)[0]) - float(
                                      model(replacelist(
                                          xvar, indlistvar, adverslist),
                                            labels=yvar)[0])
                        if word_balance_memory[ii] < 0:
                            fool = True

            elif ord == 2:
                grad = delta.grad.data
                grad = tozero(grad, indlistvar)
                grad = normalize_by_pnorm(grad)
                delta.data = delta.data + batch_multiply(eps_iter, grad)
                delta.data = clamp(embvar.data + delta.data, clip_min,
                                   clip_max) - embvar.data
                if eps is not None:
                    delta.data = clamp_by_pnorm(delta.data, ord, eps)
                with torch.no_grad():
                    delta.data = tozero(delta.data, indlistvar)
                    if (ii % 300) == 0:
                        adverslist = []
                        for t in range(nb):
                            advers, nb_vois = neighboors_np_dens_cand(
                                (embvar + delta)[0][indlistvar[t]], rayon,
                                candid[t])
                            advers = int(advers[0])
                            advers = torch.tensor(convers[t][advers])
                            if len(tablist[t]) == 0:
                                tablist[t] += [
                                    (tokenizer.decode(advers.unsqueeze(0)), ii,
                                     nb_vois)
                                ]
                            elif not (first(
                                    tablist[t][-1]) == tokenizer.decode(
                                        advers.unsqueeze(0))):
                                tablist[t] += [
                                    (tokenizer.decode(advers.unsqueeze(0)), ii,
                                     nb_vois)
                                ]
                            adverslist += [advers]
                        word_balance_memory[ii] = float(
                            model(replacelist(xvar, indlistvar, adverslist),
                                  labels=1 - yvar)[0]) - float(
                                      model(replacelist(
                                          xvar, indlistvar, adverslist),
                                            labels=yvar)[0])
                        if word_balance_memory[ii] < 0:
                            fool = True

            elif ord == 1:
                grad = delta.grad.data
                grad_sign = tozero(grad_sign, indvar)
                abs_grad = torch.abs(grad)

                batch_size = grad.size(0)
                view = abs_grad.view(batch_size, -1)
                view_size = view.size(1)
                if l1_sparsity is None:
                    vals, idx = view.topk(1)
                else:
                    vals, idx = view.topk(
                        int(np.round((1 - l1_sparsity) * view_size)))

                out = torch.zeros_like(view).scatter_(1, idx, vals)
                out = out.view_as(grad)
                grad = grad.sign() * (out > 0).float()
                grad = normalize_by_pnorm(grad, p=1)
                delta.data = delta.data + batch_multiply(eps_iter, grad)

                delta.data = batch_l1_proj(delta.data.cpu(), eps)
                if embvar.is_cuda:
                    delta.data = delta.data.cuda()
                delta.data = clamp(embvar.data + delta.data, clip_min,
                                   clip_max) - embvar.data
            else:
                error = "Only ord = inf, ord = 1 and ord = 2 have been implemented"
                raise NotImplementedError(error)
            delta.grad.data.zero_()
            with torch.no_grad():
                loss_memory[ii] = loss

            ii += 1

        #plt.plot(loss_memory)
        #plt.title("evolution of embed loss")
        #plt.show()
        #plt.plot(word_balance_memory)
        #plt.title("evolution of word loss difference")
        #plt.show()
        emb_adv = clamp(embvar + delta, clip_min, clip_max)
        return emb_adv, word_balance_memory, loss_memory, tablist, fool
def perturb_iterative(xvar,
                      yvar,
                      predict1,
                      predict2,
                      nb_iter,
                      eps,
                      eps_iter,
                      loss_fn,
                      delta_init=None,
                      minimize=False,
                      ord=np.inf,
                      clip_min=0.0,
                      clip_max=1.0,
                      l1_sparsity=None):
    """
    Iteratively maximize the loss over the input. It is a shared method for
    iterative attacks including IterativeGradientSign, LinfPGD, etc.

    :param xvar: input data.
    :param yvar: input labels.
    :param predict: forward pass function.
    :param nb_iter: number of iterations.
    :param eps: maximum distortion.
    :param eps_iter: attack step size.
    :param loss_fn: loss function.
    :param delta_init: (optional) tensor contains the random initialization.
    :param minimize: (optional bool) whether to minimize or maximize the loss.
    :param ord: (optional) the order of maximum distortion (inf or 2).
    :param clip_min: mininum value per input dimension.
    :param clip_max: maximum value per input dimension.
    :param l1_sparsity: sparsity value for L1 projection.
                  - if None, then perform regular L1 projection.
                  - if float value, then perform sparse L1 descent from
                    Algorithm 1 in https://arxiv.org/pdf/1904.13000v1.pdf
    :return: tensor containing the perturbed input.
    """
    if delta_init is not None:
        delta = delta_init
    else:
        delta = torch.zeros_like(xvar)

    delta.requires_grad_()
    for ii in range(nb_iter):
        if predict2 is not None:
            outputs = predict2(predict1(xvar + delta))
        else:
            outputs = predict1(xvar + delta)
        loss = loss_fn(outputs, yvar)
        if minimize:
            loss = -loss

        loss.backward()
        if ord == np.inf:
            grad_sign = delta.grad.data.sign()
            delta.data = delta.data + batch_multiply(eps_iter, grad_sign)
            delta.data = batch_clamp(eps, delta.data)
            delta.data = clamp(xvar.data + delta.data, clip_min,
                               clip_max) - xvar.data

        elif ord == 2:
            grad = delta.grad.data
            grad = normalize_by_pnorm(grad)
            delta.data = delta.data + batch_multiply(eps_iter, grad)
            delta.data = clamp(xvar.data + delta.data, clip_min,
                               clip_max) - xvar.data
            if eps is not None:
                delta.data = clamp_by_pnorm(delta.data, ord, eps)

        elif ord == 1:
            grad = delta.grad.data
            abs_grad = torch.abs(grad)

            batch_size = grad.size(0)
            view = abs_grad.view(batch_size, -1)
            view_size = view.size(1)
            if l1_sparsity is None:
                vals, idx = view.topk(1)
            else:
                vals, idx = view.topk(
                    int(np.round((1 - l1_sparsity) * view_size)))

            out = torch.zeros_like(view).scatter_(1, idx, vals)
            out = out.view_as(grad)
            grad = grad.sign() * (out > 0).float()
            grad = normalize_by_pnorm(grad, p=1)
            delta.data = delta.data + batch_multiply(eps_iter, grad)

            delta.data = batch_l1_proj(delta.data.cpu(), eps)
            if xvar.is_cuda:
                delta.data = delta.data.cuda()
            delta.data = clamp(xvar.data + delta.data, clip_min,
                               clip_max) - xvar.data
        else:
            error = "Only ord = inf, ord = 1 and ord = 2 have been implemented"
            raise NotImplementedError(error)
        delta.grad.data.zero_()

    x_adv = clamp(xvar + delta, clip_min, clip_max)
    return x_adv
Esempio n. 16
0
def white_box_untargeted(args, image, target, model, enc=None, dec=None, \
        vae=None, ae= None, normalize=None):
    epsilon = 0.3
    # Create noise vector
    delta = torch.zeros_like(image, requires_grad=True).to(args.device)
    # Optimize noise vector (only) to fool model
    x = image

    use_vae = True if (vae is not None) else False
    use_ae = True if (ae is not None) else False

    print("Target is %d" % (target))
    for t in range(args.PGD_steps):
        if normalize is not None:
            if use_vae:
                x = x.view(x.size(0), -1).unsqueeze(0)
                z, mu, logvar = vae(x)
                z = z.clamp(0, 1)
                x = z.view(z.size(0), 1, 28, 28)
            elif use_ae:
                x = ae(x)
            pred = model(normalize(x + delta))
        else:
            if use_vae:
                x = x.view(x.size(0), -1).unsqueeze(0)
                z, mu, logvar = vae(x)
                z = z.clamp(0, 1)
                x = z.view(z.size(0), 1, 28, 28)
            elif use_ae:
                x = ae(x)
            pred = model(x.detach() + delta)
            recon_pred = model(x.detach())
        out = pred.max(
            1, keepdim=True)[1]  # get the index of the max log-probability
        recon_out = recon_pred.max(
            1, keepdim=True)[1]  # get the index of the max log-probability
        loss = nn.CrossEntropyLoss(reduction="sum")(pred, target)
        recon_image = (x)[0].detach()
        if args.comet:
            args.experiment.log_metric("Whitebox CE loss", loss, step=t)
            plot_image_to_comet(args, recon_image, "recon.png")
        if t % 5 == 0:
            print(t, out[0][0], recon_out[0][0], loss.item())

        loss.backward()
        grad_sign = delta.grad.data.sign()
        delta.data = delta.data + batch_multiply(0.01, grad_sign)
        # Clipping is equivalent to projecting back onto the l_\infty ball
        # This technique is known as projected gradient descent (PGD)
        delta.data.clamp_(-epsilon, epsilon)
        delta.data = clamp(x.data + delta.data, 0., 1.) - x.data
        delta.grad.data.zero_()
        # if out != target:
        # print(t, out[0][0], loss.item())
        # break
    if args.comet:
        if not args.mnist:
            clean_image = (image)[0].detach().cpu().numpy().transpose(1, 2, 0)
            adv_image = (x + delta)[0].detach().cpu().numpy().transpose(
                1, 2, 0)
            delta_image = (delta)[0].detach().cpu().numpy().transpose(1, 2, 0)
        else:
            clean_image = (image)[0].detach()
            adv_image = (x + delta)[0].detach()
            recon_image = (x)[0].detach()
            delta_image = (delta)[0].detach().cpu()
        plot_image_to_comet(args, clean_image, "clean.png")
        plot_image_to_comet(args, adv_image, "Adv.png")
        plot_image_to_comet(args, delta_image, "delta.png")
        plot_image_to_comet(args, recon_image, "recon.png")
    return out, delta
Esempio n. 17
0
def perturb_iterative(xvar,
                      yvar,
                      predict,
                      nb_iter,
                      eps,
                      eps_iter,
                      loss_fn,
                      delta_init=None,
                      minimize=False,
                      ord=np.inf,
                      clip_min=0.0,
                      clip_max=1.0,
                      beta=0.5,
                      early_stop=True):
    """
    Iteratively maximize the loss over the input. It is a shared method for
    iterative attacks including IterativeGradientSign, LinfPGD, etc.

    :param xvar: input data.
    :param yvar: input labels.
    :param predict: forward pass function.
    :param nb_iter: number of iterations.
    :param eps: maximum distortion.
    :param eps_iter: attack step size per iteration.
    :param loss_fn: loss function.
    :param delta_init: (optional) tensor contains the random initialization.
    :param minimize: (optional bool) whether to minimize or maximize the loss.
    :param ord: (optional) the order of maximum distortion (inf or 2).
    :param clip_min: (optional float) mininum value per input dimension.
    :param clip_max: (optional float) maximum value per input dimension.
    :return: tensor containing the perturbed input.
    """
    if delta_init is not None:
        delta = delta_init
    else:
        delta = torch.zeros_like(xvar)
    count = 0
    delta.requires_grad_()
    for ii in range(nb_iter):
        count += 1
        loss, w_loss = loss_fn(predict, yvar, xvar, xvar + delta)
        outputs = predict(xvar + delta)
        p = torch.argmax(outputs, dim=1)
        if torch.max(p == yvar) != 1 and early_stop:
            break  # 攻击成功提前结束迭代
        predict.zero_grad()
        loss.backward(retain_graph=True)
        g1 = torch.mean(delta.grad.data.abs().reshape(-1, 28 * 28)).float()
        delta.grad.data.zero_()
        w_loss.backward(retain_graph=True)
        g2 = torch.mean(delta.grad.data.abs().reshape(-1, 28 * 28)).float()
        g = g1 / g2
        g = torch.min(g, torch.tensor(1e6))
        if count % 5 == 0:  # may not coverage
            beta = beta / 10
        delta.grad.data.zero_()
        # print('loss',loss)
        # print('w_loss', w_loss)
        # print(count)
        # print((p == yvar).sum())
        final_loss = loss + beta * g * w_loss
        final_loss.backward(retain_graph=True)

        if ord == np.inf:
            grad_sign = delta.grad.data.sign()
            delta.data = delta.data + batch_multiply(eps_iter, grad_sign)
            delta.data = batch_clamp(eps, delta.data)
            delta.data = clamp(xvar.data + delta.data, clip_min,
                               clip_max) - xvar.data
        elif ord == 1:
            grad = delta.grad.data
            grad = normalize_by_pnorm(grad, 1)
            grad = grad * 28 * 28
            delta.data = delta.data + batch_multiply(eps_iter, grad)
            delta.data = batch_clamp(eps, delta.data)
            delta.data = clamp(xvar.data + delta.data, clip_min,
                               clip_max) - xvar.data
        elif ord == 2:
            grad = delta.grad.data
            grad = normalize_by_pnorm(grad)
            delta.data = delta.data + batch_multiply(eps_iter, grad)
            delta.data = clamp(xvar.data + delta.data, clip_min,
                               clip_max) - xvar.data
            if eps is not None:
                delta.data = clamp_by_pnorm(delta.data, ord, eps)
        else:
            error = "Only ord = inf and ord = 2 have been implemented"
            raise NotImplementedError(error)

        delta.grad.data.zero_()

    x_adv = clamp(xvar + delta, clip_min, clip_max)
    iter_count = count
    return x_adv, iter_count