Ejemplo n.º 1
0
    def _forward_and_update_delta(self, optimizer, x_atanh, delta, y_onehot,
                                  loss_coeffs):

        optimizer.zero_grad()
        adv = tanh_rescale(delta + x_atanh, self.clip_min, self.clip_max)
        transimgs_rescale = tanh_rescale(x_atanh, self.clip_min, self.clip_max)
        output = self.predict(adv)
        l2distsq = calc_l2distsq(adv, transimgs_rescale)
        loss = self._loss_fn(output, y_onehot, l2distsq, loss_coeffs)
        loss.backward()
        optimizer.step()

        return loss.item(), l2distsq.data, output.data, adv.data
Ejemplo n.º 2
0
    def _outputs_and_loss(self, x, modifiers, starting_atanh, y, const, taus):
        adversarials = tanh_rescale(starting_atanh + modifiers, self.clip_min,
                                    self.clip_max)

        outputs = self.predict(adversarials)
        y_onehot = to_one_hot(y, self.num_classes).float()

        real = (y_onehot * outputs).sum(dim=1)

        other = ((1.0 - y_onehot) * outputs -
                 (y_onehot * TARGET_MULT)).max(dim=1)[0]
        # - (y_onehot * TARGET_MULT) is for the true label not to be selected

        if self.targeted:
            loss1 = torch.clamp(other - real, min=0.)
        else:
            loss1 = torch.clamp(real - other, min=0.)

        loss1 = const * loss1

        image_dimensions = tuple(range(1, len(x.shape)))
        taus_shape = (-1, ) + (1, ) * (len(x.shape) - 1)

        penalties = torch.clamp(torch.abs(x - adversarials) -
                                taus.view(taus_shape),
                                min=0)
        loss2 = torch.sum(penalties, dim=image_dimensions)

        assert loss1.shape == loss2.shape

        loss = loss1 + loss2
        return outputs.detach(), loss
Ejemplo n.º 3
0
    def _forward_and_update_delta(self, optimizer, x_atanh, delta, y_onehot,
                                  loss_coeffs):

        optimizer.zero_grad()
        adv = tanh_rescale(delta + x_atanh, self.clip_min, self.clip_max)
        transimgs_rescale = tanh_rescale(x_atanh, self.clip_min, self.clip_max)
        if self.normalize_fn == None:
            output = self.predict(adv)
        else:
            output = self.predict(self.normalize_fn(adv))
        linfdistsq, _ = torch.max(torch.abs(adv - transimgs_rescale).view(
            adv.size()[0], -1),
                                  dim=1)
        loss = self._loss_fn(output, y_onehot, linfdistsq, loss_coeffs)
        loss.backward()
        optimizer.step()

        return loss.item(), linfdistsq.data, output.data, adv.data
Ejemplo n.º 4
0
    def _outputs_and_loss(self,
                          x,
                          modifiers,
                          starting_atanh,
                          y,
                          const,
                          taus,
                          active_mask=None,
                          filter_=None):
        # If you're comparing with Carlini's original implementation, x
        # is the name that has been given to tf.tanh(timg)/2, while
        # adversarials is the name that has been given to tf.tanh(modifier + simg)/2, aka newimg
        adversarials = tanh_rescale(starting_atanh + modifiers, self.clip_min,
                                    self.clip_max)

        assert x.shape == adversarials.shape

        outputs = self._outputs(adversarials,
                                active_mask=active_mask,
                                filter_=filter_)
        assert outputs.shape == (adversarials.shape[0], self.num_classes)

        y_onehot = to_one_hot(y, self.num_classes).float()
        assert y_onehot.shape == outputs.shape

        real = (y_onehot * outputs).sum(dim=1)

        other = ((1.0 - y_onehot) * outputs -
                 (y_onehot * TARGET_MULT)).max(dim=1)[0]
        # - (y_onehot * TARGET_MULT) is for the true label not to be selected

        if self.targeted:
            loss1 = torch.clamp(other - real, min=0.)
        else:
            loss1 = torch.clamp(real - other, min=0.)

        image_dimensions = tuple(range(1, len(x.shape)))

        # Reshape taus to [batch_size, 1, 1, 1] for broadcasting
        taus_shape = (len(x), ) + (1, ) * (len(x.shape) - 1)

        penalties = torch.clamp(torch.abs(adversarials - x) -
                                taus.view(taus_shape),
                                min=0)
        assert penalties.shape == x.shape

        loss2 = torch.sum(penalties, dim=image_dimensions)
        assert loss2.shape == loss1.shape

        losses = const * loss1 + loss2
        assert losses.shape == (len(x), )

        # losses is returned as a (batch_size,) vector to support abort_early
        # Only later it is converted to a scalar
        return outputs.detach(), losses
Ejemplo n.º 5
0
    def perturb(self, x, y=None):
        x, y = self._verify_and_process_inputs(x, y)

        # Initialization
        if y is None:
            y = self._get_predicted_label(x)
        x = replicate_input(x)
        # batch_size = len(x)

        final_advs = x
        x_atanh = self._get_arctanh_x(x)
        y_onehot = to_one_hot(y, self.num_classes).float()

        delta = nn.Parameter(torch.zeros_like(x))
        optimizer = optim.Adam([delta], lr=self.learning_rate)
        prevloss = PREV_LOSS_INIT

        for ii in range(self.max_iterations):
            # loss, l2distsq, output, adv_img = \
            #     self._forward_and_update_delta(
            #         optimizer, x_atanh, delta, y_onehot, self.c)

            optimizer.zero_grad()
            adv = tanh_rescale(delta + x_atanh, self.clip_min, self.clip_max)
            transimgs_rescale = tanh_rescale(x_atanh, self.clip_min, self.clip_max)
            output = self.predict(adv)
            l2distsq = calc_l2distsq(adv, transimgs_rescale)
            loss, l2dist, adv_loss = self._loss_fn(output, y_onehot, l2distsq, self.c)
            loss.backward()
            optimizer.step()

            if ii % 1000 == 1:
                print('step: {}, dis: {:.2f}, loss1: {:.2f}.'.format(ii, l2dist.item(), adv_loss.item()))

            # if self.abort_early:
            #     if ii % (self.max_iterations // NUM_CHECKS or 1) == 0:
            #         if loss > prevloss * ONE_MINUS_EPS:
            #             break
            #         prevloss = loss

            final_advs = adv.data
        return final_advs
    def _forward_and_update_delta(
            self, optimizer, x_atanh, delta, y_onehot, loss_coeffs):

        optimizer.zero_grad()
        adv = tanh_rescale(delta + x_atanh, self.clip_min, self.clip_max)
        transimgs_rescale = tanh_rescale(x_atanh, self.clip_min, self.clip_max)
        l2distsq = calc_l2distsq(adv, transimgs_rescale)

        for i in range(len(self.predict.classifiers)):
            output_i = self.predict.predict(adv, i)
            output_i = output_i

            if i == 0 :
                output = output_i * self.predict.weights[i]
            else :
                output = output + output_i * self.predict.weights[i]
  
        loss = self._loss_fn(output, y_onehot, l2distsq, loss_coeffs)
        loss.backward()
        optimizer.step()

        return loss.item(), l2distsq.data, output.data, adv.data
Ejemplo n.º 7
0
    def _run_attack(self, x, y, initial_const, taus, prev_adversarials,
                    active):
        assert len(x) == len(taus)
        batch_size = len(x)
        computed_adversarials = x.clone().detach()

        if self.warm_start:
            starting_atanh = self._get_arctanh_x(prev_adversarials.clone())
        else:
            starting_atanh = self._get_arctanh_x(x.clone())

        modifiers = torch.nn.Parameter(torch.zeros_like(starting_atanh))

        # An array of booleans that stores which samples have not converged
        # yet
        optimizer = optim.Adam([modifiers], lr=self.learning_rate)

        const = initial_const

        j = 0
        stop_search = False

        while (not stop_search) and const < self.max_const:
            # We add an extra iteration because adversarials are
            # not saved until the next iteration
            for k in range(self.max_iterations + 1):
                # Note: unlike the CPU version, the CUDA version updates and calls the model
                # on all samples, including inactive ones. However, the filter_ parameter is designed
                # to force best_sample to only update active samples. This is counter-productive, but
                # it's necessary in order to have consistent CPU and CUDA implementations
                outputs, losses = self._outputs_and_loss(x,
                                                         modifiers,
                                                         starting_atanh,
                                                         y,
                                                         const,
                                                         taus,
                                                         filter_=active)

                adversarials = tanh_rescale(starting_atanh + modifiers,
                                            self.clip_min,
                                            self.clip_max).detach()

                replace = torch.ones((batch_size, ),
                                     dtype=torch.bool,
                                     device=x.device)

                if not self.update_inactive:
                    replace = replace & active

                computed_adversarials = utils.fast_boolean_choice(
                    computed_adversarials, adversarials, replace)

                # Update the modifiers
                total_loss = torch.sum(losses)
                #total_loss = torch.sum(losses[active]) # Temp
                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()

                # If early aborting is enabled, drop successful
                # samples with a small loss (the current adversarials
                # are saved regardless of whether they are dropped)
                if self.abort_early:
                    successful = self._successful(outputs, y).detach()
                    small_loss = losses < SMALL_LOSS_COEFFICIENT * const

                    active = active & ~(successful & small_loss)

                    if self.inner_check != 0 and (k +
                                                  1) % self.inner_check == 0:
                        # Causes an implicit sync point
                        if not active.any():
                            # Break from both loops
                            stop_search = True
                            break

            if stop_search:
                break

            if self.abort_early and self.const_check != 0 and (
                    j + 1) % self.const_check == 0:
                # Causes an implicit sync point
                if not active.any():
                    break

            # Give more weight to the output loss
            const *= self.const_factor

        return computed_adversarials
Ejemplo n.º 8
0
    def _run_attack(self, x, y, initial_const, taus, prev_adversarials,
                    outer_active_mask):
        assert len(x) == len(taus)
        batch_size = len(x)
        computed_adversarials = x.clone().detach()

        if self.warm_start:
            starting_atanh = self._get_arctanh_x(prev_adversarials.clone())
        else:
            starting_atanh = self._get_arctanh_x(x.clone())

        modifiers = torch.nn.Parameter(torch.zeros_like(starting_atanh))

        # An array of booleans that stores which samples have not converged
        # yet
        active = torch.ones((batch_size, ), dtype=torch.bool, device=x.device)
        optimizer = optim.Adam([modifiers], lr=self.learning_rate)

        const = initial_const

        # Used for best_sample tracking
        active_mask = outer_active_mask.clone()

        while torch.any(active) and const < self.max_const:
            # We add an extra iteration because adversarials
            # are not saved until the next iteration
            for _ in range(self.max_iterations + 1):
                # Only the elements of active_mask where outer_active_mask is True are changed
                active_mask[outer_active_mask] = active

                outputs, losses = self._outputs_and_loss(
                    x[active],
                    modifiers[active],
                    starting_atanh[active],
                    y[active],
                    const,
                    taus[active],
                    active_mask=active_mask)

                adversarials = tanh_rescale(
                    starting_atanh[active] + modifiers[active], self.clip_min,
                    self.clip_max).detach()

                computed_adversarials[active] = adversarials

                # Update the modifiers
                # Note: this will update the modifiers of adversarials that might be
                # possibly dropped. This is not an issue, since adversarials are detached from
                # the gradient graph and saved before updating. In other words, the modifiers
                # will be updated, while the adversarials won't be (at least until the next iteration)
                total_loss = torch.sum(losses)
                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()

                # If early aborting is enabled, drop successful
                # samples with a small loss (the current adversarials
                # are saved regardless of whether they are dropped)
                if self.abort_early:
                    successful = self._successful(outputs, y[active]).detach()
                    small_loss = losses < SMALL_LOSS_COEFFICIENT * const

                    active[active] = ~(successful & small_loss)

                    if not active.any():
                        break

            # Give more weight to the output loss
            const *= self.const_factor

        return computed_adversarials
Ejemplo n.º 9
0
    def _run_attack(self, x, y, initial_const, taus, prev_adversarials):
        assert len(x) == len(taus)
        batch_size = len(x)
        best_adversarials = x.clone().detach()
        best_distances = torch.ones(
            (batch_size, ), device=x.device) * float("inf")

        if self.warm_start:
            starting_atanh = self._get_arctanh_x(prev_adversarials.clone())
        else:
            starting_atanh = self._get_arctanh_x(x.clone())

        modifiers = torch.nn.Parameter(torch.zeros_like(starting_atanh))

        # An array of booleans that stores which samples have not converged
        # yet
        active = torch.ones((batch_size, ),
                            dtype=boolean_type,
                            device=x.device)
        optimizer = optim.Adam([modifiers], lr=self.learning_rate)

        const = initial_const

        while torch.any(active) and const < self.max_const:
            for _ in range(self.max_iterations):
                optimizer.zero_grad()
                outputs, loss = self._outputs_and_loss(x[active],
                                                       modifiers[active],
                                                       starting_atanh[active],
                                                       y[active], const,
                                                       taus[active])

                adversarials = tanh_rescale(starting_atanh + modifiers,
                                            self.clip_min,
                                            self.clip_max).detach()

                successful = self._successful(outputs, y[active])

                if self.return_best:
                    distances = torch.max(
                        torch.abs(x[active] - adversarials[active]).flatten(1),
                        dim=1)[0]
                    better_distance = distances < best_distances[active]

                    replace_active(adversarials[active], best_adversarials,
                                   active, successful & better_distance)
                    replace_active(distances, best_distances, active,
                                   successful & better_distance)
                else:
                    best_adversarials[active] = adversarials[active]

                # If early aborting is enabled, drop successful
                # samples with a small loss (the current adversarials
                # are saved regardless of whether they are dropped)
                if self.abort_early:
                    small_loss = loss < 0.0001 * const

                    drop = successful & small_loss

                    # This workaround avoids modifying "active"
                    # in-place, which would mess with
                    # gradient computation in backwards()
                    active_clone = active.clone()
                    active_clone[active] = ~drop
                    active = active_clone

                if not active.any():
                    break

                # Update the modifiers
                total_loss = torch.sum(loss)
                total_loss.backward()
                optimizer.step()

            # Give more weight to the output loss
            const *= self.const_factor

        return best_adversarials