Beispiel #1
0
    def merge_xform(self, other, self_mask):
        """ Takes in an other instance of this same class with the same
            shape of parameters (NxSHAPE) and a self_mask bytetensor of length
            N and outputs the merge between self's parameters for the indices
            of 1s in the self_mask and other's parameters for the indices of 0's
        ARGS:
            other: instance of same class as self with params of shape NxSHAPE -
                   the thing we merge with this one
            self_mask : ByteTensor (length N) - which indices of parameters we
                        keep from self, and which we keep from other
        RETURNS:
            New instance of this class that's merged between the self and other
            (same shaped params)
        """

        # JUST DO ASSERTS IN THE SKELETON CLASS
        assert self.__class__ == other.__class__

        self_params = self.xform_params.data
        other_params = other.xform_params.data
        assert self_params.shape == other_params.shape
        assert self_params.shape[0] == self_mask.shape[0]
        assert other_params.shape[0] == self_mask.shape[0]

        new_xform = self.__class__(shape=self.img_shape)

        new_params = utils.fold_mask(self.xform_params.data,
                                     other.xform_params.data, self_mask)
        new_xform.xform_params = nn.Parameter(new_params)
        new_xform.use_gpu = self.use_gpu
        return new_xform
    def merge_perturbation(self, other, self_mask):
        super(DeltaAddition, self).merge_perturbation(other, self_mask)

        # initialize a new perturbation
        new_perturbation = DeltaAddition(self.threat_model,
                                         self.perturbation_params)

        # make the new parameters
        new_delta = utils.fold_mask(self.delta.data, other.delta.data,
                                    self_mask)

        # do the merge setup and return the object
        new_perturbation._merge_setup(self.num_examples, new_delta)
        return new_perturbation
    def tweak_lambdas(cls, var_scale_lo, var_scale_hi, var_scale,
                      successful_mask):
        """ Modifies the constant scaling that we keep to weight f_adv vs D(.)
            in our loss function.

                IF the attack was successful
                THEN hi -> lambda
                     lambda -> (lambda + lo) /2
                ELSE
                     lo -> lambda
                     lambda -> (lambda + hi) / 2


        ARGS:
            var_scale_lo : Variable (N) - variable that holds the running lower
                           bounds in our binary search
            var_scale_hi: Variable (N) - variable that holds the running upper
                          bounds in our binary search
            var_scale : Variable (N) - variable that holds the lambdas we
                        actually use
            successful_mask : Variable (ByteTensor N) - mask that holds the
                              indices of the successful attacks
        RETURNS:
            (var_scale_lo, var_scale_hi, var_scale) but modified according to
            the rule describe in the spec of this method
        """
        prev_his = var_scale_hi.data
        downweights = (var_scale_lo.data + var_scale.data) / 2.0
        upweights = (var_scale_hi.data + var_scale.data) / 2.0

        scale_hi = utils.fold_mask(var_scale.data, var_scale_hi.data,
                                   successful_mask.data)
        scale_lo = utils.fold_mask(var_scale_lo.data, var_scale.data,
                                   successful_mask.data)
        scale = utils.fold_mask(downweights, upweights,
                                successful_mask.data)
        return (Variable(scale_lo), Variable(scale_hi), Variable(scale))
Beispiel #4
0
    def merge_xform(self, other, self_mask):
        """ Takes in an other instance of this same class with the same
            shape of parameters (NxSHAPE) and a self_mask bytetensor of length
            N and outputs the merge between self's parameters for the indices
            of 1s in the self_mask and other's parameters for the indices of 0's
        """
        super(FullSpatial, self).merge_xform(other, self_mask)

        new_xform = FullSpatial(shape=self.img_shape, use_gpu=self.use_gpu)

        new_params = utils.fold_mask(self.xform_params.data,
                                     other.xform_params.data, self_mask)
        new_xform.xform_params = nn.Parameter(new_params)

        return new_xform
    def attack(self, examples, labels, targets=None, initial_lambda=1.0,
               num_bin_search_steps=10, num_optim_steps=1000,
               confidence=0.0, verbose=True):
        """ Performs Carlini Wagner attack on provided examples to make them
            not get classified as the labels.
        ARGS:
            examples : Tensor (NxCxHxW) - input images to be made adversarial
            labels : Tensor (N) - correct labels of the examples
            initial_lambda : float - which lambda to use initially
                             in the regularization of the carlini loss
            num_bin_search_steps : int - how many binary search steps we perform
                                   to optimize the lambda
            num_optim_steps : int - how many optimizer steps we perform during
                                    each binary search step (we may stop early)
            confidence : float - how great the difference in the logits must be
                                 for the carlini_loss to be zero. Overwrites the
                                 self.carlini_loss.kappa value
        RETURNS:
            AdversarialPerturbation object with correct parameters.
            Calling perturbation() gets Variable of output and
            calling perturbation().data gets tensor of output
            calling perturbation(distances=True) returns a dict like
                {}
        """

        ######################################################################
        #   First perform some setups                                        #
        ######################################################################

        if targets is not None:
            raise NotImplementedError("Targeted attacks aren't built yet")

        if self.use_gpu:
            examples = examples.cuda()
            labels = labels.cuda()

        self.classifier_net.eval() # ALWAYS EVAL FOR BUILDING ADV EXAMPLES

        var_examples = Variable(examples, requires_grad=False)
        var_labels = Variable(labels, requires_grad=False)


        loss_fxn = self._construct_loss_fxn(initial_lambda, confidence)
        loss_fxn.setup_attack_batch(var_examples)
        distance_fxn = loss_fxn.losses['distance_fxn']

        num_examples = examples.shape[0]

        best_results = {'best_dist': torch.ones(num_examples)\
                                                 .type(examples.type()) \
                                                   * MAXFLOAT,
                        'best_perturbation': self.threat_model(examples)}



        ######################################################################
        #   Now start the binary search                                      #
        ######################################################################
        var_scale_lo = Variable(torch.zeros(num_examples)\
                                .type(self._dtype).squeeze())


        var_scale = Variable(torch.ones(num_examples, 1).type(self._dtype) *
                             initial_lambda).squeeze()
        var_scale_hi = Variable(torch.ones(num_examples).type(self._dtype)
                                * 128).squeeze() # HARDCODED UPPER LIMIT


        for bin_search_step in range(num_bin_search_steps):
            perturbation = self.threat_model(examples)
            ##################################################################
            #   Optimize with a given scale constant                         #
            ##################################################################
            if verbose:
                print("Starting binary_search_step %02d..." % bin_search_step)

            prev_loss = MAXFLOAT
            optimizer = optim.Adam(perturbation.parameters(), lr=0.001)

            for optim_step in range(num_optim_steps):

                if verbose and optim_step > 0 and optim_step % 25 == 0:
                    print("Optim search: %s, Loss: %s" %
                          (optim_step, prev_loss))

                loss_sum = self._optimize_step(optimizer, perturbation,
                                               var_examples, var_labels,
                                               var_scale, loss_fxn)

                if loss_sum + 1e-10 > prev_loss * 0.99999 and optim_step >= 100:
                    if verbose:
                        print(("...stopping early on binary_search_step %02d "
                               " after %03d iterations" ) % (bin_search_step,
                                                             optim_step))
                    break
                prev_loss = loss_sum
            # End inner optimize loop

            ################################################################
            #   Update with results from optimization                      #
            ################################################################

            # We only keep this round's perturbations if two things occur:
            # 1) the perturbation fools the classifier
            # 2) the perturbation is closer to original than the best-so-far


            bin_search_perts = perturbation(var_examples)
            bin_search_out = self.classifier_net.forward(bin_search_perts)
            successful_attack_idxs = self._batch_compare(bin_search_out,
                                                         var_labels)


            batch_dists = distance_fxn.forward(bin_search_perts).data

            successful_dist_idxs = batch_dists < best_results['best_dist']
            successful_dist_idxs = successful_dist_idxs


            successful_mask = successful_attack_idxs * successful_dist_idxs

            # And then generate a new 'best distance' and 'best perturbation'

            best_results['best_dist'] = utils.fold_mask(batch_dists,
                                                      best_results['best_dist'],
                                                      successful_mask)

            best_results['best_perturbation'] =\
                 perturbation.merge_perturbation(
                                              best_results['best_perturbation'],
                                              successful_mask)

            # And then adjust the scale variables (lambdas)
            new_scales = self.tweak_lambdas(var_scale_lo, var_scale_hi,
                                            var_scale,
                                            Variable(successful_mask))

            var_scale_lo, var_scale_hi, var_scale = new_scales

        # End binary search loop
        if verbose:
            num_successful = len([_ for _ in best_results['best_dist']
                                  if _ < MAXFLOAT])
            print("\n Ending attack")
            print("Successful attacks for %03d/%03d examples in CONTINUOUS" %\
                  (num_successful, num_examples))

        loss_fxn.cleanup_attack_batch()
        perturbation.attach_originals(examples)
        perturbation.attach_attr('distances', best_results['best_dist'])

        return perturbation
    def attack(self,
               examples,
               labels,
               targets=None,
               initial_lambda=1.0,
               num_bin_search_steps=10,
               num_optim_steps=1000,
               confidence=0.0,
               warm_start=False,
               stop_early=True,
               verbose=True,
               log_iterates=False):
        """ Performs Carlini Wagner attack on provided examples to make them
            not get classified as the labels.
        ARGS:
            examples : Tensor (NxCxHxW) - input images to be made adversarial
            labels : Tensor (N) - correct labels of the examples
            initial_lambda : float - which lambda to use initially
                             in the regularization of the carlini loss
            num_bin_search_steps : int - how many binary search steps we perform
                                   to optimize the lambda
            num_optim_steps : int - how many optimizer steps we perform during
                                    each binary search step (we may stop early)
            confidence : float - how great the difference in the logits must be
                                 for the carlini_loss to be zero. Overwrites the
                                 self.carlini_loss.kappa value
            warm_start : boolean - if True, we start each binary search step
                                   using the perturbation from the previous
                                   binsearch step (but with the new loss)
            stop_early : boolean - if True, we stop after 100 optimizer iterations
                                   if the loss hasn't gone down too much.,
            log_iterates : boolean or int - if False, we don't keep track of any
                                            iterates. If True, we keep track of
                                            ALL iterates. If an int, say k,
                                            we keep track of every k^th iterate.
                                    These are stashed in a dict and attached
                                    to the perturbation object in an attribute
                                    'iterate_log'. Also stashes lambdas at each
                                    bin search step so we can recreat loss fxns
        RETURNS:
            AdversarialPerturbation object with correct parameters.
            Calling perturbation() gets Variable of output and
            calling perturbation().data gets tensor of output
            calling perturbation(distances=True) returns a dict like
                {}
        """

        ######################################################################
        #   First perform some setups                                        #
        ######################################################################

        if targets is not None:
            raise NotImplementedError("Targeted attacks aren't built yet")
        examples, labels = utils.cudafy(self.use_gpu, (examples, labels))

        self.classifier_net.eval()  # ALWAYS EVAL FOR BUILDING ADV EXAMPLES

        var_examples = Variable(examples, requires_grad=False)
        var_labels = Variable(labels, requires_grad=False)

        loss_fxn = self._construct_loss_fxn(initial_lambda, confidence)
        loss_fxn.setup_attack_batch(var_examples)
        distance_fxn = loss_fxn.losses['distance_fxn']

        num_examples = examples.shape[0]

        best_results = {'best_dist': torch.ones(num_examples)\
                                                 .type(examples.type()) \
                                                   * MAXFLOAT,
                        'best_perturbation': self.threat_model(examples)}
        iterate_log = {}

        ######################################################################
        #   Now start the binary search                                      #
        ######################################################################
        var_scale_lo = Variable(torch.zeros(num_examples)\
                                .type(self._dtype).squeeze())

        var_scale = Variable(
            torch.ones(num_examples, 1).type(self._dtype) *
            initial_lambda).squeeze()
        var_scale_hi = Variable(
            torch.ones(num_examples).type(self._dtype) *
            256).squeeze()  # HARDCODED UPPER LIMIT

        for bin_search_step in range(num_bin_search_steps):
            loss_fxn.setup_attack_batch(var_examples)
            if warm_start:
                perturbation = best_results['best_perturbation']\
                                            .clone_perturbation()
            else:
                perturbation = self.threat_model(examples)
            ##################################################################
            #   Optimize with a given scale constant                         #
            ##################################################################
            if verbose:
                print("Starting binary_search_step %02d..." % bin_search_step)
            prev_loss = MAXFLOAT
            optimizer = optim.Adam(perturbation.parameters(), lr=0.0001)

            for optim_step in range(num_optim_steps):
                perturbation.zero_grad()
                loss = loss_fxn.forward(perturbation(var_examples),
                                        var_labels,
                                        perturbation=perturbation)
                loss_sum = loss.sum()
                torch.autograd.backward(loss_sum)
                optimizer.step()

                if verbose and optim_step > 0 and optim_step % 100 == 0:
                    print("Optim search: %s, Loss: %s" % (optim_step, loss))
                    self.validation_loop(perturbation(var_examples),
                                         var_labels,
                                         iter_no=optim_step)

                if (loss_sum + 1e-10 > prev_loss * 0.99999
                        and optim_step >= 100 and stop_early):
                    if verbose:
                        print(("...stopping early on binary_search_step %02d "
                               " after %03d iterations") %
                              (bin_search_step, optim_step))
                    break

                if log_iterates is not False:
                    if (bin_search_step, 'lambdas') not in iterate_log:
                        lambda_clone = var_scale.data.cpu().clone()
                        iterate_log[(bin_search_step,
                                     'lambdas')] = lambda_clone

                    if optim_step > 0 and (optim_step % log_iterates) == 0:
                        iterate = perturbation(var_examples).data.cpu().clone()
                        iterate_log[(bin_search_step, optim_step)] = iterate

            # End inner optimize loop

            ################################################################
            #   Update with results from optimization                      #
            ################################################################

            # We only keep this round's perturbations if two things occur:
            # 1) the perturbation fools the classifier
            # 2) the perturbation is closer to original than the best-so-far

            bin_search_perts = perturbation(var_examples)
            bin_search_out = self.classifier_net.forward(
                self.normalizer(bin_search_perts))
            successful_attack_idxs = self._batch_compare(
                bin_search_out, var_labels)

            batch_dists = distance_fxn.forward(bin_search_perts).data

            successful_dist_idxs = batch_dists < best_results['best_dist']
            successful_dist_idxs = successful_dist_idxs

            successful_mask = successful_attack_idxs * successful_dist_idxs
            # And then generate a new 'best distance' and 'best perturbation'

            best_results['best_dist'] = utils.fold_mask(
                batch_dists, best_results['best_dist'], successful_mask)

            best_results['best_perturbation'] =\
                 perturbation.merge_perturbation(
                                              best_results['best_perturbation'],
                                              successful_mask)

            # And then adjust the scale variables (lambdas) in the loss
            new_scales = self.tweak_lambdas(var_scale_lo,
                                            var_scale_hi, var_scale,
                                            Variable(successful_mask))

            var_scale_lo, var_scale_hi, var_scale = new_scales
            loss_fxn.scalars['carlini_loss'] = var_scale

        # End binary search loop
        perturbation = best_results['best_perturbation']
        if verbose:
            if best_results['best_dist'].numel() == 1:
                num_successful = best_results['best_dist'].item() < MAXFLOAT
            else:
                num_successful = len(
                    [_ for _ in best_results['best_dist'] if _ < MAXFLOAT])
            print("\n Ending attack")
            print("Successful attacks for %03d/%03d examples in CONTINUOUS" %\
                  (num_successful, num_examples))

        loss_fxn.cleanup_attack_batch()
        perturbation.attach_originals(examples)
        perturbation.attach_attr('var_scale', var_scale)
        perturbation.attach_attr('distances', best_results['best_dist'])
        if log_iterates is not False:
            perturbation.attach_attr('iterate_log', iterate_log)
        return perturbation