コード例 #1
0
ファイル: lbfgs.py プロジェクト: xunge/advertorch
    def perturb(self, x, y=None):

        from scipy.optimize import fmin_l_bfgs_b

        def _loss_fn(adv_x_np, self, x, target, const):
            adv_x = torch.from_numpy(
                adv_x_np.reshape(x.shape)).float().to(
                x.device).requires_grad_()
            output = self.predict(adv_x)
            loss2 = torch.sum((x - adv_x) ** 2)
            loss_fn = F.cross_entropy(output, target, reduction='none')
            loss1 = torch.sum(const * loss_fn)
            loss = loss1 + loss2
            loss.backward()
            grad_ret = adv_x.grad.data.cpu().numpy().flatten().astype(float)
            loss = loss.data.cpu().numpy().flatten().astype(float)
            if not self.targeted:
                loss = -loss
            return loss, grad_ret

        x, y = self._verify_and_process_inputs(x, y)
        batch_size = len(x)
        coeff_lower_bound = x.new_zeros(batch_size)
        coeff_upper_bound = x.new_ones(batch_size) * COEFF_UPPER
        loss_coeffs = x.new_ones(batch_size) * self.initial_const
        final_l2dists = [L2DIST_UPPER] * batch_size
        final_labels = [INVALID_LABEL] * batch_size
        final_advs = x.clone()
        clip_min = self.clip_min * np.ones(x.shape[:]).astype(float)
        clip_max = self.clip_max * np.ones(x.shape[:]).astype(float)
        clip_bound = list(zip(clip_min.flatten(), clip_max.flatten()))

        for outer_step in range(self.binary_search_steps):
            init_guess = x.clone().cpu().numpy().flatten().astype(float)
            adv_x, f, _ = fmin_l_bfgs_b(_loss_fn,
                                        init_guess,
                                        args=(self, x.clone(), y, loss_coeffs),
                                        bounds=clip_bound,
                                        maxiter=self.max_iterations,
                                        iprint=0)

            adv_x = torch.from_numpy(
                adv_x.reshape(x.shape)).float().to(x.device)
            l2s = calc_l2distsq(x, adv_x)
            output = self.predict(adv_x)
            self._update_if_better(
                adv_x, y, output.data, l2s, batch_size,
                final_l2dists, final_labels, final_advs)
            self._update_loss_coeffs(
                y, batch_size,
                loss_coeffs, coeff_upper_bound, coeff_lower_bound,
                output.data)
        return final_advs
コード例 #2
0
    def _forward_and_update_delta(self, optimizer, x_atanh, delta, y_onehot,
                                  loss_coeffs):

        optimizer.zero_grad()
        adv = tanh_rescale(delta + x_atanh, self.clip_min, self.clip_max)
        transimgs_rescale = tanh_rescale(x_atanh, self.clip_min, self.clip_max)
        output = self.predict(adv)
        l2distsq = calc_l2distsq(adv, transimgs_rescale)
        loss = self._loss_fn(output, y_onehot, l2distsq, loss_coeffs)
        loss.backward()
        optimizer.step()

        return loss.item(), l2distsq.data, output.data, adv.data
コード例 #3
0
    def perturb(self, x, y=None):
        x, y = self._verify_and_process_inputs(x, y)
        batch_size = len(x)
        loss_coeffs = x.new_ones(batch_size) * self.initial_const
        final_l2dists = [L2DIST_UPPER] * batch_size
        final_labels = [INVALID_LABEL] * batch_size
        final_step = [INVALID_LABEL] * batch_size
        final_advs = x.clone() #fixing bug from advertorch

        # TODO: refactor the theta generation
        theta = torch.tensor([[[1., 0., 0.],
                               [0., 1., 0.]]]).to(x.device)
        theta = theta.repeat((x.shape[0], 1, 1))


        grid = F.affine_grid(theta, x.size())

        grid_ori = grid.clone()
        y_onehot = to_one_hot(y, self.num_classes).float()

        clip_min = np.ones(grid_ori.shape[:]) * -1
        clip_max = np.ones(grid_ori.shape[:]) * 1
        clip_bound = list(zip(clip_min.flatten(), clip_max.flatten()))
        grid_ret = grid.clone().data.cpu().numpy().flatten().astype(float)
        from scipy.optimize import fmin_l_bfgs_b
        for outer_step in range(self.search_steps):
            grid_ret, f, d = fmin_l_bfgs_b(
                self._loss_fn_spatial,
                grid_ret,
                args=(
                    x.clone().detach(),
                    y_onehot, loss_coeffs,
                    grid_ori.clone().detach()),
                maxiter=self.max_iterations,
                bounds=clip_bound,
                iprint=0,
                maxls=100,
            )
            grid = torch.from_numpy(
                grid_ret.reshape(grid_ori.shape)).float().to(x.device)
            adv_x = F.grid_sample(x.clone(), grid)
            l2s = calc_l2distsq(grid.data, grid_ori.data)
            output = self.predict(adv_x)
            self._update_if_better(
                adv_x.data, y, output.data, l2s, batch_size,
                final_l2dists, final_labels, final_advs,
                outer_step, final_step)

        return final_advs
コード例 #4
0
    def perturb(self, x, y=None):
        x, y = self._verify_and_process_inputs(x, y)

        # Initialization
        if y is None:
            y = self._get_predicted_label(x)
        x = replicate_input(x)
        # batch_size = len(x)

        final_advs = x
        x_atanh = self._get_arctanh_x(x)
        y_onehot = to_one_hot(y, self.num_classes).float()

        delta = nn.Parameter(torch.zeros_like(x))
        optimizer = optim.Adam([delta], lr=self.learning_rate)
        prevloss = PREV_LOSS_INIT

        for ii in range(self.max_iterations):
            # loss, l2distsq, output, adv_img = \
            #     self._forward_and_update_delta(
            #         optimizer, x_atanh, delta, y_onehot, self.c)

            optimizer.zero_grad()
            adv = tanh_rescale(delta + x_atanh, self.clip_min, self.clip_max)
            transimgs_rescale = tanh_rescale(x_atanh, self.clip_min, self.clip_max)
            output = self.predict(adv)
            l2distsq = calc_l2distsq(adv, transimgs_rescale)
            loss, l2dist, adv_loss = self._loss_fn(output, y_onehot, l2distsq, self.c)
            loss.backward()
            optimizer.step()

            if ii % 1000 == 1:
                print('step: {}, dis: {:.2f}, loss1: {:.2f}.'.format(ii, l2dist.item(), adv_loss.item()))

            # if self.abort_early:
            #     if ii % (self.max_iterations // NUM_CHECKS or 1) == 0:
            #         if loss > prevloss * ONE_MINUS_EPS:
            #             break
            #         prevloss = loss

            final_advs = adv.data
        return final_advs
    def _forward_and_update_delta(
            self, optimizer, x_atanh, delta, y_onehot, loss_coeffs):

        optimizer.zero_grad()
        adv = tanh_rescale(delta + x_atanh, self.clip_min, self.clip_max)
        transimgs_rescale = tanh_rescale(x_atanh, self.clip_min, self.clip_max)
        l2distsq = calc_l2distsq(adv, transimgs_rescale)

        for i in range(len(self.predict.classifiers)):
            output_i = self.predict.predict(adv, i)
            output_i = output_i

            if i == 0 :
                output = output_i * self.predict.weights[i]
            else :
                output = output + output_i * self.predict.weights[i]
  
        loss = self._loss_fn(output, y_onehot, l2distsq, loss_coeffs)
        loss.backward()
        optimizer.step()

        return loss.item(), l2distsq.data, output.data, adv.data
コード例 #6
0
    def _forward_and_update_delta(self, optimizer, x, delta, y_onehot,
                                  loss_coeffs):

        optimizer.zero_grad()
        adv = self._fast_iterative_shrinkage_thresholding(x, delta)
        output = 0
        for i in range(self.eot_samples):
            output += self.predict(adv)
        output /= self.eot_samples
        l2distsq = calc_l2distsq(adv, x)
        l1dist = calc_l1dist(adv, x)

        if self.decision_rule == 'EN':
            crit = l2distsq + (l1dist * self.beta)
        else:
            crit = l1dist

        loss = self._loss_fn(output, y_onehot, l2distsq, loss_coeffs)
        loss.backward()
        optimizer.step()
        self.global_step += 1

        return loss.item(), crit.data, output.data, adv.data
コード例 #7
0
ファイル: ead.py プロジェクト: xunge/advertorch
    def perturb(self, x, y=None):

        x, y = self._verify_and_process_inputs(x, y)

        # Initialization
        if y is None:
            y = self._get_predicted_label(x)

        x = replicate_input(x)
        batch_size = len(x)
        coeff_lower_bound = x.new_zeros(batch_size)
        coeff_upper_bound = x.new_ones(batch_size) * COEFF_UPPER
        loss_coeffs = torch.ones_like(y).float() * self.initial_const

        final_dist = [DIST_UPPER] * batch_size
        final_labels = [INVALID_LABEL] * batch_size

        final_advs = x.clone()
        y_onehot = to_one_hot(y, self.num_classes).float()

        final_dist = torch.FloatTensor(final_dist).to(x.device)
        final_labels = torch.LongTensor(final_labels).to(x.device)

        # Start binary search
        for outer_step in range(self.binary_search_steps):

            self.global_step = 0

            # slack vector from the paper
            yy_k = nn.Parameter(x.clone())
            xx_k = x.clone()

            cur_dist = [DIST_UPPER] * batch_size
            cur_labels = [INVALID_LABEL] * batch_size

            cur_dist = torch.FloatTensor(cur_dist).to(x.device)
            cur_labels = torch.LongTensor(cur_labels).to(x.device)

            prevloss = PREV_LOSS_INIT

            if (self.repeat and outer_step == (self.binary_search_steps - 1)):
                loss_coeffs = coeff_upper_bound

            lr = self.learning_rate

            for ii in range(self.max_iterations):

                # reset gradient
                if yy_k.grad is not None:
                    yy_k.grad.detach_()
                    yy_k.grad.zero_()

                # loss over yy_k with only L2 same as C&W
                # we don't update L1 loss with SGD because we use ISTA
                output = self.predict(yy_k)
                l2distsq = calc_l2distsq(yy_k, x)
                loss_opt = self._loss_fn(output,
                                         y_onehot,
                                         None,
                                         l2distsq,
                                         loss_coeffs,
                                         opt=True)
                loss_opt.backward()

                # gradient step
                yy_k.data.add_(-lr, yy_k.grad.data)
                self.global_step += 1

                # ploynomial decay of learning rate
                lr = self.init_learning_rate * \
                    (1 - self.global_step / self.max_iterations)**0.5

                yy_k, xx_k = self._fast_iterative_shrinkage_thresholding(
                    x, yy_k, xx_k)

                # loss ElasticNet or L1 over xx_k
                output = self.predict(xx_k)
                l2distsq = calc_l2distsq(xx_k, x)
                l1dist = calc_l1dist(xx_k, x)

                if self.decision_rule == 'EN':
                    dist = l2distsq + (l1dist * self.beta)
                elif self.decision_rule == 'L1':
                    dist = l1dist
                loss = self._loss_fn(output, y_onehot, l1dist, l2distsq,
                                     loss_coeffs)

                if self.abort_early:
                    if ii % (self.max_iterations // NUM_CHECKS or 1) == 0:
                        if loss > prevloss * ONE_MINUS_EPS:
                            break
                        prevloss = loss

                self._update_if_smaller_dist_succeed(xx_k.data, y, output,
                                                     dist, batch_size,
                                                     cur_dist, cur_labels,
                                                     final_dist, final_labels,
                                                     final_advs)

            self._update_loss_coeffs(y, cur_labels, batch_size, loss_coeffs,
                                     coeff_upper_bound, coeff_lower_bound)

        return final_advs