Beispiel #1
0
 def perturbation(self, samples, ys, device):
     # samples:1000个干净样本(用于攻击)
     # ys:1000个干净样本的真实标签
     # copy_samples:复制干净样本
     copy_samples = np.copy(samples)
     # 对干净样本做一个随机变换
     copy_samples = copy_samples + (
         self.alpha_ratio * self.epsilon *
         np.sign(np.random.randn(*copy_samples.shape)))
     # 变化后的样本像素控制在[0.0,1.0]之间
     copy_samples = np.clip(copy_samples, 0.0, 1.0).astype(np.float32)
     # eps:FGSM产生对抗样本的扰动大小
     eps = (1.0 - self.alpha_ratio) * self.epsilon
     # 将copy_samples进行封装,可以对copy_samples进行求导
     var_samples = tensor2variable(torch.from_numpy(copy_samples),
                                   device=device,
                                   requires_grad=True)
     # 对ys进行封装,不能对ys_target进行求导
     var_ys = tensor2variable(torch.LongTensor(ys), device=device)
     # 样本标签预测
     self.model.eval()
     preds = self.model(var_samples)
     loss_fun = torch.nn.CrossEntropyLoss()
     loss = loss_fun(preds, torch.max(var_ys, 1)[1])
     # 对copy_samples求导并求符号函数转化为numpy的形式
     loss.backward()
     gradient_sign = var_samples.grad.data.cpu().sign().numpy()
     # 计算对抗样本
     adv_samples = copy_samples + eps * gradient_sign
     # 将对抗样本的像素控制在[0.0,1.0]之间
     adv_samples = np.clip(adv_samples, 0.0, 1.0)
     return adv_samples
Beispiel #2
0
 def perturbation(self, samples, ys_target, device):
     # samples:1000个干净样本(用于攻击)
     # ys_target:samples目标类(最不可能的分类标签)
     # copy_samples:复制干净样本
     copy_samples = np.copy(samples)
     # 对copy_samples进行封装,可以对copy_samples进行求导
     var_samples = tensor2variable(torch.from_numpy(copy_samples),
                                   device=device,
                                   requires_grad=True)
     # 对ys_target进行封装,不能对ys_target进行求导
     var_ys_target = tensor2variable(torch.from_numpy(ys_target), device)
     # 对var_samples进行标签预测
     self.model.eval()
     preds = self.model(var_samples)
     # 求损失函数
     loss_fun = torch.nn.CrossEntropyLoss()
     loss = loss_fun(preds, var_ys_target)
     # 对copy_samples进行求导并求符号函数转为numpy的形式
     loss.backward()
     gradient_sign = var_samples.grad.data.cpu().sign().numpy()
     # 求对抗样本
     adv_samples = copy_samples - self.epsilon * gradient_sign
     # 将对抗样本的像素限制在[0.0,1.0]之间
     adv_samples = np.clip(adv_samples, 0.0, 1.0)
     return adv_samples
Beispiel #3
0
 def perturbation(self, samples, ys_target, device):
     # samples:1000个干净样本(用于攻击)
     # ys_target:1000个干净样本的目标类标签
     # copy_samples:复制干净样本
     # var_ys_target:将目标标签封装,不能求导
     copy_samples = np.copy(samples)
     var_ys_target = tensor2variable(torch.from_numpy(ys_target), device)
     # 循环添加扰动
     for index in range(self.num_steps):
         # 对干净样本进行封装,可以对copy_samples进行求导
         var_samples = tensor2variable(torch.from_numpy(copy_samples),
                                       device=device,
                                       requires_grad=True)
         # 预测样本标签
         self.model.eval()
         preds = self.model(var_samples)
         # 计算损失函数
         loss_fun = torch.nn.CrossEntropyLoss()
         loss = loss_fun(preds, var_ys_target)
         # 对copy_samples求导并求符号函数转化为numpy形式
         loss.backward()
         gradient_sign = var_samples.grad.data.cpu().sign().numpy()
         # 根据步长求对抗样本
         copy_samples = copy_samples - self.epsilon_iter * gradient_sign
         # 将对抗样本的大小控制在[samples - self.epsilon,samples + self.epsilon]的范围内,epsilon为扰动大小
         copy_samples = np.clip(copy_samples, samples - self.epsilon,
                                samples + self.epsilon)
         # 将对抗样本的像素限制在[0.0,1.0]之间
         copy_samples = np.clip(copy_samples, 0.0, 1.0)
     return copy_samples
Beispiel #4
0
 def perturbation(self, samples, ys_target, device):
     # samples:1000个干净样本(用于攻击)
     # ys_target:1000个干净样本的目标分类标签(最不可能分类的标签)
     # copy_samples:复制干净样本
     copy_samples = np.copy(samples)
     # 对干净样本做一个随机变换
     copy_samples = np.clip(
         copy_samples + self.alpha_ratio * self.epsilon *
         np.sign(np.random.randn(*copy_samples.shape)), 0.0,
         1.0).astype(np.float32)
     # 将copy_samples进行封装,可以对copy_samples进行求导
     var_samples = tensor2variable(torch.from_numpy(copy_samples),
                                   device=device,
                                   requires_grad=True)
     # 对ys_target进行封装,不能对ys_target进行求导
     var_ys_target = tensor2variable(torch.from_numpy(ys_target), device)
     # eps:LLC产生对抗样本的扰动大小
     eps = (1 - self.alpha_ratio) * self.epsilon
     # 样本标签预测
     self.model.eval()
     preds = self.model(var_samples)
     # 损失函数计算
     loss_fun = torch.nn.CrossEntropyLoss()
     loss = loss_fun(preds, var_ys_target)
     # 对copy_samples求导并求符号函数转化为numpy的形式
     loss.backward()
     gradient_sign = var_samples.grad.data.cpu().sign().numpy()
     # 计算对抗样本
     adv_samples = copy_samples - eps * gradient_sign
     adv_samples = np.clip(adv_samples, 0, 1)
     return adv_samples
Beispiel #5
0
 def perturbation(self, samples, ys, device):
     # samples:干净样本(用于攻击)
     # ys:samples的真实标签
     # copy_samples:复制干净样本
     copy_samples = np.copy(samples)
     self.model.to(device)
     # 循环添加扰动
     for index in range(self.num_steps):
         # 对copy_samples进行封装,可以对copy_samples进行求导
         var_samples = tensor2variable(torch.from_numpy(copy_samples),
                                       device=device,
                                       requires_grad=True)
         # 对copy_samples的标签进行封装,不可以对其进行求导
         var_ys = tensor2variable(torch.LongTensor(ys), device=device)
         # preds:对var_samples的标签预测
         self.model.eval()
         preds = self.model(var_samples)
         # 计算损失函数
         loss_fun = torch.nn.CrossEntropyLoss()
         loss = loss_fun(preds, torch.max(var_ys, 1)[1])
         # 对var_samples进行链式法则求导,并求sign值转化为numpy数组
         loss.backward()
         gradient_sign = var_samples.grad.data.cpu().sign().numpy()
         # 计算对抗样本,epsilon_iter为迭代步长
         copy_samples = copy_samples + self.epsilon_iter * gradient_sign
         # 将对抗样本的大小控制在[samples - self.epsilon,samples + self.epsilon]的范围内,epsilon为扰动大小
         copy_samples = np.clip(copy_samples, samples - self.epsilon,
                                samples + self.epsilon)
         # 将对抗样本的像素限制在[0.0,1.0]之间
         copy_samples = np.clip(copy_samples, 0.0, 1.0)
     return copy_samples
Beispiel #6
0
 def perturbation(self, samples, ys, device):
     # samples:干净样本
     # ys:真实标签,对应标签值[0,0,0,1,0,0...]
     # copy_samples:复制samples
     copy_samples = np.copy(samples)
     self.model.to(device)
     # randomly chosen starting points inside the L_\inf ball around the
     # 从均匀分布[-epsilon,epsilon)中采样与copy_samples叠加
     copy_samples = copy_samples + np.random.uniform(-self.epsilon, self.epsilon, copy_samples.shape).astype('float32')
     # 循环num_steps次
     for index in range(self.num_steps):
         # 对输入copy_samples进行封装
         var_samples = tensor2variable(torch.from_numpy(copy_samples), device=device, requires_grad=True)
         # 将ys标签值转化为longerTensor类型然后进行封装
         var_ys = tensor2variable(torch.LongTensor(ys), device=device)
         self.model.eval()
         # 经过网络返回预测的结果标签
         preds = self.model(var_samples)
         # 交叉熵损失函数
         loss_fun = torch.nn.CrossEntropyLoss()
         # 计算交叉熵损失,preds为预测值,后面一项为输出结果(每一行最大值的索引,输出的结果)
         loss = loss_fun(preds, torch.max(var_ys, 1)[1])
         # 对var_samples进行链式法则求导,并求sign值转化为numpy数组
         loss.backward()
         gradient_sign = var_samples.grad.data.cpu().sign().numpy()
         # 对抗样本=原来的样本+epsilon_iter*梯度
         copy_samples = copy_samples + self.epsilon_iter * gradient_sign
         # 将对抗样本的范围设置在[-epsilon,epsilon]中
         copy_samples = np.clip(copy_samples, samples - self.epsilon, samples + self.epsilon)
         # 将对抗样本的范围设置在[0.0,1.0]中
         copy_samples = np.clip(copy_samples, 0.0, 1.0)
     return copy_samples
Beispiel #7
0
 def perturbation_single(self, sample, ys_target, device):
     copy_sample = np.copy(sample)
     var_sample = tensor2variable(torch.from_numpy(copy_sample),
                                  device=device,
                                  requires_grad=True)
     var_target = tensor2variable(torch.LongTensor(ys_target),
                                  device=device)
     if self.theta > 0:
         increasing = True
     else:
         increasing = False
     num_features = int(np.prod(copy_sample.shape[1:]))
     shape = var_sample.size()
     max_iters = int(np.ceil(num_features * self.gamma / 2.0))
     if increasing:
         search_domain = torch.lt(var_sample, 0.99).to(device)
     else:
         search_domain = torch.gt(var_sample, 0.01).to(device)
     search_domain = search_domain.view(num_features)
     self.model.eval().to(device)
     output = self.model(var_sample)
     current = torch.max(output.data, 1)[1].cpu().numpy()
     iter = 0
     while (iter < max_iters) and (current[0] != ys_target[0]) and (
             search_domain.sum() != 0):
         jacobian = self.compute_jacobian(input=var_sample, device=device)
         p1, p2 = self.saliency_map(jacobian, var_target, increasing,
                                    search_domain, num_features, device)
         var_sample_flatten = var_sample.view(-1, num_features)
         var_sample_flatten[0, p1] += self.theta
         var_sample_flatten[0, p2] += self.theta
         new_sample = torch.clamp(var_sample_flatten, min=0.0, max=1.0)
         new_sample = new_sample.view(shape)
         search_domain[p1] = 0
         search_domain[p2] = 0
         var_sample = tensor2variable(torch.tensor(new_sample),
                                      device=device,
                                      requires_grad=True)
         output = self.model(var_sample)
         current = torch.max(output.data, 1)[1].cpu().numpy()
         iter += 1
     adv_samples = var_sample.data.cpu().numpy()
     return adv_samples
Beispiel #8
0
 def perturbation(self, samples, ys, device):
     # smples:输入
     # ys:真实one-hot标签,对应标签值[0,0,0,1,0,0...]
     # epsilon:扰动大小
     copy_samples = np.copy(samples)
     # 对输入samples进行封装,设置为可求导
     var_samples = tensor2variable(torch.from_numpy(copy_samples), device=device, requires_grad=True)
     # 将ys标签值转化为longerTensor类型然后进行封装
     var_ys = tensor2variable(torch.LongTensor(ys), device=device)
     self.model.eval()
     # 经过网络返回预测的结果标签
     preds = self.model(var_samples)
     # 交叉熵损失函数
     loss_fun = torch.nn.CrossEntropyLoss()
     # 计算交叉熵损失,preds为预测值,后面一项为输出结果(每一行最大值的索引,输出的结果)
     loss = loss_fun(preds, torch.max(var_ys, 1)[1])
     # 对var_samples进行链式法则求导,并求sign值转化为numpy数组
     loss.backward()
     gradient_sign = var_samples.grad.data.cpu().sign().numpy()
     # 对抗样本=原来的样本+epsilon*梯度
     adv_samples = copy_samples + self.epsilon * gradient_sign
     # 将对抗样本中的像素值剪切到0.0-1.0之间
     adv_samples = np.clip(adv_samples, 0.0, 1.0)
     return adv_samples
Beispiel #9
0
 def perturbation_single(self, sample, device):
     # sample:一个干净样本
     assert sample.shape[0] == 1, 'only perturbing one sample'
     # copy_sample:复制sample
     copy_sample = np.copy(sample)
     # var_sample:对copy_sample进行封装,可以对其求导,并将其转化为float类型
     var_sample = tensor2variable(torch.from_numpy(copy_sample), device=device, requires_grad=True).float()
     self.model.eval()
     # prediction:干净样本经过模型预测的softmax标签
     prediction = self.model(var_sample)
     # original:干净样本经过模型预测的真实标签
     original = torch.max(prediction, 1)[1]
     # current:当前的标签(0~9)
     current = original
     # 将预测结果由tensor形式转为numpy形式并按照降序进行排列(里面的值为索引)
     I = np.argsort(prediction.data.cpu().numpy() * -1)
     # 构造和样本相同大小的全为0的数组
     perturbation_r_tot = np.zeros(copy_sample.shape, dtype=np.float32)
     # iteration:迭代次数
     iteration = 0
     # 如果当前的标签值等于原始标签值并且迭代次数小于最大迭代次数时进行循环(成功产生对抗样本或达到最大迭代次数时停止)
     while (original == current) and (iteration < self.max_iterations):
         # 清空梯度
         zero_gradients(var_sample)
         self.model.eval()
         # f_kx:将样本输入到模型中经过softmax预测的标签
         f_kx = self.model(var_sample)
         # current:var_sample对应的最大预测结果的索引
         current = torch.max(f_kx, 1)[1]
         # I[0, 0]:为原始样本预测的最大值的索引
         # f_kx[0, I[0, 0]]:为当前预测结果对应原始样本真实标签索引的softmax的值,保留中间参数
         # grad_original:计算var_sample预测为真实标签的梯度
         f_kx[0, I[0, 0]].backward(retain_graph=True)
         grad_original = np.copy(var_sample.grad.data.cpu().numpy())
         # closest_dist:与真实类的最小分类边界距离
         closest_dist = 1e10
         for k in range(1, 10):
             # 清空梯度
             zero_gradients(var_sample)
             # grad_current:计算var_sample经过网络预测为其他标签的梯度,保留中间参数
             f_kx[0, I[0, k]].backward(retain_graph=True)
             grad_current = var_sample.grad.data.cpu().numpy().copy()
             # w_k:当前计算的标签梯度减去真实标签梯度
             w_k = grad_current - grad_original
             # f_k:当前的k对应softmax预测值减去真实标签对应的softmax的值并转化为numpy的形式
             f_k = (f_kx[0, I[0, k]] - f_kx[0, I[0, 0]]).detach().data.cpu().numpy()
             # dist_k:计算其他类到真实分类边界的距离
             dist_k = np.abs(f_k) / (np.linalg.norm(w_k.flatten()) + 1e-15)
             # closest_dist:更新与真实类的最小分类边界距离
             # closest_w:更新当前计算的标签梯度减去真实标签梯度
             if dist_k < closest_dist:
                 closest_dist = dist_k
                 closest_w = w_k
         # 计算最终扰动(距离乘距离方向)
         r_i = (closest_dist + 1e-4) * closest_w / np.linalg.norm(closest_w)
         # 为每个像素添加扰动,都为r_i
         perturbation_r_tot = perturbation_r_tot + r_i
         # 放大perturbation_r_tot扰动并叠加到干净样本上,每个像素的范围限制在[0.0,1.0]之间
         tmp_sample = np.clip((1 + self.overshoot) * perturbation_r_tot + sample, 0.0, 1.0)
         # 将tmp_sample进行封装
         var_sample = tensor2variable(torch.from_numpy(tmp_sample), device=device, requires_grad=True)
         # 循环次数加1
         iteration += 1
     # 循环结束,像对抗样本添加最终的扰动获得对抗样本
     adv = np.clip(sample + (1 + self.overshoot) * perturbation_r_tot, 0.0, 1.0)
     # 返回对抗样本,添加的扰动和迭代的次数
     return adv, perturbation_r_tot, iteration
Beispiel #10
0
    def perturbation(self, samples, ys_targets, batch_size, device):
        # samples:输入干净样本
        # ys_targets:目标标签
        # batch_size:分组大小=输入样本的数量
        assert len(
            samples
        ) == batch_size, "the length of sample is not equal to the batch_size"
        # transform the samples [lower, upper] to [-1, 1] and then to the arctanh space
        # 将像素点x属于[lower_bound,upper_bound]变为x属于[-1,1]通过[x-(lower_bound+upper_bound)/2]/[(upper_bound-lower_bound)/2]
        mid_point = (self.upper_bound + self.lower_bound) * 0.5
        half_range = (self.upper_bound - self.lower_bound) * 0.5
        # 然后将像素点转为arctanh的形式,arctanh(x)=0.5*ln(1+x/1-x),求w的值,w=arctanh[(x-0.5)/0.5]
        # 将w用x表示
        arctanh_samples = np.arctanh(
            (samples - mid_point) / half_range * 0.9999)
        # 将输入样本进行封装,可以对其进行求导
        var_samples = tensor2variable(torch.from_numpy(arctanh_samples),
                                      device=device,
                                      requires_grad=True)
        # set the lower and upper bound for searching 'c' const in the CW2 attack
        # 为每个输入样本生成一个对应的系数c的初始值
        const_origin = np.ones(shape=batch_size, dtype=float) * self.init_const
        # 设置系数c的最大值为10的10次方
        c_upper_bound = [1e10] * batch_size
        # 设置系数c的最小值为0
        c_lower_bound = np.zeros(batch_size)
        # convert targets to one hot encoder
        # 生成10*10,对角线为1其余元素为0的矩阵
        temp_one_hot_matrix = np.eye(10)
        # 目标标签one-hot存放位置
        targets_in_one_hot = []
        # 对每一个目标标签进行one-hot转换
        for i in range(batch_size):
            # 获取当前目标标签对应矩阵中的one-hot形式
            current_target = temp_one_hot_matrix[ys_targets[i]]
            # 将one-hot标签放入数组中
            targets_in_one_hot.append(current_target)
        # 将one-hot标签转化为tensor形式并进行封装
        targets_in_one_hot = tensor2variable(torch.FloatTensor(
            np.array(targets_in_one_hot)),
                                             device=device)
        # best_l2:最佳l2范数,初始化为一个很大的值
        best_l2 = [1e10] * batch_size
        # best_perturbation:添加的最佳扰动后的图像,与samples的大小相同,初始化为0
        best_perturbation = np.zeros(var_samples.size())
        # current_prediction_class:当前预测标签,初始化为-1
        current_prediction_class = [-1] * batch_size

        # 返回True或者False(是否成功找到对抗样本)
        def attack_achieved(pre_softmax, target_class):
            # pre_softmax:经softmax的预测值
            # target_class:目标标签(0~9)
            pre_softmax[target_class] -= self.kappa
            # 如果预测的标签与目标标签相同则返回True,否则返回Fasle
            return np.argmax(pre_softmax) == target_class

        self.model.eval()
        # Outer loop for linearly searching for c
        # 进行不同次的c值搜索,寻找最小l2且成功产生的对抗样本
        for search_for_c in range(self.binary_search_steps):
            # modifier:初始化与samples大小相同的,每个像素都为0.0
            modifier = torch.zeros(var_samples.size()).float()
            # 将modifier封装,默认可以求导(最后求得是使目标函数最小的扰动的值)
            modifier = tensor2variable(modifier,
                                       device=device,
                                       requires_grad=True)
            # 设置Adam优化器,参数为modifier,学习率为lr
            optimizer = torch.optim.Adam([modifier], lr=self.learning_rate)
            # 将系数c转化为tensor并封装
            var_const = tensor2variable(torch.FloatTensor(const_origin),
                                        device=device)
            print("\tbinary search step {}:".format(search_for_c))
            # 迭代,寻找最小l2且成功产生的对抗样本
            for iteration_times in range(self.max_iterations):
                # inverse the transform tanh -> [0, 1]
                # 将arctanh(x)=0.5*ln[(1+x)/(1-x)]带入tanh(x)=[e(x)-e(-x)]/[e(x)+e(-x)]中得到tanh(x)的取值范围为[0,1]
                # 扰动图像大小(论文中的公式):x+σ=0.5tanh(w)+0.5使得x+σ的范围为[0,1]
                # 将添加扰动后图像的大小控制在[0,1]之间
                perturbed_images = torch.tanh(
                    var_samples + modifier) * half_range + mid_point
                # 预测添加扰动后的图像
                prediction = self.model(perturbed_images)
                # 计算添加扰动的各个像素的平方和(都转化为[0,1]范围再求添加扰动的平方)
                l2dist = torch.sum(
                    (perturbed_images -
                     (torch.tanh(var_samples) * half_range + mid_point))**2,
                    [1, 2, 3])
                # 第一项为非目标标签对应softmax输出的最大值,第二项为对应目标标签预测的softmax值,第三项为k值控制置信度大小
                # constraint_loss为论文中的f(x')
                constraint_loss = torch.max(
                    (prediction - 1e10 * targets_in_one_hot).max(1)[0] -
                    (prediction * targets_in_one_hot).sum(1),
                    torch.ones(batch_size, device=device) * self.kappa * -1)
                # c*f(x')
                loss_f = var_const * constraint_loss
                # loss:目标函数=l2损失值+c*f(x')(求所有图像的总和)
                loss = l2dist.sum() + loss_f.sum(
                )  # minimize |r| + c * loss_f(x+r,l)
                # 反向传播,每一次迭代会更新modifier的值
                optimizer.zero_grad()
                loss.backward(retain_graph=True)
                optimizer.step()
                # update the best l2 distance, current predication class as well as the corresponding adversarial example
                # 循环每一个图像
                for i, (dist, score, img) in enumerate(
                        zip(l2dist.data.cpu().numpy(),
                            prediction.data.cpu().numpy(),
                            perturbed_images.data.cpu().numpy())):
                    # 如果l2范数小于最佳l2范数且成功找到对抗样本
                    if dist < best_l2[i] and attack_achieved(
                            score, ys_targets[i]):
                        # 更新l2
                        best_l2[i] = dist
                        # 更新当前预测标签(0~9)
                        current_prediction_class[i] = np.argmax(score)
                        # 更新添加扰动后的图像
                        best_perturbation[i] = img
            # update the best constant c for each sample in the batch
            # 循环每个图像,更新c当前值
            for i in range(batch_size):
                # 如果当前预测标签等于目标标签且不等于-1(即c值可以成功产生对抗样本,则减小c的值)
                if current_prediction_class[i] == ys_targets[
                        i] and current_prediction_class[i] != -1:
                    # c的上界为原来的c的上界和初始值的最小值
                    c_upper_bound[i] = min(c_upper_bound[i], const_origin[i])
                    # 如果小于10的10次方,则更新c的值为中间值
                    if c_upper_bound[i] < 1e10:
                        const_origin[i] = (c_lower_bound[i] +
                                           c_upper_bound[i]) / 2.0
                # 否则增大c的值
                else:
                    c_lower_bound[i] = max(c_lower_bound[i], const_origin[i])
                    # 如果上界小于1e10则将c的值调整到上下界的中间
                    if c_upper_bound[i] < 1e10:
                        const_origin = (c_lower_bound[i] +
                                        c_upper_bound[i]) / 2.0
                    # 如果上界还是1e10则将c的值放大10倍
                    else:
                        const_origin[i] *= 10
        # 返回最终的添加了扰动的图像(数组形式)
        return np.array(best_perturbation)