def generate(self): attacker = JSMAAttack(model=self.raw_model, theta=self.theta, gamma=self.gamma) targets = np.argmax(self.targets_samples, axis=1) adv_samples = attacker.perturbation(xs=self.nature_samples, ys_target=targets, device=self.device) adv_labels = predict(model=self.raw_model, samples=adv_samples, device=self.device) adv_labels = torch.max(adv_labels, 1)[1] adv_labels = adv_labels.cpu().numpy() np.save( '{}{}_AdvExamples.npy'.format(self.adv_examples_dir, self.attack_name), adv_samples) np.save( '{}{}_AdvLabels.npy'.format(self.adv_examples_dir, self.attack_name), adv_labels) np.save( '{}{}_TrueLabels.npy'.format(self.adv_examples_dir, self.attack_name), self.labels_samples) mis_target = 0 for i in range(len(adv_samples)): if targets[i] == adv_labels[i]: mis_target += 1 print( '\nFor **{}**(targeted attack) on **{}**, {}/{}={:.1f}% samples are misclassified as the specified targeted label\n' .format(self.attack_name, self.dataset, mis_target, len(adv_samples), mis_target / len(adv_samples) * 100.0))
def defense_predication(self, DefenseModelDirs, defense_name, **kwargs): # DefenseModelDirs:防御模型所在位置 # defense_name:防御名称(大写) re_train_defenses = { 'NAT', 'RLT', 'RLT1', 'RLT2', 'RLT3', 'EAT', 'UAPAT', 'NEAT', 'NRC', 'RAT', 'RAT1', 'RAT2', 'RAT3', 'RAT4', 'RAT5', 'RAT6', 'RAT7', 'RAT8', 'RAT9', 'RAT10', 'RAT11', 'MART', 'NEW_MART', 'NEW_MART1', 'NEW_MMA' } other_defenses = {'NRC'} defense_name = defense_name.upper().strip() assert defense_name in re_train_defenses or input_transformation_defenses or other_defenses # 如果是重新训练网络防御 if defense_name in re_train_defenses: print( '\n##{}## defense is a kind of complete defenses that retrain the model' .format(defense_name)) # 加载防御模型 defended_model_location = '{}/{}/{}_{}_enhanced.pt'.format( DefenseModelDirs, defense_name, self.dataset, defense_name) defended_model = MNIST_CNN().to( self.device) if self.dataset == 'MNIST' else ResNet18().to( self.device) defended_model.load(path=defended_model_location, device=self.device) defended_model.eval() # 进行标签预测 predication = predict(model=defended_model, samples=self.adv_samples, device=self.device) # 返回标签行向量 labels = torch.argmax(predication, 1).cpu().numpy() return labels else: if defense_name == 'NRC': print( '\n##{}## defense is a kind of region-based classification defenses ... ' .format(defense_name)) from Defenses.DefenseMethods.NRC import NRCDefense num_points = 1000 assert 'nrc_radius' in kwargs assert 'nrc_mean' in kwargs assert 'nrc_std' in kwargs radius = kwargs['nrc_radius'] mean = kwargs['nrc_mean'] std = kwargs['nrc_std'] nrc = NRCDefense(model=self.raw_model, defense_name='NRC', dataset=self.dataset, device=self.device, num_points=num_points) labels = nrc.region_based_classification( samples=self.adv_samples, radius=radius, mean=mean, std=std) return labels else: raise ValueError('{} is not supported!!!'.format(defense_name))
def generate(self): attacker = PGDAttack(model=self.raw_model, epsilon=self.epsilon, eps_iter=self.epsilon_iter, num_steps=self.num_steps) adv_samples = attacker.batch_perturbation(xs=self.nature_samples, ys=self.labels_samples, batch_size=self.attack_batch_size, device=self.device) # prediction for the adversarial examples adv_labels = predict(model=self.raw_model, samples=adv_samples, device=self.device) adv_labels = torch.max(adv_labels, 1)[1] adv_labels = adv_labels.cpu().numpy() np.save('{}{}_AdvExamples.npy'.format(self.adv_examples_dir, self.attack_name), adv_samples) np.save('{}{}_AdvLabels.npy'.format(self.adv_examples_dir, self.attack_name), adv_labels) np.save('{}{}_TrueLabels.npy'.format(self.adv_examples_dir, self.attack_name), self.labels_samples) mis = 0 for i in range(len(adv_samples)): if self.labels_samples[i].argmax(axis=0) != adv_labels[i]: mis = mis + 1 print('\nFor **{}** on **{}**: misclassification ratio is {}/{}={:.1f}%\n'.format(self.attack_name, self.dataset, mis, len(adv_samples), mis / len(adv_labels) * 100))
def generate(self): # adv_labels:对抗样本标签 # adv_samples:对抗样本 # labels_samples:真实标签(存放方式为0,1) # nature_samples:干净样本(用于攻击) # 将相关参数传入BIM攻击中 attacker = BIMAttack(model=self.raw_model, epsilon=self.epsilon, eps_iter=self.epsilon_iter, num_steps=self.num_steps) # 分组产生对抗样本 adv_samples = attacker.batch_perturbation( xs=self.nature_samples, ys=self.labels_samples, batch_size=self.attack_batch_size, device=self.device) # 对对抗样本进行标签预测 adv_labels = predict(model=self.raw_model, samples=adv_samples, device=self.device) # 返回最大值的索引,并转为numpy数据 adv_labels = torch.max(adv_labels, 1)[1] adv_labels = adv_labels.cpu().numpy() # 将对抗性样本存入AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_AdvExamples.npy或者MNIST # 将对抗性样本标签存入AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_AdvLabels.npy或者MNIST # 将样本真实标签存入AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_TrueLabels.npy或者MNIST np.save( '{}{}_AdvExamples.npy'.format(self.adv_examples_dir, self.attack_name), adv_samples) np.save( '{}{}_AdvLabels.npy'.format(self.adv_examples_dir, self.attack_name), adv_labels) np.save( '{}{}_TrueLabels.npy'.format(self.adv_examples_dir, self.attack_name), self.labels_samples) # 如果对抗样本的预测结果与目标类不同,则mis+1,计算对抗样本的制作成功率 mis = 0 for i in range(len(adv_samples)): if self.labels_samples[i].argmax(axis=0) != adv_labels[i]: mis = mis + 1 print( '\nFor **{}** on **{}**: misclassification ratio is {}/{}={:.1f}%\n' .format(self.attack_name, self.dataset, mis, len(adv_samples), mis / len(adv_labels) * 100))
def generate(self): # 将参数传入RLLC攻击中 attacker = RLLCAttack(model=self.raw_model, epsilon=self.epsilon, alpha_ratio=self.alpha_ratio) # nature_samples:干净数据集 # targets_samples:目标分类的标签,ILLC时为最不可能分类的标签 # llc_labels:目标分类索引 llc_labels = np.argmax(self.targets_samples, 1) # generating # 产生对抗样本 adv_samples = attacker.batch_perturbation( xs=self.nature_samples, ys_target=llc_labels, batch_size=self.attack_batch_size, device=self.device) # 预测对抗样本标签,转为numpy格式 adv_labels = predict(model=self.raw_model, samples=adv_samples, device=self.device) adv_labels = torch.max(adv_labels, 1)[1] adv_labels = adv_labels.cpu().numpy() # 将对抗性样本存入AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_AdvExamples.npy或者MNIST # 将对抗性样本标签存入AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_AdvLabels.npy或者MNIST # 将样本真实标签存入AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_TrueLabels.npy或者MNIST np.save( '{}{}_AdvExamples.npy'.format(self.adv_examples_dir, self.attack_name), adv_samples) np.save( '{}{}_AdvLabels.npy'.format(self.adv_examples_dir, self.attack_name), adv_labels) np.save( '{}{}_TrueLabels.npy'.format(self.adv_examples_dir, self.attack_name), self.labels_samples) # 如果对抗样本的预测结果与目标类不同,则mis+1,计算对抗样本的制作成功率 mis_target = 0 for i in range(len(adv_samples)): if llc_labels[i] == adv_labels[i]: mis_target += 1 print( '\nFor the **{}** (targeted attack) on **{}**, the misclassified rate = {}/{} = {:.1f}%\n' .format(self.attack_name, self.dataset, mis_target, len(adv_samples), mis_target / len(adv_samples) * 100))
def generate(self): # nature_samples:干净数据集 # labels_samples:干净标签 # raw_model:加载的CNN或者ResNet模型 # 将模型raw_model和扰动大小epsilon参数传入FGSMAttack类中 attacker = FGSMAttack(model=self.raw_model, epsilon=self.epsilon) # 产生对抗性样本 adv_samples = attacker.batch_perturbation( xs=self.nature_samples, ys=self.labels_samples, batch_size=self.attack_batch_size, device=self.device) # 对对抗性样本进行封装,返回预测结果标签 adv_labels = predict(model=self.raw_model, samples=adv_samples, device=self.device) # 得到对应的标签值 adv_labels = torch.max(adv_labels, 1)[1] # 将标签值转化为数组形式 adv_labels = adv_labels.cpu().numpy() # 将对抗样本保存到AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_AdvExamples.npy或者MNIST np.save( '{}{}_AdvExamples.npy'.format(self.adv_examples_dir, self.attack_name), adv_samples) # 将对抗样本标签到AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_AdvLabels.npy或者MNIST np.save( '{}{}_AdvLabels.npy'.format(self.adv_examples_dir, self.attack_name), adv_labels) # 将样本真实标签保存到AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_TrueLabels.npy或者MNIST np.save( '{}{}_TrueLabels.npy'.format(self.adv_examples_dir, self.attack_name), self.labels_samples) # 如果对抗样本的预测结果与真实标签不同,则mis+1,计算对抗样本的制作成功率 mis = 0 for i in range(len(adv_samples)): if self.labels_samples[i].argmax(axis=0) != adv_labels[i]: mis = mis + 1 print( '\nFor **{}** on **{}**: misclassification ratio is {}/{}={:.1f}%\n' .format(self.attack_name, self.dataset, mis, len(adv_samples), mis / len(adv_labels) * 100))
def generate(self): # nature_samples:干净数据集 # labels_samples:干净数据集的真实标签 # 将参数传入RFGSM攻击中 attacker = RFGSMAttack(model=self.raw_model, epsilon=self.epsilon, alpha_ratio=self.alpha_ratio) # 产生对抗样本 adv_samples = attacker.batch_perturbation( xs=self.nature_samples, ys=self.labels_samples, batch_size=self.attack_batch_size, device=self.device) # 预测对抗样本标签,转为numpy格式 adv_labels = predict(model=self.raw_model, samples=adv_samples, device=self.device) adv_labels = torch.max(adv_labels, 1)[1] adv_labels = adv_labels.cpu().numpy() # 将对抗性样本存入AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_AdvExamples.npy或者MNIST # 将对抗性样本标签存入AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_AdvLabels.npy或者MNIST # 将样本真实标签存入AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_TrueLabels.npy或者MNIST np.save( '{}{}_AdvExamples.npy'.format(self.adv_examples_dir, self.attack_name), adv_samples) np.save( '{}{}_AdvLabels.npy'.format(self.adv_examples_dir, self.attack_name), adv_labels) np.save( '{}{}_TrueLabels.npy'.format(self.adv_examples_dir, self.attack_name), self.labels_samples) # 如果对抗样本的预测结果与真实标签不同,则mis+1,计算对抗样本的制作成功率 mis = 0 for i in range(len(adv_samples)): if self.labels_samples[i].argmax(axis=0) != adv_labels[i]: mis = mis + 1 print( '\nFor **{}** on **{}**: misclassification ratio is {}/{}={:.1f}%\n' .format(self.attack_name, self.dataset, mis, len(adv_samples), mis / len(adv_labels) * 100))
def generate(self): # 将参数传入到DeepFool攻击中 attacker = DeepFoolAttack(model=self.raw_model, overshoot=self.overshoot, max_iters=self.max_iters) # 产生DeepFool对抗样本 adv_samples = attacker.perturbation(xs=self.nature_samples, device=self.device) # prediction for the adversarial examples # 计算DeepFool对抗样本经过原始模型的softmax预测结果 adv_labels = predict(model=self.raw_model, samples=adv_samples, device=self.device) # 获取DeepFool对抗样本经过原始模型的标签 adv_labels = torch.max(adv_labels, 1)[1] # 将标签转化为numpy形式 adv_labels = adv_labels.cpu().numpy() # 将DeepFool对抗样本保存 np.save( '{}{}_AdvExamples.npy'.format(self.adv_examples_dir, self.attack_name), adv_samples) # 将DeepFool对抗样本对应标签保存 np.save( '{}{}_AdvLabels.npy'.format(self.adv_examples_dir, self.attack_name), adv_labels) # 将DeepFool的真实标签保存 np.save( '{}{}_TrueLabels.npy'.format(self.adv_examples_dir, self.attack_name), self.labels_samples) # mis:误分类率,计算1000个DeepFool对抗样本经过原始模型的误分类率 mis = 0 for i in range(len(adv_samples)): if self.labels_samples[i].argmax(axis=0) != adv_labels[i]: mis = mis + 1 print( '\nFor **{}** on **{}**: misclassification ratio is {}/{}={:.1f}%\n' .format(self.attack_name, self.dataset, mis, len(adv_samples), mis / len(adv_labels) * 100))
def __init__(self, DataSet='MNIST', AttackName='FGSM', RawModelLocation='../data/', CleanDataLocation='../clean_datasets/', AdvExamplesDir='../AdversarialExampleDatasets/', device=torch.device('cpu')): # DataSet:数据集名称 # dataset:数据集名称MNIST或CIFAR10 # AttackName:攻击名称 # attack_name:攻击名称 # RawModelLocation:模型所在位置data/ # CleanDataLocation:干净数据集所在位置clean_datasets/ # AdvExamplesDir:对抗性样本所在位置AdversarialExampleDatasets/ # color_mode:CIFAR10为RGB,MNIST为L # Targeted:False为非目标攻击,Ture为目标攻击 self.device = device assert DataSet.upper() in ['MNIST', 'CIFAR10' ], "The data set must be MNIST or CIFAR10" self.dataset = DataSet.upper() self.color_mode = 'RGB' if self.dataset == 'CIFAR10' else 'L' self.attack_name = AttackName.upper() # 非目标攻击名称 supported_un_targeted = [ 'FGSM', 'RFGSM', 'BIM', 'PGD', 'DEEPFOOL', 'UAP' ] # 目标攻击名称 supported_targeted = ['LLC', "RLLC", 'ILLC', 'JSMA', 'CW2'] assert self.attack_name in supported_un_targeted or self.attack_name in supported_targeted, \ "\nCurrently, our implementation support attacks of FGSM, RFGSM, BIM, UMIFGSM, DeepFool, LLC, RLLC, ILLC, TMIFGSM, JSMA, CW2,....\n" if self.attack_name.upper() in supported_un_targeted: self.Targeted = False else: self.Targeted = True # 加载模型 # raw_model_location:模型位置data/CIFAR10/model/CIFAR10_raw.pt或者MNIST # raw_model:模型 # ********若要衡量白盒攻击将路径改为RawModelLocation/防御名称/数据集名称_防御名称_enhanced.pt******** raw_model_location = '{}{}/model/{}_raw.pt'.format( RawModelLocation, self.dataset, self.dataset) if self.dataset == 'MNIST': self.raw_model = MNIST_CNN().to(device) self.raw_model.load(path=raw_model_location, device=device) else: self.raw_model = ResNet18().to(device) self.raw_model.load(path=raw_model_location, device=device) # 获取干净数据集及标签 # nature_samples:干净数据集CleanDatasets/CIFAR10/CIFAR10_inputs.npy或者MNIST # labels_samples:干净数据集标签CleanDatasets/CIFAR10/CIFAR10_labels.npy或者MNIST self.nature_samples = np.load('{}{}/{}_inputs.npy'.format( CleanDataLocation, self.dataset, self.dataset)) self.labels_samples = np.load('{}{}/{}_labels.npy'.format( CleanDataLocation, self.dataset, self.dataset)) # 获取目标标签 # 如果是LLC RLLC和ILLC攻击,为LLC RLLC和ILLC准备目标标签CleanDatasets/CIFAR10/CIFAR10_llc.npy if self.attack_name.upper() in ['LLC', 'RLLC', 'ILLC']: self.targets_samples = np.load('{}{}/{}_llc.npy'.format( CleanDataLocation, self.dataset, self.dataset)) # 否则目标标签为CleanDatasets/CIFAR10/CIFAR10_targets.npy else: self.targets_samples = np.load('{}{}/{}_targets.npy'.format( CleanDataLocation, self.dataset, self.dataset)) # 获取对抗性样本 # AdvExamplesDir:AdversarialExampleDatasets/attack_name/CIFAR10/或者MNIST # adv_samples:对抗性样本AdversarialExampleDatasets/attack_name/CIFAR10/attack_name_AdvExamples.npy或者MNIST self.AdvExamplesDir = AdvExamplesDir + self.attack_name + '/' + self.dataset + '/' # 如果没有这个路径则提示 if os.path.exists(self.AdvExamplesDir) is False: print( "the directory of {} is not existing, please check carefully". format(self.AdvExamplesDir)) self.adv_samples = np.load('{}{}_AdvExamples.npy'.format( self.AdvExamplesDir, self.attack_name)) # self.adv_labels = np.load('{}{}_AdvLabels.npy'.format(self.AdvExamplesDir, self.AttackName)) # 对对抗性样本进行标签预测 # predictions:对抗性样本预测标签 predictions = predict(model=self.raw_model, samples=self.adv_samples, device=self.device).detach().cpu().numpy() # 定义softmax函数 def soft_max(x): return np.exp(x) / np.sum(np.exp(x), axis=0) # 对预测标签进行softmax计算 # softmax_prediction:经softmax的预测标签 tmp_soft_max = [] for i in range(len(predictions)): tmp_soft_max.append(soft_max(predictions[i])) self.softmax_prediction = np.array(tmp_soft_max)
def generate(self): # 将参数传入到UAP攻击中 attacker = UniversalAttack(model=self.raw_model, fooling_rate=self.fooling_rate, max_iter_universal=self.max_iter_uni, epsilon=self.epsilon, overshoot=self.overshoot, max_iter_deepfool=self.max_iter_df) assert self.dataset.upper() == 'MNIST' or self.dataset.upper( ) == 'CIFAR10', "dataset should be MNIST or CIFAR10!" # 获取MNIST/CIFAR10的训练集和验证集 if self.dataset.upper() == 'MNIST': samples_loader, valid_loader = get_mnist_train_validate_loader( dir_name='../data/MNIST/', batch_size=1, valid_size=0.9, shuffle=True) else: # 'CIFAR10': samples_loader, valid_loader = get_cifar10_train_validate_loader( dir_name='../data/CIFAR10/', batch_size=1, valid_size=0.9, augment=False, shuffle=True) # 计算UAP扰动并转化为numpy形式 universal_perturbation = attacker.universal_perturbation( dataset=samples_loader, validation=valid_loader, device=self.device) universal_perturbation = universal_perturbation.cpu().numpy() # 将UAP扰动存放在AdversarialExampleDatasets/UAP_MNIST_universal_perturbation中 np.save( '{}{}_{}_universal_perturbation'.format(self.adv_examples_dir, self.attack_name, self.dataset), universal_perturbation) # 产生UAP对抗样本 adv_samples = attacker.perturbation(xs=self.nature_samples, uni_pert=universal_perturbation, device=self.device) # UAP对抗样本的预测标签softmax adv_labels = predict(model=self.raw_model, samples=adv_samples, device=self.device) # UAP对抗样本的预测标签转化为numpy adv_labels = torch.max(adv_labels, 1)[1] adv_labels = adv_labels.cpu().numpy() # 将UAP对抗样本、UAP对抗样本标签和真实标签进行保存 np.save( '{}{}_AdvExamples.npy'.format(self.adv_examples_dir, self.attack_name), adv_samples) np.save( '{}{}_AdvLabels.npy'.format(self.adv_examples_dir, self.attack_name), adv_labels) np.save( '{}{}_TrueLabels.npy'.format(self.adv_examples_dir, self.attack_name), self.labels_samples) # 计算添加了UAP扰动对抗样本的误分类率 mis = 0 for i in range(len(adv_samples)): if self.labels_samples[i].argmax(axis=0) != adv_labels[i]: mis = mis + 1 print( '\nFor **{}** on **{}**: misclassification ratio is {}/{}={:.1f}%\n' .format(self.attack_name, self.dataset, mis, len(adv_samples), mis / len(adv_labels) * 100))