def main_evaluation_script(): """ Here's a little script to show how to evaluate a trained model against varying attacks (on the fly, without saving adv examples) """ # Steps # 0) Initialize a classifier/normalizer/evaluation loader # 1) Build some attack objects to try # 2) Run the evaluation and print results # 0 classifier_net = cifar_loader.load_pretrained_cifar_resnet(flavor=32, use_gpu=False) cifar_normer = utils.DifferentiableNormalize(mean=config.CIFAR10_MEANS, std=config.CIFAR10_STDS) val_loader = cifar_loader.load_cifar_data('val', normalize=False) # 1 L_INF_BOUND = 8.0 / 255.0 # --- FGSM attack fgsm_xentropy_loss = plf.VanillaXentropy(classifier_net, normalizer=cifar_normer) fgsm_attack_obj = aa.FGSM(classifier_net, cifar_normer, fgsm_xentropy_loss) fgsm_spec_params = {'attack_kwargs': {'l_inf_bound': L_INF_BOUND}} fgsm_attack_params = advtrain.AdversarialAttackParameters( fgsm_attack_obj, 0.5, fgsm_spec_params) # --- BIM attack BIM_L_INF = 8.0 / 255.0 BIM_STEP_SIZE = 1.0 / 255.0 BIM_NUM_ITER = 16 bim_xentropy_loss = plf.VanillaXentropy(classifier_net, normalizer=cifar_normer) bim_attack_obj = aa.BIM(classifier_net, cifar_normer, bim_xentropy_loss) bim_spec_params = { 'attack_kwargs': { 'l_inf_bound': L_INF_BOUND, 'step_size': BIM_STEP_SIZE, 'num_iterations': BIM_NUM_ITER } } bim_attack_params = advtrain.AdversarialAttackParameters( bim_attack_obj, 0.5, bim_spec_params) attack_ensemble = {'fgsm': fgsm_attack_params, 'bim': bim_attack_params} # 2 eval_obj = advtrain.AdversarialEvaluation(classifier_net, cifar_normer) eval_out = eval_obj.evaluate(val_loader, attack_ensemble, num_minibatches=5)
def build_stadv_linf_attack(classifier_net, normalizer): flow_threat = ap.ThreatModel( ap.ParameterizedXformAdv, ap.PerturbationParameters(lp_style='inf', lp_bound=FLOW_LINF_BOUND, xform_class=st.FullSpatial, use_stadv=True)) adv_loss = lf.CWLossF6(classifier_net, normalizer) st_loss = lf.PerturbationNormLoss(lp=2) loss_fxn = lf.RegularizedLoss({ 'adv': adv_loss, 'st': st_loss }, { 'adv': 1.0, 'st': 0.05 }, negate=True) pgd_kwargs = copy.deepcopy(GLOBAL_ATK_KWARGS) pgd_kwargs['optimizer_kwargs']['lr'] = 0.001 pgd_attack = aa.PGD(classifier_net, normalizer, flow_threat, loss_fxn) params = advtrain.AdversarialAttackParameters( pgd_attack, 1.0, attack_specific_params={'attack_kwargs': pgd_kwargs}) return params
def main_defense_script(): ######################################################################## # SHARED BLOCK # ######################################################################## # Initialize CIFAR classifier classifier_net = cifar_loader.load_pretrained_cifar_resnet(flavor=32, use_gpu=False) classifier_net.eval() # Differentiable normalizer needed for classification cifar_normer = utils.DifferentiableNormalize(mean=config.CIFAR10_MEANS, std=config.CIFAR10_STDS) ###################################################################### # SIMPLE FGSM TRAINING EXAMPLE # ###################################################################### if True: # Steps # 0) initialize hyperparams for attack/training # 1) setup attack loss object # 2) build attack and parameters for attack # 3) build training object, training loss, data loader # 4) train # 0 FGSM_L_INF = 8.0 / 255.0 FGSM_TRAINING_ATTACK_PROPORTION = 0.5 FGSM_TRAINING_EPOCHS = 10 # 1 fgsm_attack_loss = plf.VanillaXentropy(classifier_net, cifar_normer) # 2 fgsm_xentropy_attack_obj = aa.FGSM(classifier_net, cifar_normer, fgsm_attack_loss) fgsm_xentropy_attack_params = advtrain.AdversarialAttackParameters( fgsm_xentropy_attack_obj, FGSM_TRAINING_ATTACK_PROPORTION, {'attack_kwargs': { 'l_inf_bound': FGSM_L_INF }}) # 3 half_fgsm_cifar = advtrain.AdversarialTraining(classifier_net, cifar_normer, 'half_fgsm_cifar', 'cifar_resnet32') train_loss = nn.CrossEntropyLoss() train_loader = cifar_loader.load_cifar_data('train', normalize=False) # 4 half_fgsm_cifar.train(train_loader, FGSM_TRAINING_EPOCHS, train_loss, attack_parameters=fgsm_xentropy_attack_params, verbosity='snoop')
def build_full_attack(classifier_net, normalizer, use_gpu): # L_inf + flow style attack delta_threat = ap.ThreatModel( ap.DeltaAddition, ap.PerturbationParameters(lp_style='inf', lp_bound=L_INF_BOUND, use_gpu=use_gpu)) trans_threat = ap.ThreatModel( ap.ParameterizedXformAdv, ap.PerturbationParameters(lp_style=1, lp_bound=0.05, xform_class=st.TranslationTransform, use_gpu=use_gpu)) flow_threat = ap.ThreatModel( ap.ParameterizedXformAdv, ap.PerturbationParameters(lp_style='inf', lp_bound=FLOW_LINF_BOUND, xform_class=st.FullSpatial, use_gpu=use_gpu, use_stadv=True)) rotation_threat = ap.ThreatModel( ap.ParameterizedXformAdv, ap.PerturbationParameters(xform_class=st.RotationTransform, lp_style='inf', lp_bound=math.pi / 24., use_gpu=use_gpu)) sequence_threat = ap.ThreatModel( ap.SequentialPerturbation, [delta_threat, flow_threat, trans_threat, rotation_threat], ap.PerturbationParameters(norm_weights=[0.0, 1.0, 1.0, 1.0])) pgd_kwargs = copy.deepcopy(GLOBAL_ATK_KWARGS) pgd_kwargs['optimizer_kwargs']['lr'] = 0.001 adv_loss = lf.CWLossF6(classifier_net, normalizer) st_loss = lf.PerturbationNormLoss(lp=2) loss_fxn = lf.RegularizedLoss({ 'adv': adv_loss, 'st': st_loss }, { 'adv': 1.0, 'st': 0.05 }, negate=True) pgd_attack = aar.PGD(classifier_net, normalizer, sequence_threat, loss_fxn, use_gpu=use_gpu) params = advtrain.AdversarialAttackParameters( pgd_attack, 1.0, attack_specific_params={'attack_kwargs': pgd_kwargs}) return params
def build_fgsm_attack(classifier_net, normalizer): delta_threat = ap.ThreatModel( ap.DeltaAddition, ap.PerturbationParameters(lp_style='inf', lp_bound=L_INF_BOUND)) attack_loss = plf.VanillaXentropy(classifier_net, normalizer) fgsm_attack = aa.FGSM(classifier_net, cifar_normer, delta_threat, attack_loss) attack_kwargs = {'verbose': GLOBAL_ATK_KWARGS['verbose']} params = advtrain.AdversarialAttackParameters( fgsm_attack, 1.0, attack_specific_params={'attack_kwargs': attack_kwargs}) return params
def build_pgd_linf_attack(classifier_net, normalizer, use_gpu): # PREBUILT LOSS FUNCTION delta_threat = ap.ThreatModel( ap.DeltaAddition, ap.PerturbationParameters(lp_style='inf', lp_bound=L_INF_BOUND, use_gpu=use_gpu)) attack_loss = plf.VanillaXentropy(classifier_net, normalizer=normalizer) pgd_attack = aar.PGD(classifier_net, normalizer, delta_threat, attack_loss) pgd_kwargs = copy.deepcopy(GLOBAL_ATK_KWARGS) params = advtrain.AdversarialAttackParameters( pgd_attack, 1.0, attack_specific_params={'attack_kwargs': pgd_kwargs}) return params
def build_delta_fgsm(model, normalizer, linf_bound=L_INF_BOUND, use_gpu=USE_GPU, verbose=False, adv_loss='xentropy', output='attack'): # Build threat delta_threat = ap.ThreatModel( ap.DeltaAddition, ap.PerturbationParameters(lp_style='inf', lp_bound=linf_bound, use_gpu=use_gpu)) # Build loss assert adv_loss in ['xentropy', 'cw'] if adv_loss == 'xentropy': attack_loss = plf.VanillaXentropy(model, normalizer) else: cw_loss = lf.CWLossF6(model, normalizer) attack_loss = lf.RegularizedLoss({'adv': cw_loss}, {'adv': 1.0}) # Build attack fgsm_attack = aar.FGSM(model, normalizer, delta_threat, attack_loss, use_gpu=use_gpu) # Return based on output arg assert output in ['attack', 'params', 'eval'] if output == 'attack': return fgsm_attack attack_kwargs = {'verbose': verbose} params = advtrain.AdversarialAttackParameters( fgsm_attack, 1.0, attack_specific_params={'attack_kwargs': attack_kwargs}) if output == 'params': return params to_eval = {'top1': 'top1', 'lpips': 'avg_successful_lpips'} eval_result = adveval.EvaluationResult(params, model, normalizer, to_eval=to_eval, use_gpu=USE_GPU) return eval_result
def build_delta_pgd(model, normalizer, linf_bound=L_INF_BOUND, manual_gpu=None, verbose=False, adv_loss='cw', num_iter=PGD_ITER, loss_convergence=LOSS_CONVERGENCE, output='attack', extra_attack_kwargs=None): # Build threat delta_threat = ap.ThreatModel(ap.DeltaAddition, ap.PerturbationParameters(lp_style='inf', lp_bound=linf_bound, manual_gpu=manual_gpu)) # Build loss assert adv_loss in ['xentropy', 'cw'] if adv_loss == 'xentropy': attack_loss = plf.VanillaXentropy(model, normalizer) else: cw_loss = lf.CWLossF6(model, normalizer) attack_loss = lf.RegularizedLoss({'adv': cw_loss}, {'adv': 1.0}, negate=True) # Build attack pgd_attack = aa.PGD(model, normalizer, delta_threat, attack_loss, manual_gpu=manual_gpu) # Return based on output arg assert output in ['attack', 'params', 'eval'] if output == 'attack': return pgd_attack extra_attack_kwargs = extra_attack_kwargs or {} optimizer = optim.Adam optimizer_kwargs = {'lr': 0.01} pgd_kwargs = {'num_iterations': num_iter, 'signed': False, 'optimizer': optimizer, 'optimizer_kwargs': optimizer_kwargs, 'verbose': verbose, 'loss_convergence': loss_convergence} pgd_kwargs.update(extra_attack_kwargs) params = advtrain.AdversarialAttackParameters(pgd_attack, 1.0, attack_specific_params={'attack_kwargs': pgd_kwargs}) if output == 'params': return params to_eval= {'top1': 'top1', 'lpips': 'avg_successful_lpips'} eval_result = adveval.EvaluationResult(params, to_eval=to_eval, manual_gpu=manual_gpu) return eval_result
def build_rotation_translation_attack(classifier_net, normalizer, use_gpu): # L_inf + flow style attack delta_threat = ap.ThreatModel( ap.DeltaAddition, ap.PerturbationParameters(lp_style='inf', lp_bound=L_INF_BOUND, use_gpu=use_gpu)) trans_threat = ap.ThreatModel( ap.ParameterizedXformAdv, ap.PerturbationParameters(lp_style=1, lp_bound=0.05, xform_class=st.TranslationTransform, use_gpu=use_gpu)) rotation_threat = ap.ThreatModel( ap.ParameterizedXformAdv, ap.PerturbationParameters(xform_class=st.RotationTransform, lp_style='inf', lp_bound=math.pi / 24., use_gpu=use_gpu)) sequence_threat = ap.ThreatModel( ap.SequentialPerturbation, [delta_threat, trans_threat, rotation_threat]) pgd_kwargs = copy.deepcopy(GLOBAL_ATK_KWARGS) pgd_kwargs['optimizer_kwargs']['lr'] = 0.001 loss_fxn = plf.VanillaXentropy(classifier_net, normalizer) pgd_attack = aar.PGD(classifier_net, normalizer, sequence_threat, loss_fxn, use_gpu=use_gpu) params = advtrain.AdversarialAttackParameters( pgd_attack, 1.0, attack_specific_params={'attack_kwargs': pgd_kwargs}) return params
def build_stadv_rot_trans_pgd(model, normalizer, flow_bound=FLOW_LINF, trans_bound=TRANS_LINF, rot_bound=ROT_LINF, use_gpu=USE_GPU, verbose=False, adv_loss='cw', num_iter=PGD_ITER, loss_convergence=LOSS_CONVERGENCE, output='attack', extra_attack_kwargs=None): # Build threat flow_threat = ap.ThreatModel( ap.ParameterizedXformAdv, ap.PerturbationParameters(lp_style='inf', lp_bound=flow_bound, xform_class=st.FullSpatial, use_gpu=use_gpu, use_stadv=True)) trans_threat = ap.ThreatModel( ap.ParameterizedXformAdv, ap.PerturbationParameters(lp_style='inf', lp_bound=trans_bound, xform_class=st.TranslationTransform, use_gpu=use_gpu)) rotation_threat = ap.ThreatModel( ap.ParameterizedXformAdv, ap.PerturbationParameters(xform_class=st.RotationTransform, lp_style='inf', lp_bound=rot_bound, use_gpu=use_gpu)) sequence_threat = ap.ThreatModel( ap.SequentialPerturbation, [flow_threat, trans_threat, rotation_threat], ap.PerturbationParameters(norm_weights=[1.00, 1.00, 1.0, 1.0])) # Build loss assert adv_loss in ['cw', 'xentropy'] if adv_loss == 'xentropy': adv_loss_obj = lf.PartialXentropy(model, normalizer=normalizer) else: adv_loss_obj = lf.CWLossF6(model, normalizer) st_loss = lf.PerturbationNormLoss(lp=2) loss_fxn = lf.RegularizedLoss({ 'adv': adv_loss_obj, 'st': st_loss }, { 'adv': 1.0, 'st': 0.05 }, negate=True) # Build attack optimizer = optim.Adam optimizer_kwargs = {'lr': 0.001} pgd_attack = aar.PGD(model, normalizer, sequence_threat, loss_fxn, use_gpu=use_gpu) assert output in ['attack', 'params', 'eval'] if output == 'attack': return pgd_attack pgd_kwargs = { 'num_iterations': num_iter, 'signed': False, 'optimizer': optimizer, 'optimizer_kwargs': optimizer_kwargs, 'verbose': verbose, 'loss_convergence': loss_convergence } if extra_attack_kwargs is not None: pgd_kwargs.update(extra_attack_kwargs) params = advtrain.AdversarialAttackParameters( pgd_attack, 1.0, attack_specific_params={'attack_kwargs': pgd_kwargs}) if output == 'params': return params to_eval = {'top1': 'top1', 'lpips': 'avg_successful_lpips'} eval_result = adveval.EvaluationResult(params, model, normalizer, to_eval=to_eval, use_gpu=use_gpu) return eval_result
def build_delta_rot_trans_pgd(model, normalizer, delta_bound=L_INF_BOUND, trans_bound=TRANS_LINF, rot_bound=ROT_LINF, use_gpu=USE_GPU, verbose=False, adv_loss='cw', num_iter=PGD_ITER, loss_convergence=LOSS_CONVERGENCE, output='attack', extra_attack_kwargs=None): # Build threat delta_threat = ap.ThreatModel( ap.DeltaAddition, ap.PerturbationParameters(lp_style='inf', lp_bound=delta_bound, use_gpu=use_gpu)) trans_threat = ap.ThreatModel( ap.ParameterizedXformAdv, ap.PerturbationParameters(lp_style='inf', lp_bound=trans_bound, xform_class=st.TranslationTransform, use_gpu=use_gpu)) rotation_threat = ap.ThreatModel( ap.ParameterizedXformAdv, ap.PerturbationParameters(xform_class=st.RotationTransform, lp_style='inf', lp_bound=rot_bound, use_gpu=use_gpu)) sequence_threat = ap.ThreatModel( ap.SequentialPerturbation, [delta_threat, trans_threat, rotation_threat]) # Build loss assert adv_loss in ['xentropy', 'cw'] if adv_loss == 'xentropy': attack_loss = plf.VanillaXentropy(model, normalizer) else: cw_loss = lf.CWLossF6(model, normalizer) attack_loss = lf.RegularizedLoss({'adv': cw_loss}, {'adv': 1.0}, negate=True) # Build attack pgd_attack = aar.PGD(model, normalizer, sequence_threat, attack_loss, use_gpu=use_gpu) # Return based on output arg assert output in ['attack', 'params', 'eval'] if output == 'attack': return pgd_attack extra_attack_kwargs = extra_attack_kwargs or {} optimizer = optim.Adam optimizer_kwargs = {'lr': 0.01} pgd_kwargs = { 'num_iterations': num_iter, 'signed': False, 'optimizer': optimizer, 'optimizer_kwargs': optimizer_kwargs, 'verbose': verbose, 'loss_convergence': loss_convergence } pgd_kwargs.update(extra_attack_kwargs) params = advtrain.AdversarialAttackParameters( pgd_attack, 1.0, attack_specific_params={'attack_kwargs': pgd_kwargs}) if output == 'params': return params to_eval = {'top1': 'top1', 'lpips': 'avg_successful_lpips'} eval_result = adveval.EvaluationResult(params, model, normalizer, to_eval=to_eval, use_gpu=USE_GPU) return eval_result
def build_stadv_pgd(model, normalizer, linf_bound=FLOW_LINF, use_gpu=USE_GPU, verbose=False, adv_loss='cw', num_iter=PGD_ITER, loss_convergence=LOSS_CONVERGENCE, use_stadv=True, output='attack', norm_hyperparam=0.05, extra_attack_kwargs=None): # Build threat flow_threat = ap.ThreatModel( ap.ParameterizedXformAdv, ap.PerturbationParameters(lp_style='inf', lp_bound=linf_bound, xform_class=st.FullSpatial, use_gpu=use_gpu, use_stadv=use_stadv)) # Build loss assert adv_loss in ['xentropy', 'cw'] if adv_loss == 'xentropy': adv_loss_obj = lf.PartialXentropy(model, normalizer=normalizer) else: adv_loss_obj = lf.CWLossF6(model, normalizer) st_loss = lf.PerturbationNormLoss(lp=2) attack_loss = lf.RegularizedLoss({ 'adv': adv_loss_obj, 'st': st_loss }, { 'adv': 1.0, 'st': norm_hyperparam }, negate=True) # Build attack pgd_attack = aar.PGD(model, normalizer, flow_threat, attack_loss, use_gpu=use_gpu) # Return based on output arg assert output in ['attack', 'params', 'eval'] if output == 'attack': return pgd_attack extra_attack_kwargs = extra_attack_kwargs or {} optimizer = optim.Adam optimizer_kwargs = {'lr': 0.01} pgd_kwargs = { 'num_iterations': num_iter, 'signed': False, 'optimizer': optimizer, 'optimizer_kwargs': optimizer_kwargs, 'verbose': verbose, 'loss_convergence': loss_convergence } pgd_kwargs.update(extra_attack_kwargs) params = advtrain.AdversarialAttackParameters( pgd_attack, 1.0, attack_specific_params={'attack_kwargs': pgd_kwargs}) if output == 'params': return params to_eval = {'top1': 'top1', 'lpips': 'avg_successful_lpips'} eval_result = adveval.EvaluationResult(params, model, normalizer, to_eval=to_eval, use_gpu=USE_GPU) return eval_result
def build_delta_stadv_pgd(model, normalizer, delta_bound=L_INF_BOUND, flow_bound=FLOW_LINF, manual_gpu=None, verbose=False, adv_loss='cw', num_iter=PGD_ITER, loss_convergence=LOSS_CONVERGENCE, output='attack', extra_attack_kwargs=None): # Build threat delta_threat = ap.ThreatModel(ap.DeltaAddition, ap.PerturbationParameters(lp_style='inf', lp_bound=delta_bound, manual_gpu=manual_gpu)) flow_threat = ap.ThreatModel(ap.ParameterizedXformAdv, ap.PerturbationParameters(lp_style='inf', lp_bound=flow_bound, xform_class=st.FullSpatial, manual_gpu=manual_gpu, use_stadv=True)) sequence_threat = ap.ThreatModel(ap.SequentialPerturbation, [delta_threat, flow_threat], ap.PerturbationParameters(norm_weights=[0.00, 1.00])) # Build loss assert adv_loss in ['cw', 'xentropy'] if adv_loss == 'xentropy': adv_loss_obj = lf.PartialXentropy(model, normalizer=normalizer) adv_loss_scale = -1.0 else: adv_loss_obj = lf.CWLossF6(model, normalizer) adv_loss_scale = 1.0 st_loss = lf.PerturbationNormLoss(lp=2) loss_fxn = lf.RegularizedLoss({'adv': adv_loss_obj, 'st':st_loss}, {'adv': adv_loss_scale, 'st': 0.05}, negate=True) # Build attack optimizer = optim.Adam optimizer_kwargs = {'lr': 0.001} pgd_attack = aa.PGD(model, normalizer, sequence_threat, loss_fxn, manual_gpu=manual_gpu) assert output in ['attack', 'params', 'eval'] if output == 'attack': return pgd_attack pgd_kwargs = {'num_iterations': num_iter, 'signed': False, 'optimizer': optimizer, 'optimizer_kwargs': optimizer_kwargs, 'verbose': verbose, 'loss_convergence': loss_convergence} if extra_attack_kwargs is not None: pgd_kwargs.update(extra_attack_kwargs) params = advtrain.AdversarialAttackParameters(pgd_attack, 1.0, attack_specific_params={'attack_kwargs': pgd_kwargs}) if output == 'params': return params to_eval= {'top1': 'top1', 'lpips': 'avg_successful_lpips'} eval_result = adveval.EvaluationResult(params, to_eval=to_eval, manual_gpu=manual_gpu) return eval_result
def main(config): model = Classifier(200, classifier_name='resnet18', dataset="tinyimagenet", pretrained=False) # format matching data_classifier_state = torch.load(os.path.join(config.path, 'Classifier.pth'), map_location=None) if 'state_dict' in data_classifier_state: data_classifier_state = data_classifier_state['state_dict'] bad_classifier_state = {} for k, v in data_classifier_state.items(): if k.startswith('1.'): bad_classifier_state[k[2:]] = v else: bad_classifier_state[k] = v starts_with_module = False for key in bad_classifier_state.keys(): if key.startswith('module.'): starts_with_module = True break if starts_with_module: correct_classifier_state = { k[7:]: v for k, v in bad_classifier_state.items() } else: correct_classifier_state = bad_classifier_state starts_with_feature_extractor = False for k in correct_classifier_state.keys(): if k.startswith('feature_extractor.'): starts_with_feature_extractor = True break if not starts_with_feature_extractor: correct_classifier_state = { 'feature_extractor.' + k: v for k, v in correct_classifier_state.items() } # fit into our model model.load_state_dict(correct_classifier_state) normalizer = utils.IdentityNormalize() # Put this into the AdversarialEvaluation object adv_eval_object = adveval.AdversarialEvaluation(model, normalizer) surrogate = model normalizer_surr = normalizer # First let's build the attack parameters for each. # we'll reuse the loss function: attack_loss = plf.VanillaXentropy(surrogate, normalizer_surr) linf_8_threat = ap.ThreatModel(ap.DeltaAddition, { 'lp_style': 'inf', 'lp_bound': 8.0 / 255.0 }) #------ FGSM Block fgsm_attack = aa.FGSM(surrogate, normalizer_surr, linf_8_threat, attack_loss) fgsm_attack_kwargs = {'step_size': 8.0 / 255.0, 'verbose': False} fgsm_attack_params = advtrain.AdversarialAttackParameters( fgsm_attack, attack_specific_params={'attack_kwargs': fgsm_attack_kwargs}) # ------ pgd10 Block pgd10_attack = aa.PGD(surrogate, normalizer_surr, linf_8_threat, attack_loss) pgd10_attack_kwargs = { 'step_size': 8.0 / 255.0 / 4.0, 'num_iterations': 10, 'keep_best': True, 'random_init': True, 'verbose': False } pgd10_attack_params = advtrain.AdversarialAttackParameters( pgd10_attack, attack_specific_params={'attack_kwargs': pgd10_attack_kwargs}) # ------ pgd100 Block pgd100_attack = aa.PGD(surrogate, normalizer_surr, linf_8_threat, attack_loss) pgd100_attack_kwargs = { 'step_size': 8.0 / 255.0 / 12.0, 'num_iterations': 100, 'keep_best': True, 'random_init': True, 'verbose': False } pgd100_attack_params = advtrain.AdversarialAttackParameters( pgd100_attack, attack_specific_params={'attack_kwargs': pgd100_attack_kwargs}) # ------ CarliniWagner100 Block cwloss6 = lf.CWLossF6 distance_fxn = lf.SoftLInfRegularization cw100_attack = aa.CarliniWagner(surrogate, normalizer_surr, linf_8_threat, distance_fxn, cwloss6) cw100_attack_kwargs = {'num_optim_steps': 100, 'verbose': False} cw100_attack_params = advtrain.AdversarialAttackParameters( cw100_attack, attack_specific_params={'attack_kwargs': cw100_attack_kwargs}) # ------ CarliniWagner1000 Block cwloss6 = lf.CWLossF6 distance_fxn = lf.SoftLInfRegularization cw1000_attack = aa.CarliniWagner(surrogate, normalizer_surr, linf_8_threat, distance_fxn, cwloss6) cw1000_attack_kwargs = {'num_optim_steps': 1000, 'verbose': False} cw1000_attack_params = advtrain.AdversarialAttackParameters( cw1000_attack, attack_specific_params={'attack_kwargs': cw1000_attack_kwargs}) to_eval_dict = { 'top1': 'top1', 'avg_loss_value': 'avg_loss_value', 'avg_successful_ssim': 'avg_successful_ssim' } fgsm_eval = adveval.EvaluationResult(fgsm_attack_params, to_eval=to_eval_dict) pgd10_eval = adveval.EvaluationResult(pgd10_attack_params, to_eval=to_eval_dict) pgd100_eval = adveval.EvaluationResult(pgd100_attack_params, to_eval=to_eval_dict) cw100_eval = adveval.EvaluationResult(cw100_attack_params, to_eval=to_eval_dict) cw1000_eval = adveval.EvaluationResult(cw1000_attack_params, to_eval=to_eval_dict) attack_ensemble = { 'fgsm': fgsm_eval, 'pgd10': pgd10_eval, 'pgd100': pgd100_eval, 'cw100': cw100_eval, 'cw1000': cw1000_eval } ensemble_out = adv_eval_object.evaluate_ensemble(test_dataloader, attack_ensemble, verbose=True, num_minibatches=None) sort_order = { 'ground': 1, 'fgsm': 2, 'pgd10': 3, 'pgd100': 4, 'cw100': 5, 'cw1000': 6 } # sort_order = {'ground': 1, 'pgd10': 2, 'pgd100': 3} def pretty_printer(fd, eval_ensemble, result_type): print('~' * 10, result_type, '~' * 10) fd.write('~' * 10 + result_type + '~' * 10 + "\n") for key in sorted(list(eval_ensemble.keys()), key=lambda k: sort_order[k]): eval_result = eval_ensemble[key] pad = 6 - len(key) if result_type not in eval_result.results: continue avg_result = eval_result.results[result_type].avg print(key, pad * ' ', ': ', avg_result) fd.write(key + pad * ' ' + ': ' + str(avg_result) + "\n") with open(os.path.join(config.path, 'base_eval_result.txt'), "w") as fd: fd.write('Result for {}'.format(config.path) + "\n") fd.write("\n") pretty_printer(fd, ensemble_out, 'top1') # We can examine the loss (noting that we seek to 'maximize' loss in the adversarial example domain) pretty_printer(fd, ensemble_out, 'avg_loss_value') # This is actually 1-SSIM, which can serve as a makeshift 'similarity index', # which essentially gives a meterstick for how similar the perturbed images are to the originals pretty_printer(fd, ensemble_out, 'avg_successful_ssim')
""" return plf.PerceptualXentropy(classifier, normalizer=normalizer, regularization_constant=lpips_penalty, use_gpu=True) attack_params = {} penalties = [0.01, 0.1, 1.0, 10.0, 100.0] for penalty in penalties: loss_obj = build_attack_loss(adv_trained_net, cifar_normer, penalty) attack_obj = aa.LInfPGD(adv_trained_net, cifar_normer, loss_obj, use_gpu=True) attack_param = advtrain.AdversarialAttackParameters( attack_obj, 1.0, attack_specific_params=ATTACK_SPECIFIC_PARAMS) attack_params[str(penalty)] = attack_param # In[8]: # Eval over just one particular_param = attack_params['1.0'] eval_obj = adveval.AdversarialEvaluation(adv_trained_net, cifar_normer) torch.cuda.empty_cache() out = eval_obj.evaluate_ensemble(val_loader, {'partic': particular_param}, use_gpu=True, num_minibatches=20) print out
def main(config): defence_method = config.defence flavor = config.architecture blackbox = config.blackbox flavor_blackbox = config.flavor_blackbox epoch = config.epoch # assert defence_method in ['PLAIN','FGSM', 'PGD', 'CW'],"INVALID ATTACK: %s" % defence_method assert flavor in ['20', '56', 'wide'], "INVALID ARCHITECTURE: %s" % flavor # Load the trained model and normalizer if flavor in ['20', '56']: model, normalizer = cifar_loader.load_pretrained_cifar_resnet( flavor=int(flavor), return_normalizer=True) elif flavor == 'wide': model, normalizer = cifar_loader.load_pretrained_cifar_wide_resnet( return_normalizer=True) if defence_method in ['FGSM', 'PGD', 'CW', 'PGD40', 'PGD100']: model = checkpoints.load_state_dict(defence_method + 'ResNet' + flavor, 'resnet' + flavor, epoch, model) elif defence_method != 'PLAIN': bad_state_dict = torch.load('./pretrained_models/' + defence_method + '.pth') correct_state_dict = { re.sub(r'^.*feature_extractor\.', '', k): v for k, v in bad_state_dict.items() } model.load_state_dict(correct_state_dict) # Load the evaluation dataset cifar_valset = cifar_loader.load_cifar_data('val', no_transform=True, shuffle=False, batch_size=100) # Put this into the AdversarialEvaluation object adv_eval_object = adveval.AdversarialEvaluation(model, normalizer) # Use blackbox attack or not if blackbox: surrogate, normalizer_surr = cifar_loader.load_pretrained_cifar_resnet( flavor=int(flavor_blackbox), return_normalizer=True) surrogate.cuda() else: surrogate = model normalizer_surr = normalizer # First let's build the attack parameters for each. # we'll reuse the loss function: attack_loss = plf.VanillaXentropy(surrogate, normalizer_surr) linf_8_threat = ap.ThreatModel(ap.DeltaAddition, { 'lp_style': 'inf', 'lp_bound': 8.0 / 255.0 }) #------ FGSM Block fgsm_attack = aa.FGSM(surrogate, normalizer_surr, linf_8_threat, attack_loss) fgsm_attack_kwargs = {'step_size': 8.0 / 255.0, 'verbose': False} fgsm_attack_params = advtrain.AdversarialAttackParameters( fgsm_attack, attack_specific_params={'attack_kwargs': fgsm_attack_kwargs}) # ------ pgd10 Block pgd10_attack = aa.PGD(surrogate, normalizer_surr, linf_8_threat, attack_loss) pgd10_attack_kwargs = { 'step_size': 8.0 / 255.0 / 4.0, 'num_iterations': 10, 'keep_best': True, 'verbose': False } pgd10_attack_params = advtrain.AdversarialAttackParameters( pgd10_attack, attack_specific_params={'attack_kwargs': pgd10_attack_kwargs}) # ------ pgd100 Block pgd100_attack = aa.PGD(surrogate, normalizer_surr, linf_8_threat, attack_loss) pgd100_attack_kwargs = { 'step_size': 8.0 / 255.0 / 12.0, 'num_iterations': 100, 'keep_best': True, 'verbose': False } pgd100_attack_params = advtrain.AdversarialAttackParameters( pgd100_attack, attack_specific_params={'attack_kwargs': pgd100_attack_kwargs}) # ------ CarliniWagner100 Block cwloss6 = lf.CWLossF6 distance_fxn = lf.SoftLInfRegularization cw100_attack = aa.CarliniWagner(surrogate, normalizer_surr, linf_8_threat, distance_fxn, cwloss6) cw100_attack_kwargs = {'num_optim_steps': 100, 'verbose': False} cw100_attack_params = advtrain.AdversarialAttackParameters( cw100_attack, attack_specific_params={'attack_kwargs': cw100_attack_kwargs}) # ------ CarliniWagner1000 Block cwloss6 = lf.CWLossF6 distance_fxn = lf.SoftLInfRegularization cw1000_attack = aa.CarliniWagner(surrogate, normalizer_surr, linf_8_threat, distance_fxn, cwloss6) cw1000_attack_kwargs = {'num_optim_steps': 1000, 'verbose': False} cw1000_attack_params = advtrain.AdversarialAttackParameters( cw1000_attack, attack_specific_params={'attack_kwargs': cw1000_attack_kwargs}) ''' Next we'll build the EvaluationResult objects that wrap these. And let's say we'll evaluate the: - top1 accuracy - average loss - average SSIM distance of successful perturbations [don't worry too much about this] The 'to_eval' dict as passed in the constructor has structure {key : <shorthand fxn>} where key is just a human-readable handle for what's being evaluated and shorthand_fxn is either a string for prebuilt evaluators, or you can pass in a general function to evaluate ''' to_eval_dict = { 'top1': 'top1', 'avg_loss_value': 'avg_loss_value', 'avg_successful_ssim': 'avg_successful_ssim' } fgsm_eval = adveval.EvaluationResult(fgsm_attack_params, to_eval=to_eval_dict) pgd10_eval = adveval.EvaluationResult(pgd10_attack_params, to_eval=to_eval_dict) pgd100_eval = adveval.EvaluationResult(pgd100_attack_params, to_eval=to_eval_dict) cw100_eval = adveval.EvaluationResult(cw100_attack_params, to_eval=to_eval_dict) cw1000_eval = adveval.EvaluationResult(cw1000_attack_params, to_eval=to_eval_dict) attack_ensemble = { 'fgsm': fgsm_eval, 'pgd10': pgd10_eval, 'pgd100': pgd100_eval, 'cw100': cw100_eval, 'cw1000': cw1000_eval } if blackbox: attack_ensemble = { 'fgsm': fgsm_eval, 'pgd10': pgd10_eval, 'pgd100': pgd100_eval } ensemble_out = adv_eval_object.evaluate_ensemble(cifar_valset, attack_ensemble, verbose=True, num_minibatches=None) filename = "result.txt" if blackbox: filename = "result_blackbox.txt" # Now let's build a little helper to print things out cleanly: sort_order = { 'ground': 1, 'fgsm': 2, 'pgd10': 3, 'pgd100': 4, 'cw100': 5, 'cw1000': 6 } if blackbox: sort_order = {'ground': 1, 'fgsm': 2, 'pgd10': 3, 'pgd100': 4} def pretty_printer(eval_ensemble, result_type): f = open(filename, "a") print('~' * 10, result_type, '~' * 10) f.write('~' * 10 + result_type + '~' * 10 + "\n") for key in sorted(list(eval_ensemble.keys()), key=lambda k: sort_order[k]): eval_result = eval_ensemble[key] pad = 6 - len(key) if result_type not in eval_result.results: continue avg_result = eval_result.results[result_type].avg print(key, pad * ' ', ': ', avg_result) f.write(key + pad * ' ' + ': ' + str(avg_result) + "\n") f.close() '''And then we can print out and look at the results: This prints the accuracy. Ground is the unperturbed accuracy. If everything is done right, we should see that PGD with an l_inf bound of 4 is a stronger attack against undefended networks than FGSM with an l_inf bound of 8 ''' f = open(filename, "a") f.write('Result for ' + defence_method + 'ResNet{}'.format(flavor) + "\n") if blackbox: f.write('Blackbox' + flavor_blackbox + "\n") f.close() pretty_printer(ensemble_out, 'top1') # We can examine the loss (noting that we seek to 'maximize' loss in the adversarial example domain) pretty_printer(ensemble_out, 'avg_loss_value') # This is actually 1-SSIM, which can serve as a makeshift 'similarity index', # which essentially gives a meterstick for how similar the perturbed images are to the originals pretty_printer(ensemble_out, 'avg_successful_ssim') f = open(filename, "a") f.write("\n") f.close()