def PGDAttack(model, test_loader): # adversary = L2PGDAttack(model, eps=1.0, eps_iter=1*2/40, nb_iter=40,rand_init=False, targeted=False, clip_min=-1000, clip_max=1000) #adversary = CarliniWagnerL2Attack(model, num_classes=10) adversary = FGSM(model, eps=0.1, clip_min=-1000, clip_max=1000) all_raw_imgs, all_adv_imgs = [], [] model.eval() correct, total = 0, 0 for batch_idx, (inputs, targets) in tqdm(enumerate(test_loader)): # if batch_idx > 10: # break raw_imgs, targets = inputs.cuda(), targets.cuda() adv_imgs = adversary.perturb(raw_imgs) #tensor2img(adv_imgs, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) #save_image(adv_imgs, "tmp.png") all_raw_imgs.append(raw_imgs.cpu()) all_adv_imgs.append(adv_imgs.cpu()) predicts = model(adv_imgs) _, predicts = predicts.max(1) total += targets.size(0) correct += predicts.eq(targets).sum().item() all_raw_imgs = torch.cat(all_raw_imgs, dim=0) all_adv_imgs = torch.cat(all_adv_imgs, dim=0) acc = 100 * correct / total return acc, all_raw_imgs, all_adv_imgs
def advtrain(model, device, train_loader, optimizer, epoch, log_interval): model.train() avg_loss = 0 # in training loop: adversary = FGSM(model, loss_fn=nn.NLLLoss(reduction='sum'), eps=0.3, clip_min=0., clip_max=1., targeted=False) for batch_idx, (data, target) in enumerate(train_loader): # gpu나 cpu로 데이터를 옮김 data, target = data.to(device), target.to(device) data = adversary.perturb(data, target) # gradient descent전에 gradient buffer를 0으로 초기화 하는 역할을 한다 optimizer.zero_grad() output = model(data) # negative log-likelihood: nll loss, 딥러닝 모델의 손실함수이다 loss = F.nll_loss(output, target) loss.backward() optimizer.step( ) # Does the update based on the current gradient stored in grad # 여기서 기존에 저장되어있던 gradient를 기반으로 update 하기 때문에 위의 초기화가 필요함 avg_loss += F.nll_loss(output, target, reduction='sum').item() if batch_idx % log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item())) avg_loss /= len(train_loader.dataset) return avg_loss
def generate(datasetname, batch_size): save_dir_path = "{}/data_adv_defense/guided_denoiser".format(PY_ROOT) os.makedirs(save_dir_path, exist_ok=True) set_log_file(save_dir_path + "/generate_{}.log".format(datasetname)) data_loader = DataLoaderMaker.get_img_label_data_loader(datasetname, batch_size, is_train=True) attackers = [] for model_name in MODELS_TRAIN_STANDARD[datasetname] + MODELS_TEST_STANDARD[datasetname]: model = StandardModel(datasetname, model_name, no_grad=False) model = model.cuda().eval() linf_PGD_attack =LinfPGDAttack(model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.031372, nb_iter=30, eps_iter=0.01, rand_init=True, clip_min=0.0, clip_max=1.0, targeted=False) l2_PGD_attack = L2PGDAttack(model, loss_fn=nn.CrossEntropyLoss(reduction="sum"),eps=4.6, nb_iter=30,clip_min=0.0, clip_max=1.0, targeted=False) FGSM_attack = FGSM(model, loss_fn=nn.CrossEntropyLoss(reduction="sum")) momentum_attack = MomentumIterativeAttack(model, loss_fn=nn.CrossEntropyLoss(reduction="sum"), eps=0.031372, nb_iter=30, eps_iter=0.01, clip_min=0.0, clip_max=1.0, targeted=False) attackers.append(linf_PGD_attack) attackers.append(l2_PGD_attack) attackers.append(FGSM_attack) attackers.append(momentum_attack) log.info("Create model {} done!".format(model_name)) generate_and_save_adv_examples(datasetname, data_loader, attackers, save_dir_path)
n_batch = len(testloader) chunk_size = n_batch // args.world_size start_ind = args.rank * chunk_size end_ind = (args.rank + 1) * chunk_size # load classifier predict = get_classifier(cfg, cfg.classifier) state_dict = torch.load(cfg.classifier.ckpt) predict.load_state_dict(state_dict) for p in predict.parameters(): p.requires_grad_(False) predict = torch.nn.Sequential(transform.classifier_preprocess_layer, predict).cuda() predict.eval() # create attacker attacker = FGSM(predict=predict, eps=args.eps/255.0) total = 0 correct_clean = 0 correct_adv = 0 correct_def = 0 for i, (images, labels) in enumerate(progress_bar): if i < start_ind or i >= end_ind: continue images, labels = images.cuda(), labels.cuda() result_path = os.path.join(result_dir, 'batch_{:04d}.pt'.format(i)) if os.path.isfile(result_path): result_dict = torch.load(result_path) images_adv = result_dict['input'].cuda() images_def = result_dict['rec'].cuda()
def precalc_weibull(args, dataloader_train, knownclass, Encoder, NorClsfier): # First generate pre-softmax 'activation vectors' for all training examples print( "Weibull: computing features for all correctly-classified training data" ) activation_vectors = {} if args.adv is 'PGDattack': from advertorch.attacks import PGDAttack adversary = PGDAttack(predict1=Encoder, predict2=NorClsfier, nb_iter=args.adv_iter) elif args.adv is 'FGSMattack': from advertorch.attacks import FGSM adversary = FGSM(predict1=Encoder, predict2=NorClsfier) for _, (images, labels, _, _) in enumerate(dataloader_train): labels = lab_conv(knownclass, labels) images, labels = images.cuda(), labels.long().cuda() print("**********Conduct Attack**********") advimg = adversary.perturb(images, labels) with torch.no_grad(): logits = NorClsfier(Encoder(advimg)) correctly_labeled = (logits.data.max(1)[1] == labels) labels_np = labels.cpu().numpy() logits_np = logits.data.cpu().numpy() for i, label in enumerate(labels_np): if not correctly_labeled[i]: continue # If correctly labeled, add this to the list of activation_vectors for this class if label not in activation_vectors: activation_vectors[label] = [] activation_vectors[label].append(logits_np[i]) print("Computed activation_vectors for {} known classes".format( len(activation_vectors))) for class_idx in activation_vectors: print("Class {}: {} images".format(class_idx, len(activation_vectors[class_idx]))) # Compute a mean activation vector for each class print("Weibull computing mean activation vectors...") mean_activation_vectors = {} for class_idx in activation_vectors: mean_activation_vectors[class_idx] = np.array( activation_vectors[class_idx]).mean(axis=0) # Initialize one libMR Wiebull object for each class print("Fitting Weibull to distance distribution of each class") weibulls = {} for class_idx in activation_vectors: distances = [] mav = mean_activation_vectors[class_idx] for v in activation_vectors[class_idx]: distances.append(np.linalg.norm(v - mav)) mr = libmr.MR() tail_size = min(len(distances), WEIBULL_TAIL_SIZE) mr.fit_high(distances, tail_size) weibulls[class_idx] = mr print("Weibull params for class {}: {}".format(class_idx, mr.get_params())) return activation_vectors, mean_activation_vectors, weibulls
def openset_weibull(args, dataloader_test, knownclass, Encoder, NorClsfier, activation_vectors, mean_activation_vectors, weibulls, mode='openset'): # Apply Weibull score to every logit weibull_scores = [] logits = [] classes = activation_vectors.keys() running_corrects = 0.0 epoch_size = 0.0 if args.adv is 'PGDattack': from advertorch.attacks import PGDAttack adversary = PGDAttack(predict1=Encoder, predict2=NorClsfier, nb_iter=args.adv_iter) elif args.adv is 'FGSMattack': from advertorch.attacks import FGSM adversary = FGSM(predict1=Encoder, predict2=NorClsfier) # reclosslist = [] for steps, (images, labels) in enumerate(dataloader_test): labels = lab_conv(knownclass, labels) images, labels = images.cuda(), labels.long().cuda() print("Calculate weibull_scores in step {}/{}".format( steps, len(dataloader_test))) print("**********Conduct Attack**********") if mode is 'closeset': advimg = adversary.perturb(images, labels) else: advimg = adversary.perturb(images) with torch.no_grad(): batch_logits_torch = NorClsfier(Encoder(advimg)) batch_logits = batch_logits_torch.data.cpu().numpy() batch_weibull = np.zeros(shape=batch_logits.shape) for activation_vector in batch_logits: weibull_row = np.ones(len(knownclass)) for class_idx in classes: mav = mean_activation_vectors[class_idx] dist = np.linalg.norm(activation_vector - mav) weibull_row[class_idx] = 1 - weibulls[class_idx].w_score(dist) weibull_scores.append(weibull_row) logits.append(activation_vector) if mode is 'closeset': _, preds = torch.max(batch_logits_torch, 1) # statistics running_corrects += torch.sum(preds == labels.data) epoch_size += images.size(0) if mode is 'closeset': running_corrects = running_corrects.double() / epoch_size print('Test Acc: {:.4f}'.format(running_corrects)) weibull_scores = np.array(weibull_scores) logits = np.array(logits) openmax_scores = -np.log(np.sum(np.exp(logits * weibull_scores), axis=1)) if mode is 'closeset': return running_corrects, np.array(openmax_scores) else: return np.array(openmax_scores)