def attackchar(maxbatch = None): corrects = .0 total_loss = 0 model.eval() tgt = [] adv = [] origsample = [] origsampleidx = [] modified = [] for dataid, data in enumerate(test_loader): print(dataid) if maxbatch!=None and dataid >= maxbatch: break inputs,target,idx = data inputs, target = inputs.to(device), target.to(device) output = model(inputs) tgt.append(target) origsample.append(inputs) origsampleidx.append(idx) pred = torch.max(output, 1)[1].view(target.size()) losses = torch.zeros(inputs.size()[0],inputs.size()[2]) losses = scoring_char.scorefunc(args.scoring)(model, inputs, pred, numclass) sorted, indices = torch.sort(losses,dim = 1,descending=True) advinputs = inputs.clone() dt = inputs.sum(dim=1).int() for k in range(inputs.size()[0]): j=0 t=0 while j < args.power and t<inputs.size()[1]: if dt[k,indices[k][t]]>0: advinputs[k,:,indices[k][t]],nowchar = transformer_char.transform(args.transformer)(inputs, torch.max(advinputs[k,:,indices[k][t]],0)[1].item(), alphabet) modified.append((args.batchsize*dataid+k,nowchar)) j+=1 t+=1 adv.append(advinputs) inputs2 = advinputs output2 = model(inputs2) pred2 = torch.max(output2, 1)[1].view(target.size()) corrects += (pred2 == target).sum().item() target = torch.cat(tgt) advinputs = torch.cat(adv) origsamples = torch.cat(origsample) acc = corrects/advinputs.size(0) print('Accuracy %.5f' % (acc)) f = open('attack_log.txt','a') f.write('%d\t%s\t%s\t%s\t%d\t%.2f\n' % (args.data,args.model,args.scoring,args.transformer,args.power,100*acc)) if args.advsamplepath == None: advsamplepath = 'a0808/%s_%d_%s_%s_%d.dat' % (args.model,args.data,args.scoring,args.transformer,args.power) else: advsamplepath = args.advsamplepath torch.save({'original':origsamples,'sampleid':origsampleidx,'advinputs':advinputs,'labels':target, 'modified':modified}, advsamplepath)
def generate_char_adv(model, args, numclass, data, device, alphabet): # tgt = [] # origsample = [] # origsampleidx = [] # modified = [] inputs, target, idx, raw = data inputs, target = inputs.to(device), target.to(device) h = model(inputs) outputs = model.h_to_logits(h) # tgt.append(target) # origsample.append(inputs) # origsampleidx.append(idx) pred = torch.max(outputs, 1)[1].view(target.size()) losses = torch.zeros(inputs.size()[0], inputs.size()[2]) losses = scoring_char.scorefunc(args.scoring)(model, inputs, pred, numclass) sorted, indices = torch.sort(losses, dim=1, descending=True) advinputs = inputs.clone() dt = inputs.sum(dim=1).int() for k in range(inputs.size()[0]): md = raw[k][:] md = md[::-1] j = 0 t = 0 while j < args.power and t < inputs.size()[2]: if dt[k, indices[k][t]].item() > 0: advinputs[k, :, indices[k][t]], nowchar = \ transformer_char.transform(args.transformer)(inputs, torch.max(advinputs[k, :, indices[k][t]], 0)[1].item(), alphabet) md = md[:indices[k][t].item()] + nowchar + md[indices[k][t].item() + 1:] j += 1 t += 1 # md = md[::-1] # modified.append(md) return advinputs
def visualize(input_str, dict_word=[], index2word=[], classes_list=[], power=5, scoring_alg='replaceone', transformer_alg='homoglyph', model=model, mode='word', maxlength=500, device=None, filter_char=default_filter, alphabet=default_alphabet): numclass = len(classes_list) if mode == 'word': input_seq = simple_tokenize(input_str, dict_word) input_seq = torch.Tensor(input_seq).long().view(1, -1) if device: input_seq = input_seq.to(device) res1 = model(input_seq) pred1 = torch.max(res1, 1)[1].view(-1) losses = scoring.scorefunc(scoring_alg)(model, input_seq, pred1, numclass) print(input_str) pred1 = pred1.item() print('original:', classes_list[pred1]) sorted, indices = torch.sort(losses, dim=1, descending=True) advinputs = input_seq.clone() wtmp = [] for i in range(input_seq.size()[1]): if advinputs[0, i].item() > 3: wtmp.append(index2word[advinputs[0, i].item()]) else: wtmp.append('') j = 0 t = 0 while j < power and t < input_seq.size()[1]: if advinputs[0, indices[0][t]].item() > 3: word, advinputs[0, indices[0][t]] = transformer.transform( transformer_alg)(advinputs[0, indices[0][t]].item(), dict_word, index2word, top_words=20000) wtmp[indices[0][t]] = word j += 1 t += 1 output2 = model(advinputs) pred2 = torch.max(output2, 1)[1].view(-1).item() adv_str = recoveradv(input_str.lower(), index2word, input_seq[0], wtmp) print(adv_str) print('adversarial:', classes_list[pred2]) return (input_str, torch.exp(res1).detach().cpu()[0], classes_list[pred1], adv_str, torch.exp(output2).detach().cpu()[0], classes_list[pred2]) elif mode == 'char': inputs = transchar(input_str, alphabet=alphabet) if device: inputs = inputs.to(device) output = model(inputs) pred1 = torch.max(output, 1)[1].view(-1) losses = scoring_char.scorefunc(scoring_alg)(model, inputs, pred1, numclass) sorted, indices = torch.sort(losses, dim=1, descending=True) advinputs = inputs.clone() dt = inputs.sum(dim=1).int() j = 0 t = 0 md = input_str.lower()[:][::-1] while j < power and t < inputs.size()[2]: if dt[0, indices[0][t]].item() > 0: advinputs[0, :, indices[0][t]], nowchar = transformer_char.transform( transformer_alg)(inputs, torch.max( advinputs[0, :, indices[0][t]], 0)[1].item(), alphabet) md = md[:indices[0][t].item( )] + nowchar + md[indices[0][t].item() + 1:] j += 1 t += 1 md = md[::-1] output2 = model(advinputs) pred2 = torch.max(output2, 1)[1].view(-1) print(input_str) print('original:', classes_list[pred1.item()]) print(md) print('adversarial:', classes_list[pred2.item()]) return (input_str, torch.exp(output)[0].detach().cpu(), classes_list[pred1.item()], md, torch.exp(output2)[0].detach().cpu(), classes_list[pred2.item()]) else: raise Exception('Wrong mode %s' % mode)
def attackchar(maxbatch=None): corrects = .0 total_loss = 0 model.eval() tgt = [] adv = [] origsample = [] for dataid, data in enumerate(test_loader): print dataid if maxbatch != None and dataid >= maxbatch: break inputs, target = data inputs, target = Variable(inputs, volatile=True), Variable(target) inputs, target = inputs.cuda(), target.cuda() output = model(inputs) tgt.append(target.data) origsample.append(inputs.data) pred = Variable(torch.max(output, 1)[1].view(target.size()).data) losses = torch.zeros(inputs.size()[0], inputs.size()[2]) losses = scoring_char.scorefunc(args.scoring)(model, inputs, pred, numclass) sorted, indices = torch.sort(losses, dim=1, descending=True) advinputs = inputs.data.clone() dt = inputs.data.sum(dim=1).int() print dt for k in xrange(inputs.size()[0]): j = 0 t = 0 while j < args.power and t < inputs.size()[1]: if dt[k, indices[k][t]] > 0: advinputs[k, :, indices[k][t]] = transformer_char.transform( args.transformer)(inputs) j += 1 t += 1 adv.append(advinputs) inputs2 = Variable(advinputs, volatile=True) output2 = model(inputs2) pred2 = torch.max(output2, 1)[1].view(target.size()).data corrects += (pred2 == target.data).sum() target = torch.cat(tgt) advinputs = torch.cat(adv) origsamples = torch.cat(origsample) acc = corrects / advinputs.size(0) print('Accuracy %.5f' % (acc)) f = open('attack.txt', 'a') f.write('%d\t%s\t%s\t%s\t%d\t%.2f\n' % (args.data, args.model, args.scoring, args.transformer, args.power, 100 * acc)) if args.advsamplepath == None: advsamplepath = 'advsamples/%s_%d_%s_%s_%d.dat' % ( args.model, args.data, args.scoring, args.transformer, args.power) else: advsamplepath = args.advsamplepath torch.save( { 'original': origsamples, 'advinputs': advinputs, 'labels': target }, advsamplepath)
def attackchar(maxbatch=None): corrects = .0 total_loss = 0 model.eval() tgt = [] adv = [] origsample = [] for dataid, data in enumerate(train_loader): print dataid if maxbatch != None and dataid > maxbatch: break inputs, target = data inputs, target = Variable(inputs, volatile=True), Variable(target) inputs, target = inputs.cuda(), target.cuda() output = model(inputs) tgt.append(target.data) origsample.append(inputs.data) # loss = F.nll_loss(output, targenst) # total_loss += loss.data[0] pred = Variable(torch.max(output, 1)[1].view(target.size()).data) losses = torch.zeros(inputs.size()[0], inputs.size()[2]) losses = scoring_char.scorefunc(args.scoring)(model, inputs, pred, numclass) sorted, indices = torch.sort(losses, dim=1, descending=True) # print losses # print indices advinputs = inputs.data.clone() for k in xrange(inputs.size()[0]): j = 0 t = 0 while j < args.power and t < inputs.size()[1]: # if losses[k,indices[k][t]]>0: advinputs[k, :, indices[k][t]] = transformer_char.transform( args.transformer)(inputs) j += 1 t += 1 # for i in xrange(args.power): # tmp = torch.zeros(inputs.size()[1]) # advinputs[k,:,indices[k][i]] = tmp # advinputs[k,:,indices[k][i]] = transformer_char.transform(args.transformer)(inputs) adv.append(advinputs) inputs2 = Variable(advinputs, volatile=True) output2 = model(inputs2) pred2 = torch.max(output2, 1)[1].view(target.size()).data corrects += (pred2 == target.data).sum() # predicates_all+=predicates.cpu().numpy().tolist() # target_all+=target.data.cpu().numpy().tolist() # print corrects # correct += pred.eq(target.data.view_as(pred)).cpu().sum() # acc = corrects/len(test_loader.dataset) target = torch.cat(tgt) advinputs = torch.cat(adv) origsamples = torch.cat(origsample) acc = corrects / advinputs.size(0) # avg_loss = total_loss/len(test_loader.dataset) print('Accuracy %.5f' % (acc)) f = open('attack_train.txt', 'a') f.write('%d\t%s\t%s\t%s\t%d\t%.2f\n' % (args.data, args.model, args.scoring, args.transformer, args.power, 100 * acc)) if args.advsamplepath == None: advsamplepath = 'advsamples2/%s_%d_%s_%s_%d_train.dat' % ( args.model, args.data, args.scoring, args.transformer, args.power) else: advsamplepath = args.advsamplepath torch.save( { 'original': origsamples, 'advinputs': advinputs, 'labels': target }, args.advsamplepath)