def generate_word_adv(model, args, numclass, data, device, index2word, word_index): inputs, target, idx, raw = data inputs, target = inputs.to(device), target.to(device) # origsample.append(inputs) # origsampleidx.append(idx) # tgt.append(target) # wtmp = [] h, output = model(inputs) pred = torch.max(output, 1)[1].view(target.size()) losses = scoring.scorefunc(args.scoring)(model, inputs, pred, numclass) sorted, indices = torch.sort(losses, dim=1, descending=True) advinputs = inputs.clone() # for k in range(inputs.size()[0]): # wtmp.append([]) # for i in range(inputs.size()[1]): # if advinputs[k, i].item() > 3: # wtmp[-1].append(index2word[advinputs[k, i].item()]) # else: # wtmp[-1].append('') for k in range(inputs.size()[0]): j = 0 t = 0 while j < args.power and t < inputs.size()[1]: if advinputs[k, indices[k][t]].item() > 3: word, advinputs[k, indices[k][t]] = transformer.transform(args.transformer)( advinputs[k, indices[k][t]].item(), word_index, index2word, top_words=args.dictionarysize) # wtmp[k][indices[k][t]] = word # print(word) j += 1 t += 1 # a = ' '.join(wtmp[32]) # print(a) return advinputs
def visualize(input_str, dict_word=[], index2word=[], classes_list=[], power=5, scoring_alg='replaceone', transformer_alg='homoglyph', model=model, mode='word', maxlength=500, device=None, filter_char=default_filter, alphabet=default_alphabet): numclass = len(classes_list) if mode == 'word': input_seq = simple_tokenize(input_str, dict_word) input_seq = torch.Tensor(input_seq).long().view(1, -1) if device: input_seq = input_seq.to(device) res1 = model(input_seq) pred1 = torch.max(res1, 1)[1].view(-1) losses = scoring.scorefunc(scoring_alg)(model, input_seq, pred1, numclass) print(input_str) pred1 = pred1.item() print('original:', classes_list[pred1]) sorted, indices = torch.sort(losses, dim=1, descending=True) advinputs = input_seq.clone() wtmp = [] for i in range(input_seq.size()[1]): if advinputs[0, i].item() > 3: wtmp.append(index2word[advinputs[0, i].item()]) else: wtmp.append('') j = 0 t = 0 while j < power and t < input_seq.size()[1]: if advinputs[0, indices[0][t]].item() > 3: word, advinputs[0, indices[0][t]] = transformer.transform( transformer_alg)(advinputs[0, indices[0][t]].item(), dict_word, index2word, top_words=20000) wtmp[indices[0][t]] = word j += 1 t += 1 output2 = model(advinputs) pred2 = torch.max(output2, 1)[1].view(-1).item() adv_str = recoveradv(input_str.lower(), index2word, input_seq[0], wtmp) print(adv_str) print('adversarial:', classes_list[pred2]) return (input_str, torch.exp(res1).detach().cpu()[0], classes_list[pred1], adv_str, torch.exp(output2).detach().cpu()[0], classes_list[pred2]) elif mode == 'char': inputs = transchar(input_str, alphabet=alphabet) if device: inputs = inputs.to(device) output = model(inputs) pred1 = torch.max(output, 1)[1].view(-1) losses = scoring_char.scorefunc(scoring_alg)(model, inputs, pred1, numclass) sorted, indices = torch.sort(losses, dim=1, descending=True) advinputs = inputs.clone() dt = inputs.sum(dim=1).int() j = 0 t = 0 md = input_str.lower()[:][::-1] while j < power and t < inputs.size()[2]: if dt[0, indices[0][t]].item() > 0: advinputs[0, :, indices[0][t]], nowchar = transformer_char.transform( transformer_alg)(inputs, torch.max( advinputs[0, :, indices[0][t]], 0)[1].item(), alphabet) md = md[:indices[0][t].item( )] + nowchar + md[indices[0][t].item() + 1:] j += 1 t += 1 md = md[::-1] output2 = model(advinputs) pred2 = torch.max(output2, 1)[1].view(-1) print(input_str) print('original:', classes_list[pred1.item()]) print(md) print('adversarial:', classes_list[pred2.item()]) return (input_str, torch.exp(output)[0].detach().cpu(), classes_list[pred1.item()], md, torch.exp(output2)[0].detach().cpu(), classes_list[pred2.item()]) else: raise Exception('Wrong mode %s' % mode)
def attackword(model, args, numclass, test_loader, device, index2word, word_index, maxbatch=None): corrects = .0 total_loss = 0 model.eval() wordinput = [] tgt = [] adv = [] origsample = [] origsampleidx = [] for dataid, data in enumerate(test_loader): print(dataid) if maxbatch is not None and dataid >= maxbatch: break inputs, target, idx, raw = data inputs, target = inputs.to(device), target.to(device) origsample.append(inputs) origsampleidx.append(idx) tgt.append(target) wtmp = [] output = model(inputs) pred = torch.max(output, 1)[1].view(target.size()) losses = scoring.scorefunc(args.scoring)(model, inputs, pred, numclass) sorted, indices = torch.sort(losses, dim=1, descending=True) advinputs = inputs.clone() for k in range(inputs.size()[0]): wtmp.append([]) for i in range(inputs.size()[1]): if advinputs[k, i].item() > 3: wtmp[-1].append(index2word[advinputs[k, i].item()]) else: wtmp[-1].append('') for k in range(inputs.size()[0]): j = 0 t = 0 while j < args.power and t < inputs.size()[1]: if advinputs[k, indices[k][t]].item() > 3: word, advinputs[k, indices[k][t]] = transformer.transform(args.transformer)( advinputs[k, indices[k][t]].item(), word_index, index2word, top_words=args.dictionarysize) wtmp[k][indices[k][t]] = word print(word) j += 1 t += 1 adv.append(advinputs) output2 = model(advinputs) pred2 = torch.max(output2, 1)[1].view(target.size()) corrects += (pred2 == target).sum().item() for i in range(len(wtmp)): print(raw[i]) print(pred[i].item()) wordinputi = recoveradv(raw[i], index2word, inputs[i], wtmp[i]) print(wordinputi) wordinput.append(wordinputi) print(pred2[i].item()) target = torch.cat(tgt) advinputs = torch.cat(adv) origsamples = torch.cat(origsample) origsampleidx = torch.cat(origsampleidx) acc = corrects / advinputs.size(0) print('Accuracy %.5f' % acc) f = open('attack_log.txt', 'a') f.write('%d\t%d\t%s\t%s\t%s\t%d\t%.2f\n' % ( args.data, args.wordlength, args.model, args.scoring, args.transformer, args.power, 100 * acc)) if args.advsamplepath is None: advsamplepath = 'advsamples/%s_%d_%s_%s_%d_%d.dat' % ( args.model, args.data, args.scoring, args.transformer, args.power, args.wordlength) else: advsamplepath = args.advsamplepath torch.save({'original': origsamples, 'sampleid': origsampleidx, 'wordinput': wordinput, 'advinputs': advinputs, 'labels': target}, advsamplepath)
def attackword(maxbatch=None): corrects = .0 total_loss = 0 model.eval() wordinput = [] tgt = [] adv = [] origsample = [] flagstore = True for dataid, data in enumerate(test_loader): print dataid if maxbatch != None and dataid >= maxbatch: break inputs, target = data inputs, target = Variable(inputs, volatile=True), Variable(target) inputs, target = inputs.cuda(), target.cuda() origsample.append(inputs.data) tgt.append(target.data) wtmp = [] output = model(inputs) pred = Variable(torch.max(output, 1)[1].view(target.size()).data) losses = scoring.scorefunc(args.scoring)(model, inputs, pred, numclass) sorted, indices = torch.sort(losses, dim=1, descending=True) advinputs = inputs.clone() if flagstore: for k in xrange(inputs.size()[0]): wtmp.append([]) for i in xrange(inputs.size()[1]): if advinputs.data[k, i] > 3: wtmp[-1].append(index2word[advinputs.data[k, i]]) else: wtmp[-1].append('') for k in xrange(inputs.size()[0]): j = 0 t = 0 while j < args.power and t < inputs.size()[1]: if advinputs.data[k, indices[k][t]] > 3: word, advinputs.data[ k, indices[k][t]] = transformer.transform( args.transformer)( advinputs[k, indices[k][t]].data[0], word_index, index2word, top_words=args.dictionarysize) wtmp[k][indices[k][t]] = word j += 1 t += 1 else: for k in xrange(inputs.size()[0]): for i in xrange(args.power): word, advinputs.data[ k, indices[k][i]] = transformer.transform( args.transformer)(advinputs[k, indices[k][i]].data[0], word_index, index2word, top_words=args.dictionarysize) adv.append(advinputs.data) for i in xrange(len(wtmp)): wordinput.append(wtmp[i]) output2 = model(advinputs) pred2 = torch.max(output2, 1)[1].view(target.size()).data corrects += (pred2 == target.data).sum() print wordinput[0] target = torch.cat(tgt) advinputs = torch.cat(adv) origsamples = torch.cat(origsample) acc = corrects / advinputs.size(0) print('Accuracy %.5f' % (acc)) f = open('attack.txt', 'a') f.write('%d\t%s\t%s\t%s\t%d\t%.2f\n' % (args.data, args.model, args.scoring, args.transformer, args.power, 100 * acc)) if args.advsamplepath == None: advsamplepath = 'advsamples/%s_%d_%s_%s_%d.dat' % ( args.model, args.data, args.scoring, args.transformer, args.power) else: advsamplepath = args.advsamplepath torch.save( { 'original': origsamples, 'wordinput': wordinput, 'advinputs': advinputs, 'labels': target }, advsamplepath)
def attackword(maxbatch=None): corrects = .0 total_loss = 0 model.eval() wordinput = [] tgt = [] adv = [] origsample = [] flagstore = True for dataid, data in enumerate(train_loader): print dataid if maxbatch != None and dataid > maxbatch: break inputs, target = data inputs, target = Variable(inputs, volatile=True), Variable(target) inputs, target = inputs.cuda(), target.cuda() origsample.append(inputs.data) tgt.append(target.data) wtmp = [] output = model(inputs) # loss = F.nll_loss(output, targenst) # total_loss += loss.data[0] pred = Variable(torch.max(output, 1)[1].view(target.size()).data) losses = scoring.scorefunc(args.scoring)(model, inputs, pred, numclass) sorted, indices = torch.sort(losses, dim=1, descending=True) # print losses # print indices advinputs = inputs.clone() if flagstore: for k in xrange(inputs.size()[0]): wtmp.append([]) for i in xrange(inputs.size()[1]): if advinputs.data[k, i] > 3: wtmp[-1].append(index2word[advinputs.data[k, i]]) else: wtmp[-1].append('') for k in xrange(inputs.size()[0]): j = 0 t = 0 while j < args.power and t < inputs.size()[1]: # if advinputs.data[k,indices[k][t]]>3 and losses[k,indices[k][t]]>0: if advinputs.data[k, indices[k][t]] > 3: word, advinputs.data[ k, indices[k][t]] = transformer.transform( args.transformer)( advinputs[k, indices[k][t]].data[0], word_index, index2word) wtmp[k][indices[k][t]] = word j += 1 t += 1 # for i in xrange(args.power): # word, advinputs.data[k,indices[k][i]] = transformer.transform(args.transformer)(advinputs[k,indices[k][i]].data[0],word_index,index2word) # wordinput[k][indices[k][i]] = word else: for k in xrange(inputs.size()[0]): for i in xrange(args.power): word, advinputs.data[ k, indices[k][i]] = transformer.transform( args.transformer)(advinputs[k, indices[k][i]].data[0], word_index, index2word) adv.append(advinputs.data) for i in xrange(len(wtmp)): wordinput.append(wtmp[i]) # inputs2 = Variable(advinputs, volatile=True) output2 = model(advinputs) pred2 = torch.max(output2, 1)[1].view(target.size()).data # print inputs.size() # inputs = Variable(inputs, volatile=True) # logit = model(inputs) # pred1 = torch.max(logit, 1)[1].view(target.size()).data # accumulated_loss += F.nll_loss(logit, target, size_average=False).data[0] corrects += (pred2 == target.data).sum() # predicates_all+=predicates.cpu().numpy().tolist() # target_all+=target.data.cpu().numpy().tolist() # print corrects print wordinput[0] # correct += pred.eq(target.data.view_as(pred)).cpu().sum() # acc = corrects/len(test_loader.dataset) # avg_loss = total_loss/len(test_loader.dataset) target = torch.cat(tgt) advinputs = torch.cat(adv) origsamples = torch.cat(origsample) acc = corrects / advinputs.size(0) print('Accuracy %.5f' % (acc)) f = open('attack_train.txt', 'a') f.write('%d\t%s\t%s\t%s\t%d\t%.2f\n' % (args.data, args.model, args.scoring, args.transformer, args.power, 100 * acc)) if args.advsamplepath == None: advsamplepath = 'advsamples2/%s_%d_%s_%s_%d_train.dat' % ( args.model, args.data, args.scoring, args.transformer, args.power) else: advsamplepath = args.advsamplepath torch.save( { 'original': origsamples, 'wordinput': wordinput, 'advinputs': advinputs, 'labels': target }, advsamplepath)