def attackchar(maxbatch = None):
    corrects = .0
    total_loss = 0
    model.eval()
    tgt = []
    adv = []
    origsample = []
    origsampleidx = []
    modified = []
    for dataid, data in enumerate(test_loader):
        print(dataid)
        if maxbatch!=None and dataid >= maxbatch:
            break
        inputs,target,idx = data
        inputs, target = inputs.to(device), target.to(device)
        output = model(inputs)
        tgt.append(target)
        origsample.append(inputs)
        origsampleidx.append(idx)
        pred = torch.max(output, 1)[1].view(target.size())
        losses = torch.zeros(inputs.size()[0],inputs.size()[2])
       
        losses = scoring_char.scorefunc(args.scoring)(model, inputs, pred, numclass)
        
        sorted, indices = torch.sort(losses,dim = 1,descending=True)
        advinputs = inputs.clone()
        dt = inputs.sum(dim=1).int()
        for k in range(inputs.size()[0]):
            j=0
            t=0
            while j < args.power and t<inputs.size()[1]:
                if dt[k,indices[k][t]]>0:
                    advinputs[k,:,indices[k][t]],nowchar = transformer_char.transform(args.transformer)(inputs, torch.max(advinputs[k,:,indices[k][t]],0)[1].item(), alphabet)
                    modified.append((args.batchsize*dataid+k,nowchar))
                    j+=1
                t+=1
        adv.append(advinputs)        
        inputs2 = advinputs
        output2 = model(inputs2)
        pred2 = torch.max(output2, 1)[1].view(target.size())
        corrects += (pred2 == target).sum().item()
        
    target = torch.cat(tgt)
    advinputs = torch.cat(adv)
    origsamples = torch.cat(origsample)
    acc = corrects/advinputs.size(0)
    print('Accuracy %.5f' % (acc))
    f = open('attack_log.txt','a')
    f.write('%d\t%s\t%s\t%s\t%d\t%.2f\n' % (args.data,args.model,args.scoring,args.transformer,args.power,100*acc))
    if args.advsamplepath == None:
        advsamplepath = 'a0808/%s_%d_%s_%s_%d.dat' % (args.model,args.data,args.scoring,args.transformer,args.power)
    else:
        advsamplepath = args.advsamplepath
    torch.save({'original':origsamples,'sampleid':origsampleidx,'advinputs':advinputs,'labels':target, 'modified':modified}, advsamplepath)
Esempio n. 2
0
def generate_char_adv(model, args, numclass, data, device, alphabet):
    # tgt = []
    # origsample = []
    # origsampleidx = []
    # modified = []

    inputs, target, idx, raw = data
    inputs, target = inputs.to(device), target.to(device)
    h = model(inputs)
    outputs = model.h_to_logits(h)
    # tgt.append(target)
    # origsample.append(inputs)
    # origsampleidx.append(idx)
    pred = torch.max(outputs, 1)[1].view(target.size())
    losses = torch.zeros(inputs.size()[0], inputs.size()[2])

    losses = scoring_char.scorefunc(args.scoring)(model, inputs, pred, numclass)

    sorted, indices = torch.sort(losses, dim=1, descending=True)
    advinputs = inputs.clone()
    dt = inputs.sum(dim=1).int()
    for k in range(inputs.size()[0]):
        md = raw[k][:]
        md = md[::-1]
        j = 0
        t = 0
        while j < args.power and t < inputs.size()[2]:
            if dt[k, indices[k][t]].item() > 0:
                advinputs[k, :, indices[k][t]], nowchar = \
                    transformer_char.transform(args.transformer)(inputs,
                                                                 torch.max(advinputs[k, :, indices[k][t]],
                                                                           0)[1].item(), alphabet)
                md = md[:indices[k][t].item()] + nowchar + md[indices[k][t].item() + 1:]
                j += 1
            t += 1
        # md = md[::-1]
        # modified.append(md)
    return advinputs
Esempio n. 3
0
def visualize(input_str,
              dict_word=[],
              index2word=[],
              classes_list=[],
              power=5,
              scoring_alg='replaceone',
              transformer_alg='homoglyph',
              model=model,
              mode='word',
              maxlength=500,
              device=None,
              filter_char=default_filter,
              alphabet=default_alphabet):
    numclass = len(classes_list)

    if mode == 'word':

        input_seq = simple_tokenize(input_str, dict_word)
        input_seq = torch.Tensor(input_seq).long().view(1, -1)
        if device:
            input_seq = input_seq.to(device)
        res1 = model(input_seq)
        pred1 = torch.max(res1, 1)[1].view(-1)
        losses = scoring.scorefunc(scoring_alg)(model, input_seq, pred1,
                                                numclass)

        print(input_str)
        pred1 = pred1.item()
        print('original:', classes_list[pred1])

        sorted, indices = torch.sort(losses, dim=1, descending=True)

        advinputs = input_seq.clone()
        wtmp = []
        for i in range(input_seq.size()[1]):
            if advinputs[0, i].item() > 3:
                wtmp.append(index2word[advinputs[0, i].item()])
            else:
                wtmp.append('')
        j = 0
        t = 0
        while j < power and t < input_seq.size()[1]:
            if advinputs[0, indices[0][t]].item() > 3:
                word, advinputs[0, indices[0][t]] = transformer.transform(
                    transformer_alg)(advinputs[0, indices[0][t]].item(),
                                     dict_word,
                                     index2word,
                                     top_words=20000)
                wtmp[indices[0][t]] = word
                j += 1
            t += 1
        output2 = model(advinputs)
        pred2 = torch.max(output2, 1)[1].view(-1).item()
        adv_str = recoveradv(input_str.lower(), index2word, input_seq[0], wtmp)
        print(adv_str)
        print('adversarial:', classes_list[pred2])
        return (input_str, torch.exp(res1).detach().cpu()[0],
                classes_list[pred1], adv_str,
                torch.exp(output2).detach().cpu()[0], classes_list[pred2])
    elif mode == 'char':
        inputs = transchar(input_str, alphabet=alphabet)
        if device:
            inputs = inputs.to(device)
        output = model(inputs)
        pred1 = torch.max(output, 1)[1].view(-1)

        losses = scoring_char.scorefunc(scoring_alg)(model, inputs, pred1,
                                                     numclass)

        sorted, indices = torch.sort(losses, dim=1, descending=True)
        advinputs = inputs.clone()
        dt = inputs.sum(dim=1).int()
        j = 0
        t = 0
        md = input_str.lower()[:][::-1]
        while j < power and t < inputs.size()[2]:
            if dt[0, indices[0][t]].item() > 0:
                advinputs[0, :,
                          indices[0][t]], nowchar = transformer_char.transform(
                              transformer_alg)(inputs,
                                               torch.max(
                                                   advinputs[0, :,
                                                             indices[0][t]],
                                                   0)[1].item(), alphabet)
                md = md[:indices[0][t].item(
                )] + nowchar + md[indices[0][t].item() + 1:]
                j += 1
            t += 1
        md = md[::-1]
        output2 = model(advinputs)
        pred2 = torch.max(output2, 1)[1].view(-1)
        print(input_str)
        print('original:', classes_list[pred1.item()])
        print(md)
        print('adversarial:', classes_list[pred2.item()])
        return (input_str, torch.exp(output)[0].detach().cpu(),
                classes_list[pred1.item()], md,
                torch.exp(output2)[0].detach().cpu(),
                classes_list[pred2.item()])
    else:
        raise Exception('Wrong mode %s' % mode)
Esempio n. 4
0
def attackchar(maxbatch=None):
    corrects = .0
    total_loss = 0
    model.eval()
    tgt = []
    adv = []
    origsample = []
    for dataid, data in enumerate(test_loader):
        print dataid
        if maxbatch != None and dataid >= maxbatch:
            break
        inputs, target = data
        inputs, target = Variable(inputs, volatile=True), Variable(target)
        inputs, target = inputs.cuda(), target.cuda()
        output = model(inputs)
        tgt.append(target.data)
        origsample.append(inputs.data)

        pred = Variable(torch.max(output, 1)[1].view(target.size()).data)
        losses = torch.zeros(inputs.size()[0], inputs.size()[2])

        losses = scoring_char.scorefunc(args.scoring)(model, inputs, pred,
                                                      numclass)

        sorted, indices = torch.sort(losses, dim=1, descending=True)

        advinputs = inputs.data.clone()
        dt = inputs.data.sum(dim=1).int()
        print dt
        for k in xrange(inputs.size()[0]):
            j = 0
            t = 0
            while j < args.power and t < inputs.size()[1]:
                if dt[k, indices[k][t]] > 0:
                    advinputs[k, :,
                              indices[k][t]] = transformer_char.transform(
                                  args.transformer)(inputs)
                    j += 1
                t += 1
        adv.append(advinputs)
        inputs2 = Variable(advinputs, volatile=True)
        output2 = model(inputs2)
        pred2 = torch.max(output2, 1)[1].view(target.size()).data
        corrects += (pred2 == target.data).sum()

    target = torch.cat(tgt)
    advinputs = torch.cat(adv)
    origsamples = torch.cat(origsample)
    acc = corrects / advinputs.size(0)
    print('Accuracy %.5f' % (acc))
    f = open('attack.txt', 'a')
    f.write('%d\t%s\t%s\t%s\t%d\t%.2f\n' %
            (args.data, args.model, args.scoring, args.transformer, args.power,
             100 * acc))
    if args.advsamplepath == None:
        advsamplepath = 'advsamples/%s_%d_%s_%s_%d.dat' % (
            args.model, args.data, args.scoring, args.transformer, args.power)
    else:
        advsamplepath = args.advsamplepath
    torch.save(
        {
            'original': origsamples,
            'advinputs': advinputs,
            'labels': target
        }, advsamplepath)
Esempio n. 5
0
def attackchar(maxbatch=None):
    corrects = .0
    total_loss = 0
    model.eval()
    tgt = []
    adv = []
    origsample = []
    for dataid, data in enumerate(train_loader):
        print dataid
        if maxbatch != None and dataid > maxbatch:
            break
        inputs, target = data
        inputs, target = Variable(inputs, volatile=True), Variable(target)
        inputs, target = inputs.cuda(), target.cuda()
        output = model(inputs)
        tgt.append(target.data)
        origsample.append(inputs.data)

        # loss = F.nll_loss(output, targenst)
        # total_loss += loss.data[0]
        pred = Variable(torch.max(output, 1)[1].view(target.size()).data)
        losses = torch.zeros(inputs.size()[0], inputs.size()[2])

        losses = scoring_char.scorefunc(args.scoring)(model, inputs, pred,
                                                      numclass)

        sorted, indices = torch.sort(losses, dim=1, descending=True)
        # print losses
        # print indices
        advinputs = inputs.data.clone()
        for k in xrange(inputs.size()[0]):
            j = 0
            t = 0
            while j < args.power and t < inputs.size()[1]:
                # if losses[k,indices[k][t]]>0:
                advinputs[k, :, indices[k][t]] = transformer_char.transform(
                    args.transformer)(inputs)
                j += 1
                t += 1
            # for i in xrange(args.power):
            # tmp = torch.zeros(inputs.size()[1])
            # advinputs[k,:,indices[k][i]] = tmp
            # advinputs[k,:,indices[k][i]] = transformer_char.transform(args.transformer)(inputs)
        adv.append(advinputs)
        inputs2 = Variable(advinputs, volatile=True)
        output2 = model(inputs2)
        pred2 = torch.max(output2, 1)[1].view(target.size()).data
        corrects += (pred2 == target.data).sum()
        # predicates_all+=predicates.cpu().numpy().tolist()
        # target_all+=target.data.cpu().numpy().tolist()
        # print corrects

        # correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    # acc = corrects/len(test_loader.dataset)
    target = torch.cat(tgt)
    advinputs = torch.cat(adv)
    origsamples = torch.cat(origsample)
    acc = corrects / advinputs.size(0)
    # avg_loss = total_loss/len(test_loader.dataset)
    print('Accuracy %.5f' % (acc))
    f = open('attack_train.txt', 'a')
    f.write('%d\t%s\t%s\t%s\t%d\t%.2f\n' %
            (args.data, args.model, args.scoring, args.transformer, args.power,
             100 * acc))
    if args.advsamplepath == None:
        advsamplepath = 'advsamples2/%s_%d_%s_%s_%d_train.dat' % (
            args.model, args.data, args.scoring, args.transformer, args.power)
    else:
        advsamplepath = args.advsamplepath
    torch.save(
        {
            'original': origsamples,
            'advinputs': advinputs,
            'labels': target
        }, args.advsamplepath)