예제 #1
0
 def get_fool_sentence_pwws(self, sentence: str, label: int, index: int):
     start = time.perf_counter()
     maxlen = config_data[config_dataset].padding_maxlen
     vector = str2seq(sentence, self.vocab, self.tokenizer,
                      maxlen).to(config_device)
     label = torch.tensor(label).to(config_device)
     flag = -1
     predict = self.net.predict_class(vector)[0]
     end = -1
     if predict == label:
         sentence, adv_y, sub_rate, NE_rate, change_tuple_list = adversarial_paraphrase(
             sentence,
             vector,
             label,
             self.tokenizer,
             self.vocab,
             self.net,
             self.verbose,
             self.sub_rate_limit,
         )
         if adv_y != label:
             flag = 1
         else:
             flag = 0
         self.__write_log(index, flag, sub_rate, NE_rate, change_tuple_list)
         end = time.perf_counter() - start
     return sentence, flag, end
예제 #2
0
 def get_fool_sentence_textfool(self, sentence: str, label: int,
                                index: int):
     start = time.perf_counter()
     maxlen = config_data[config_dataset].padding_maxlen
     flag = -1
     end = -1
     vector = str2seq(sentence, self.vocab, self.tokenizer,
                      maxlen).to(config_device)
     predict = self.net.predict_class(vector)[0]
     if predict == label:
         sentence, adv_y, sub_rate, NE_rate, change_tuple_list = textfool_perturb_text(
             sentence,
             self.net,
             self.vocab,
             self.tokenizer,
             maxlen,
             label,
             self.use_typos,
             verbose=self.verbose,
             sub_rate_limit=self.sub_rate_limit,
         )
         if adv_y != label:
             flag = 1
         else:
             flag = 0
         self.__write_log(index, flag, sub_rate, NE_rate, change_tuple_list)
         end = time.perf_counter() - start
     return sentence, flag, end
예제 #3
0
 def halt_condition_fn(perturbed_text):
     '''
     Halt if model output is changed.
     '''
     maxlen = config_data[config_dataset].padding_maxlen
     perturbed_vector = str2seq(perturbed_text, vocab, tokenizer, maxlen).to(config_device)
     predict = net.predict_class(perturbed_vector)[0]
     return predict != true_y
예제 #4
0
def random_attack(
        sentence,
        y_true,
        net,
        vocab,
        tokenizer,
        maxlen,
        verbose=False,
        sub_rate_limit=None):
    doc = nlp(sentence)
    if sub_rate_limit: sub_rate_limit = int(sub_rate_limit * len(doc))
    else: sub_rate_limit = len(doc)

    def halt_conditionh_func(perturbed_text):
        perturbed_vector = str2seq(perturbed_text, vocab, tokenizer, maxlen).to(config_device)
        predict = net.predict_class(perturbed_vector)[0]
        return predict != y_true


    candidates_list = []
    for idx, token in enumerate(doc):
        if idx >= maxlen: break
        candidates = _generate_synonym_candidates(token=token, token_position=idx, rank_fn=None)
        if len(candidates) > 0: candidates_list.append((idx, candidates))

    upper = min(len(candidates_list), sub_rate_limit)
    lower = upper // 3
    sub_num = get_random(lower, upper)
    sub_pos = random.sample(candidates_list, sub_num)
    change_tuple_list = []

    accepted_candidates = []



    for token_pos, candidates in sub_pos:
        substitution = random.sample(candidates, 1)[0]
        accepted_candidates.append(substitution)
        change_tuple_list.append((token_pos, substitution.original_token, substitution.candidate_word, None, 'tagNONE'))
        perturbed_text = ' '.join(
            _compile_perturbed_tokens(doc, accepted_candidates))
        if halt_conditionh_func(perturbed_text): break
        if verbose:
            print(f'origin token pos {token_pos}, origin token {substitution.original_token}, candidate token {substitution.candidate_word}')

    perturbed_text = ' '.join(
        _compile_perturbed_tokens(doc, accepted_candidates))

    sub_rate = len(change_tuple_list) / len(doc)
    ne_rate = 0.0
    adv_vec = str2seq(perturbed_text, vocab, tokenizer, maxlen).to(config_device)
    adv_y = net.predict_class(adv_vec)[0]

    return perturbed_text, adv_y, sub_rate, ne_rate, change_tuple_list
예제 #5
0
    def heuristic_fn(text, candidate):
        '''
        Return the difference between the classification probability of the original
        word and the candidate substitute synonym, which is defined in Eq.(4) and Eq.(5).
        '''
        doc = nlp(text)
        maxlen = config_data[config_dataset].padding_maxlen
        perturbed_tokens = _compile_perturbed_tokens(doc, [candidate])
        perturbed_doc = ' '.join(perturbed_tokens)
        perturbed_vector = str2seq(perturbed_doc, vocab, tokenizer, maxlen).to(config_device)
        adv_y = net.predict_prob(perturbed_vector, true_y)[0]
        ori_y = net.predict_prob(origin_vector, true_y)[0]


        return ori_y - adv_y
예제 #6
0
def adversarial_paraphrase(input_text, origin_vector, true_y, tokenizer:Tokenizer,
                           vocab:'Vocab', net:nn.Module, verbose=False, sub_rate_limit=None):
    '''
    Compute a perturbation, greedily choosing the synonym if it causes the most
    significant change in the classification probability after replacement
    :return perturbed_text

    : generated adversarial examples
    :return perturbed_y: predicted class of perturbed_text
    :return sub_rate: word replacement rate showed in Table 3
    :return change_tuple_list: list of substitute words
    '''

    def halt_condition_fn(perturbed_text):
        '''
        Halt if model output is changed.
        '''
        maxlen = config_data[config_dataset].padding_maxlen
        perturbed_vector = str2seq(perturbed_text, vocab, tokenizer, maxlen).to(config_device)
        predict = net.predict_class(perturbed_vector)[0]
        return predict != true_y

    def heuristic_fn(text, candidate):
        '''
        Return the difference between the classification probability of the original
        word and the candidate substitute synonym, which is defined in Eq.(4) and Eq.(5).
        '''
        doc = nlp(text)
        maxlen = config_data[config_dataset].padding_maxlen
        perturbed_tokens = _compile_perturbed_tokens(doc, [candidate])
        perturbed_doc = ' '.join(perturbed_tokens)
        perturbed_vector = str2seq(perturbed_doc, vocab, tokenizer, maxlen).to(config_device)
        adv_y = net.predict_prob(perturbed_vector, true_y)[0]
        ori_y = net.predict_prob(origin_vector, true_y)[0]


        return ori_y - adv_y

    doc = nlp(input_text)

    # PWWS
    position_word_list, word_saliency_list = evaluate_word_saliency(doc, origin_vector, true_y, net)
    perturbed_text, sub_rate, NE_rate, change_tuple_list = PWWS(doc,
                                                                true_y,
                                                                word_saliency_list=word_saliency_list,
                                                                heuristic_fn=heuristic_fn,
                                                                halt_condition_fn=halt_condition_fn,
                                                                verbose=verbose,
                                                                sub_rate_limit=sub_rate_limit)

    # print("perturbed_text after perturb_text:", perturbed_text)

    maxlen = config_data[config_dataset].padding_maxlen
    perturbed_vector = str2seq(perturbed_text, vocab, tokenizer, maxlen).to(config_device)
    perturbed_y = net.predict_class(perturbed_vector)[0]
    if verbose:
        origin_prob = net.predict_prob(origin_vector, true_y)[0]
        perturbed_prob = net.predict_prob(perturbed_vector, true_y)[0]
        raw_score = origin_prob - perturbed_prob
        print('Prob before: ', origin_prob, '. Prob after: ', perturbed_prob,
              '. Prob shift: ', raw_score)
    return perturbed_text, perturbed_y, sub_rate, NE_rate, change_tuple_list
예제 #7
0
 def halt_conditionh_func(perturbed_text):
     perturbed_vector = str2seq(perturbed_text, vocab, tokenizer, maxlen).to(config_device)
     predict = net.predict_class(perturbed_vector)[0]
     return predict != true_y
예제 #8
0
def textfool_perturb_text(
        sentence,
        net,
        vocab,
        tokenizer,
        maxlen,
        true_y,
        use_typos=False,
        rank_fn=None,
        heuristic_fn=None,
        verbose=False,
        sub_rate_limit=None):
    '''
    Perturb the text by replacing some words with their WordNet synonyms,
    sorting by GloVe similarity between the synonym and the original context
    window, and optional heuristic.

    :param doc: Document to perturb.
    :type doc: spacy.tokens.doc.Doc
    :param rank_fn: See `_generate_synonym_candidates``.
    :param heuristic_fn: Ranks the best synonyms using the heuristic.
            If the value of the heuristic is negative, the candidate
            substitution is rejected.
    :param halt_condition_fn: Returns true when the perturbation is
            satisfactory enough.
    :param verbose:
    ATTENTION:
    This function is originally from https://github.com/bogdan-kulynych/textfool
    '''
    
    def halt_conditionh_func(perturbed_text):
        perturbed_vector = str2seq(perturbed_text, vocab, tokenizer, maxlen).to(config_device)
        predict = net.predict_class(perturbed_vector)[0]
        return predict != true_y


    doc = nlp(sentence)
    heuristic_fn = heuristic_fn or (lambda _, candidate: candidate.similarity_rank)
    candidates = textfool_generate_synonym_candidates(doc, rank_fn=rank_fn)
    if use_typos:
        candidates.extend(_generate_typo_candidates(doc))

    if sub_rate_limit: sub_rate_limit = int(sub_rate_limit * len(doc))
    else: sub_rate_limit = len(doc)

    perturbed_positions = set()
    accepted_candidates = []
    perturbed_text = doc.text
    if verbose:
        print('Got {} candidates'.format(len(candidates)))

    sorted_candidates = zip(
            map(partial(heuristic_fn, perturbed_text), candidates),
            candidates)
    sorted_candidates = list(sorted(sorted_candidates,
            key=lambda t: t[0]))
    change_tuple_list = []

    while len(sorted_candidates) > 0 and len(change_tuple_list) < sub_rate_limit:
        score, candidate = sorted_candidates.pop()
        if score < 0: continue
        if candidate.token_position >= maxlen: break
        if candidate.token_position not in perturbed_positions:
            perturbed_positions.add(candidate.token_position)
            accepted_candidates.append(candidate)
            if verbose:
                print('Candidate:', candidate)
                print('Candidate score:', heuristic_fn(perturbed_text, candidate))
                print('Candidate accepted.')

            perturbed_text = ' '.join(_compile_perturbed_tokens(doc, accepted_candidates))

            change_tuple_list.append((candidate.token_position, candidate.original_token, candidate.candidate_word, score, 'TAGNONE'))
            if halt_conditionh_func(perturbed_text):
                break
            if len(sorted_candidates) > 0:
                _, candidates = zip(*sorted_candidates)
                sorted_candidates = zip(
                        map(partial(heuristic_fn, perturbed_text),
                            candidates),
                        candidates)
                sorted_candidates = list(sorted(sorted_candidates,
                        key=lambda t: t[0]))
    sub_rate = len(change_tuple_list) / len(doc)
    ne_rate = 0.0
    adv_vec = str2seq(perturbed_text, vocab, tokenizer, maxlen).to(config_device)
    adv_y = net.predict_class(adv_vec)[0]
    return perturbed_text, adv_y, sub_rate, ne_rate, change_tuple_list