예제 #1
0
    def heuristic_fn(text, candidate):
        '''
        Return the difference between the forward derivative of the original
        word and the candidate substitute synonym, amplified by synonym
        relevance rank.

        Yes, this one is pretty bad in terms of performance.
        '''
        doc = nlp(text)
        x = extract_features([doc], max_length=max_length)[0] \
                .reshape(1, -1)
        grads = grad_guide.wordwise_grads(x).squeeze()
        index = candidate.token_position
        derivative = grads[index]

        perturbed_tokens = _compile_perturbed_tokens(doc, [candidate])
        perturbed_doc = nlp(' '.join(perturbed_tokens))
        perturbed_x = extract_features(
                [perturbed_doc], max_length=max_length)[0] \
                .reshape(1, -1)
        perturbed_grads = grad_guide.wordwise_grads(perturbed_x).squeeze()
        perturbed_derivative = perturbed_grads[index]
        rank = candidate.similarity_rank + 1
        raw_score = derivative - perturbed_derivative
        raw_score *= -1 * target
        return raw_score / rank
예제 #2
0
 def halt_condition_fn(perturbed_text):
     '''
     Halt if model output is changed.
     '''
     perturbed_x = extract_features([nlp(perturbed_text)],
                                    max_length=max_length)[0]
     y = model.predict_classes([perturbed_x.reshape(1, -1)], verbose=0)\
             .squeeze()
     if y == target:
         return True
     else:
         return False
예제 #3
0
 def halt_condition_fn(perturbed_text):
     '''
     Halt if model output is changed.
     检查模型输出是否改变(是否fool成功)
     '''
     perturbed_x = extract_features(
         [nlp(perturbed_text)],
         max_length=max_length)[0]  # 将对抗文本的第一行进行数字化处理,得到perturbed_X
     y = model.predict_classes([perturbed_x.reshape(1, -1)],
                               verbose=0).squeeze()  # 对perturbed_x进行预测
     if y == target:  # 如果fool成功?
         return True
     else:
         return False
예제 #4
0
def adversarial_paraphrase(doc,
                           grad_guide,
                           target,
                           max_length=1000,
                           verbose=False):
    '''
    Compute a perturbation, greedily choosing the synonyms by maximizing
    the forward derivative of the model towards target class.
    '''

    model = grad_guide.model

    x = extract_features([doc], max_length=max_length)[0]
    y = model.predict(x.reshape(1, -1), verbose=0).squeeze()
    if verbose:
        print('Prob before', y)

    def halt_condition_fn(perturbed_text):
        '''
        Halt if model output is changed.
        '''
        perturbed_x = extract_features([nlp(perturbed_text)],
                                       max_length=max_length)[0]
        y = model.predict_classes([perturbed_x.reshape(1, -1)], verbose=0)\
                .squeeze()
        if y == target:
            return True
        else:
            return False

    def heuristic_fn(text, candidate):
        '''
        Return the difference between the forward derivative of the original
        word and the candidate substitute synonym, amplified by synonym
        relevance rank.

        Yes, this one is pretty bad in terms of performance.
        '''
        doc = nlp(text)
        x = extract_features([doc], max_length=max_length)[0] \
                .reshape(1, -1)
        grads = grad_guide.wordwise_grads(x).squeeze()
        index = candidate.token_position
        derivative = grads[index]

        perturbed_tokens = _compile_perturbed_tokens(doc, [candidate])
        perturbed_doc = nlp(' '.join(perturbed_tokens))
        perturbed_x = extract_features(
                [perturbed_doc], max_length=max_length)[0] \
                .reshape(1, -1)
        perturbed_grads = grad_guide.wordwise_grads(perturbed_x).squeeze()
        perturbed_derivative = perturbed_grads[index]
        rank = candidate.similarity_rank + 1
        raw_score = derivative - perturbed_derivative
        raw_score *= -1 * target
        return raw_score / rank

    perturbed_text = perturb_text(doc,
                                  heuristic_fn=heuristic_fn,
                                  halt_condition_fn=halt_condition_fn,
                                  verbose=verbose)

    perturbed_x = extract_features([nlp(perturbed_text)],
                                   max_length=max_length).reshape(1, -1)
    perturbed_y = model.predict(perturbed_x, verbose=0).squeeze()
    _stats_probability_shifts.append(perturbed_y - y)
    if verbose:
        print('Prob after:', perturbed_y)

    perturbed_y_class = model.predict_classes(perturbed_x, verbose=0).squeeze()
    return perturbed_text, (y, perturbed_y)
예제 #5
0
def adversarial_paraphrase(doc,
                           grad_guide,
                           target,
                           max_length=1000,
                           use_typos=False,
                           verbose=False):
    '''
    Compute a perturbation, greedily choosing the synonyms by maximizing the forward derivative of the model towards target class.
    获取一个扰动单词(doc),用贪婪算法,找到一个同义词,使替换之后模型对目标类别的前向导数值最大
    '''

    model = grad_guide.model

    x = extract_features([doc],
                         max_length=max_length)[0]  # x为数字化矩阵的第一行,即doc中的第一句
    # model.predict按batch获得输入数据对应的输出,函数的返回值是预测值的numpy array
    y = model.predict(x.reshape(1, -1), verbose=0).squeeze()  # y为原文本的预测输出值
    if verbose:
        print('Prob before', y)

    def halt_condition_fn(perturbed_text):
        '''
        Halt if model output is changed.
        检查模型输出是否改变(是否fool成功)
        '''
        perturbed_x = extract_features(
            [nlp(perturbed_text)],
            max_length=max_length)[0]  # 将对抗文本的第一行进行数字化处理,得到perturbed_X
        y = model.predict_classes([perturbed_x.reshape(1, -1)],
                                  verbose=0).squeeze()  # 对perturbed_x进行预测
        if y == target:  # 如果fool成功?
            return True
        else:
            return False

    def heuristic_fn(text, candidate):
        '''
        Return the difference between the forward derivative of the original
        word and the candidate substitute synonym, amplified by synonym
        relevance rank.
        返回原单词和替换单词的正向导数
        Yes, this one is pretty bad in terms of performance.
        '''
        doc = nlp(text)
        x = extract_features([doc], max_length=max_length)[0].reshape(1, -1)
        grads = grad_guide.wordwise_grads(x).squeeze()
        index = candidate.token_position
        derivative = grads[index]

        perturbed_tokens = _compile_perturbed_tokens(doc, [candidate])
        perturbed_doc = nlp(' '.join(perturbed_tokens))
        perturbed_x = extract_features([perturbed_doc],
                                       max_length=max_length)[0].reshape(
                                           1, -1)
        perturbed_grads = grad_guide.wordwise_grads(perturbed_x).squeeze()
        perturbed_derivative = perturbed_grads[index]
        rank = candidate.similarity_rank + 1
        raw_score = derivative - perturbed_derivative
        raw_score *= -1 * target
        return raw_score / rank

    perturbed_text = perturb_text(doc,
                                  use_typos=use_typos,
                                  heuristic_fn=heuristic_fn,
                                  halt_condition_fn=halt_condition_fn,
                                  verbose=verbose)

    perturbed_x = extract_features([nlp(perturbed_text)],
                                   max_length=max_length).reshape(1, -1)
    perturbed_y = model.predict(perturbed_x, verbose=0).squeeze()
    _stats_probability_shifts.append(perturbed_y - y)
    if verbose:
        print('Prob after:', perturbed_y)

    perturbed_y_class = model.predict_classes(perturbed_x, verbose=0).squeeze()
    return perturbed_text, (y, perturbed_y)