Beispiel #1
0
    def heuristic_fn(text, candidate):
        '''
        Return the difference between the classification probability of the original
        word and the candidate substitute synonym, which is defined in Eq.(4) and Eq.(5).
        '''
        doc = nlp(text)
        origin_vector = None
        perturbed_vector = None
        if level == 'word':
            origin_vector = text_to_vector(text, tokenizer, dataset)
            perturbed_tokens = _compile_perturbed_tokens(doc, [candidate])
            perturbed_doc = nlp(' '.join(perturbed_tokens))
            perturbed_vector = text_to_vector(perturbed_doc.text, tokenizer,
                                              dataset)
        elif level == 'char':
            max_len = config.char_max_len[dataset]
            origin_vector = doc_process(text, get_embedding_dict(),
                                        dataset).reshape(1, max_len)
            perturbed_tokens = _compile_perturbed_tokens(
                nlp(input_text), [candidate])
            perturbed_text = ' '.join(perturbed_tokens)
            perturbed_vector = doc_process(perturbed_text,
                                           get_embedding_dict(),
                                           dataset).reshape(1, max_len)

        origin_prob = grad_guide.predict_prob(input_vector=origin_vector)
        perturbed_prob = grad_guide.predict_prob(input_vector=perturbed_vector)
        delta_p = origin_prob[true_y] - perturbed_prob[true_y]

        return delta_p
    def heuristic_fn(text, candidate):
        '''
        Return the difference between the forward derivative of the original
        word and the candidate substitute synonym, amplified by synonym
        relevance rank.

        Yes, this one is pretty bad in terms of performance.
        '''
        doc = nlp(text)
        x = extract_features([doc], max_length=max_length)[0] \
                .reshape(1, -1)
        grads = grad_guide.wordwise_grads(x).squeeze()
        index = candidate.token_position
        derivative = grads[index]

        perturbed_tokens = _compile_perturbed_tokens(doc, [candidate])
        perturbed_doc = nlp(' '.join(perturbed_tokens))
        perturbed_x = extract_features(
                [perturbed_doc], max_length=max_length)[0] \
                .reshape(1, -1)
        perturbed_grads = grad_guide.wordwise_grads(perturbed_x).squeeze()
        perturbed_derivative = perturbed_grads[index]
        rank = candidate.similarity_rank + 1
        raw_score = derivative - perturbed_derivative
        raw_score *= -1 * target
        return raw_score / rank