Beispiel #1
0
def add_more_examples(train,
                      prob=0.1,
                      drop=False,
                      swap=False,
                      key=False,
                      add=False,
                      all=False):
    extra_examples = []
    for line, tag in train:
        if random.random() > prob: continue  # this is correct...
        if swap:
            gen_attacks = attacks.swap_one_attack(line)
        elif drop:
            gen_attacks = attacks.drop_one_attack(line)
        elif add:
            gen_attacks = attacks.add_one_attack(line)
        elif key:
            gen_attacks = attacks.key_one_attack(line)
        elif all:
            gen_attacks = attacks.all_one_attack(line)

        for _, adversary in gen_attacks:
            w_i, c_i = get_word_and_char_indices(adversary)
            adv_pred = predict(w_i, c_i)
            if adv_pred != t2i[tag]:
                # found incorrect
                extra_examples.append((adversary, tag))

    return extra_examples
Beispiel #2
0
def get_qualitative_examples():
    lines, tags = read_valid_lines("data/classes/test_retokenized.txt")
    c = list(zip(lines, tags))
    random.shuffle(c)
    lines, tags = zip(*c)
    lines = lines[:200]
    tags = tags[:200]

    for line, tag in tqdm(zip(lines, tags)):

        w_i, c_i = get_word_and_char_indices(line)

        # check if model prediction is incorrect, if yes, find next example...
        model_prediction = predict(w_i, c_i)
        if t2i[tag] != model_prediction:
            # already incorrect, not interesting...
            continue

        gen_attacks = attacks.all_one_attack(line)

        for idx, adversary in gen_attacks:
            #adversary = checker.correct_string(adversary)
            w_i, c_i = get_word_and_char_indices(adversary)

            adv_pred = predict(w_i, c_i)

            if adv_pred == t2i[tag]:
                # this example doesn't break the model...
                continue

            corrected_string = checker.correct_string(adversary)
            w_i, c_i = get_word_and_char_indices(corrected_string)

            post_pred = predict(w_i, c_i)

            if post_pred != t2i[tag]:
                # after correction the tag isn't correct...
                continue

            log.pr(" -------------- ")
            log.pr("Original line = %s" % (line))
            log.pr("Original label = %s" % (tag))
            log.pr_red("Adversary = %s" % (adversary))
            log.pr_green("Correction = %s" % (corrected_string))
            log.pr(" -------------- ")

    return None
Beispiel #3
0
def check_against_spell_mistakes(filename):
    lines, tags = read_valid_lines(filename)

    c = list(zip(lines, tags))
    random.shuffle(c)
    lines, tags = zip(*c)
    lines = lines
    tags = tags

    # if in small (or COMPUTATION HEAVY) modes
    if params['small']:
        lines = lines[:200]
        tags = tags[:200]
    if params['small'] and params['sc_atd']:
        lines = lines[:99]
        tags = tags[:99]

    inc_count = 0.0
    inc_count_per_attack = [0.0 for _ in range(NUM_ATTACKS + 1)]
    error_analyser = {}
    for line, tag in tqdm(zip(lines, tags)):

        w_i, c_i = get_word_and_char_indices(line)
        if params['is_spell_check']:
            w_i, c_i = get_word_and_char_indices(checker.correct_string(line))

        # check if model prediction is incorrect, if yes, continue
        model_prediction = predict(w_i, c_i)
        if t2i[tag] != model_prediction:
            # already incorrect, no attack needed
            inc_count += 1
            inc_count_per_attack[0] += 1.0
            continue

        found_incorrect = False

        worst_example = line
        worst_confidence = 1.0
        worst_idx = -1
        ignore_incides = set()

        for attack_count in range(1, 1 + NUM_ATTACKS):

            ignore_incides.add(worst_idx)

            if 'drop' in type_of_attack:
                gen_attacks = attacks.drop_one_attack(
                    worst_example,
                    ignore_incides,
                    include_ends=params['include_ends'])
            elif 'swap' in type_of_attack:
                gen_attacks = attacks.swap_one_attack(
                    worst_example, include_ends=params['include_ends'])
            elif 'key' in type_of_attack:
                gen_attacks = attacks.key_one_attack(
                    worst_example,
                    ignore_incides,
                    include_ends=params['include_ends'])
            elif 'add' in type_of_attack:
                gen_attacks = attacks.add_one_attack(
                    worst_example,
                    ignore_incides,
                    include_ends=params['include_ends'])
            elif 'all' in type_of_attack:
                gen_attacks = attacks.all_one_attack(
                    worst_example,
                    ignore_incides,
                    include_ends=params['include_ends'])

            for idx, adversary in gen_attacks:
                original_adv = adversary
                if found_incorrect: break
                if params['is_spell_check']:
                    adversary = checker.correct_string(adversary)
                w_i, c_i = get_word_and_char_indices(adversary)
                adv_pred = predict(w_i, c_i)
                confidence = get_confidence(w_i, c_i)

                if confidence < worst_confidence:
                    worst_confidence = confidence
                    worst_idx = idx
                    worst_example = adversary

                if adv_pred != t2i[tag]:
                    # found incorrect prediction
                    found_incorrect = True
                    break

            if found_incorrect:
                inc_count += 1.0
                inc_count_per_attack[attack_count] += 1.0
                if params['analyse']:
                    error_analyser[line] = {}
                    error_analyser[line]['adversary'] = original_adv.split(
                    )[idx]
                    error_analyser[line]['correction'] = adversary.split()[idx]
                    error_analyser[line]['idx'] = idx

                break

    for num in range(NUM_ATTACKS + 1):
        log.pr_red(
            'adversarial accuracy of the model after %d attacks = %.2f' %
            (num, 100. *
             (1 - sum(inc_count_per_attack[:num + 1]) / len(lines))))

    if params['analyse']:
        curr_time = datetime.datetime.now().strftime("%B_%d_%I:%M%p")
        pickle.dump(error_analyser,
                    open("error_analyser_" + str(curr_time) + ".p", 'wb'))

    return None
Beispiel #4
0
def check_against_spell_mistakes(filename):
    if(singles):
        lines, tags = read_valid_lines_single(filename)
        spell_check_model = BertForMaskedLM.from_pretrained("bert-base-uncased", cache_dir="/data/kashyap_data/")
        spell_check_model.eval()
        spell_check_model.to('cuda')
        c = list(zip(lines, tags))
        random.shuffle(c)
        lines, tags = zip(*c)
        lines = lines
        tags = tags

        # if in small (or COMPUTATION HEAVY) modes
        if params['small']:
            lines = lines[:200]
            tags = tags[:200]
        if params['small'] and params['sc_atd']:
            lines = lines[:99]
            tags = tags[:99]

        inc_count = 0.0
        inc_count_per_attack = [0.0 for _ in range(NUM_ATTACKS+1)]
        error_analyser = {}
        for line, tag in tqdm(zip(lines, tags)):

            w_i, c_i = get_word_and_char_indices(line)
            if params['is_spell_check']:
                """
                pickle_off = open("tokenizer.pkl", "rb")
                bert_tokenizer = pickle.load(pickle_off)
                tokenizer = nlp.Defaults.create_tokenizer(nlp)
                tokens = [t.text for t in tokenizer(line)]
                misspelled = spell.unknown(tokens)
                indices = []
                for i in misspelled:
                    indices.append(tokens.index(i))
                    tokens[tokens.index(i)] = "[MASK]"
                tokens.insert(0, "[CLS]")
                tokens.insert(-1, "[SEP]")
                new_string = " ".join(tokens)
                bert_tokenized_text = bert_tokenizer.tokenize(new_string)
                indexed_tokens = bert_tokenizer.convert_tokens_to_ids(
                    bert_tokenized_text)
                segments_ids = [0]*len(bert_tokenized_text)

                tokens_tensor = torch.tensor([indexed_tokens])
                segments_tensors = torch.tensor([segments_ids])
                tokens_tensor = tokens_tensor.to('cuda')
                segments_tensors = segments_tensors.to('cuda')
                with torch.no_grad():
                    outputs = spell_check_model(tokens_tensor, token_type_ids=segments_tensors)
                    predictions = outputs[0]
                predicted_tokens = []
                indices = [i+1 for i in indices]
                for i in indices:
                    predicted_tokens.append(bert_tokenizer.convert_ids_to_tokens(
                        [torch.argmax(predictions[0, i]).item()])[0])
                for j in range(len(predicted_tokens)):
                    tokens[indices[j]] = predicted_tokens[j]
                tokens.remove("[SEP]")
                tokens.remove("[CLS]")
                final_string = " ".join(tokens)
                """
                temp = line.split(" ")
                new_string = []
                for i in temp:
                    new_string.append(spell.correction(i))
                temp = " ".join(new_string)
                line = temp
                w_i, c_i = get_word_and_char_indices(line)

            # check if model prediction is incorrect, if yes, continue
            model_prediction = predict(w_i, c_i)
            if tag != model_prediction:
                # already incorrect, no attack needed
                inc_count += 1
                inc_count_per_attack[0] += 1.0
                continue

            found_incorrect = False

            worst_example = line
            worst_confidence = 1.0
            worst_idx = -1
            ignore_incides=set()

            for attack_count in range(1, 1 + NUM_ATTACKS):

                ignore_incides.add(worst_idx)

                if 'drop' in type_of_attack:
                    gen_attacks = attacks.drop_one_attack(worst_example, ignore_incides, include_ends=params['include_ends'])
                elif 'swap' in type_of_attack:
                    gen_attacks = attacks.swap_one_attack(worst_example, include_ends=params['include_ends'])
                elif 'key' in type_of_attack:
                    gen_attacks = attacks.key_one_attack(worst_example, ignore_incides, include_ends=params['include_ends'])
                elif 'add' in type_of_attack:
                    gen_attacks = attacks.add_one_attack(worst_example, ignore_incides, include_ends=params['include_ends'])
                elif 'all' in type_of_attack:
                    gen_attacks = attacks.all_one_attack(worst_example, ignore_incides, include_ends=params['include_ends'])
                for idx, adversary in gen_attacks:
                    original_adv = adversary
                    if found_incorrect: break
                    if params['is_spell_check']:
                        """
                        pickle_off = open("tokenizer.pkl", "rb")
                        bert_tokenizer = pickle.load(pickle_off)
                        tokenizer = nlp.Defaults.create_tokenizer(nlp)
                        tokens = [t.text for t in tokenizer(line)]
                        misspelled = spell.unknown(tokens)
                        indices = []
                        for i in misspelled:
                            indices.append(tokens.index(i))
                            tokens[tokens.index(i)] = "[MASK]"
                        tokens.insert(0, "[CLS]")
                        tokens.insert(-1, "[SEP]")
                        new_string = " ".join(tokens)
                        bert_tokenized_text = bert_tokenizer.tokenize(new_string)
                        indexed_tokens = bert_tokenizer.convert_tokens_to_ids(
                            bert_tokenized_text)
                        segments_ids = [0]*len(bert_tokenized_text)

                        tokens_tensor = torch.tensor([indexed_tokens])
                        segments_tensors = torch.tensor([segments_ids])
                        tokens_tensor = tokens_tensor.to('cuda')
                        segments_tensors = segments_tensors.to('cuda')
                        with torch.no_grad():
                            outputs = spell_check_model(
                                tokens_tensor, token_type_ids=segments_tensors)
                            predictions = outputs[0]
                        predicted_tokens = []
                        indices = [i+1 for i in indices]
                        for i in indices:
                            predicted_tokens.append(bert_tokenizer.convert_ids_to_tokens(
                                [torch.argmax(predictions[0, i]).item()])[0])
                        for j in range(len(predicted_tokens)):
                            tokens[indices[j]] = predicted_tokens[j]
                        tokens.remove("[SEP]")
                        tokens.remove("[CLS]")
                        final_string = " ".join(tokens)
                        """
                        temp = line[0].split(" ")
                        new_string = []
                        for i in temp:
                            new_string.append(spell.correction(i))
                        temp = " ".join(new_string)
                        adversary = temp
                    w_i, c_i = get_word_and_char_indices(adversary)
                    adv_pred = predict(w_i, c_i)
                    confidence = get_confidence(w_i, c_i)

                    if confidence < worst_confidence:
                        worst_confidence = confidence
                        worst_idx = idx
                        worst_example = adversary

                    if adv_pred != tag:
                        # found incorrect prediction
                        found_incorrect = True
                        break

                if found_incorrect:
                    inc_count += 1.0
                    inc_count_per_attack[attack_count] += 1.0
                    if params['analyse']:
                        error_analyser[line] = {}
                        error_analyser[line]['adversary'] = original_adv.split()[idx]
                        error_analyser[line]['correction'] = adversary.split()[idx]
                        error_analyser[line]['idx'] = idx

                    break
    else:
        lines, tags = read_valid_lines_single(filename)
        spell_check_model = BertForMaskedLM.from_pretrained("bert-base-uncased", cache_dir="/data/kashyap_data/")
        spell_check_model.eval()
        spell_check_model.to('cuda')
        # c = list(zip(lines, tags))
        # random.shuffle(c)
        # lines, tags = zip(*c)
        lines = lines
        tags = tags

        # if in small (or COMPUTATION HEAVY) modes
        if params['small']:
            lines = lines[:200]
            tags = tags[:200]
        if params['small'] and params['sc_atd']:
            lines = lines[:99]
            tags = tags[:99]

        inc_count = 0.0
        inc_count_per_attack = [0.0 for _ in range(NUM_ATTACKS+1)]
        error_analyser = {}
        for line in tqdm(lines):
            tag = line[2]
            w_i, c_i = get_word_and_char_indices(line)
            if params['is_spell_check']:
                temp = line[0].split(" ")
                new_string = []
                for i in temp:
                    new_string.append(spell.correction(i))
                temp = " ".join(new_string)
                line[0] = temp
                w_i, c_i = get_word_and_char_indices(line)

            # check if model prediction is incorrect, if yes, continue
            model_prediction = predict(w_i, c_i)
            if tag != model_prediction:
                # already incorrect, no attack needed
                inc_count += 1
                inc_count_per_attack[0] += 1.0
                continue

            found_incorrect = False

            worst_example = line[0] #could make random choice
            worst_confidence = 1.0
            worst_idx = -1
            ignore_incides=set()

            for attack_count in range(1, 1 + NUM_ATTACKS):

                ignore_incides.add(worst_idx)

                if 'drop' in type_of_attack:
                    gen_attacks = attacks.drop_one_attack(worst_example, ignore_incides, include_ends=params['include_ends'])
                elif 'swap' in type_of_attack:
                    gen_attacks = attacks.swap_one_attack(worst_example, include_ends=params['include_ends'])
                elif 'key' in type_of_attack:
                    gen_attacks = attacks.key_one_attack(worst_example, ignore_incides, include_ends=params['include_ends'])
                elif 'add' in type_of_attack:
                    gen_attacks = attacks.add_one_attack(worst_example, ignore_incides, include_ends=params['include_ends'])
                elif 'all' in type_of_attack:
                    gen_attacks = attacks.all_one_attack(worst_example, ignore_incides, include_ends=params['include_ends'])
                for idx, adversary in gen_attacks:
                    original_adv = adversary
                    if found_incorrect: break
                    if params['is_spell_check']:
                        temp = adversary.split(" ")
                        new_string = []
                        for i in temp:
                            new_string.append(spell.correction(i))
                        temp = " ".join(new_string)

                        adversary = adversary
                    w_i, c_i = get_word_and_char_indices_adv(adversary)
                    adv_pred = predict(w_i, c_i)
                    confidence = get_confidence(w_i, c_i)

                    if confidence < worst_confidence:
                        worst_confidence = confidence
                        worst_idx = idx
                        worst_example = adversary

                    if adv_pred != tag:
                        # found incorrect prediction
                        found_incorrect = True
                        break

                if found_incorrect:
                    inc_count += 1.0
                    inc_count_per_attack[attack_count] += 1.0
                    if params['analyse']:
                        error_analyser[line] = {}
                        error_analyser[line]['adversary'] = original_adv.split()[idx]
                        error_analyser[line]['correction'] = adversary.split()[idx]
                        error_analyser[line]['idx'] = idx

                    break

    for num in range(NUM_ATTACKS + 1):
        log.pr_red('adversarial accuracy of the model after %d attacks = %.2f'
                %(num, 100. * (1 - sum(inc_count_per_attack[:num+1])/len(lines))))

    if params['analyse']:
        curr_time = datetime.datetime.now().strftime("%B_%d_%I:%M%p")
        pickle.dump(error_analyser, open("error_analyser_" + str(curr_time) + ".p", 'wb'))

    return None