def compute_metrics_token(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)  ## batch_size, seq_length

    offset_wise_scores = []
    # print(len(predictions))
    for i, prediction in enumerate(predictions):
        ## Batch Wise
        # print(len(prediction))
        ground_spans = eval(validation_spans[i])
        predicted_spans = []
        for j, tokenwise_prediction in enumerate(
                prediction[:len(validation_offsets_mapping[i])]):
            if tokenwise_prediction == 1:
                predicted_spans += list(
                    range(
                        validation_offsets_mapping[i][j][0],
                        validation_offsets_mapping[i][j][1],
                    ))
        offset_wise_scores.append(f1(predicted_spans, ground_spans))
    results_offset = np.mean(offset_wise_scores)

    true_predictions = [[p for (p, l) in zip(pred, label) if l != -100]
                        for pred, label in zip(predictions, labels)]
    true_labels = [[l for (p, l) in zip(pred, label) if l != -100]
                   for pred, label in zip(predictions, labels)]

    results = np.mean([
        f1_score(true_label, true_preds)
        for true_label, true_preds in zip(true_labels, true_predictions)
    ])
    return {"Token-Wise F1": results, "Offset-Wise F1": results_offset}
Exemple #2
0
def calculate_f1(preds_file, ground_file, out_file):
    ground_spans = pd.read_csv(ground_file)["spans"].apply(
        lambda x: eval(x)).values
    pred_spans = []
    with open(preds_file, "r") as f:
        for line in f:
            line_split = line.split("\t")
            pred_spans.append(eval(line_split[1]))

    f1_score = np.mean(
        [f1(pred, gold) for pred, gold in zip(pred_spans, ground_spans)])
    with open(out_file, "w") as f:
        f.write(str(f1_score))
Exemple #3
0
def predict(train_file,
            dev_file,
            test_files,
            max_length,
            save_dir,
            max_epochs=100):
    # train_file = "./data/tsd_train.csv"
    # dev_file = "./data/tsd_trial.csv"
    # test_file = "./data/tsd_test.csv"
    # clean_train_file = "./data/clean_train.csv"
    # clean_dev_file = "./data/clean_trial.csv"

    train = read_datafile(train_file)
    dev = read_datafile(dev_file)

    # reduced_train = []
    # for i in train:
    #     if i not in dev:
    #         reduced_train.append(i)

    ## Tune Threshold on Dev
    train_token_labels, train_offset_mapping = list(
        zip(*[
            convert_spans_to_token_labels(text, spans) for spans, text in train
        ]))

    dev_token_labels, dev_offset_mapping = list(
        zip(*[
            convert_spans_to_token_labels(text, spans) for spans, text in dev
        ]))

    train_tokens = [
        [
            word.lower().translate(
                str.maketrans("", "", string.punctuation)
            )  ## Remove Punctuation and make into lower case
            for word in text.split()
        ] for spans, text in train
    ]
    dev_tokens = [
        [
            word.lower().translate(
                str.maketrans("", "", string.punctuation)
            )  ## Remove Punctuation and make into lower case
            for word in text.split()
        ] for spans, text in dev
    ]
    train_token_labels_oh = [
        to_categorical(train_token_label, num_classes=3)
        for train_token_label in train_token_labels
    ]
    dev_token_labels_oh = [
        to_categorical(dev_token_label, num_classes=3)
        for dev_token_label in dev_token_labels
    ]

    rnnsl = RNNSL(max_epochs=max_epochs)

    run_df = rnnsl.fit(
        train_tokens,
        train_token_labels_oh,
        validation_data=(dev_tokens, dev_token_labels_oh),
    )
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    run_df.to_csv(os.path.join(save_dir, "RNNSL_Run.csv"), index=False)
    # rnnsl.set_up_preprocessing(reduced_train_tokens)
    # rnnsl.model = rnnsl.build()

    val_data = (dev_tokens, dev_token_labels)
    rnnsl.tune_threshold(val_data, f1_score)
    print("=" * 80)
    print("Threshold: ", rnnsl.threshold)
    with open(os.path.join(save_dir, "thresh.txt"), "w") as f:
        f.write(str(rnnsl.threshold))

    token_predictions = rnnsl.get_toxic_offsets(val_data[0],
                                                )  ## Word Level Toxic Offsets
    print("=" * 80)
    print(
        "F1_score Word Wise on Dev Tokens :",
        np.mean([
            f1_score(token_predictions[i], val_data[1][i][:max_length])
            for i in range(len(val_data[1]))
        ]),
    )

    print("=" * 80)

    # dev_offset_mapping #map token index to offsets
    offset_predictions = []
    for example in range(len(dev_tokens)):
        offset_predictions.append([])
        for token in range(len(dev_tokens[example][:max_length])):
            if token_predictions[example][token] == rnnsl.toxic_label:
                offset_predictions[-1] += list(
                    range(
                        dev_offset_mapping[example][token][0],
                        dev_offset_mapping[example][token][1],
                    ))
    dev_spans = [spans for spans, text in dev]
    dev_texts = [text for spans, text in dev]
    new_offset_predictions = [
        clean_predicted_text(text, offsets)
        for offsets, text in zip(offset_predictions, dev_texts)
    ]

    # for i in range(20):
    #     ground_offsets = dev_spans[i]
    #     old_offsets = offset_predictions[i]
    #     new_offsets = new_offset_predictions[i]
    #     text = dev_texts[i]
    #     print("Text: ", text)
    #     print("Ground: ", get_text_spans(text, ground_offsets))
    #     print("Preds: ", get_text_spans(text, old_offsets))
    #     print("Clean Preds: ", get_text_spans(text, new_offsets))

    avg_dice_score = np.mean([
        f1(preds, gold)
        for preds, gold in zip(new_offset_predictions, dev_spans)
    ])

    print("=" * 80)
    print("Avg Dice Score on Dev: ", avg_dice_score)
    print("=" * 80)

    ## Test predictions
    # print("=" * 80)
    # print("Training on both train and dev for predictions!")
    # print("=" * 80)
    # combo = train + dev

    # combo_token_labels, combo_offset_mapping = list(
    #     zip(*[convert_spans_to_token_labels(text, spans) for spans, text in combo])
    # )
    # combo_tokens = [
    #     [
    #         word.lower().translate(
    #             str.maketrans("", "", string.punctuation)
    #         )  ## Remove Punctuation and make into lower case
    #         for word in text.split()
    #     ]
    #     for spans, text in combo
    # ]
    # combo_token_labels_oh = [
    #     to_categorical(combo_token_label, num_classes=3)
    #     for combo_token_label in combo_token_labels
    # ]

    # rnnsl_2 = RNNSL(max_epochs=10)
    # pred_df = rnnsl_2.fit(combo_tokens, combo_token_labels_oh)
    # pred_df.to_csv("RNNSL_Pred.csv", index=False)
    # rnnsl_2.threshold = rnnsl.threshold  ##Replace with tuned threshold
    # rnnsl_2.set_up_preprocessing(combo_tokens)
    # rnnsl_2.model = rnnsl_2.build()

    rnnsl.model.save(os.path.join(save_dir, "model"))
    for test_file in test_files:
        print(f"Predicting on {test_file}")
        test = read_datafile(test_file)
        test_token_labels, test_offset_mapping = list(
            zip(*[
                convert_spans_to_token_labels(text, spans)
                for spans, text in test
            ]))
        test_tokens = [
            [
                word.lower().translate(
                    str.maketrans("", "", string.punctuation)
                )  ## Remove Punctuation and make into lower case
                for word in text.split()
            ] for spans, text in test
        ]

        test_token_labels_oh = [
            to_categorical(test_token_label, num_classes=3)
            for test_token_label in test_token_labels
        ]
        test_spans = [spans for spans, text in test]
        test_texts = [text for spans, text in test]

        check_for_mismatch(test_tokens, test_texts, test_offset_mapping)
        final_token_predictions = rnnsl.get_toxic_offsets(test_tokens)
        print("=" * 80)
        print(
            f"F1_score Word Wise on {test_file} Tokens :",
            np.mean([
                f1_score(final_token_predictions[i],
                         test_token_labels[i][:max_length])
                for i in range(len(test_token_labels))
            ]),
        )
        print("=" * 80)

        final_offset_predictions = []
        for example in range(len(test_tokens)):
            final_offset_predictions.append([])
            for token in range(len(
                    test_tokens[example][:max_length])):  # max_length: 192
                if final_token_predictions[example][
                        token] == rnnsl.toxic_label:
                    final_offset_predictions[-1] += list(
                        range(
                            test_offset_mapping[example][token][0],
                            test_offset_mapping[example][token][1],
                        ))
        new_final_offset_predictions = [
            clean_predicted_text(text, offsets)
            for offsets, text in zip(final_offset_predictions, test_texts)
        ]

        avg_dice_score = np.mean([
            f1(preds, gold)
            for preds, gold in zip(new_final_offset_predictions, test_spans)
        ])

        print("=" * 80)
        print("Avg Dice Score on Dev: ", avg_dice_score)
        print("=" * 80)
        with open(
                os.path.join(
                    save_dir,
                    f"eval_scores_{test_file.split('/')[-1].split('.')[0]}.txt"
                ),
                "w",
        ) as f:
            f.write(str(avg_dice_score))

        # for i in range(20):
        #     old_offsets = final_offset_predictions[i]
        #     new_offsets = new_final_offset_predictions[i]
        #     text = test_texts[i]
        #     print("Text: ", text)
        #     print("Preds: ", get_text_spans(text, old_offsets))
        #     print("Clean Preds: ", get_text_spans(text, new_offsets))

        with open(
                os.path.join(
                    save_dir,
                    f"spans-pred-{test_file.split('/')[-1].split('.')[0]}.txt"
                ),
                "w",
        ) as f:
            for i, spans in enumerate(new_final_offset_predictions):
                f.write(f"{i}\t{str(spans)}\n")
Exemple #4
0
def dev():
    train_file = "./data/tsd_train.csv"
    dev_file = "./data/tsd_trial.csv"

    train = read_datafile(train_file)
    dev = read_datafile(dev_file)

    reduced_train = []
    for i in train:
        if i not in dev:
            reduced_train.append(i)

    ## Tune Threshold on Dev
    reduced_train_token_labels, reduced_train_offset_mapping = list(
        zip(*[
            convert_spans_to_token_labels(text, spans)
            for spans, text in reduced_train
        ]))

    dev_token_labels, dev_offset_mapping = list(
        zip(*[
            convert_spans_to_token_labels(text, spans) for spans, text in dev
        ]))

    reduced_train_tokens = [
        [
            word.lower().translate(
                str.maketrans("", "", string.punctuation)
            )  ## Remove Punctuation and make into lower case
            for word in text.split()
        ] for spans, text in reduced_train
    ]
    dev_tokens = [
        [
            word.lower().translate(
                str.maketrans("", "", string.punctuation)
            )  ## Remove Punctuation and make into lower case
            for word in text.split()
        ] for spans, text in dev
    ]
    reduced_train_token_labels_oh = [
        to_categorical(train_token_label, num_classes=3)
        for train_token_label in reduced_train_token_labels
    ]
    dev_token_labels_oh = [
        to_categorical(dev_token_label, num_classes=3)
        for dev_token_label in dev_token_labels
    ]

    rnnsl = RNNSL()

    run_df = rnnsl.fit(
        reduced_train_tokens,
        reduced_train_token_labels_oh,
        validation_data=(dev_tokens, dev_token_labels_oh),
    )
    run_df.to_csv("RNNSL_Run.csv", index=False)
    # rnnsl.set_up_preprocessing(reduced_train_tokens)
    # rnnsl.model = rnnsl.build()

    val_data = (dev_tokens, dev_token_labels)
    rnnsl.tune_threshold(val_data, f1_score)
    print("=" * 80)
    print("Threshold: ", rnnsl.threshold)
    token_predictions = rnnsl.get_toxic_offsets(val_data[0],
                                                )  ## Word Level Toxic Offsets
    print("=" * 80)
    print(
        "F1_score Word Wise on Dev Tokens :",
        np.mean([
            f1_score(token_predictions[i], val_data[1][i][:192])
            for i in range(len(val_data[1]))
        ]),
    )
    print("=" * 80)

    # dev_offset_mapping #map token index to offsets
    offset_predictions = []
    for example in range(len(dev_tokens)):
        offset_predictions.append([])
        for token in range(len(dev_tokens[example][:192])):
            if token_predictions[example][token] == rnnsl.toxic_label:
                offset_predictions[-1] += list(
                    range(
                        dev_offset_mapping[example][token][0],
                        dev_offset_mapping[example][token][1],
                    ))
    dev_spans = [spans for spans, text in dev]
    dev_texts = [text for spans, text in dev]
    new_offset_predictions = [
        clean_predicted_text(text, offsets)
        for offsets, text in zip(offset_predictions, dev_texts)
    ]

    for i in range(20):
        ground_offsets = dev_spans[i]
        old_offsets = offset_predictions[i]
        new_offsets = new_offset_predictions[i]
        text = dev_texts[i]
        print("Text: ", text)
        print("Ground: ", get_text_spans(text, ground_offsets))
        print("Preds: ", get_text_spans(text, old_offsets))
        print("Clean Preds: ", get_text_spans(text, new_offsets))

    avg_dice_score = np.mean([
        f1(preds, gold)
        for preds, gold in zip(new_offset_predictions, dev_spans)
    ])

    print("=" * 80)
    print("Avg Dice Score on Dev: ", avg_dice_score)
    print("=" * 80)
Exemple #5
0
def main():
    """Train and eval a spacy named entity tagger for toxic spans."""
    # Read training data
    print('loading training data')
    train = read_datafile('data/tsd_train.csv')

    # Read test data
    print('loading test data')
    test = read_datafile('data/tsd_test.csv')

    # Convert training data to Spacy Entities
    nlp = spacy.load("en_core_web_sm")

    print('preparing training data')
    training_data = []
    for n, (spans, text) in enumerate(train):
        doc = nlp(text)
        ents = spans_to_ents(doc, set(spans), 'TOXIC')
        training_data.append((doc.text, {'entities': ents}))

    toxic_tagging = spacy.blank('en')
    toxic_tagging.vocab.strings.add('TOXIC')
    ner = nlp.create_pipe("ner")
    toxic_tagging.add_pipe(ner, last=True)
    ner.add_label('TOXIC')

    pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
    unaffected_pipes = [
        pipe for pipe in toxic_tagging.pipe_names
        if pipe not in pipe_exceptions
    ]

    print('training')
    with toxic_tagging.disable_pipes(*unaffected_pipes):
        toxic_tagging.begin_training()
        for iteration in range(30):
            random.shuffle(training_data)
            losses = {}
            batches = spacy.util.minibatch(training_data,
                                           size=spacy.util.compounding(
                                               4.0, 32.0, 1.001))
            for batch in batches:
                texts, annotations = zip(*batch)
                toxic_tagging.update(texts,
                                     annotations,
                                     drop=0.5,
                                     losses=losses)
            print("Losses", losses)

    # Score on test data
    print('evaluation')
    scores = []
    for spans, text in test:
        pred_spans = []
        doc = toxic_tagging(text)
        for ent in doc.ents:
            pred_spans.extend(
                range(ent.start_char, ent.start_char + len(ent.text)))
        score = f1(pred_spans, spans)
        scores.append(score)

    test_f1 = pd.DataFrame({'spacy_f1': scores})
    print(
        f"Spacy tagging baseline F1 = {test_f1.spacy_f1.mean():.2f} ± {sem(test_f1.spacy_f1):.2f}"
    )
    test_f1.to_csv('spacy_f1.csv')
Exemple #6
0
                        break
                    else:
                        if k >= len(preds):
                            break
                        if preds[k] == 1:
                            predicted_spans[-1] += list(
                                range(offsets[0], offsets[1]))
                        k += 1

            spans = [
                eval(temp_dataset[i]["spans"])
                for i in range(len(temp_dataset))
            ]

            avg_f1_score = np.mean([
                f1(preds, ground)
                for preds, ground in zip(predicted_spans, spans)
            ])
            with open(
                    os.path.join(eval_config.save_dir,
                                 f"spans-pred-{key}.txt"), "w") as f:
                for i, pred in enumerate(predicted_spans):
                    if i == len(preds) - 1:
                        f.write(f"{i}\t{str(pred)}")
                    else:
                        f.write(f"{i}\t{str(pred)}\n")
            with open(
                    os.path.join(eval_config.save_dir,
                                 f"eval_scores_{key}.txt"), "w") as f:
                f.write(str(avg_f1_score))
    else:
def main():
    """Train and eval a spacy named entity tagger for toxic spans."""
    # Read training data
    print("loading training data")
    datasets = {}
    # datasets["clean_train"] = read_datafile("../data/clean_train.csv")

    # Read trial data for test.
    # print("loading dev data")
    # dev = read_datafile("../data/tsd_trial.csv")

    # train = train + dev

    # datasets["clean_trial"] = read_datafile("../data/clean_trial.csv")
    datasets["tsd_train"] = read_datafile("../data/tsd_train.csv")
    datasets["tsd_trial"] = read_datafile("../data/tsd_trial.csv")
    datasets["tsd_test"] = read_datafile("../data/tsd_test_spans.csv")

    # Convert training data to Spacy Entities
    nlp = spacy.load("en_core_web_sm")

    print("preparing training data")
    training_data = []
    for n, (spans, text) in enumerate(datasets["tsd_train"]):
        doc = nlp(text)
        ents = spans_to_ents(doc, set(spans), "TOXIC")
        training_data.append((doc.text, {"entities": ents}))

    toxic_tagging = spacy.blank("en")
    toxic_tagging.vocab.strings.add("TOXIC")
    ner = nlp.create_pipe("ner")
    toxic_tagging.add_pipe(ner, last=True)
    ner.add_label("TOXIC")

    pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
    unaffected_pipes = [
        pipe for pipe in toxic_tagging.pipe_names
        if pipe not in pipe_exceptions
    ]

    print("training")
    with toxic_tagging.disable_pipes(*unaffected_pipes):
        toxic_tagging.begin_training()
        for iteration in range(30):
            random.shuffle(training_data)
            losses = {}
            batches = spacy.util.minibatch(training_data,
                                           size=spacy.util.compounding(
                                               4.0, 32.0, 1.001))
            for batch in batches:
                texts, annotations = zip(*batch)
                toxic_tagging.update(texts,
                                     annotations,
                                     drop=0.5,
                                     losses=losses)
            print("Losses", losses)

    # Score on dev data.
    print("evaluation")
    for dataset in datasets.keys():
        scores = []
        with open(f"spans-pred-{dataset}.txt", "w") as f:
            for i, (spans, text) in enumerate(datasets[dataset]):
                pred_spans = []
                doc = toxic_tagging(text)
                for ent in doc.ents:
                    pred_spans.extend(
                        range(ent.start_char, ent.start_char + len(ent.text)))
                score = semeval2021.f1(pred_spans, spans)
                f.write(f"{i}\t{str(pred_spans)}\n")
                scores.append(score)

        with open(f"eval_scores_{dataset}.txt", "w") as f:
            f.write(str(statistics.mean(scores)))