예제 #1
0
def run_test(input_doc, model):
    """Test ADU and CLPR models."""
    input_doc = filter_feats(input_doc, load=True)
    print("Filtered feats")
    input_doc = add_embeddings(input_doc)
    print("Added embeds")
    acc, prec, rec, f1 = test_model(model, input_doc)
    return acc, prec, rec, f1
예제 #2
0
def run_train(input_doc):
    """Train ADU and CLPR models."""
    input_doc = filter_feats(input_doc, load=True)
    print("Finished Feature selection")
    input_doc = add_embeddings(input_doc)
    print("Added Embeddings")
    adu_model = fit_model(input_doc)
    print("Fit ADU Model")
    return adu_model
예제 #3
0
def run_clpr_test(input_doc, model):
    """Test ADU and CLPR models."""
    input_doc = filter_feats(input_doc, load=True)
    print("Filtered feats")
    input_doc = add_embeddings(input_doc)
    clpr_feats = []
    for idx, l in enumerate(input_doc._.Labels):
        if l == 1:
            clpr_feats.append(input_doc._.Features[idx])
    input_doc._.CLPR_Features = clpr_feats
    print("Added embeds")
    acc, prec, rec, f1 = test_clpr_model(model, input_doc)
    return acc, prec, rec, f1
예제 #4
0
def run_clpr_train(input_doc):
    """Train ADU and CLPR models."""
    input_doc = filter_feats(input_doc, load=True)
    print("Finished Feature selection")
    input_doc = add_embeddings(input_doc)
    clpr_feats = []
    for idx, l in enumerate(input_doc._.Labels):
        if l == 1:
            clpr_feats.append(input_doc._.Features[idx])
    input_doc._.CLPR_Features = clpr_feats
    print("Added Embeddings")
    adu_model = fit_clpr_model(input_doc)
    print("Fit CLPR Model")
    return adu_model
예제 #5
0
def run_production(input_doc):
    """Apply classification on doc."""
    input_doc = filter_feats(input_doc, load=True)
    input_doc = add_embeddings(input_doc)
    doc = predict(input_doc)
    clpr_feats = []
    for idx, l in enumerate(input_doc._.Labels):
        if l == 1:
            clpr_feats.append(input_doc._.Features[idx])
    if len(clpr_feats) < 2:
        input_doc._.Labels = [1 for s in input_doc._.Labels]
        clpr_feats = input_doc._.Features
    input_doc._.CLPR_Features = clpr_feats
    doc = predict_clpr(input_doc)
    doc = predict_mc(input_doc)
    return doc
def single_run(input_files,
               label_files,
               test_files,
               test_labels,
               adu_mode="true"):

    doc = read_files(input_files, label_files)
    print("Read Train Files")
    cl_result = run_task.run_clpr_train(doc)
    print("Finished Training")

    t_doc = read_files(test_files, test_labels)
    print("Read Test Files")
    if adu_mode == "true":
        acc, prec, rec, f1 = run_task.run_clpr_test(t_doc, cl_result)
    elif adu_mode == "classified":
        orig_adus_labels = t_doc._.Labels
        t_doc = filter_feats(t_doc, load=True)
        print("Filtered feats")
        t_doc = add_embeddings(t_doc)
        print("Added Embdes")
        t_doc = classify.predict(t_doc)
        clpr_feats = []
        for idx, l in enumerate(t_doc._.Labels):
            if l == 1:
                clpr_feats.append(t_doc._.Features[idx])
        t_doc._.CLPR_Features = clpr_feats
        feature = t_doc._.CLPR_Features
        label = t_doc._.CLPR_Labels
        feature = np.asarray(feature)
        predictions = cl_result.predict(feature).tolist()
        cl_iter = 0
        correct_count = 0
        for idx, l in enumerate(orig_adus_labels):
            if cl_iter < len(predictions):
                if orig_adus_labels[idx] == 1:
                    if orig_adus_labels[idx] == t_doc._.Labels[idx]:
                        if label[cl_iter] == predictions[cl_iter]:
                            correct_count += 1
                    cl_iter += 1
            else:
                break
        prec = correct_count / len(orig_adus_labels)
        print("Precision:\tCLPR\t%8.8f" % (prec))
def LOOCV(input_files,
          label_files,
          test_files,
          test_labels,
          n_runs=5,
          adu_mode="true"):
    avg_cl_acc = 0.0
    avg_cl_prec = 0.0
    avg_cl_rec = 0.0
    avg_cl_f1 = 0.0
    for i in range(n_runs):
        print(i)
        input_files, label_files = shuffle(input_files, label_files)
        test_files = test_files[-1]
        test_labels = test_labels[-1]

        doc = read_files(input_files, label_files)
        print("Read Train Files")
        cl_result = run_task.run_clpr_train(doc)
        print("Finished Training")

        t_doc = read_files(test_files, test_labels)
        print("Read Test Files")
        if adu_mode == "true":
            acc, prec, rec, f1 = run_task.run_clpr_test(t_doc, cl_result)
            avg_cl_acc += acc
            avg_cl_prec += prec
            avg_cl_rec += rec
            avg_cl_f1 += f1
        elif adu_mode == "classified":
            orig_adus_labels = t_doc._.Labels
            orig_adus = t_doc._.ADU_Sents
            t_doc = filter_feats(t_doc, load=True)
            print("Filtered feats")
            t_doc = add_embeddings(t_doc)
            print("Added Embdes")
            t_doc = classify.predict(t_doc)
            clpr_feats = []
            for idx, l in enumerate(t_doc._.Labels):
                if l == 1:
                    clpr_feats.append(t_doc._.Features[idx])
            t_doc._.CLPR_Features = clpr_feats
            feature = t_doc._.CLPR_Features
            label = t_doc._.CLPR_Labels
            feature = np.asarray(feature)
            predictions = cl_result.predict(feature).tolist()
            cl_iter = 0
            correct_count = 0
            for idx, l in enumerate(orig_adus_labels):
                if orig_adus_labels[idx] == 1:
                    if orig_adus_labels[idx] == t_doc._.Labels[idx]:
                        if label[cl_iter] == predictions[cl_iter]:
                            correct_count += 1
                    cl_iter += 1
            acc = correct_count / len(orig_adus)
            avg_cl_acc += prec
    avg_cl_acc *= 1 / n_runs
    print("Avg Accuracy:\tCLPR\t%8.8f" % (avg_cl_acc))
    if adu_mode == "true":
        avg_cl_prec *= 1 / n_runs
        avg_cl_rec *= 1 / n_runs
        avg_cl_f1 *= 1 / n_runs
        print("Avg Precision:\tCLPR\t%8.8f" % (avg_cl_prec))
        print("Avg Recall:\tCLPR\t%8.8f" % (avg_cl_rec))
        print("Avg F1-Score:\tCLPR\t%8.8f" % (avg_cl_f1))
def _preset_adus(
    graph: ag.Graph,
    preset_mc: bool = True
) -> t.Tuple[Doc, t.Dict[str, t.List[Relation]], ag.Graph]:
    sents = []
    labels = []
    mc_list = []

    for node in graph.inodes:
        sent_text = node.raw_text

        # if not sent_text.endswith("."):
        #    sent_text += "."

        sents.append(sent_text)
        labels.append(1)

        if node.major_claim:
            mc_list.append(1)
        else:
            mc_list.append(0)

    doc_text = "\n\n".join(sents)

    parse.add_pipe(_preset_segment, name="preset_segment", before="parser")
    doc = parse(doc_text)
    parse.remove_pipe("preset_segment")

    total_inodes = len(graph.inodes)
    total_sents = len(list(doc.sents))

    # assert total_inodes == total_sents

    if total_sents > total_inodes:
        labels += [1] * (total_sents - total_inodes)
        mc_list += [0] * (total_sents - total_inodes)

    elif total_sents < total_inodes:
        labels = labels[:total_sents]
        mc_list = mc_list[:total_sents]

    if not 1 in mc_list:
        mc_list[0] = 1

    if preset_mc:
        doc._.MC_List = mc_list
    else:
        doc._.MC_List = predict_mc(doc)

    doc._.Labels = labels
    doc._.key = graph.name
    doc = set_features(doc)
    doc = filter_feats(doc, load=True)
    doc = add_embeddings(doc)
    doc._.CLPR_Features = doc._.Features
    doc = predict_clpr(doc)

    rel_types = attack_support.classify(doc._.ADU_Sents)

    # Create graph with relationships
    graph = construct_graph.main(doc, rel_types, preset_mc)

    return doc, rel_types, graph