def run_test(input_doc, model): """Test ADU and CLPR models.""" input_doc = filter_feats(input_doc, load=True) print("Filtered feats") input_doc = add_embeddings(input_doc) print("Added embeds") acc, prec, rec, f1 = test_model(model, input_doc) return acc, prec, rec, f1
def run_train(input_doc): """Train ADU and CLPR models.""" input_doc = filter_feats(input_doc, load=True) print("Finished Feature selection") input_doc = add_embeddings(input_doc) print("Added Embeddings") adu_model = fit_model(input_doc) print("Fit ADU Model") return adu_model
def run_clpr_test(input_doc, model): """Test ADU and CLPR models.""" input_doc = filter_feats(input_doc, load=True) print("Filtered feats") input_doc = add_embeddings(input_doc) clpr_feats = [] for idx, l in enumerate(input_doc._.Labels): if l == 1: clpr_feats.append(input_doc._.Features[idx]) input_doc._.CLPR_Features = clpr_feats print("Added embeds") acc, prec, rec, f1 = test_clpr_model(model, input_doc) return acc, prec, rec, f1
def run_clpr_train(input_doc): """Train ADU and CLPR models.""" input_doc = filter_feats(input_doc, load=True) print("Finished Feature selection") input_doc = add_embeddings(input_doc) clpr_feats = [] for idx, l in enumerate(input_doc._.Labels): if l == 1: clpr_feats.append(input_doc._.Features[idx]) input_doc._.CLPR_Features = clpr_feats print("Added Embeddings") adu_model = fit_clpr_model(input_doc) print("Fit CLPR Model") return adu_model
def run_production(input_doc): """Apply classification on doc.""" input_doc = filter_feats(input_doc, load=True) input_doc = add_embeddings(input_doc) doc = predict(input_doc) clpr_feats = [] for idx, l in enumerate(input_doc._.Labels): if l == 1: clpr_feats.append(input_doc._.Features[idx]) if len(clpr_feats) < 2: input_doc._.Labels = [1 for s in input_doc._.Labels] clpr_feats = input_doc._.Features input_doc._.CLPR_Features = clpr_feats doc = predict_clpr(input_doc) doc = predict_mc(input_doc) return doc
def single_run(input_files, label_files, test_files, test_labels, adu_mode="true"): doc = read_files(input_files, label_files) print("Read Train Files") cl_result = run_task.run_clpr_train(doc) print("Finished Training") t_doc = read_files(test_files, test_labels) print("Read Test Files") if adu_mode == "true": acc, prec, rec, f1 = run_task.run_clpr_test(t_doc, cl_result) elif adu_mode == "classified": orig_adus_labels = t_doc._.Labels t_doc = filter_feats(t_doc, load=True) print("Filtered feats") t_doc = add_embeddings(t_doc) print("Added Embdes") t_doc = classify.predict(t_doc) clpr_feats = [] for idx, l in enumerate(t_doc._.Labels): if l == 1: clpr_feats.append(t_doc._.Features[idx]) t_doc._.CLPR_Features = clpr_feats feature = t_doc._.CLPR_Features label = t_doc._.CLPR_Labels feature = np.asarray(feature) predictions = cl_result.predict(feature).tolist() cl_iter = 0 correct_count = 0 for idx, l in enumerate(orig_adus_labels): if cl_iter < len(predictions): if orig_adus_labels[idx] == 1: if orig_adus_labels[idx] == t_doc._.Labels[idx]: if label[cl_iter] == predictions[cl_iter]: correct_count += 1 cl_iter += 1 else: break prec = correct_count / len(orig_adus_labels) print("Precision:\tCLPR\t%8.8f" % (prec))
def LOOCV(input_files, label_files, test_files, test_labels, n_runs=5, adu_mode="true"): avg_cl_acc = 0.0 avg_cl_prec = 0.0 avg_cl_rec = 0.0 avg_cl_f1 = 0.0 for i in range(n_runs): print(i) input_files, label_files = shuffle(input_files, label_files) test_files = test_files[-1] test_labels = test_labels[-1] doc = read_files(input_files, label_files) print("Read Train Files") cl_result = run_task.run_clpr_train(doc) print("Finished Training") t_doc = read_files(test_files, test_labels) print("Read Test Files") if adu_mode == "true": acc, prec, rec, f1 = run_task.run_clpr_test(t_doc, cl_result) avg_cl_acc += acc avg_cl_prec += prec avg_cl_rec += rec avg_cl_f1 += f1 elif adu_mode == "classified": orig_adus_labels = t_doc._.Labels orig_adus = t_doc._.ADU_Sents t_doc = filter_feats(t_doc, load=True) print("Filtered feats") t_doc = add_embeddings(t_doc) print("Added Embdes") t_doc = classify.predict(t_doc) clpr_feats = [] for idx, l in enumerate(t_doc._.Labels): if l == 1: clpr_feats.append(t_doc._.Features[idx]) t_doc._.CLPR_Features = clpr_feats feature = t_doc._.CLPR_Features label = t_doc._.CLPR_Labels feature = np.asarray(feature) predictions = cl_result.predict(feature).tolist() cl_iter = 0 correct_count = 0 for idx, l in enumerate(orig_adus_labels): if orig_adus_labels[idx] == 1: if orig_adus_labels[idx] == t_doc._.Labels[idx]: if label[cl_iter] == predictions[cl_iter]: correct_count += 1 cl_iter += 1 acc = correct_count / len(orig_adus) avg_cl_acc += prec avg_cl_acc *= 1 / n_runs print("Avg Accuracy:\tCLPR\t%8.8f" % (avg_cl_acc)) if adu_mode == "true": avg_cl_prec *= 1 / n_runs avg_cl_rec *= 1 / n_runs avg_cl_f1 *= 1 / n_runs print("Avg Precision:\tCLPR\t%8.8f" % (avg_cl_prec)) print("Avg Recall:\tCLPR\t%8.8f" % (avg_cl_rec)) print("Avg F1-Score:\tCLPR\t%8.8f" % (avg_cl_f1))
def _preset_adus( graph: ag.Graph, preset_mc: bool = True ) -> t.Tuple[Doc, t.Dict[str, t.List[Relation]], ag.Graph]: sents = [] labels = [] mc_list = [] for node in graph.inodes: sent_text = node.raw_text # if not sent_text.endswith("."): # sent_text += "." sents.append(sent_text) labels.append(1) if node.major_claim: mc_list.append(1) else: mc_list.append(0) doc_text = "\n\n".join(sents) parse.add_pipe(_preset_segment, name="preset_segment", before="parser") doc = parse(doc_text) parse.remove_pipe("preset_segment") total_inodes = len(graph.inodes) total_sents = len(list(doc.sents)) # assert total_inodes == total_sents if total_sents > total_inodes: labels += [1] * (total_sents - total_inodes) mc_list += [0] * (total_sents - total_inodes) elif total_sents < total_inodes: labels = labels[:total_sents] mc_list = mc_list[:total_sents] if not 1 in mc_list: mc_list[0] = 1 if preset_mc: doc._.MC_List = mc_list else: doc._.MC_List = predict_mc(doc) doc._.Labels = labels doc._.key = graph.name doc = set_features(doc) doc = filter_feats(doc, load=True) doc = add_embeddings(doc) doc._.CLPR_Features = doc._.Features doc = predict_clpr(doc) rel_types = attack_support.classify(doc._.ADU_Sents) # Create graph with relationships graph = construct_graph.main(doc, rel_types, preset_mc) return doc, rel_types, graph