def evaluate_baseline(num_lda_models): #try: #with open("topic_models.pkl", "rb") as f: # models, dictionary = pickle.load(f) #except: print('In evaluate baseline: model not found. Building it.', flush=True) build_model(num_lda_models) with open("topic_models_top.pkl", "rb") as f: models, dictionary = pickle.load(f) try: with open("classifier.pkl", "rb") as f: p_class, p_label = pickle.load(f) except: bayes_EM(models, dictionary) tp = defaultdict(int) fp = defaultdict(int) tn = defaultdict(int) fn = defaultdict(int) for assignment, questions in ground_truth.get_groundTruth().items(): for question, ground in questions.items(): with open('questions/' + assignment + "/" + question, encoding='utf8') as f: try: txt1 = f.read() except: continue txt1 = process([txt1]) results = {} for file in os.listdir("topics/"): with open("topics/" + file, encoding='utf8') as doc2: txt = doc2.read() # print("hey") txt = process([txt]) # print(txt) # print("ho") kl = 0 for lda in models: p = lda[dictionary.doc2bow(txt1[0])] # print(p) q = lda[dictionary.doc2bow(txt[0])] # print(q) for y, w1 in p: for x, w0 in q: if x == y: kl += w0 * w1 results[file] = kl tops = sorted(results, key=results.get, reverse=True)[:len(ground)] for label in labels: if label in tops and label in ground: tp[label] += 1 elif label in tops and label not in ground: fp[label] += 1 elif label in ground and label not in tops: fn[label] += 1 else: tn[label] += 1 precision_dict = defaultdict(int) recall_dict = defaultdict(int) f1_dict = defaultdict(int) for label in labels: if tp[label] == 0: print(label, flush=True) continue precision_dict[label] = tp[label] / (fp[label] + tp[label]) recall_dict[label] = tp[label] / (fn[label] + tp[label]) f1_dict[label] = 2 * (precision_dict[label] * recall_dict[label]) / ( precision_dict[label] + recall_dict[label]) macro_precision = sum(precision_dict.values()) / 5 macro_recall = sum(recall_dict.values()) / 5 macro_f1 = sum(f1_dict.values()) / 5 micro_precision = sum(tp.values()) / (sum(tp.values()) + sum(fp.values())) micro_recall = sum(tp.values()) / (sum(tp.values()) + sum(fn.values())) micro_f1 = 2 * (micro_precision * micro_recall) / (micro_recall + micro_precision) print("### PER-CLASS METRICS ###", flush=True) print("PRECIS RECALL F1 ", flush=True) for label in labels: print( f"{precision_dict[label]:6.4f} {recall_dict[label]:6.4f} {f1_dict[label]:6.4f} {label.upper()}", flush=True) print() print("### AVERAGED METRICS ###", flush=True) print() print("MICRO AVERAGED", flush=True) print("PRECIS RECALL F1 ", flush=True) print(f"{micro_precision:6.4f} {micro_recall:6.4f} {micro_f1:6.4f}", flush=True) print() print("MACRO AVERAGED", flush=True) print("PRECIS RECALL F1 ", flush=True) print(f"{macro_precision:6.4f} {macro_recall:6.4f} {macro_f1:6.4f}", flush=True)
def evaluate_bayes(): try: with open("topic_models.pkl", "rb") as f: models, dictionary = pickle.load(f) except: print('heyo!') build_model() with open("topic_models.pkl", "rb") as f: models, dictionary = pickle.load(f) try: with open("classifier.pkl", "rb") as f: p_class, p_label = pickle.load(f) except: bayes_EM(models, dictionary) with open("classifier.pkl", "rb") as f: p_class, p_label = pickle.load(f) tp = defaultdict(int) fp = defaultdict(int) tn = defaultdict(int) fn = defaultdict(int) # for assignment, questions in ground_truth.get_groundTruth().items(): # for question, ground in questions.items(): for assignment, question in test_set: with open('questions/' + assignment + "/" + question) as f: try: txt = f.read() except: continue results = predict(txt, models, dictionary, p_class, p_label) ground = list(ground_truth.get_groundTruth()[assignment][question]) print(ground) tops = sorted(results, key=results.get, reverse=True)[:len(ground)] print(tops) print() for label in labels: if label in tops and label in ground: tp[label] += 1 elif label in tops and label not in ground: fp[label] += 1 elif label in ground and label not in tops: fn[label] += 1 else: tn[label] += 1 precision_dict = defaultdict(int) recall_dict = defaultdict(int) f1_dict = defaultdict(int) for label in labels: if tp[label] == 0: print(label) continue precision_dict[label] = tp[label] / (fp[label] + tp[label]) recall_dict[label] = tp[label] / (fn[label] + tp[label]) f1_dict[label] = 2 * (precision_dict[label] * recall_dict[label]) / ( precision_dict[label] + recall_dict[label]) macro_precision = sum(precision_dict.values()) / 5 macro_recall = sum(recall_dict.values()) / 5 macro_f1 = sum(f1_dict.values()) / 5 micro_precision = sum(tp.values()) / (sum(tp.values()) + sum(fp.values())) micro_recall = sum(tp.values()) / (sum(tp.values()) + sum(fn.values())) micro_f1 = 2 * (micro_precision * micro_recall) / (micro_recall + micro_precision) print("### PER-CLASS METRICS ###") print("PRECIS RECALL F1 ") for label in labels: print( f"{precision_dict[label]:6.4f} {recall_dict[label]:6.4f} {f1_dict[label]:6.4f} {label.upper()}" ) print() print("### AVERAGED METRICS ###") print() print("MICRO AVERAGED") print("PRECIS RECALL F1 ") print(f"{micro_precision:6.4f} {micro_recall:6.4f} {micro_f1:6.4f}") print() print("MACRO AVERAGED") print("PRECIS RECALL F1 ") print(f"{macro_precision:6.4f} {macro_recall:6.4f} {macro_f1:6.4f}")
def evaluate_bayes(num_topics): #try: # with open("topic_models.pkl", "rb") as f: # models, dictionary = pickle.load(f) #except: print('In evaluate bayes: model not found. Building it.', flush=True) build_model(num_topics) with open("topic_models_top.pkl", "rb") as f: models, dictionary = pickle.load(f) try: with open("classifier.pkl", "rb") as f: p_class, p_label = pickle.load(f) except: print('In evaluate bayes: classifier not found. Building it', flush=True) bayes_EM(models, dictionary) with open("classifier.pkl", "rb") as f: p_class, p_label = pickle.load(f) tp = defaultdict(int) fp = defaultdict(int) tn = defaultdict(int) fn = defaultdict(int) for assignment, questions in ground_truth.get_groundTruth().items(): for question, ground in questions.items(): with open('questions/' + assignment + "/" + question, encoding='utf8') as f: try: txt = f.read() except: continue results = predict(txt, models, dictionary, p_class, p_label) tops = sorted(results, key=results.get, reverse=True)[:len(ground)] for label in labels: if label in tops and label in ground: tp[label] += 1 elif label in tops and label not in ground: fp[label] += 1 elif label in ground and label not in tops: fn[label] += 1 else: tn[label] += 1 precision_dict = defaultdict(int) recall_dict = defaultdict(int) f1_dict = defaultdict(int) for label in labels: if tp[label] == 0: print(label, flush=True) continue precision_dict[label] = tp[label] / (fp[label] + tp[label]) recall_dict[label] = tp[label] / (fn[label] + tp[label]) f1_dict[label] = 2 * (precision_dict[label] * recall_dict[label]) / ( precision_dict[label] + recall_dict[label]) macro_precision = sum(precision_dict.values()) / 5 macro_recall = sum(recall_dict.values()) / 5 macro_f1 = sum(f1_dict.values()) / 5 micro_precision = sum(tp.values()) / (sum(tp.values()) + sum(fp.values())) micro_recall = sum(tp.values()) / (sum(tp.values()) + sum(fn.values())) micro_f1 = 2 * (micro_precision * micro_recall) / (micro_recall + micro_precision) print("### PER-CLASS METRICS ###", flush=True) print("PRECIS RECALL F1 ", flush=True) for label in labels: print( f"{precision_dict[label]:6.4f} {recall_dict[label]:6.4f} {f1_dict[label]:6.4f} {label.upper()}", flush=True) print() print("### AVERAGED METRICS ###", flush=True) print() print("MICRO AVERAGED", flush=True) print("PRECIS RECALL F1 ", flush=True) print(f"{micro_precision:6.4f} {micro_recall:6.4f} {micro_f1:6.4f}", flush=True) print() print("MACRO AVERAGED", flush=True) print("PRECIS RECALL F1 ", flush=True) print(f"{macro_precision:6.4f} {macro_recall:6.4f} {macro_f1:6.4f}", flush=True)
from collections import defaultdict import spacy import math import pickle import random from nltk.corpus import stopwords import nltk from gensim.corpora import Dictionary from gensim.models.ldamodel import LdaModel train_set = [] test_set = [] dataset = [] for assignment, questions in ground_truth.get_groundTruth().items(): for each in questions.keys(): dataset.append((assignment, each)) train_num = 2 for i in range(train_num): choice = random.choice(dataset) while choice == ("a3", "p2"): choice = random.choice(dataset) train_set.append(choice) dataset.remove(choice) test_set = dataset print(test_set) nltk.download("wordnet") nltk.download("stopwords")