def print_scores(gold, pred): start = time.time() f_macro = f1_score(gold, pred, average="macro") acc = accuracy_score(gold, pred) breakdown_evaluation(pred, gold) print('F1 macro = {:.3f}, Acc = {:.3f}'.format(f_macro, acc)) print() print("Predict time : {}".format(utils.convert_time(time.time()-start)))
def train_models(train_file, test_file, gold_file): slide = lid.Slide() slide.train(train_file) # slide.load_label_encoder(train_file) # slide.load_model('saved_models/slide_trained.dat') slide.save_model("saved_models/slide_trained_unigram.dat") test_data = pd.read_csv(test_file, encoding="utf-8", sep=r"\t+", header=None, names=["text"]) X_test_raw = test_data["text"].values predictor_list = [0] predictions = slide.predict(X_test_raw, predictor_list) gold_labels = Utils.get_y(gold_file) evaluate.breakdown_evaluation(predictions, gold_labels, True, False, 2.0)
'submissions/mms/mms-tfidf-close-none-run3.txt' ], 'nlel': ['submissions/nlel/NLEL_UPV_Autoritas-probfwk-close-none-run2.txt'] } gold_file = 'data/DSLCC-v2.0/gold/test-gold.txt' gold_none_file = 'data/DSLCC-v2.0/gold/test-none-gold.txt' print "Close Submission\n=========" for team in close_submissions: for s in close_submissions[team]: results = [i.strip().split('\t')[-1] for i in io.open(s, 'r')] goldtags = [i.strip().split('\t')[-1] for i in io.open(gold_file, 'r')] results = breakdown_evaluation(results, goldtags, version=2.0, overall_only=True) print s + '\t' + str(results) print print "Close Blinded NE Submission\n=========" for team in close_none_submissions: for s in close_none_submissions[team]: results = [i.strip().split('\t')[-1] for i in io.open(s, 'r')] goldtags = [ i.strip().split('\t')[-1] for i in io.open(gold_none_file, 'r') ] results = breakdown_evaluation(results, goldtags, version=2.0, overall_only=True)
def calculate_accuracy(test_file, gold_labels, slide, predictor_list, human_readable, overall_only): predictions = slide.predict(test_file, predictor_list) overall_accuracy = evaluate.breakdown_evaluation(predictions, gold_labels, human_readable, overall_only) print("overall_accuracy", overall_accuracy, "predictor_list", predictor_list) return overall_accuracy
"submissions/mms/mms-tfidf-close-none-run1.txt", "submissions/mms/mms-tfidf-close-none-run2.txt", "submissions/mms/mms-tfidf-close-none-run3.txt", ], "nlel": ["submissions/nlel/NLEL_UPV_Autoritas-probfwk-close-none-run2.txt"], } gold_file = "data/DSLCC-v2.0/gold/test-gold.txt" gold_none_file = "data/DSLCC-v2.0/gold/test-none-gold.txt" print "Close Submission\n=========" for team in close_submissions: for s in close_submissions[team]: results = [i.strip().split("\t")[-1] for i in io.open(s, "r")] goldtags = [i.strip().split("\t")[-1] for i in io.open(gold_file, "r")] results = breakdown_evaluation(results, goldtags, version=2.0, overall_only=True) print s + "\t" + str(results) print print "Close Blinded NE Submission\n=========" for team in close_none_submissions: for s in close_none_submissions[team]: results = [i.strip().split("\t")[-1] for i in io.open(s, "r")] goldtags = [i.strip().split("\t")[-1] for i in io.open(gold_none_file, "r")] results = breakdown_evaluation(results, goldtags, version=2.0, overall_only=True) print s + "\t" + str(results) print print "Open Submission\n=========" for team in open_submissions: for s in open_submissions[team]: