import json from evaluation.evaluator import Evaluator if __name__ == '__main__': with open('dataset/dev-predictions-final-it4.json', 'r') as f: bad_format_predictions = json.loads(f.read()) predictions = {} for question_id, predictions_list in bad_format_predictions.iteritems( ): predictions[question_id] = predictions_list[0] evaluator = Evaluator('dataset/dev.json') print evaluator.ExactMatch(predictions) print evaluator.F1(predictions)
# dataFile = "/Users/Jian/Data/research/squad/dataset/proto/dev-annotated.proto" dataFile = "./dev-annotated.proto" # predFile = "/Users/Jian/Data/research/squad/output/non-learning-baseline/uni-bi-1460521688980_new.predict" predFile = "./random-guess.json" agent = RandGuessAgent(randSeed=0, articleLevel=False, topK=1) agent.LoadData(dataFile) agent.LoadStopWords() agent.Predict(debug=False) agent.DumpPrediction(predFile) # jsonDataFile = "/Users/Jian/Data/research/squad/dataset/json/dev.json" jsonDataFile = "./dev-v1.0.json" evaluator = Evaluator(jsonDataFile) exactMatchRate = evaluator.ExactMatch(agent.predictions) F1 = evaluator.F1(agent.predictions) print "exact rate ", exactMatchRate print "F1 rate ", F1 # evalCandidateFile = "/Users/Jian/Data/research/squad/dataset/proto/dev-candidatesal.proto" # evalOrigFile = "/Users/Jian/Data/research/squad/dataset/proto/dev-annotated.proto" # vocabPath = "/Users/Jian/Data/research/squad/dataset/proto/vocab_dict" # evalCandidateFile = "./dev-candidatesal.proto" # evalOrigFile = "./dev-annotated.proto" # vocabPath = "./vocab_dict/proto/vocab_dict" # sampleAgent = Agent(floatType=tf.float32, idType=tf.int32, lossType="max-margin", articleLevel=agent.articleLevel) # sampleAgent.LoadEvalData(evalCandidateFile, evalOrigFile, doDebug=False)
predFile = "./output/dev-predictions-it3.json" jsonDataFile = "./dataset/json/dev.json" # predFile = "./dev-predictions-it3.json" # jsonDataFile = "./dev.json" with open(predFile, "r") as fp: predDict = json.load(fp) evaluator = Evaluator(jsonDataFile) exactMatchRateList = list() F1List = list() for dist in sorted(editDistGroup.keys()): predSubDict = dict() for qaId, _ in editDistGroup[dist]: predSubDict[qaId] = predDict[qaId] exactMatchRate = evaluator.ExactMatch(predSubDict) F1 = evaluator.F1(predSubDict) exactMatchRateList.append(exactMatchRate) F1List.append(F1) print "edit dist ", dist print "number of sample ", len(editDistGroup[dist]) print "exact match ", exactMatchRate print "F1 ", F1 print allDist = [dist for _, dist in editDist.iteritems()] # print np.mean(np.array(allDist) ), np.std(np.array(allDist) ) # print np.max(np.array(allDist) ) fileName = "/Users/Jian/Data/research/squad/paper/figure/edit-dist-hist.pdf" fig = plt.figure(figsize=(10, 5))
num_same_counts = Counter() for article in articles: for paragraph in article['paragraphs']: for qa in paragraph['qas']: if len(qa['answers']) >= 3: num_same_counts[3 - len( set([ Evaluator.CleanAnswer(answer['text']) for answer in qa['answers'][0:3] ])) + 1] += 1 if len(qa['answers']) > 1: predictions[qa['id']] = qa['answers'].pop(1)['text'] evaluator = Evaluator(articles=articles) print 'Exact match:', round(evaluator.ExactMatch(predictions), 1) print 'F1:', round(evaluator.F1(predictions), 1) total_num_same_count = sum(num_same_counts.values()) for num_same, count in sorted(num_same_counts.items()): print num_same, 'same:', round(100.0 * count / total_num_same_count, 1) with open('dataset/dev-answertypetags.json') as fileobj: tags = json.loads(fileobj.read()) print len(tags), 'tagged questions' for tag, _ in Counter(tags.values()).most_common(): num_correct = 0 total_f1 = 0 num_total = 0 for question_id, _ in filter(lambda x: x[1] == tag, tags.items()): num_total += 1