from evaluation.evaluator import Evaluator if __name__ == '__main__': with open('dataset/dev.json') as fileobj: articles = json.loads(fileobj.read())['data'] predictions = {} num_same_counts = Counter() for article in articles: for paragraph in article['paragraphs']: for qa in paragraph['qas']: if len(qa['answers']) >= 3: num_same_counts[3 - len( set([ Evaluator.CleanAnswer(answer['text']) for answer in qa['answers'][0:3] ])) + 1] += 1 if len(qa['answers']) > 1: predictions[qa['id']] = qa['answers'].pop(1)['text'] evaluator = Evaluator(articles=articles) print 'Exact match:', round(evaluator.ExactMatch(predictions), 1) print 'F1:', round(evaluator.F1(predictions), 1) total_num_same_count = sum(num_same_counts.values()) for num_same, count in sorted(num_same_counts.items()): print num_same, 'same:', round(100.0 * count / total_num_same_count, 1) with open('dataset/dev-answertypetags.json') as fileobj: tags = json.loads(fileobj.read())
def GetQAPathPairs(origData, candData, anchorFunc=None): # extract question path qSenDict = dict() ansSenDict = dict() for title in origData.keys(): article = origData[title] for iPara, paragraph in enumerate(article.paragraphs): for qa in paragraph.qas: qSenDict[qa.id] = qa.question.sentence[0] ansSenDict[qa.id] = qa.answers[0].sentence[0] # extract answer path aSenDict = dict() aSpanDict = dict() for title in candData.keys(): candidates = candData[title] candAnsList = candidates.candidateAnswers article = origData[title] for qa in candidates.questions: span = candAnsList[qa.correctAnswerIndex] paraId = span.paragraphIndex senId = span.sentenceIndex sentence = article.paragraphs[paraId].context.sentence[senId] aSenDict[qa.id] = sentence aSpanDict[qa.id] = span # eliminate answers that is not exact constituents nQaBeforeRm = len(qSenDict.keys()) aSpanDictKey = set(aSpanDict.keys()) aSenDictKey = set(aSenDict.keys()) for qaId in ansSenDict.keys(): if qaId not in aSpanDictKey \ or qaId not in aSenDictKey: del qSenDict[qaId] del ansSenDict[qaId] continue exactAns = Evaluator.CleanAnswer(' '.join( [token.word.lower() for token in ansSenDict[qaId].token])) start = aSpanDict[qaId].spanBeginIndex end = start + aSpanDict[qaId].spanLength spanAns = Evaluator.CleanAnswer(' '.join( [token.word.lower() for token in aSenDict[qaId].token[start:end]])) if exactAns != spanAns: del qSenDict[qaId] del ansSenDict[qaId] del aSenDict[qaId] del aSpanDict[qaId] nQaAfterRm = len(qSenDict.keys()) print "Got ", nQaAfterRm / float(nQaBeforeRm), " exactly covered pair!" # # DEBUG # fp = open("./output/dep_analysis/multi_comparison.txt", "w") aSenDictKeys = set(aSenDict.keys()) cntOverlapping = 0 qPathDict = dict() aPathDict = dict() for qaId in qSenDict.keys(): if qaId not in aSenDictKeys: continue # assert qaId in aSenDictKeys qSen = qSenDict[qaId] aSen = aSenDict[qaId] coList = anchorFunc(qSen, aSen) anchorFound = False if len(coList) != 0: # get path in query qPathDict[qaId] = list() aPathDict[qaId] = list() for anchorQ, anchorA in coList: # # TODO recover to single version # for iWord in range(aSpanDict[qaId].spanLength): sentence = qSenDict[qaId] qNodePath, qDepPath = GetQPath(sentence, anchorQ) # get path in answer span = aSpanDict[qaId] # # TODO recover the single version # span.spanBeginIndex = span.spanBeginIndex + iWord # span.spanLength = 1 sentence = aSenDict[qaId] aNodePath, aDepPath = GetAPath(span, sentence, anchorA) # if len(qDepPath) <= 4 and len(aDepPath) <= 4: if len(aNodePath) > 0: qPathDict[qaId].append(dict()) qPathDict[qaId][-1]["depPath"] = qDepPath qPathDict[qaId][-1]["nodePath"] = qNodePath aPathDict[qaId].append(dict()) aPathDict[qaId][-1]["nodePath"] = aNodePath aPathDict[qaId][-1]["depPath"] = aDepPath anchorFound = True # # TODO recover the single version # qPathVisual, qSenFull = GetPathFullVisual(qSen.token, qDepPath, qNodePath) # aPathVisual, aSenFull = GetPathFullVisual(aSen.token, aDepPath, aNodePath) # # fp.write("q: " + qSenFull) # # fp.write("q: " + qSenFull) # fp.write("q: " + qPathVisual.encode("utf8") + "\n") # fp.write("a: " + aSenFull.encode("utf8") + "\n") # fp.write("a: " + aPathVisual.encode("utf8") + "\n") # fp.write("\n") # fp.write("\n\n") if anchorFound: cntOverlapping += 1 # else: # print "Correspondence can not be found!" # print "overlap rate ", cntOverlapping / float(len(qSenDict.keys() ) ) # ansSenDict contains the answer constituent span (may not be the exact correct answer) return qPathDict, aPathDict, qSenDict, aSenDict, ansSenDict