def computeStatistics(self, errorOut): """ Count RPF statistics for each unique OUTCOME entity statOut = file stream for RPF stats for all parts of summarization system errorOut = file stream for TPs, FPs, FNs return hash of IRstats, one for each mention type, keyed by mention type """ aOutcomeTemplates = self.abstract.annotatedEntities.getList('outcome') self.nTrueOutcomes = len(aOutcomeTemplates) errorOut.write('outcome:\n') stats = {} stats['outcome'] = templates.countMatches(aOutcomeTemplates, self.outcomeTemplates, errorOut) errorOut.write('primary outcome:\n') primaryOutcomeStats = irstats.IRstats() for oTemplate in self.outcomeTemplates: if oTemplate.isPrimary(): if oTemplate.matchedTemplate != None and oTemplate.matchedTemplate.isPrimary(useAnnotated=True): primaryOutcomeStats.incTP() errorOut.write(' +TP: %s is PRIMARY OUTCOME\n' % oTemplate.name) oTemplate.primaryOutcomeEvaluation.markCorrect() else: primaryOutcomeStats.incFP() errorOut.write(' -FP: %s is NOT known to be PRIMARY OUTCOME\n' % oTemplate.name) print self.abstract.id, oTemplate.name, 'is not a primary outcome' oTemplate.primaryOutcomeEvaluation.markIncorrect() for oTemplate in aOutcomeTemplates: if oTemplate.isPrimary(useAnnotated=True) and oTemplate.matchedTemplate != None \ and oTemplate.matchedTemplate.isPrimary() == False: primaryOutcomeStats.incFN() errorOut.write(' -FN: %s SHOULD BE PRIMARY OUTCOME\n' \ % oTemplate.matchedTemplate.name) stats['primary outcome'] = primaryOutcomeStats return stats
def computeStatistics(self, errorOut): """ Count RPF statistics for each unique AGE, CONDITION, POPULATION entity statOut = file stream for RPF stats for all parts of summarization system errorOut = file stream for TPs, FPs, FNs return hash of IRstats, one for each mention type, keyed by mention type """ stats = {} self.nTrueGroupSizes = 0 aAgeTemplates = createAnnotatedMergedList(self.abstract, 'age') errorOut.write('age:\n') stats['age'] = self.ageInfo.countAgeMatches(aAgeTemplates, errorOut) errorOut.write('condition:\n') aConditionTemplates = self.abstract.annotatedEntities.getList('condition') stats['condition'] = countMatches(aConditionTemplates, \ self.conditionTemplates, errorOut) errorOut.write('group:\n') aGroupTemplates = self.abstract.annotatedEntities.getList('group') stats['group'] = countMatches(aGroupTemplates, self.groupTemplates, errorOut) self.nTrueConditions = len(aConditionTemplates) self.nTrueGroups = len(aGroupTemplates) errorOut.write('group size:\n') gsStats = IRstats() gsFound = set([]) for gTemplate in self.groupTemplates: gSize = gTemplate.getSize(maxSize=True) if gSize != 0: # look for group size match in sizes for annotated group found = False if gTemplate.matchedTemplate != None: for trueGSize in gTemplate.matchedTemplate.sizes: if gSize == trueGSize.value: found = True break if found: # group size is correct gsStats.incTP() errorOut.write(' +TP: %s size = %d\n' % (gTemplate.name, gSize)) gTemplate.groupSizeEvaluation.markCorrect() gsFound.add(gTemplate.matchedTemplate) else: # group size is incorrect gsStats.incFP() errorOut.write(' -FP: %s size = %d\n' % (gTemplate.name, gSize)) gTemplate.groupSizeEvaluation.markIncorrect() # look for false negatives for trueTemplate in aGroupTemplates: if trueTemplate not in gsFound and trueTemplate.matchedTemplate != None and trueTemplate.getSize() > 0: # there should be a group size for this group gsStats.incFN() errorOut.write(' -FN: %s size = %d\n' % \ (trueTemplate.name, trueTemplate.getSize())) stats['group size'] = gsStats self.nTrueGroupSizes = gsStats.tp + gsStats.fn # errorOut.write('population:\n') # templates = createAnnotatedMergedList(self.abstract, 'population') # aPopulationTemplates = [] # for pTemplate in templates: # if pTemplate.isInteresting() > 0: # # term is informative, keep it # aPopulationTemplates.append(pTemplate) # stats['population'] = self.countMatches(aPopulationTemplates, \ # self.populationTemplates, errorOut) return stats