Exemple #1
0
    def computeStatistics(self, errorOut):
        """ Count RPF statistics for each unique OUTCOME entity
            statOut = file stream for RPF stats for all parts of summarization system
            errorOut = file stream for TPs, FPs, FNs

            return hash of IRstats, one for each mention type, keyed by mention type
            """

        aOutcomeTemplates = self.abstract.annotatedEntities.getList('outcome')
        self.nTrueOutcomes = len(aOutcomeTemplates)
        errorOut.write('outcome:\n')
        stats = {}
        stats['outcome'] = templates.countMatches(aOutcomeTemplates, self.outcomeTemplates, errorOut)

        errorOut.write('primary outcome:\n')
        primaryOutcomeStats = irstats.IRstats()
        for oTemplate in self.outcomeTemplates:
            if oTemplate.isPrimary():
                if oTemplate.matchedTemplate != None and oTemplate.matchedTemplate.isPrimary(useAnnotated=True):
                    primaryOutcomeStats.incTP()
                    errorOut.write('  +TP: %s is PRIMARY OUTCOME\n' % oTemplate.name)
                    oTemplate.primaryOutcomeEvaluation.markCorrect()
                else:
                    primaryOutcomeStats.incFP()
                    errorOut.write('  -FP: %s is NOT known to be PRIMARY OUTCOME\n' % oTemplate.name)
                    print self.abstract.id, oTemplate.name, 'is not a primary outcome'
                    oTemplate.primaryOutcomeEvaluation.markIncorrect()
        for oTemplate in aOutcomeTemplates:
            if oTemplate.isPrimary(useAnnotated=True) and oTemplate.matchedTemplate != None \
                    and oTemplate.matchedTemplate.isPrimary() == False:
                primaryOutcomeStats.incFN()
                errorOut.write('  -FN: %s SHOULD BE PRIMARY OUTCOME\n' \
                               % oTemplate.matchedTemplate.name)

        stats['primary outcome'] = primaryOutcomeStats

        return stats
Exemple #2
0
  def computeStatistics(self, errorOut):
    """ Count RPF statistics for each unique AGE, CONDITION, POPULATION entity
        statOut = file stream for RPF stats for all parts of summarization system
        errorOut = file stream for TPs, FPs, FNs
        
        return hash of IRstats, one for each mention type, keyed by mention type
        """
    stats = {}
    self.nTrueGroupSizes = 0

    aAgeTemplates = createAnnotatedMergedList(self.abstract, 'age')
    errorOut.write('age:\n')
    stats['age'] = self.ageInfo.countAgeMatches(aAgeTemplates, errorOut)

    errorOut.write('condition:\n')          
    aConditionTemplates = self.abstract.annotatedEntities.getList('condition')
    stats['condition'] = countMatches(aConditionTemplates, \
                                     self.conditionTemplates, errorOut)
    errorOut.write('group:\n')          
    aGroupTemplates = self.abstract.annotatedEntities.getList('group')
    stats['group'] = countMatches(aGroupTemplates, self.groupTemplates, errorOut)

    self.nTrueConditions = len(aConditionTemplates)
    self.nTrueGroups = len(aGroupTemplates)

    errorOut.write('group size:\n') 
    gsStats = IRstats()
    gsFound = set([])
    for gTemplate in self.groupTemplates:
      gSize = gTemplate.getSize(maxSize=True)
      if gSize != 0:
        # look for group size match in sizes for annotated group 
        found = False
        if gTemplate.matchedTemplate != None:
          for trueGSize in gTemplate.matchedTemplate.sizes:
            if gSize == trueGSize.value:
              found = True
              break
                 
        if found:
          # group size is correct
          gsStats.incTP()        
          errorOut.write('  +TP: %s size = %d\n' % (gTemplate.name, gSize))
          gTemplate.groupSizeEvaluation.markCorrect()
          gsFound.add(gTemplate.matchedTemplate)
        else:
          # group size is incorrect
          gsStats.incFP()  
          errorOut.write('  -FP: %s size = %d\n' % (gTemplate.name, gSize))
          gTemplate.groupSizeEvaluation.markIncorrect()
    # look for false negatives
    for trueTemplate in aGroupTemplates:
      if trueTemplate not in gsFound and trueTemplate.matchedTemplate != None and trueTemplate.getSize() > 0:
        # there should be a group size for this group
        gsStats.incFN()  
        errorOut.write('  -FN: %s size = %d\n' % \
                (trueTemplate.name, trueTemplate.getSize()))
        
    
    stats['group size'] = gsStats
    self.nTrueGroupSizes = gsStats.tp + gsStats.fn
#     errorOut.write('population:\n')
#     templates = createAnnotatedMergedList(self.abstract, 'population')
#     aPopulationTemplates = []
#     for pTemplate in templates:
#       if pTemplate.isInteresting() > 0:
#         # term is informative, keep it
#         aPopulationTemplates.append(pTemplate)
#     stats['population'] = self.countMatches(aPopulationTemplates, \
#                       self.populationTemplates, errorOut)
    return stats