Esempio n. 1
0
    def countAgeMatches(self, aAgeTemplates, errorOut):
        """ count the number of age value matches in a set of annotated age templates """
        annotatedAgeValues = {
            'min': set([]),
            'max': set([]),
            'mean': set([]),
            'median': set([])
        }
        for template in aAgeTemplates:
            for type, avList in template.trueValues.items():
                for av in avList:
                    #          print '@@@ ADDING AGE VALUE:', type, av.value
                    #          if len(annotatedAgeValues[type]) > 0:
                    #            print '-- Redundant value'
                    annotatedAgeValues[type].add(av)
        self.nTrueAgeValues = 0
        annotatedValueFound = {}
        for avSet in annotatedAgeValues.values():
            for av in avSet:
                annotatedValueFound[av] = False
                self.nTrueAgeValues += 1
        # count the number of detected values that match annotated ones
        stats = IRstats()
        for type, av in self.ageValues.items():
            #      print '@@@ Checking:', type, av.value
            if av.source != 'trial_registry':
                foundAgeValue = False
                for annotatedValue in annotatedAgeValues[type]:
                    if av.value == annotatedValue.value:
                        stats.incTP()
                        errorOut.write('  +TP: %s = %d\n' % (type, av.value))
                        #            print '  +TP: %s = %d' % (type,av.value)
                        annotatedValueFound[annotatedValue] = True
                        av.evaluation.markCorrect()
                        foundAgeValue = True
                if foundAgeValue == False:
                    stats.incFP()
                    errorOut.write('  -FP: %s = %d\n' % (type, av.value))
                    #          print '  -FP: %s = %d' % (type, av.value)
                    av.evaluation.markIncorrect()
#      else:
#        print '@@@@ AGE VALUE SOURCE IS TRIAL REGISTRY'

# count the ones that we missed
        for av, found in annotatedValueFound.items():
            if found == False:
                stats.incFN()
                errorOut.write('  -FN: %s = %d\n' % (av.type, av.value))
#        print '  -FN: %s = %d' % (av.type, av.value)

        return stats
  def countAgeMatches(self, aAgeTemplates, errorOut):
    """ count the number of age value matches in a set of annotated age templates """
    annotatedAgeValues = {'min':set([]), 'max':set([]), 'mean':set([]), 'median':set([])}
    for template in aAgeTemplates:
      for type, avList in template.trueValues.items():
        for av in avList:
#          print '@@@ ADDING AGE VALUE:', type, av.value
#          if len(annotatedAgeValues[type]) > 0:
#            print '-- Redundant value'
          annotatedAgeValues[type].add(av)
    self.nTrueAgeValues = 0
    annotatedValueFound = {}
    for avSet in annotatedAgeValues.values():
      for av in avSet:
        annotatedValueFound[av] = False
        self.nTrueAgeValues += 1
    # count the number of detected values that match annotated ones
    stats = IRstats()
    for type, av in self.ageValues.items():
#      print '@@@ Checking:', type, av.value
      if av.source != 'trial_registry':
        foundAgeValue = False
        for annotatedValue in annotatedAgeValues[type]:          
          if av.value == annotatedValue.value:
            stats.incTP()
            errorOut.write('  +TP: %s = %d\n' % (type,av.value))
#            print '  +TP: %s = %d' % (type,av.value)
            annotatedValueFound[annotatedValue] = True
            av.evaluation.markCorrect()
            foundAgeValue = True
        if foundAgeValue == False:  
          stats.incFP()
          errorOut.write('  -FP: %s = %d\n' % (type, av.value))
#          print '  -FP: %s = %d' % (type, av.value)
          av.evaluation.markIncorrect()
#      else:
#        print '@@@@ AGE VALUE SOURCE IS TRIAL REGISTRY'
        
    # count the ones that we missed
    for av, found in annotatedValueFound.items():
      if found == False:
        stats.incFN()
        errorOut.write('  -FN: %s = %d\n' % (av.type, av.value))
#        print '  -FN: %s = %d' % (av.type, av.value)
           
    return stats
Esempio n. 3
0
  def computeStatistics(self, errorOut):
    """ Count RPF statistics for each unique AGE, CONDITION, POPULATION entity
        statOut = file stream for RPF stats for all parts of summarization system
        errorOut = file stream for TPs, FPs, FNs
        
        return hash of IRstats, one for each mention type, keyed by mention type
        """
    stats = {}
    self.nTrueGroupSizes = 0

    aAgeTemplates = createAnnotatedMergedList(self.abstract, 'age')
    errorOut.write('age:\n')
    stats['age'] = self.ageInfo.countAgeMatches(aAgeTemplates, errorOut)

    errorOut.write('condition:\n')          
    aConditionTemplates = self.abstract.annotatedEntities.getList('condition')
    stats['condition'] = countMatches(aConditionTemplates, \
                                     self.conditionTemplates, errorOut)
    errorOut.write('group:\n')          
    aGroupTemplates = self.abstract.annotatedEntities.getList('group')
    stats['group'] = countMatches(aGroupTemplates, self.groupTemplates, errorOut)

    self.nTrueConditions = len(aConditionTemplates)
    self.nTrueGroups = len(aGroupTemplates)

    errorOut.write('group size:\n') 
    gsStats = IRstats()
    gsFound = set([])
    for gTemplate in self.groupTemplates:
      gSize = gTemplate.getSize(maxSize=True)
      if gSize != 0:
        # look for group size match in sizes for annotated group 
        found = False
        if gTemplate.matchedTemplate != None:
          for trueGSize in gTemplate.matchedTemplate.sizes:
            if gSize == trueGSize.value:
              found = True
              break
                 
        if found:
          # group size is correct
          gsStats.incTP()        
          errorOut.write('  +TP: %s size = %d\n' % (gTemplate.name, gSize))
          gTemplate.groupSizeEvaluation.markCorrect()
          gsFound.add(gTemplate.matchedTemplate)
        else:
          # group size is incorrect
          gsStats.incFP()  
          errorOut.write('  -FP: %s size = %d\n' % (gTemplate.name, gSize))
          gTemplate.groupSizeEvaluation.markIncorrect()
    # look for false negatives
    for trueTemplate in aGroupTemplates:
      if trueTemplate not in gsFound and trueTemplate.matchedTemplate != None and trueTemplate.getSize() > 0:
        # there should be a group size for this group
        gsStats.incFN()  
        errorOut.write('  -FN: %s size = %d\n' % \
                (trueTemplate.name, trueTemplate.getSize()))
        
    
    stats['group size'] = gsStats
    self.nTrueGroupSizes = gsStats.tp + gsStats.fn
#     errorOut.write('population:\n')
#     templates = createAnnotatedMergedList(self.abstract, 'population')
#     aPopulationTemplates = []
#     for pTemplate in templates:
#       if pTemplate.isInteresting() > 0:
#         # term is informative, keep it
#         aPopulationTemplates.append(pTemplate)
#     stats['population'] = self.countMatches(aPopulationTemplates, \
#                       self.populationTemplates, errorOut)
    return stats