Esempio n. 1
0
    def countAgeMatches(self, aAgeTemplates, errorOut):
        """ count the number of age value matches in a set of annotated age templates """
        annotatedAgeValues = {
            'min': set([]),
            'max': set([]),
            'mean': set([]),
            'median': set([])
        }
        for template in aAgeTemplates:
            for type, avList in template.trueValues.items():
                for av in avList:
                    #          print '@@@ ADDING AGE VALUE:', type, av.value
                    #          if len(annotatedAgeValues[type]) > 0:
                    #            print '-- Redundant value'
                    annotatedAgeValues[type].add(av)
        self.nTrueAgeValues = 0
        annotatedValueFound = {}
        for avSet in annotatedAgeValues.values():
            for av in avSet:
                annotatedValueFound[av] = False
                self.nTrueAgeValues += 1
        # count the number of detected values that match annotated ones
        stats = IRstats()
        for type, av in self.ageValues.items():
            #      print '@@@ Checking:', type, av.value
            if av.source != 'trial_registry':
                foundAgeValue = False
                for annotatedValue in annotatedAgeValues[type]:
                    if av.value == annotatedValue.value:
                        stats.incTP()
                        errorOut.write('  +TP: %s = %d\n' % (type, av.value))
                        #            print '  +TP: %s = %d' % (type,av.value)
                        annotatedValueFound[annotatedValue] = True
                        av.evaluation.markCorrect()
                        foundAgeValue = True
                if foundAgeValue == False:
                    stats.incFP()
                    errorOut.write('  -FP: %s = %d\n' % (type, av.value))
                    #          print '  -FP: %s = %d' % (type, av.value)
                    av.evaluation.markIncorrect()
#      else:
#        print '@@@@ AGE VALUE SOURCE IS TRIAL REGISTRY'

# count the ones that we missed
        for av, found in annotatedValueFound.items():
            if found == False:
                stats.incFN()
                errorOut.write('  -FN: %s = %d\n' % (av.type, av.value))
#        print '  -FN: %s = %d' % (av.type, av.value)

        return stats
Esempio n. 2
0
  def computeTupleMentionError(self, recomputeAnnotatedMentions, errorWeights={}):
    """ compute the number of FP, FN, Duplicate mentions in the sentence """
    totalFP = 0
    totalFN = 0
    totalDuplicates = 0
    stats = {}
    aList = {}
    if len(errorWeights) == 0:
      errorWeights['group']      = {'fp':1, 'fn':1, 'dup':1}
      errorWeights['outcome']    = {'fp':1, 'fn':1, 'dup':1}
      errorWeights['eventrate']  = {'fp':1, 'fn':1, 'dup':1}
      errorWeights['on']         = {'fp':1, 'fn':1, 'dup':1}
      errorWeights['gs']         = {'fp':1, 'fn':1, 'dup':1}

    
    mentions = {}
    mentions['group'] = (self.groupLabeling.entities['group'], self.groupLabeling.finder)
    mentions['outcome'] = (self.outcomeLabeling.entities['outcome'], self.outcomeLabeling.finder)
    mentions['eventrate'] = (self.eventrateLabeling.entities['eventrate'], self.eventrateLabeling.finder)
    mentions['on'] = (self.numberLabeling.entities['on'], self.numberLabeling.finder)
    mentions['gs'] = (self.numberLabeling.entities['gs'], self.numberLabeling.finder)
    
    for mType, (dList, finder) in mentions.items():
      aList[mType] = self.sentence.getAnnotatedMentions(mType, recomputeMentions=recomputeAnnotatedMentions)
      stats[mType] = IRstats()  
      finder.compareMentionLists(dList, aList[mType], mType, stats[mType])
      totalFP += stats[mType].fp * errorWeights[mType]['fp']
      totalFN += stats[mType].fn * errorWeights[mType]['fn']
      totalDuplicates += stats[mType].duplicates * errorWeights[mType]['dup']
    
#    mType = 'eventrate'
#    print 'True:', [m.text for m in aList[mType]]
#    print 'Detected:',[m.text for m in mentions[mType][0]]
    totalError = totalFP + totalDuplicates + totalFN   
#    if totalError > 9:
#      for mType in mentions.keys():
#        print 'Type: %s, FP: %d, FN: %d, DUP: %d'%(mType, stats[mType].fp, stats[mType].fn, stats[mType].duplicates)         
#      print self.sentence.abstract.id, 'Total error = ', totalError
      
    return totalError
Esempio n. 3
0
 def __init__(self, entityTypes):
     """ start computing RPF statistics for new set of abstracts """
     self.irstats = {}
     self.entityTypes = entityTypes
     for mType in self.entityTypes:
         self.irstats[mType] = IRstats()
Esempio n. 4
0
#             if token.hasAnnotation(entityType):
#               verbRuleCounts[depToken.lemma].incTP()
#             else:
#               verbRuleCounts[depToken.lemma].incFP()

      for dep in token.governors:
        if dep.isRoot() == False and dep.type == 'pobj':
          depToken = token.sentence[dep.index]
#          print depToken.text, token.text
          for g in depToken.governors:
            if g.isRoot() == False:# and g.type == 'prep':
              gToken = token.sentence[g.index]
#              print gToken.text+'_'+g.type, depToken.text, token.text
              if gToken.pos[0:2] == 'VB':
                if gToken.lemma not in verbRuleCounts:
                  verbRuleCounts[gToken.lemma] = IRstats()  
                if token.hasAnnotation(entityType):
                  verbRuleCounts[gToken.lemma].incTP()
                else:
                  verbRuleCounts[gToken.lemma].incFP()
    
    for token in sentence:
    
      for type in entityTypes:
        if token.hasAnnotation(type):
          entityTokenCounts[type] += 1
        
    # for token in sentence 
#     for token in sentence:
#       if token.text != 'greater' and token.text != 'less':
#         continue