Example #1
0
  def computeTrueStats(self, abstract):
    """ compute summaries statistics using annotations """
    self.outcomeNumbers = []
    self.eventRates = []
    self.stats = []
    self.trueStats = True
    self.abstract = abstract
    
    self.groupsById = {}
    self.outcomesById = {}
    self.timesById = {}
 
    omHash = {}
    
    for s in abstract.sentences:
      # find all of the annotated templates in the sentence
      templates = s.annotatedTemplates
      gList = templates.getList('group')
      oList = templates.getList('outcome')
      gsList = templates.getList('gs')
      onList = templates.getList('on') 
      erList = templates.getList('eventrate')     
      tList = templates.getList('time')
      
#      print abstract.id
#      for er in erList:
#        print er.value,
#      print
      
      for t in tList:
#         times.append(t)
        if t.getAnnotatedId() in self.timesById:
          self.timesById[t.getAnnotatedId()].merge(t)
        else:
          self.timesById[t.getAnnotatedId()] = t
          
      for g in gList:
#         groups.append(g)
        if g.getAnnotatedId() in self.groupsById:
          self.groupsById[g.getAnnotatedId()].merge(g)
        else:
          self.groupsById[g.getAnnotatedId()] = g

      for outcome in oList:
#         outcomes.append(outcome)
        if outcome.getAnnotatedId() != None and len(outcome.getAnnotatedId()) > 0:
          if outcome.getAnnotatedId() in self.outcomesById:
            self.outcomesById[outcome.getAnnotatedId()].merge(outcome)
          else:
            self.outcomesById[outcome.getAnnotatedId()] = outcome
        else:
          print abstract.id, outcome.name, 'does not have an ID.',
          print 'Not using it for summary stats.'

#       for gs in gsList:
#         self.groupSizes.append(gs)

      # link groups and their sizes
      for gs in gsList:
        gid = gs.token.getAnnotationAttribute('gs', 'group')
        if gid in self.groupsById:
          g = self.groupsById[gid]
          gs.group = g
          g.addSize(gs)
        tid = gs.token.getAnnotationAttribute('gs', 'time')
        if tid in self.timesById:
          t = self.timesById[tid]
          gs.time = t
          
#       for gid,g in self.groupsById.items():
#         print 'Group id:', gid, ', name = ', g.name, ', size =', g.getSize()
         
      # link all relevant information needed for each outcome measurement
      for on in onList:
        gid = on.token.getAnnotationAttribute('on', 'group')
        oid = on.token.getAnnotationAttribute('on', 'outcome')
        tid = on.token.getAnnotationAttribute('on', 'time')
        csID = on.token.getAnnotationAttribute('on', 'compareSet')
#        print 'on:',on.value, csID
        
        if oid in self.outcomesById:
          oTemplate = self.outcomesById[oid]
          gTemplate = self.groupsById.get(gid, None)        
          tTemplate = self.timesById.get(tid, None)
          
          if oid not in omHash:
            omHash[oid] = []
            
          om = OutcomeMeasurement(on)              
          om.addGroup(gTemplate)
          om.addOutcome(oTemplate)
          om.addTime(tTemplate)
          omHash[oid].append(om)
        else:
          print abstract.id, '??? Outcome number', on.value, 
          print 'does not have a matching outcome with id =', oid
#         print '-->',
#         om.write(sys.stdout)
        
        

             
      for er in erList:
        gid = er.token.getAnnotationAttribute('eventrate', 'group')
        oid = er.token.getAnnotationAttribute('eventrate', 'outcome')
        tid = er.token.getAnnotationAttribute('eventrate', 'time')
        csID = er.token.getAnnotationAttribute('eventrate', 'compareSet')
#        print abstract.id+': er: ',er.value, csID
        
        if oid in self.outcomesById:
          oTemplate = self.outcomesById[oid]
          gTemplate = self.groupsById.get(gid, None)        
          tTemplate = self.timesById.get(tid, None)
          
#          print abstract.id+': er: ', er.value, gTemplate, tTemplate, csID
          
          if oid not in omHash:
            omHash[oid] = []
          matchFound = False            
          for om in omHash[oid]:
            if om.getGroup() == gTemplate and om.getTime() == tTemplate and om.getCompareSetID() == csID:
              om.addEventRate(er)
#              print 'adding', er.value
#              om.write(sys.stdout)
              matchFound = True
              break
#            else:
#              print om.getGroup(), om.getTime(), om.getCompareSetID
              
          if matchFound == False:
            # event rate not added to existing outcome measurement, create new measurement
            om = OutcomeMeasurement(er)
            om.addGroup(gTemplate)
            om.addOutcome(oTemplate)
            om.addTime(tTemplate)
            omHash[oid].append(om)
        else:
          print 'Event rate missing outcome annotation in abstract ',
          print abstract.id, ':', s.toString()
          er.write(sys.stdout)     
     
        
    for oid in omHash.keys():
      omList = omHash[oid]
      for i in range(0, len(omList)):
        om1 = omList[i]
        csID1 = om1.getCompareSetID()
#        print abstract.id, csID1,':',
        for j in range(i+1, len(omList)):
          om2 = omList[j]
          csID2 = om2.getCompareSetID()
#          print csID2,
          if csID1 == csID2 and om1.isComplete() and om2.isComplete() \
            and om1.getGroup() != om2.getGroup() and om1.getTime() == om2.getTime():
            ssTemplate = SummaryStat(om1, om2, useAnnotated=True)
            self.stats.append(ssTemplate)
            om1.used = True
            om2.used = True
#        print
        if om1.used == False:
          self.unmatchedMeasurements.append(om1)
            
    for om in self.unmatchedMeasurements:
      if om.getOutcome() != None:
        om.getOutcome().unusedNumbers.append(om)