def computeTrueStats(self, abstract): """ compute summaries statistics using annotations """ self.outcomeNumbers = [] self.eventRates = [] self.stats = [] self.trueStats = True self.abstract = abstract self.groupsById = {} self.outcomesById = {} self.timesById = {} omHash = {} for s in abstract.sentences: # find all of the annotated templates in the sentence templates = s.annotatedTemplates gList = templates.getList('group') oList = templates.getList('outcome') gsList = templates.getList('gs') onList = templates.getList('on') erList = templates.getList('eventrate') tList = templates.getList('time') # print abstract.id # for er in erList: # print er.value, # print for t in tList: # times.append(t) if t.getAnnotatedId() in self.timesById: self.timesById[t.getAnnotatedId()].merge(t) else: self.timesById[t.getAnnotatedId()] = t for g in gList: # groups.append(g) if g.getAnnotatedId() in self.groupsById: self.groupsById[g.getAnnotatedId()].merge(g) else: self.groupsById[g.getAnnotatedId()] = g for outcome in oList: # outcomes.append(outcome) if outcome.getAnnotatedId() != None and len(outcome.getAnnotatedId()) > 0: if outcome.getAnnotatedId() in self.outcomesById: self.outcomesById[outcome.getAnnotatedId()].merge(outcome) else: self.outcomesById[outcome.getAnnotatedId()] = outcome else: print abstract.id, outcome.name, 'does not have an ID.', print 'Not using it for summary stats.' # for gs in gsList: # self.groupSizes.append(gs) # link groups and their sizes for gs in gsList: gid = gs.token.getAnnotationAttribute('gs', 'group') if gid in self.groupsById: g = self.groupsById[gid] gs.group = g g.addSize(gs) tid = gs.token.getAnnotationAttribute('gs', 'time') if tid in self.timesById: t = self.timesById[tid] gs.time = t # for gid,g in self.groupsById.items(): # print 'Group id:', gid, ', name = ', g.name, ', size =', g.getSize() # link all relevant information needed for each outcome measurement for on in onList: gid = on.token.getAnnotationAttribute('on', 'group') oid = on.token.getAnnotationAttribute('on', 'outcome') tid = on.token.getAnnotationAttribute('on', 'time') csID = on.token.getAnnotationAttribute('on', 'compareSet') # print 'on:',on.value, csID if oid in self.outcomesById: oTemplate = self.outcomesById[oid] gTemplate = self.groupsById.get(gid, None) tTemplate = self.timesById.get(tid, None) if oid not in omHash: omHash[oid] = [] om = OutcomeMeasurement(on) om.addGroup(gTemplate) om.addOutcome(oTemplate) om.addTime(tTemplate) omHash[oid].append(om) else: print abstract.id, '??? Outcome number', on.value, print 'does not have a matching outcome with id =', oid # print '-->', # om.write(sys.stdout) for er in erList: gid = er.token.getAnnotationAttribute('eventrate', 'group') oid = er.token.getAnnotationAttribute('eventrate', 'outcome') tid = er.token.getAnnotationAttribute('eventrate', 'time') csID = er.token.getAnnotationAttribute('eventrate', 'compareSet') # print abstract.id+': er: ',er.value, csID if oid in self.outcomesById: oTemplate = self.outcomesById[oid] gTemplate = self.groupsById.get(gid, None) tTemplate = self.timesById.get(tid, None) # print abstract.id+': er: ', er.value, gTemplate, tTemplate, csID if oid not in omHash: omHash[oid] = [] matchFound = False for om in omHash[oid]: if om.getGroup() == gTemplate and om.getTime() == tTemplate and om.getCompareSetID() == csID: om.addEventRate(er) # print 'adding', er.value # om.write(sys.stdout) matchFound = True break # else: # print om.getGroup(), om.getTime(), om.getCompareSetID if matchFound == False: # event rate not added to existing outcome measurement, create new measurement om = OutcomeMeasurement(er) om.addGroup(gTemplate) om.addOutcome(oTemplate) om.addTime(tTemplate) omHash[oid].append(om) else: print 'Event rate missing outcome annotation in abstract ', print abstract.id, ':', s.toString() er.write(sys.stdout) for oid in omHash.keys(): omList = omHash[oid] for i in range(0, len(omList)): om1 = omList[i] csID1 = om1.getCompareSetID() # print abstract.id, csID1,':', for j in range(i+1, len(omList)): om2 = omList[j] csID2 = om2.getCompareSetID() # print csID2, if csID1 == csID2 and om1.isComplete() and om2.isComplete() \ and om1.getGroup() != om2.getGroup() and om1.getTime() == om2.getTime(): ssTemplate = SummaryStat(om1, om2, useAnnotated=True) self.stats.append(ssTemplate) om1.used = True om2.used = True # print if om1.used == False: self.unmatchedMeasurements.append(om1) for om in self.unmatchedMeasurements: if om.getOutcome() != None: om.getOutcome().unusedNumbers.append(om)