def linkTemplates(self, sentence): """ link value template to best matching mention template in the same sentence. It is assumed that mention clustering has not occurred yet. """ templates = sentence.templates onList = templates.getList('on') erList = templates.getList('eventrate') omList = [] unmatchedER = [] unmatchedON = [] for er in erList: for on in onList: if on.shouldBelongToSameOutcomeMeasurement(er): om = OutcomeMeasurement(on) om.addEventRate(er) omList.append(om) # print '&&&&&&& Associating:', on.value, er.value, on.outcomeMeasurement, er.outcomeMeasurement # print er.outcomeNumber, on.textEventrate break if er.outcomeNumber == None: unmatchedER.append(er) # om = OutcomeMeasurement(er) # omList.append(om) # create outcome measurement templates lone on templates for on in onList: if on.textEventrate == None: unmatchedON.append(on) # om = OutcomeMeasurement(on) # omList.append(om) for er in unmatchedER: if er.outcomeNumber == None: # eventrate still not matched, create outcome measurement just for it om = OutcomeMeasurement(er) omList.append(om) for on in unmatchedON: if on.textEventrate == None: # outcome number still not matched, create outcome measurement just for it om = OutcomeMeasurement(on) omList.append(om) # for om in omList: # om.display() sentence.templates.addOutcomeMeasurementList(omList)
def computeTrueStats(self, abstract): """ compute summaries statistics using annotations """ self.outcomeNumbers = [] self.eventRates = [] self.stats = [] self.trueStats = True self.abstract = abstract self.groupsById = {} self.outcomesById = {} self.timesById = {} omHash = {} for s in abstract.sentences: # find all of the annotated templates in the sentence templates = s.annotatedTemplates gList = templates.getList('group') oList = templates.getList('outcome') gsList = templates.getList('gs') onList = templates.getList('on') erList = templates.getList('eventrate') tList = templates.getList('time') # print abstract.id # for er in erList: # print er.value, # print for t in tList: # times.append(t) if t.getAnnotatedId() in self.timesById: self.timesById[t.getAnnotatedId()].merge(t) else: self.timesById[t.getAnnotatedId()] = t for g in gList: # groups.append(g) if g.getAnnotatedId() in self.groupsById: self.groupsById[g.getAnnotatedId()].merge(g) else: self.groupsById[g.getAnnotatedId()] = g for outcome in oList: # outcomes.append(outcome) if outcome.getAnnotatedId() != None and len(outcome.getAnnotatedId()) > 0: if outcome.getAnnotatedId() in self.outcomesById: self.outcomesById[outcome.getAnnotatedId()].merge(outcome) else: self.outcomesById[outcome.getAnnotatedId()] = outcome else: print abstract.id, outcome.name, 'does not have an ID.', print 'Not using it for summary stats.' # for gs in gsList: # self.groupSizes.append(gs) # link groups and their sizes for gs in gsList: gid = gs.token.getAnnotationAttribute('gs', 'group') if gid in self.groupsById: g = self.groupsById[gid] gs.group = g g.addSize(gs) tid = gs.token.getAnnotationAttribute('gs', 'time') if tid in self.timesById: t = self.timesById[tid] gs.time = t # for gid,g in self.groupsById.items(): # print 'Group id:', gid, ', name = ', g.name, ', size =', g.getSize() # link all relevant information needed for each outcome measurement for on in onList: gid = on.token.getAnnotationAttribute('on', 'group') oid = on.token.getAnnotationAttribute('on', 'outcome') tid = on.token.getAnnotationAttribute('on', 'time') csID = on.token.getAnnotationAttribute('on', 'compareSet') # print 'on:',on.value, csID if oid in self.outcomesById: oTemplate = self.outcomesById[oid] gTemplate = self.groupsById.get(gid, None) tTemplate = self.timesById.get(tid, None) if oid not in omHash: omHash[oid] = [] om = OutcomeMeasurement(on) om.addGroup(gTemplate) om.addOutcome(oTemplate) om.addTime(tTemplate) omHash[oid].append(om) else: print abstract.id, '??? Outcome number', on.value, print 'does not have a matching outcome with id =', oid # print '-->', # om.write(sys.stdout) for er in erList: gid = er.token.getAnnotationAttribute('eventrate', 'group') oid = er.token.getAnnotationAttribute('eventrate', 'outcome') tid = er.token.getAnnotationAttribute('eventrate', 'time') csID = er.token.getAnnotationAttribute('eventrate', 'compareSet') # print abstract.id+': er: ',er.value, csID if oid in self.outcomesById: oTemplate = self.outcomesById[oid] gTemplate = self.groupsById.get(gid, None) tTemplate = self.timesById.get(tid, None) # print abstract.id+': er: ', er.value, gTemplate, tTemplate, csID if oid not in omHash: omHash[oid] = [] matchFound = False for om in omHash[oid]: if om.getGroup() == gTemplate and om.getTime() == tTemplate and om.getCompareSetID() == csID: om.addEventRate(er) # print 'adding', er.value # om.write(sys.stdout) matchFound = True break # else: # print om.getGroup(), om.getTime(), om.getCompareSetID if matchFound == False: # event rate not added to existing outcome measurement, create new measurement om = OutcomeMeasurement(er) om.addGroup(gTemplate) om.addOutcome(oTemplate) om.addTime(tTemplate) omHash[oid].append(om) else: print 'Event rate missing outcome annotation in abstract ', print abstract.id, ':', s.toString() er.write(sys.stdout) for oid in omHash.keys(): omList = omHash[oid] for i in range(0, len(omList)): om1 = omList[i] csID1 = om1.getCompareSetID() # print abstract.id, csID1,':', for j in range(i+1, len(omList)): om2 = omList[j] csID2 = om2.getCompareSetID() # print csID2, if csID1 == csID2 and om1.isComplete() and om2.isComplete() \ and om1.getGroup() != om2.getGroup() and om1.getTime() == om2.getTime(): ssTemplate = SummaryStat(om1, om2, useAnnotated=True) self.stats.append(ssTemplate) om1.used = True om2.used = True # print if om1.used == False: self.unmatchedMeasurements.append(om1) for om in self.unmatchedMeasurements: if om.getOutcome() != None: om.getOutcome().unusedNumbers.append(om)
def linkTemplates(self, sentence): """ link group size and group templates using Hungarian matching algorithm """ # print 'linking all templates' templates = sentence.templates onList = templates.getList('on') erList = templates.getList('eventrate') abstract = sentence.abstract if abstract not in self.incompleteMatches: self.incompleteMatches[abstract] = [] omHash = {} for er in erList: if er.group != None and er.outcome != None: # remember the mention matched with the value # save this information in a feature vector to be retrieved in linkQuantityAndMention() fv = FeatureVector(-1, -1, None) fv.mTemplate = er.group fv.qTemplate = er er.addMatchFeatures(fv) fv = FeatureVector(-1, -1, None) fv.mTemplate = er.outcome fv.qTemplate = er er.addMatchFeatures(fv) groupEntity = er.group.rootMention() outcomeEntity = er.outcome.rootMention() er.group = None er.outcome = None om = OutcomeMeasurement(er) if (groupEntity, outcomeEntity) not in omHash: omHash[(groupEntity, outcomeEntity)] = om elif omHash[(groupEntity, outcomeEntity)] != None: # there is already an outcome measurement for this group, outcome # check if this one is closer # closer if it distance to closest mention is less # if the same, use total distance # if that is the same, use value that occurs earlier in sentence currentOM = omHash[(groupEntity, outcomeEntity)] current = currentOM.getTextEventRate() closest = self.closestValue(er, current) if closest == None: # both same distance, discard both omHash[(groupEntity, outcomeEntity)] = None elif closest == er: omHash[(groupEntity, outcomeEntity)] = om self.incompleteMatches[abstract].append( OutcomeMeasurementAssociation( groupEntity, outcomeEntity, currentOM, 0)) else: self.incompleteMatches[abstract].append( OutcomeMeasurementAssociation( groupEntity, outcomeEntity, om, 0)) for on in onList: if on.group != None and on.outcome != None: fv = FeatureVector(-1, -1, None) fv.mTemplate = on.group fv.qTemplate = on on.addMatchFeatures(fv) fv = FeatureVector(-1, -1, None) fv.mTemplate = on.outcome fv.qTemplate = on on.addMatchFeatures(fv) groupEntity = on.group.rootMention() outcomeEntity = on.outcome.rootMention() on.group = None on.outcome = None # check if this ON is useful, can we compute an event rate with it? gs = on.getGroupSize() if gs > 0: # we can compute an event rate om = OutcomeMeasurement(on) if (groupEntity, outcomeEntity) not in omHash: omHash[(groupEntity, outcomeEntity)] = om elif omHash[(groupEntity, outcomeEntity)] != None: # there is already a outcome measurement currentOM = omHash[(groupEntity, outcomeEntity)] currentON = currentOM.getOutcomeNumber() currentER = currentOM.getTextEventRate() # check if this on should be merged with an event rate if currentON == None and currentER != None and on.equivalentEventRates( currentER.eventRate()): currentOM.addOutcomeNumber(on) else: # on not compatible with existing value # is it closer? if currentON != None and currentER != None: closestVal = self.closestValue( currentON, currentER) if closestVal == None: # if both the same distance, just use the ER closestVal = currentER elif currentON != None: closestVal = currentON else: closestVal = currentER closestVal = self.closestValue(closestVal, on) if closestVal == None: # both same distance, discard both omHash[(groupEntity, outcomeEntity)] = None elif closestVal == on: omHash[(groupEntity, outcomeEntity)] = om self.incompleteMatches[abstract].append( OutcomeMeasurementAssociation( groupEntity, outcomeEntity, currentOM, 0)) else: self.incompleteMatches[abstract].append( OutcomeMeasurementAssociation( groupEntity, outcomeEntity, om, 0)) omList = [] for (group, outcome), om in omHash.items(): if om != None: self.linkOutcomeMeasurementAssociations( om, group, outcome, 0.5) omList.append(om) sentence.templates.addOutcomeMeasurementList(omList)