def makeTokenSets(self, is_locorg_allowed=True): """Create a dictionary of typed TokenSet objects corresponding to the mentions is_locorg_allowed - enable/disable 'LocOrg' tag""" # determine what tags are allowed allowed_tags = set(['org', 'per', 'loc']) if is_locorg_allowed: allowed_tags.add('locorg') res = [] for mention in self.mentions: key = mention.tag if key == 'locorg' and not is_locorg_allowed: key = 'loc' if not (key in allowed_tags): continue ts = TokenSet( [x for span in mention.spans for x in span.tokens], key, self.text) for span in mention.spans: for token in span.tokens: mark = Tables.getMark(ts.tag, span.tag) ts.setMark(token, mark) res.append(ts) # find and mark organizations embedded within other organizations all_orgs = [x for x in res if x.tag == 'org'] if is_locorg_allowed: all_orgs.extend([x for x in res if x.tag == 'locorg']) for org in all_orgs: org.findParents(all_orgs) return res
def makeTokenSets(self, standard, is_locorg_allowed=True): """Create a dictionary of typed TokenSet objects corresponding to the entities, using the provided standard data to tokenize the intervals""" res = dict([(x, []) for x in self.allowed_tags]) for key in self.allowed_tags: for interval in self.entities[key]: ts = TokenSet([token for token in standard.tokens if token.start >= interval.start and token.end <= interval.end and not token.isIgnored()], key) if not is_locorg_allowed and key == 'locorg': ts.tag = 'loc' res[ts.tag].append(ts) if not is_locorg_allowed: assert(len(res['locorg']) == 0) res.pop('locorg') return res
def makeTokenSets(self, standard, is_locorg_allowed=True): """Create a dictionary of typed TokenSet objects corresponding to the mentions, using the provided standard data to tokenize the intervals""" res = [] for key in self.allowed_tags: for interval in self.mentions[key]: ts = TokenSet([ token for token in standard.tokens if token.start >= interval.start and token.end <= interval.end and not token.isIgnored() ], key, standard.text) # save the interval within the token set # to display it as-is in future ts.interval = interval if not is_locorg_allowed and key == 'locorg': ts.tag = 'loc' res.append(ts) return res
def makeTokenSets(self, standard, is_locorg_allowed=True): """Create a dictionary of typed TokenSet objects corresponding to the mentions, using the provided standard data to tokenize the intervals""" res = [] for key in self.allowed_tags: for interval in self.mentions[key]: ts = TokenSet([token for token in standard.tokens if token.start >= interval.start and token.end <= interval.end and not token.isIgnored()], key, standard.text) # save the interval within the token set # to display it as-is in future ts.interval = interval if not is_locorg_allowed and key == 'locorg': ts.tag = 'loc' res.append(ts) return res