class LongestMatchGlobalFeature(object): def __init__(self, lookup_data, featname): """ Create a global feature function that adds 3 types of features: 1) B-featname - if current token starts an entity from the ``lookup_data``; 2) I-featname - if current token is inside an entity from the ``lookup_data``; 3) featname - if current token belongs to an entity from the ``lookup_data``. """ if hasattr(lookup_data, 'find_ranges'): self.lm = lookup_data else: self.lm = LongestMatch(lookup_data) self.b_featname = 'B-' + featname self.i_featname = 'I-' + featname self.featname = featname def __call__(self, doc): token_strings = [tok.token for tok, feat in doc] for start, end, matched_text in self.lm.find_ranges(token_strings): self.process_range(doc, start, end, matched_text) def process_range(self, doc, start, end, matched_text): doc[start][1][self.b_featname] = True doc[start][1][self.featname] = True for idx in range(start+1, end): doc[idx][1][self.i_featname] = True doc[idx][1][self.featname] = True
class LongestMatchGlobalFeature(object): def __init__(self, lookup_data, featname): """ Create a global feature function that adds 3 types of features: 1) B-featname - if current token starts an entity from the ``lookup_data``; 2) I-featname - if current token is inside an entity from the ``lookup_data``; 3) featname - if current token belongs to an entity from the ``lookup_data``. """ if hasattr(lookup_data, 'find_ranges'): self.lm = lookup_data else: self.lm = LongestMatch(lookup_data) self.b_featname = 'B-' + featname self.i_featname = 'I-' + featname self.featname = featname def __call__(self, doc): token_strings = [tok.token for tok, feat in doc] for start, end, matched_text in self.lm.find_ranges(token_strings): self.process_range(doc, start, end, matched_text) def process_range(self, doc, start, end, matched_text): doc[start][1][self.b_featname] = True doc[start][1][self.featname] = True for idx in range(start + 1, end): doc[idx][1][self.i_featname] = True doc[idx][1][self.featname] = True
def __init__(self, lookup_data, featname): """ Create a global feature function that adds 3 types of features: 1) B-featname - if current token starts an entity from the ``lookup_data``; 2) I-featname - if current token is inside an entity from the ``lookup_data``; 3) featname - if current token belongs to an entity from the ``lookup_data``. """ if hasattr(lookup_data, 'find_ranges'): self.lm = lookup_data else: self.lm = LongestMatch(lookup_data) self.b_featname = 'B-' + featname self.i_featname = 'I-' + featname self.featname = featname