def align_span(self, words: List[str], tags: List[str]) -> List[Tuple[int, int, str]]: tokens = Tokens(tags, self.scheme) labels = [] for entity in tokens.entities: text = self.delimiter.join(words[:entity.start]) start = len(text) + len(self.delimiter) if text else len(text) chunk = words[entity.start:entity.end] text = self.delimiter.join(chunk) end = start + len(text) labels.append((start, end, entity.tag)) return labels
def get_label(self, words: List[str], tags: List[str], delimiter: str) -> List[Label]: scheme = self.get_scheme(self.kwargs.get('scheme', 'IOB2')) tokens = Tokens(tags, scheme) labels = [] for entity in tokens.entities: text = delimiter.join(words[:entity.start]) start = len(text) + len(delimiter) if text else len(text) chunk = words[entity.start:entity.end] text = delimiter.join(chunk) end = start + len(text) labels.append(self.label_class.parse((start, end, entity.tag))) return labels
def get_entities(seq): return [(e.tag, e.start, e.end) for e in Tokens(seq, IOB2).entities]
def test_raise_exception_when_iobes_tokens_with_iob2_scheme(self): tokens = Tokens(['B-PER', 'E-PER', 'S-PER'], IOB2) with pytest.raises(ValueError): entities = tokens.entities
def test_bilou_tokens_without_tag(tokens, expected): tokens = Tokens(tokens, BILOU) entities = [entity.to_tuple()[1:] for entity in tokens.entities] assert entities == expected
def test_iobes_tokens_without_tag(tokens, expected): tokens = Tokens(tokens, IOBES) entities = [entity.to_tuple()[1:] for entity in tokens.entities] assert entities == expected
def test_ioe2_tokens(tokens, expected): tokens = Tokens(tokens, IOE2) entities = [entity.to_tuple()[1:] for entity in tokens.entities] assert entities == expected