def test_srl_reader_can_filter_by_domain(self): conll_reader = SrlReader(domain_identifier="subdomain2") instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT / 'conll_2012') instances = ensure_list(instances) # If we'd included the folder, we'd have 9 instances. assert len(instances) == 2
def test_srl_reader_can_filter_by_domain(self): conll_reader = SrlReader(domain_identifier="subdomain2") instances = conll_reader.read('tests/fixtures/conll_2012/') instances = ensure_list(instances) # If we'd included the folder, we'd have 9 instances. assert len(instances) == 2
def test_read_from_file(self): conll_reader = SrlReader() dataset = conll_reader.read('tests/fixtures/conll_2012/') instances = dataset.instances fields = instances[0].fields() assert fields["tokens"].tokens() == ["Mali", "government", "officials", "say", "the", "woman", "'s", "confession", "was", "forced", "."] assert fields["verb_indicator"].sequence_index() == 3 assert fields["tags"].tags() == ['B-ARG0', 'I-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'O'] fields = instances[1].fields() assert fields["tokens"].tokens() == ["Mali", "government", "officials", "say", "the", "woman", "'s", "confession", "was", "forced", "."] assert fields["verb_indicator"].sequence_index() == 8 assert fields["tags"].tags() == ['O', 'O', 'O', 'O', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'B-V', 'B-ARG2', 'O'] fields = instances[2].fields() assert fields["tokens"].tokens() == ['The', 'prosecution', 'rested', 'its', 'case', 'last', 'month', 'after', 'four', 'months', 'of', 'hearings', '.'] assert fields["verb_indicator"].sequence_index() == 2 assert fields["tags"].tags() == ['B-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'B-ARGM-TMP', 'I-ARGM-TMP', 'B-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'O'] fields = instances[3].fields() assert fields["tokens"].tokens() == ['The', 'prosecution', 'rested', 'its', 'case', 'last', 'month', 'after', 'four', 'months', 'of', 'hearings', '.'] assert fields["verb_indicator"].sequence_index() == 11 assert fields["tags"].tags() == ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-V', 'O'] # Tests a sentence with no verbal predicates. fields = instances[4].fields() assert fields["tokens"].tokens() == ["Denise", "Dillon", "Headline", "News", "."] assert fields["verb_indicator"].sequence_index() is None assert fields["tags"].tags() == ['O', 'O', 'O', 'O', 'O']
def test_read_from_file(self, lazy): conll_reader = SrlReader(lazy=lazy) instances = conll_reader.read('tests/fixtures/conll_2012/subdomain') instances = ensure_list(instances) fields = instances[0].fields tokens = [t.text for t in fields['tokens'].tokens] assert tokens == [ "Mali", "government", "officials", "say", "the", "woman", "'s", "confession", "was", "forced", "." ] assert fields["verb_indicator"].labels[3] == 1 assert fields["tags"].labels == [ 'B-ARG0', 'I-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'O' ] fields = instances[1].fields tokens = [t.text for t in fields['tokens'].tokens] assert tokens == [ "Mali", "government", "officials", "say", "the", "woman", "'s", "confession", "was", "forced", "." ] assert fields["verb_indicator"].labels[8] == 1 assert fields["tags"].labels == [ 'O', 'O', 'O', 'O', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'B-V', 'B-ARG2', 'O' ] fields = instances[2].fields tokens = [t.text for t in fields['tokens'].tokens] assert tokens == [ 'The', 'prosecution', 'rested', 'its', 'case', 'last', 'month', 'after', 'four', 'months', 'of', 'hearings', '.' ] assert fields["verb_indicator"].labels[2] == 1 assert fields["tags"].labels == [ 'B-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'B-ARGM-TMP', 'I-ARGM-TMP', 'B-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'O' ] fields = instances[3].fields tokens = [t.text for t in fields['tokens'].tokens] assert tokens == [ 'The', 'prosecution', 'rested', 'its', 'case', 'last', 'month', 'after', 'four', 'months', 'of', 'hearings', '.' ] assert fields["verb_indicator"].labels[11] == 1 assert fields["tags"].labels == [ 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-V', 'O' ] # Tests a sentence with no verbal predicates. fields = instances[4].fields tokens = [t.text for t in fields['tokens'].tokens] assert tokens == ["Denise", "Dillon", "Headline", "News", "."] assert fields["verb_indicator"].labels == [0, 0, 0, 0, 0] assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'O']
def test_read_from_file(self, lazy): conll_reader = SrlReader(lazy=lazy) instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT / 'conll_2012' / 'subdomain') instances = ensure_list(instances) fields = instances[0].fields tokens = [t.text for t in fields['tokens'].tokens] assert tokens == ["Mali", "government", "officials", "say", "the", "woman", "'s", "confession", "was", "forced", "."] assert fields["verb_indicator"].labels[3] == 1 assert fields["tags"].labels == ['B-ARG0', 'I-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'O'] fields = instances[1].fields tokens = [t.text for t in fields['tokens'].tokens] assert tokens == ["Mali", "government", "officials", "say", "the", "woman", "'s", "confession", "was", "forced", "."] assert fields["verb_indicator"].labels[8] == 1 assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'B-V', 'B-ARG2', 'O'] fields = instances[2].fields tokens = [t.text for t in fields['tokens'].tokens] assert tokens == ['The', 'prosecution', 'rested', 'its', 'case', 'last', 'month', 'after', 'four', 'months', 'of', 'hearings', '.'] assert fields["verb_indicator"].labels[2] == 1 assert fields["tags"].labels == ['B-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'B-ARGM-TMP', 'I-ARGM-TMP', 'B-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'O'] fields = instances[3].fields tokens = [t.text for t in fields['tokens'].tokens] assert tokens == ['The', 'prosecution', 'rested', 'its', 'case', 'last', 'month', 'after', 'four', 'months', 'of', 'hearings', '.'] assert fields["verb_indicator"].labels[11] == 1 assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-V', 'O'] # Tests a sentence with no verbal predicates. fields = instances[4].fields tokens = [t.text for t in fields['tokens'].tokens] assert tokens == ["Denise", "Dillon", "Headline", "News", "."] assert fields["verb_indicator"].labels == [0, 0, 0, 0, 0] assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'O']
def test_read_from_file(self, lazy): conll_reader = SrlReader(lazy=lazy) instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT / u'conll_2012' / u'subdomain') instances = ensure_list(instances) fields = instances[0].fields tokens = [t.text for t in fields[u'tokens'].tokens] assert tokens == [ u"Mali", u"government", u"officials", u"say", u"the", u"woman", u"'s", u"confession", u"was", u"forced", u"." ] assert fields[u"verb_indicator"].labels[3] == 1 assert fields[u"tags"].labels == [ u'B-ARG0', u'I-ARG0', u'I-ARG0', u'B-V', u'B-ARG1', u'I-ARG1', u'I-ARG1', u'I-ARG1', u'I-ARG1', u'I-ARG1', u'O' ] fields = instances[1].fields tokens = [t.text for t in fields[u'tokens'].tokens] assert tokens == [ u"Mali", u"government", u"officials", u"say", u"the", u"woman", u"'s", u"confession", u"was", u"forced", u"." ] assert fields[u"verb_indicator"].labels[8] == 1 assert fields[u"tags"].labels == [ u'O', u'O', u'O', u'O', u'B-ARG1', u'I-ARG1', u'I-ARG1', u'I-ARG1', u'B-V', u'B-ARG2', u'O' ] fields = instances[2].fields tokens = [t.text for t in fields[u'tokens'].tokens] assert tokens == [ u'The', u'prosecution', u'rested', u'its', u'case', u'last', u'month', u'after', u'four', u'months', u'of', u'hearings', u'.' ] assert fields[u"verb_indicator"].labels[2] == 1 assert fields[u"tags"].labels == [ u'B-ARG0', u'I-ARG0', u'B-V', u'B-ARG1', u'I-ARG1', u'B-ARGM-TMP', u'I-ARGM-TMP', u'B-ARGM-TMP', u'I-ARGM-TMP', u'I-ARGM-TMP', u'I-ARGM-TMP', u'I-ARGM-TMP', u'O' ] fields = instances[3].fields tokens = [t.text for t in fields[u'tokens'].tokens] assert tokens == [ u'The', u'prosecution', u'rested', u'its', u'case', u'last', u'month', u'after', u'four', u'months', u'of', u'hearings', u'.' ] assert fields[u"verb_indicator"].labels[11] == 1 assert fields[u"tags"].labels == [ u'O', u'O', u'O', u'O', u'O', u'O', u'O', u'O', u'O', u'O', u'O', u'B-V', u'O' ] # Tests a sentence with no verbal predicates. fields = instances[4].fields tokens = [t.text for t in fields[u'tokens'].tokens] assert tokens == [u"Denise", u"Dillon", u"Headline", u"News", u"."] assert fields[u"verb_indicator"].labels == [0, 0, 0, 0, 0] assert fields[u"tags"].labels == [u'O', u'O', u'O', u'O', u'O']
def setUp(self): super().setUp() self.reader = SrlReader(bert_model_name="bert-base-uncased")
class TestBertSrlReader(AllenNlpTestCase): def setUp(self): super().setUp() self.reader = SrlReader(bert_model_name="bert-base-uncased") def test_convert_tags_to_wordpiece_tags(self): offsets = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] offsets = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] original = [ "B-ARG0", "I-ARG0", "I-ARG0", "B-V", "B-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "O", ] wordpiece_tags = [ "O", "B-ARG0", "I-ARG0", "I-ARG0", "B-V", "B-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "O", "O", ] converted = _convert_tags_to_wordpiece_tags(original, offsets) assert converted == wordpiece_tags offsets = [2, 3, 4, 5, 6, 7, 8, 9, 10, 12] offsets = [1, 2, 3, 4, 5, 6, 7, 8, 9, 11] converted = _convert_tags_to_wordpiece_tags(original, offsets) assert converted == [ "O", "B-ARG0", "I-ARG0", "I-ARG0", "B-V", "B-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "O", ] offsets = [1, 3, 5] original = ["B-ARG", "B-V", "O"] converted = _convert_tags_to_wordpiece_tags(original, offsets) assert converted == ["O", "B-ARG", "B-V", "I-V", "O", "O", "O"] offsets = [2, 3, 5] original = ["B-ARG", "I-ARG", "O"] converted = _convert_tags_to_wordpiece_tags(original, offsets) assert converted == ["O", "B-ARG", "I-ARG", "I-ARG", "O", "O", "O"] def test_wordpiece_tokenize_input(self): wordpieces, offsets, start_offsets = self.reader._wordpiece_tokenize_input( "This is a sentenceandsomepieces with a reallylongword".split(" ")) assert wordpieces == [ "[CLS]", "this", "is", "a", "sentence", "##ands", "##ome", "##piece", "##s", "with", "a", "really", "##long", "##word", "[SEP]", ] assert [wordpieces[i] for i in offsets ] == ["this", "is", "a", "##s", "with", "a", "##word"] assert [wordpieces[i] for i in start_offsets] == [ "this", "is", "a", "sentence", "with", "a", "really", ] def test_read_from_file(self): conll_reader = self.reader instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT / "conll_2012" / "subdomain") instances = ensure_list(instances) fields = instances[0].fields tokens = fields["metadata"]["words"] assert tokens == [ "Mali", "government", "officials", "say", "the", "woman", "'s", "confession", "was", "forced", ".", ] assert fields["verb_indicator"].labels[4] == 1 assert fields["tags"].labels == [ "O", "B-ARG0", "I-ARG0", "I-ARG0", "B-V", "B-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "O", "O", ] fields = instances[1].fields tokens = fields["metadata"]["words"] assert tokens == [ "Mali", "government", "officials", "say", "the", "woman", "'s", "confession", "was", "forced", ".", ] assert fields["verb_indicator"].labels[10] == 1 assert fields["tags"].labels == [ "O", "O", "O", "O", "O", "B-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "B-V", "B-ARG2", "O", "O", ] fields = instances[2].fields tokens = fields["metadata"]["words"] assert tokens == [ "The", "prosecution", "rested", "its", "case", "last", "month", "after", "four", "months", "of", "hearings", ".", ] assert fields["verb_indicator"].labels[3] == 1 assert fields["tags"].labels == [ "O", "B-ARG0", "I-ARG0", "B-V", "B-ARG1", "I-ARG1", "B-ARGM-TMP", "I-ARGM-TMP", "B-ARGM-TMP", "I-ARGM-TMP", "I-ARGM-TMP", "I-ARGM-TMP", "I-ARGM-TMP", "O", "O", ] fields = instances[3].fields tokens = fields["metadata"]["words"] assert tokens == [ "The", "prosecution", "rested", "its", "case", "last", "month", "after", "four", "months", "of", "hearings", ".", ] assert fields["verb_indicator"].labels[12] == 1 assert fields["tags"].labels == [ "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "B-V", "O", "O", ] # Tests a sentence with no verbal predicates. fields = instances[4].fields tokens = fields["metadata"]["words"] assert tokens == ["Denise", "Dillon", "Headline", "News", "."] assert fields["verb_indicator"].labels == [0, 0, 0, 0, 0, 0, 0] assert fields["tags"].labels == ["O", "O", "O", "O", "O", "O", "O"]
def test_read_from_file(self, lazy): conll_reader = SrlReader(lazy=lazy) instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT / "conll_2012" / "subdomain") instances = ensure_list(instances) fields = instances[0].fields tokens = [t.text for t in fields["tokens"].tokens] assert tokens == [ "Mali", "government", "officials", "say", "the", "woman", "'s", "confession", "was", "forced", ".", ] assert fields["verb_indicator"].labels[3] == 1 assert fields["tags"].labels == [ "B-ARG0", "I-ARG0", "I-ARG0", "B-V", "B-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "O", ] assert fields["metadata"].metadata["words"] == tokens assert fields["metadata"].metadata["verb"] == tokens[3] assert fields["metadata"].metadata["gold_tags"] == fields[ "tags"].labels fields = instances[1].fields tokens = [t.text for t in fields["tokens"].tokens] assert tokens == [ "Mali", "government", "officials", "say", "the", "woman", "'s", "confession", "was", "forced", ".", ] assert fields["verb_indicator"].labels[8] == 1 assert fields["tags"].labels == [ "O", "O", "O", "O", "B-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "B-V", "B-ARG2", "O", ] assert fields["metadata"].metadata["words"] == tokens assert fields["metadata"].metadata["verb"] == tokens[8] assert fields["metadata"].metadata["gold_tags"] == fields[ "tags"].labels fields = instances[2].fields tokens = [t.text for t in fields["tokens"].tokens] assert tokens == [ "The", "prosecution", "rested", "its", "case", "last", "month", "after", "four", "months", "of", "hearings", ".", ] assert fields["verb_indicator"].labels[2] == 1 assert fields["tags"].labels == [ "B-ARG0", "I-ARG0", "B-V", "B-ARG1", "I-ARG1", "B-ARGM-TMP", "I-ARGM-TMP", "B-ARGM-TMP", "I-ARGM-TMP", "I-ARGM-TMP", "I-ARGM-TMP", "I-ARGM-TMP", "O", ] assert fields["metadata"].metadata["words"] == tokens assert fields["metadata"].metadata["verb"] == tokens[2] assert fields["metadata"].metadata["gold_tags"] == fields[ "tags"].labels fields = instances[3].fields tokens = [t.text for t in fields["tokens"].tokens] assert tokens == [ "The", "prosecution", "rested", "its", "case", "last", "month", "after", "four", "months", "of", "hearings", ".", ] assert fields["verb_indicator"].labels[11] == 1 assert fields["tags"].labels == [ "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "B-V", "O", ] assert fields["metadata"].metadata["words"] == tokens assert fields["metadata"].metadata["verb"] == tokens[11] assert fields["metadata"].metadata["gold_tags"] == fields[ "tags"].labels # Tests a sentence with no verbal predicates. fields = instances[4].fields tokens = [t.text for t in fields["tokens"].tokens] assert tokens == ["Denise", "Dillon", "Headline", "News", "."] assert fields["verb_indicator"].labels == [0, 0, 0, 0, 0] assert fields["tags"].labels == ["O", "O", "O", "O", "O"] assert fields["metadata"].metadata["words"] == tokens assert fields["metadata"].metadata["verb"] is None assert fields["metadata"].metadata["gold_tags"] == fields[ "tags"].labels
class TestBertSrlReader(AllenNlpTestCase): def setUp(self): super().setUp() self.reader = SrlReader(bert_model_name="bert-base-uncased") def test_convert_tags_to_wordpiece_tags(self): # pylint: disable=protected-access offsets = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] offsets = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] original = ['B-ARG0', 'I-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'O'] wordpiece_tags = ['O', 'B-ARG0', 'I-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'O', 'O'] converted = _convert_tags_to_wordpiece_tags(original, offsets) assert converted == wordpiece_tags offsets = [2, 3, 4, 5, 6, 7, 8, 9, 10, 12] offsets = [1, 2, 3, 4, 5, 6, 7, 8, 9, 11] converted = _convert_tags_to_wordpiece_tags(original, offsets) assert converted == ['O', 'B-ARG0', 'I-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'O'] offsets = [1, 3, 5] original = ["B-ARG", "B-V", "O"] converted = _convert_tags_to_wordpiece_tags(original, offsets) assert converted == ['O', 'B-ARG', 'B-V', 'I-V', 'O', 'O', 'O'] offsets = [2, 3, 5] original = ["B-ARG", "I-ARG", "O"] converted = _convert_tags_to_wordpiece_tags(original, offsets) assert converted == ['O', 'B-ARG', 'I-ARG', 'I-ARG', 'O', 'O', 'O'] # pylint: enable=protected-access def test_wordpiece_tokenize_input(self): wordpieces, offsets, start_offsets = self.reader._wordpiece_tokenize_input( # pylint: disable=protected-access "This is a sentenceandsomepieces with a reallylongword".split(" ")) assert wordpieces == ['[CLS]', 'this', 'is', 'a', 'sentence', '##ands', '##ome', '##piece', '##s', 'with', 'a', 'really', '##long', '##word', '[SEP]'] assert [wordpieces[i] for i in offsets] == ['this', 'is', 'a', '##s', 'with', 'a', '##word'] assert [wordpieces[i] for i in start_offsets] == ['this', 'is', 'a', 'sentence', 'with', 'a', 'really'] def test_read_from_file(self): conll_reader = self.reader instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT / 'conll_2012' / 'subdomain') instances = ensure_list(instances) fields = instances[0].fields tokens = fields["metadata"]["words"] assert tokens == ["Mali", "government", "officials", "say", "the", "woman", "'s", "confession", "was", "forced", "."] assert fields["verb_indicator"].labels[4] == 1 assert fields["tags"].labels == ['O', 'B-ARG0', 'I-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'O', 'O'] fields = instances[1].fields tokens = fields["metadata"]["words"] assert tokens == ["Mali", "government", "officials", "say", "the", "woman", "'s", "confession", "was", "forced", "."] assert fields["verb_indicator"].labels[10] == 1 assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'O', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'B-V', 'B-ARG2', 'O', 'O'] fields = instances[2].fields tokens = fields["metadata"]["words"] assert tokens == ['The', 'prosecution', 'rested', 'its', 'case', 'last', 'month', 'after', 'four', 'months', 'of', 'hearings', '.'] assert fields["verb_indicator"].labels[3] == 1 assert fields["tags"].labels == ['O', 'B-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'B-ARGM-TMP', 'I-ARGM-TMP', 'B-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'O', 'O'] fields = instances[3].fields tokens = fields["metadata"]["words"] assert tokens == ['The', 'prosecution', 'rested', 'its', 'case', 'last', 'month', 'after', 'four', 'months', 'of', 'hearings', '.'] assert fields["verb_indicator"].labels[12] == 1 assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-V', 'O', 'O'] # Tests a sentence with no verbal predicates. fields = instances[4].fields tokens = fields["metadata"]["words"] assert tokens == ["Denise", "Dillon", "Headline", "News", "."] assert fields["verb_indicator"].labels == [0, 0, 0, 0, 0, 0, 0] assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'O', 'O', 'O']