예제 #1
0
    def test_srl_reader_can_filter_by_domain(self):

        conll_reader = SrlReader(domain_identifier="subdomain2")
        instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT / 'conll_2012')
        instances = ensure_list(instances)
        # If we'd included the folder, we'd have 9 instances.
        assert len(instances) == 2
예제 #2
0
    def test_srl_reader_can_filter_by_domain(self):

        conll_reader = SrlReader(domain_identifier="subdomain2")
        instances = conll_reader.read('tests/fixtures/conll_2012/')
        instances = ensure_list(instances)
        # If we'd included the folder, we'd have 9 instances.
        assert len(instances) == 2
예제 #3
0
    def test_read_from_file(self):
        conll_reader = SrlReader()
        dataset = conll_reader.read('tests/fixtures/conll_2012/')
        instances = dataset.instances
        fields = instances[0].fields()
        assert fields["tokens"].tokens() == ["Mali", "government", "officials", "say",
                                             "the", "woman", "'s", "confession", "was", "forced", "."]
        assert fields["verb_indicator"].sequence_index() == 3
        assert fields["tags"].tags() == ['B-ARG0', 'I-ARG0', 'I-ARG0', 'B-V', 'B-ARG1',
                                         'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'O']
        fields = instances[1].fields()
        assert fields["tokens"].tokens() == ["Mali", "government", "officials", "say",
                                             "the", "woman", "'s", "confession", "was", "forced", "."]
        assert fields["verb_indicator"].sequence_index() == 8
        assert fields["tags"].tags() == ['O', 'O', 'O', 'O', 'B-ARG1', 'I-ARG1',
                                         'I-ARG1', 'I-ARG1', 'B-V', 'B-ARG2', 'O']
        fields = instances[2].fields()
        assert fields["tokens"].tokens() == ['The', 'prosecution', 'rested', 'its', 'case', 'last', 'month',
                                             'after', 'four', 'months', 'of', 'hearings', '.']
        assert fields["verb_indicator"].sequence_index() == 2
        assert fields["tags"].tags() == ['B-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'B-ARGM-TMP',
                                         'I-ARGM-TMP', 'B-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP',
                                         'I-ARGM-TMP', 'I-ARGM-TMP', 'O']
        fields = instances[3].fields()
        assert fields["tokens"].tokens() == ['The', 'prosecution', 'rested', 'its', 'case', 'last', 'month',
                                             'after', 'four', 'months', 'of', 'hearings', '.']
        assert fields["verb_indicator"].sequence_index() == 11
        assert fields["tags"].tags() == ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-V', 'O']

        # Tests a sentence with no verbal predicates.
        fields = instances[4].fields()
        assert fields["tokens"].tokens() == ["Denise", "Dillon", "Headline", "News", "."]
        assert fields["verb_indicator"].sequence_index() is None
        assert fields["tags"].tags() == ['O', 'O', 'O', 'O', 'O']
    def test_srl_reader_can_filter_by_domain(self):

        conll_reader = SrlReader(domain_identifier="subdomain2")
        instances = conll_reader.read('tests/fixtures/conll_2012/')
        instances = ensure_list(instances)
        # If we'd included the folder, we'd have 9 instances.
        assert len(instances) == 2
예제 #5
0
    def test_read_from_file(self, lazy):
        conll_reader = SrlReader(lazy=lazy)
        instances = conll_reader.read('tests/fixtures/conll_2012/subdomain')
        instances = ensure_list(instances)

        fields = instances[0].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == [
            "Mali", "government", "officials", "say", "the", "woman", "'s",
            "confession", "was", "forced", "."
        ]
        assert fields["verb_indicator"].labels[3] == 1
        assert fields["tags"].labels == [
            'B-ARG0', 'I-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'I-ARG1',
            'I-ARG1', 'I-ARG1', 'I-ARG1', 'O'
        ]

        fields = instances[1].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == [
            "Mali", "government", "officials", "say", "the", "woman", "'s",
            "confession", "was", "forced", "."
        ]
        assert fields["verb_indicator"].labels[8] == 1
        assert fields["tags"].labels == [
            'O', 'O', 'O', 'O', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'B-V',
            'B-ARG2', 'O'
        ]

        fields = instances[2].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == [
            'The', 'prosecution', 'rested', 'its', 'case', 'last', 'month',
            'after', 'four', 'months', 'of', 'hearings', '.'
        ]
        assert fields["verb_indicator"].labels[2] == 1
        assert fields["tags"].labels == [
            'B-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'B-ARGM-TMP',
            'I-ARGM-TMP', 'B-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP',
            'I-ARGM-TMP', 'I-ARGM-TMP', 'O'
        ]

        fields = instances[3].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == [
            'The', 'prosecution', 'rested', 'its', 'case', 'last', 'month',
            'after', 'four', 'months', 'of', 'hearings', '.'
        ]
        assert fields["verb_indicator"].labels[11] == 1
        assert fields["tags"].labels == [
            'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-V', 'O'
        ]

        # Tests a sentence with no verbal predicates.
        fields = instances[4].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == ["Denise", "Dillon", "Headline", "News", "."]
        assert fields["verb_indicator"].labels == [0, 0, 0, 0, 0]
        assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'O']
예제 #6
0
    def test_read_from_file(self, lazy):
        conll_reader = SrlReader(lazy=lazy)
        instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT / 'conll_2012' / 'subdomain')
        instances = ensure_list(instances)

        fields = instances[0].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == ["Mali", "government", "officials", "say", "the", "woman", "'s",
                          "confession", "was", "forced", "."]
        assert fields["verb_indicator"].labels[3] == 1
        assert fields["tags"].labels == ['B-ARG0', 'I-ARG0', 'I-ARG0', 'B-V', 'B-ARG1',
                                         'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'O']

        fields = instances[1].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == ["Mali", "government", "officials", "say", "the", "woman", "'s",
                          "confession", "was", "forced", "."]
        assert fields["verb_indicator"].labels[8] == 1
        assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'B-ARG1', 'I-ARG1',
                                         'I-ARG1', 'I-ARG1', 'B-V', 'B-ARG2', 'O']

        fields = instances[2].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == ['The', 'prosecution', 'rested', 'its', 'case', 'last', 'month', 'after',
                          'four', 'months', 'of', 'hearings', '.']
        assert fields["verb_indicator"].labels[2] == 1
        assert fields["tags"].labels == ['B-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'B-ARGM-TMP',
                                         'I-ARGM-TMP', 'B-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP',
                                         'I-ARGM-TMP', 'I-ARGM-TMP', 'O']

        fields = instances[3].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == ['The', 'prosecution', 'rested', 'its', 'case', 'last', 'month', 'after',
                          'four', 'months', 'of', 'hearings', '.']
        assert fields["verb_indicator"].labels[11] == 1
        assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-V', 'O']

        # Tests a sentence with no verbal predicates.
        fields = instances[4].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == ["Denise", "Dillon", "Headline", "News", "."]
        assert fields["verb_indicator"].labels == [0, 0, 0, 0, 0]
        assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'O']
예제 #7
0
    def test_read_from_file(self, lazy):
        conll_reader = SrlReader(lazy=lazy)
        instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT /
                                      u'conll_2012' / u'subdomain')
        instances = ensure_list(instances)

        fields = instances[0].fields
        tokens = [t.text for t in fields[u'tokens'].tokens]
        assert tokens == [
            u"Mali", u"government", u"officials", u"say", u"the", u"woman",
            u"'s", u"confession", u"was", u"forced", u"."
        ]
        assert fields[u"verb_indicator"].labels[3] == 1
        assert fields[u"tags"].labels == [
            u'B-ARG0', u'I-ARG0', u'I-ARG0', u'B-V', u'B-ARG1', u'I-ARG1',
            u'I-ARG1', u'I-ARG1', u'I-ARG1', u'I-ARG1', u'O'
        ]

        fields = instances[1].fields
        tokens = [t.text for t in fields[u'tokens'].tokens]
        assert tokens == [
            u"Mali", u"government", u"officials", u"say", u"the", u"woman",
            u"'s", u"confession", u"was", u"forced", u"."
        ]
        assert fields[u"verb_indicator"].labels[8] == 1
        assert fields[u"tags"].labels == [
            u'O', u'O', u'O', u'O', u'B-ARG1', u'I-ARG1', u'I-ARG1', u'I-ARG1',
            u'B-V', u'B-ARG2', u'O'
        ]

        fields = instances[2].fields
        tokens = [t.text for t in fields[u'tokens'].tokens]
        assert tokens == [
            u'The', u'prosecution', u'rested', u'its', u'case', u'last',
            u'month', u'after', u'four', u'months', u'of', u'hearings', u'.'
        ]
        assert fields[u"verb_indicator"].labels[2] == 1
        assert fields[u"tags"].labels == [
            u'B-ARG0', u'I-ARG0', u'B-V', u'B-ARG1', u'I-ARG1', u'B-ARGM-TMP',
            u'I-ARGM-TMP', u'B-ARGM-TMP', u'I-ARGM-TMP', u'I-ARGM-TMP',
            u'I-ARGM-TMP', u'I-ARGM-TMP', u'O'
        ]

        fields = instances[3].fields
        tokens = [t.text for t in fields[u'tokens'].tokens]
        assert tokens == [
            u'The', u'prosecution', u'rested', u'its', u'case', u'last',
            u'month', u'after', u'four', u'months', u'of', u'hearings', u'.'
        ]
        assert fields[u"verb_indicator"].labels[11] == 1
        assert fields[u"tags"].labels == [
            u'O', u'O', u'O', u'O', u'O', u'O', u'O', u'O', u'O', u'O', u'O',
            u'B-V', u'O'
        ]

        # Tests a sentence with no verbal predicates.
        fields = instances[4].fields
        tokens = [t.text for t in fields[u'tokens'].tokens]
        assert tokens == [u"Denise", u"Dillon", u"Headline", u"News", u"."]
        assert fields[u"verb_indicator"].labels == [0, 0, 0, 0, 0]
        assert fields[u"tags"].labels == [u'O', u'O', u'O', u'O', u'O']
 def setUp(self):
     super().setUp()
     self.reader = SrlReader(bert_model_name="bert-base-uncased")
예제 #9
0
class TestBertSrlReader(AllenNlpTestCase):
    def setUp(self):
        super().setUp()
        self.reader = SrlReader(bert_model_name="bert-base-uncased")

    def test_convert_tags_to_wordpiece_tags(self):

        offsets = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
        offsets = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
        original = [
            "B-ARG0",
            "I-ARG0",
            "I-ARG0",
            "B-V",
            "B-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "O",
        ]
        wordpiece_tags = [
            "O",
            "B-ARG0",
            "I-ARG0",
            "I-ARG0",
            "B-V",
            "B-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "O",
            "O",
        ]
        converted = _convert_tags_to_wordpiece_tags(original, offsets)
        assert converted == wordpiece_tags

        offsets = [2, 3, 4, 5, 6, 7, 8, 9, 10, 12]
        offsets = [1, 2, 3, 4, 5, 6, 7, 8, 9, 11]
        converted = _convert_tags_to_wordpiece_tags(original, offsets)
        assert converted == [
            "O",
            "B-ARG0",
            "I-ARG0",
            "I-ARG0",
            "B-V",
            "B-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "O",
        ]

        offsets = [1, 3, 5]
        original = ["B-ARG", "B-V", "O"]
        converted = _convert_tags_to_wordpiece_tags(original, offsets)
        assert converted == ["O", "B-ARG", "B-V", "I-V", "O", "O", "O"]

        offsets = [2, 3, 5]
        original = ["B-ARG", "I-ARG", "O"]
        converted = _convert_tags_to_wordpiece_tags(original, offsets)
        assert converted == ["O", "B-ARG", "I-ARG", "I-ARG", "O", "O", "O"]

    def test_wordpiece_tokenize_input(self):
        wordpieces, offsets, start_offsets = self.reader._wordpiece_tokenize_input(
            "This is a sentenceandsomepieces with a reallylongword".split(" "))

        assert wordpieces == [
            "[CLS]",
            "this",
            "is",
            "a",
            "sentence",
            "##ands",
            "##ome",
            "##piece",
            "##s",
            "with",
            "a",
            "really",
            "##long",
            "##word",
            "[SEP]",
        ]
        assert [wordpieces[i] for i in offsets
                ] == ["this", "is", "a", "##s", "with", "a", "##word"]
        assert [wordpieces[i] for i in start_offsets] == [
            "this",
            "is",
            "a",
            "sentence",
            "with",
            "a",
            "really",
        ]

    def test_read_from_file(self):
        conll_reader = self.reader
        instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT /
                                      "conll_2012" / "subdomain")
        instances = ensure_list(instances)
        fields = instances[0].fields
        tokens = fields["metadata"]["words"]
        assert tokens == [
            "Mali",
            "government",
            "officials",
            "say",
            "the",
            "woman",
            "'s",
            "confession",
            "was",
            "forced",
            ".",
        ]
        assert fields["verb_indicator"].labels[4] == 1

        assert fields["tags"].labels == [
            "O",
            "B-ARG0",
            "I-ARG0",
            "I-ARG0",
            "B-V",
            "B-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "O",
            "O",
        ]

        fields = instances[1].fields
        tokens = fields["metadata"]["words"]
        assert tokens == [
            "Mali",
            "government",
            "officials",
            "say",
            "the",
            "woman",
            "'s",
            "confession",
            "was",
            "forced",
            ".",
        ]
        assert fields["verb_indicator"].labels[10] == 1
        assert fields["tags"].labels == [
            "O",
            "O",
            "O",
            "O",
            "O",
            "B-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "B-V",
            "B-ARG2",
            "O",
            "O",
        ]

        fields = instances[2].fields
        tokens = fields["metadata"]["words"]
        assert tokens == [
            "The",
            "prosecution",
            "rested",
            "its",
            "case",
            "last",
            "month",
            "after",
            "four",
            "months",
            "of",
            "hearings",
            ".",
        ]
        assert fields["verb_indicator"].labels[3] == 1
        assert fields["tags"].labels == [
            "O",
            "B-ARG0",
            "I-ARG0",
            "B-V",
            "B-ARG1",
            "I-ARG1",
            "B-ARGM-TMP",
            "I-ARGM-TMP",
            "B-ARGM-TMP",
            "I-ARGM-TMP",
            "I-ARGM-TMP",
            "I-ARGM-TMP",
            "I-ARGM-TMP",
            "O",
            "O",
        ]

        fields = instances[3].fields
        tokens = fields["metadata"]["words"]
        assert tokens == [
            "The",
            "prosecution",
            "rested",
            "its",
            "case",
            "last",
            "month",
            "after",
            "four",
            "months",
            "of",
            "hearings",
            ".",
        ]
        assert fields["verb_indicator"].labels[12] == 1
        assert fields["tags"].labels == [
            "O",
            "O",
            "O",
            "O",
            "O",
            "O",
            "O",
            "O",
            "O",
            "O",
            "O",
            "O",
            "B-V",
            "O",
            "O",
        ]

        # Tests a sentence with no verbal predicates.
        fields = instances[4].fields
        tokens = fields["metadata"]["words"]
        assert tokens == ["Denise", "Dillon", "Headline", "News", "."]
        assert fields["verb_indicator"].labels == [0, 0, 0, 0, 0, 0, 0]
        assert fields["tags"].labels == ["O", "O", "O", "O", "O", "O", "O"]
예제 #10
0
    def test_read_from_file(self, lazy):
        conll_reader = SrlReader(lazy=lazy)
        instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT /
                                      "conll_2012" / "subdomain")
        instances = ensure_list(instances)

        fields = instances[0].fields
        tokens = [t.text for t in fields["tokens"].tokens]
        assert tokens == [
            "Mali",
            "government",
            "officials",
            "say",
            "the",
            "woman",
            "'s",
            "confession",
            "was",
            "forced",
            ".",
        ]
        assert fields["verb_indicator"].labels[3] == 1
        assert fields["tags"].labels == [
            "B-ARG0",
            "I-ARG0",
            "I-ARG0",
            "B-V",
            "B-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "O",
        ]
        assert fields["metadata"].metadata["words"] == tokens
        assert fields["metadata"].metadata["verb"] == tokens[3]
        assert fields["metadata"].metadata["gold_tags"] == fields[
            "tags"].labels

        fields = instances[1].fields
        tokens = [t.text for t in fields["tokens"].tokens]
        assert tokens == [
            "Mali",
            "government",
            "officials",
            "say",
            "the",
            "woman",
            "'s",
            "confession",
            "was",
            "forced",
            ".",
        ]
        assert fields["verb_indicator"].labels[8] == 1
        assert fields["tags"].labels == [
            "O",
            "O",
            "O",
            "O",
            "B-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "B-V",
            "B-ARG2",
            "O",
        ]
        assert fields["metadata"].metadata["words"] == tokens
        assert fields["metadata"].metadata["verb"] == tokens[8]
        assert fields["metadata"].metadata["gold_tags"] == fields[
            "tags"].labels

        fields = instances[2].fields
        tokens = [t.text for t in fields["tokens"].tokens]
        assert tokens == [
            "The",
            "prosecution",
            "rested",
            "its",
            "case",
            "last",
            "month",
            "after",
            "four",
            "months",
            "of",
            "hearings",
            ".",
        ]
        assert fields["verb_indicator"].labels[2] == 1
        assert fields["tags"].labels == [
            "B-ARG0",
            "I-ARG0",
            "B-V",
            "B-ARG1",
            "I-ARG1",
            "B-ARGM-TMP",
            "I-ARGM-TMP",
            "B-ARGM-TMP",
            "I-ARGM-TMP",
            "I-ARGM-TMP",
            "I-ARGM-TMP",
            "I-ARGM-TMP",
            "O",
        ]
        assert fields["metadata"].metadata["words"] == tokens
        assert fields["metadata"].metadata["verb"] == tokens[2]
        assert fields["metadata"].metadata["gold_tags"] == fields[
            "tags"].labels

        fields = instances[3].fields
        tokens = [t.text for t in fields["tokens"].tokens]
        assert tokens == [
            "The",
            "prosecution",
            "rested",
            "its",
            "case",
            "last",
            "month",
            "after",
            "four",
            "months",
            "of",
            "hearings",
            ".",
        ]
        assert fields["verb_indicator"].labels[11] == 1
        assert fields["tags"].labels == [
            "O",
            "O",
            "O",
            "O",
            "O",
            "O",
            "O",
            "O",
            "O",
            "O",
            "O",
            "B-V",
            "O",
        ]
        assert fields["metadata"].metadata["words"] == tokens
        assert fields["metadata"].metadata["verb"] == tokens[11]
        assert fields["metadata"].metadata["gold_tags"] == fields[
            "tags"].labels

        # Tests a sentence with no verbal predicates.
        fields = instances[4].fields
        tokens = [t.text for t in fields["tokens"].tokens]
        assert tokens == ["Denise", "Dillon", "Headline", "News", "."]
        assert fields["verb_indicator"].labels == [0, 0, 0, 0, 0]
        assert fields["tags"].labels == ["O", "O", "O", "O", "O"]
        assert fields["metadata"].metadata["words"] == tokens
        assert fields["metadata"].metadata["verb"] is None
        assert fields["metadata"].metadata["gold_tags"] == fields[
            "tags"].labels
예제 #11
0
class TestBertSrlReader(AllenNlpTestCase):

    def setUp(self):
        super().setUp()
        self.reader = SrlReader(bert_model_name="bert-base-uncased")

    def test_convert_tags_to_wordpiece_tags(self):
        # pylint: disable=protected-access
        offsets = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
        offsets = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
        original = ['B-ARG0', 'I-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1',
                    'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'O']
        wordpiece_tags = ['O', 'B-ARG0', 'I-ARG0', 'I-ARG0', 'B-V', 'B-ARG1',
                          'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'O', 'O']
        converted = _convert_tags_to_wordpiece_tags(original, offsets)
        assert converted == wordpiece_tags

        offsets = [2, 3, 4, 5, 6, 7, 8, 9, 10, 12]
        offsets = [1, 2, 3, 4, 5, 6, 7, 8, 9, 11]
        converted = _convert_tags_to_wordpiece_tags(original, offsets)
        assert converted == ['O', 'B-ARG0', 'I-ARG0', 'I-ARG0', 'B-V', 'B-ARG1',
                             'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'O']

        offsets = [1, 3, 5]
        original = ["B-ARG", "B-V", "O"]
        converted = _convert_tags_to_wordpiece_tags(original, offsets)
        assert converted == ['O', 'B-ARG', 'B-V', 'I-V', 'O', 'O', 'O']

        offsets = [2, 3, 5]
        original = ["B-ARG", "I-ARG", "O"]
        converted = _convert_tags_to_wordpiece_tags(original, offsets)
        assert converted == ['O', 'B-ARG', 'I-ARG', 'I-ARG', 'O', 'O', 'O']
        # pylint: enable=protected-access


    def test_wordpiece_tokenize_input(self):
        wordpieces, offsets, start_offsets = self.reader._wordpiece_tokenize_input( # pylint: disable=protected-access
                "This is a sentenceandsomepieces with a reallylongword".split(" "))

        assert wordpieces == ['[CLS]', 'this', 'is', 'a', 'sentence', '##ands', '##ome',
                              '##piece', '##s', 'with', 'a', 'really', '##long', '##word', '[SEP]']
        assert [wordpieces[i] for i in offsets] == ['this', 'is', 'a', '##s', 'with', 'a', '##word']
        assert [wordpieces[i] for i in start_offsets] == ['this', 'is', 'a', 'sentence', 'with', 'a', 'really']


    def test_read_from_file(self):
        conll_reader = self.reader
        instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT / 'conll_2012' / 'subdomain')
        instances = ensure_list(instances)
        fields = instances[0].fields
        tokens = fields["metadata"]["words"]
        assert tokens == ["Mali", "government", "officials", "say", "the", "woman", "'s",
                          "confession", "was", "forced", "."]
        assert fields["verb_indicator"].labels[4] == 1

        assert fields["tags"].labels == ['O', 'B-ARG0', 'I-ARG0', 'I-ARG0', 'B-V', 'B-ARG1',
                                         'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'O', 'O']

        fields = instances[1].fields
        tokens = fields["metadata"]["words"]
        assert tokens == ["Mali", "government", "officials", "say", "the", "woman", "'s",
                          "confession", "was", "forced", "."]
        assert fields["verb_indicator"].labels[10] == 1
        assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'O', 'B-ARG1', 'I-ARG1', 'I-ARG1',
                                         'I-ARG1', 'I-ARG1', 'B-V', 'B-ARG2', 'O', 'O']

        fields = instances[2].fields
        tokens = fields["metadata"]["words"]
        assert tokens == ['The', 'prosecution', 'rested', 'its', 'case', 'last', 'month', 'after',
                          'four', 'months', 'of', 'hearings', '.']
        assert fields["verb_indicator"].labels[3] == 1
        assert fields["tags"].labels == ['O', 'B-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'B-ARGM-TMP',
                                         'I-ARGM-TMP', 'B-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP',
                                         'I-ARGM-TMP', 'I-ARGM-TMP', 'O', 'O']


        fields = instances[3].fields
        tokens = fields["metadata"]["words"]
        assert tokens == ['The', 'prosecution', 'rested', 'its', 'case', 'last', 'month', 'after',
                          'four', 'months', 'of', 'hearings', '.']
        assert fields["verb_indicator"].labels[12] == 1
        assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O',
                                         'O', 'O', 'O', 'O', 'B-V', 'O', 'O']

        # Tests a sentence with no verbal predicates.
        fields = instances[4].fields
        tokens = fields["metadata"]["words"]
        assert tokens == ["Denise", "Dillon", "Headline", "News", "."]
        assert fields["verb_indicator"].labels == [0, 0, 0, 0, 0, 0, 0]
        assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'O', 'O', 'O']