예제 #1
0
    def test_srl_reader_can_filter_by_domain(self):

        conll_reader = SrlReader(domain_identifier="subdomain2")
        instances = conll_reader.read('tests/fixtures/conll_2012/')
        instances = ensure_list(instances)
        # If we'd included the folder, we'd have 9 instances.
        assert len(instances) == 2
예제 #2
0
    def test_srl_reader_can_filter_by_domain(self):

        conll_reader = SrlReader(domain_identifier="subdomain2")
        instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT / 'conll_2012')
        instances = ensure_list(instances)
        # If we'd included the folder, we'd have 9 instances.
        assert len(instances) == 2
예제 #3
0
    def test_read_from_file(self):
        conll_reader = SrlReader()
        dataset = conll_reader.read('tests/fixtures/conll_2012/')
        instances = dataset.instances
        fields = instances[0].fields()
        assert fields["tokens"].tokens() == ["Mali", "government", "officials", "say",
                                             "the", "woman", "'s", "confession", "was", "forced", "."]
        assert fields["verb_indicator"].sequence_index() == 3
        assert fields["tags"].tags() == ['B-ARG0', 'I-ARG0', 'I-ARG0', 'B-V', 'B-ARG1',
                                         'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'O']
        fields = instances[1].fields()
        assert fields["tokens"].tokens() == ["Mali", "government", "officials", "say",
                                             "the", "woman", "'s", "confession", "was", "forced", "."]
        assert fields["verb_indicator"].sequence_index() == 8
        assert fields["tags"].tags() == ['O', 'O', 'O', 'O', 'B-ARG1', 'I-ARG1',
                                         'I-ARG1', 'I-ARG1', 'B-V', 'B-ARG2', 'O']
        fields = instances[2].fields()
        assert fields["tokens"].tokens() == ['The', 'prosecution', 'rested', 'its', 'case', 'last', 'month',
                                             'after', 'four', 'months', 'of', 'hearings', '.']
        assert fields["verb_indicator"].sequence_index() == 2
        assert fields["tags"].tags() == ['B-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'B-ARGM-TMP',
                                         'I-ARGM-TMP', 'B-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP',
                                         'I-ARGM-TMP', 'I-ARGM-TMP', 'O']
        fields = instances[3].fields()
        assert fields["tokens"].tokens() == ['The', 'prosecution', 'rested', 'its', 'case', 'last', 'month',
                                             'after', 'four', 'months', 'of', 'hearings', '.']
        assert fields["verb_indicator"].sequence_index() == 11
        assert fields["tags"].tags() == ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-V', 'O']

        # Tests a sentence with no verbal predicates.
        fields = instances[4].fields()
        assert fields["tokens"].tokens() == ["Denise", "Dillon", "Headline", "News", "."]
        assert fields["verb_indicator"].sequence_index() is None
        assert fields["tags"].tags() == ['O', 'O', 'O', 'O', 'O']
    def test_srl_reader_can_filter_by_domain(self):

        conll_reader = SrlReader(domain_identifier="subdomain2")
        instances = conll_reader.read('tests/fixtures/conll_2012/')
        instances = ensure_list(instances)
        # If we'd included the folder, we'd have 9 instances.
        assert len(instances) == 2
예제 #5
0
    def test_read_from_file(self, lazy):
        conll_reader = SrlReader(lazy=lazy)
        instances = conll_reader.read('tests/fixtures/conll_2012/subdomain')
        instances = ensure_list(instances)

        fields = instances[0].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == [
            "Mali", "government", "officials", "say", "the", "woman", "'s",
            "confession", "was", "forced", "."
        ]
        assert fields["verb_indicator"].labels[3] == 1
        assert fields["tags"].labels == [
            'B-ARG0', 'I-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'I-ARG1',
            'I-ARG1', 'I-ARG1', 'I-ARG1', 'O'
        ]

        fields = instances[1].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == [
            "Mali", "government", "officials", "say", "the", "woman", "'s",
            "confession", "was", "forced", "."
        ]
        assert fields["verb_indicator"].labels[8] == 1
        assert fields["tags"].labels == [
            'O', 'O', 'O', 'O', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'B-V',
            'B-ARG2', 'O'
        ]

        fields = instances[2].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == [
            'The', 'prosecution', 'rested', 'its', 'case', 'last', 'month',
            'after', 'four', 'months', 'of', 'hearings', '.'
        ]
        assert fields["verb_indicator"].labels[2] == 1
        assert fields["tags"].labels == [
            'B-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'B-ARGM-TMP',
            'I-ARGM-TMP', 'B-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP',
            'I-ARGM-TMP', 'I-ARGM-TMP', 'O'
        ]

        fields = instances[3].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == [
            'The', 'prosecution', 'rested', 'its', 'case', 'last', 'month',
            'after', 'four', 'months', 'of', 'hearings', '.'
        ]
        assert fields["verb_indicator"].labels[11] == 1
        assert fields["tags"].labels == [
            'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-V', 'O'
        ]

        # Tests a sentence with no verbal predicates.
        fields = instances[4].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == ["Denise", "Dillon", "Headline", "News", "."]
        assert fields["verb_indicator"].labels == [0, 0, 0, 0, 0]
        assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'O']
예제 #6
0
    def test_read_from_file(self, lazy):
        conll_reader = SrlReader(lazy=lazy)
        instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT / 'conll_2012' / 'subdomain')
        instances = ensure_list(instances)

        fields = instances[0].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == ["Mali", "government", "officials", "say", "the", "woman", "'s",
                          "confession", "was", "forced", "."]
        assert fields["verb_indicator"].labels[3] == 1
        assert fields["tags"].labels == ['B-ARG0', 'I-ARG0', 'I-ARG0', 'B-V', 'B-ARG1',
                                         'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'O']

        fields = instances[1].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == ["Mali", "government", "officials", "say", "the", "woman", "'s",
                          "confession", "was", "forced", "."]
        assert fields["verb_indicator"].labels[8] == 1
        assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'B-ARG1', 'I-ARG1',
                                         'I-ARG1', 'I-ARG1', 'B-V', 'B-ARG2', 'O']

        fields = instances[2].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == ['The', 'prosecution', 'rested', 'its', 'case', 'last', 'month', 'after',
                          'four', 'months', 'of', 'hearings', '.']
        assert fields["verb_indicator"].labels[2] == 1
        assert fields["tags"].labels == ['B-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'B-ARGM-TMP',
                                         'I-ARGM-TMP', 'B-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP',
                                         'I-ARGM-TMP', 'I-ARGM-TMP', 'O']

        fields = instances[3].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == ['The', 'prosecution', 'rested', 'its', 'case', 'last', 'month', 'after',
                          'four', 'months', 'of', 'hearings', '.']
        assert fields["verb_indicator"].labels[11] == 1
        assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-V', 'O']

        # Tests a sentence with no verbal predicates.
        fields = instances[4].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == ["Denise", "Dillon", "Headline", "News", "."]
        assert fields["verb_indicator"].labels == [0, 0, 0, 0, 0]
        assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'O']
예제 #7
0
    def test_read_from_file(self, lazy):
        conll_reader = SrlReader(lazy=lazy)
        instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT /
                                      u'conll_2012' / u'subdomain')
        instances = ensure_list(instances)

        fields = instances[0].fields
        tokens = [t.text for t in fields[u'tokens'].tokens]
        assert tokens == [
            u"Mali", u"government", u"officials", u"say", u"the", u"woman",
            u"'s", u"confession", u"was", u"forced", u"."
        ]
        assert fields[u"verb_indicator"].labels[3] == 1
        assert fields[u"tags"].labels == [
            u'B-ARG0', u'I-ARG0', u'I-ARG0', u'B-V', u'B-ARG1', u'I-ARG1',
            u'I-ARG1', u'I-ARG1', u'I-ARG1', u'I-ARG1', u'O'
        ]

        fields = instances[1].fields
        tokens = [t.text for t in fields[u'tokens'].tokens]
        assert tokens == [
            u"Mali", u"government", u"officials", u"say", u"the", u"woman",
            u"'s", u"confession", u"was", u"forced", u"."
        ]
        assert fields[u"verb_indicator"].labels[8] == 1
        assert fields[u"tags"].labels == [
            u'O', u'O', u'O', u'O', u'B-ARG1', u'I-ARG1', u'I-ARG1', u'I-ARG1',
            u'B-V', u'B-ARG2', u'O'
        ]

        fields = instances[2].fields
        tokens = [t.text for t in fields[u'tokens'].tokens]
        assert tokens == [
            u'The', u'prosecution', u'rested', u'its', u'case', u'last',
            u'month', u'after', u'four', u'months', u'of', u'hearings', u'.'
        ]
        assert fields[u"verb_indicator"].labels[2] == 1
        assert fields[u"tags"].labels == [
            u'B-ARG0', u'I-ARG0', u'B-V', u'B-ARG1', u'I-ARG1', u'B-ARGM-TMP',
            u'I-ARGM-TMP', u'B-ARGM-TMP', u'I-ARGM-TMP', u'I-ARGM-TMP',
            u'I-ARGM-TMP', u'I-ARGM-TMP', u'O'
        ]

        fields = instances[3].fields
        tokens = [t.text for t in fields[u'tokens'].tokens]
        assert tokens == [
            u'The', u'prosecution', u'rested', u'its', u'case', u'last',
            u'month', u'after', u'four', u'months', u'of', u'hearings', u'.'
        ]
        assert fields[u"verb_indicator"].labels[11] == 1
        assert fields[u"tags"].labels == [
            u'O', u'O', u'O', u'O', u'O', u'O', u'O', u'O', u'O', u'O', u'O',
            u'B-V', u'O'
        ]

        # Tests a sentence with no verbal predicates.
        fields = instances[4].fields
        tokens = [t.text for t in fields[u'tokens'].tokens]
        assert tokens == [u"Denise", u"Dillon", u"Headline", u"News", u"."]
        assert fields[u"verb_indicator"].labels == [0, 0, 0, 0, 0]
        assert fields[u"tags"].labels == [u'O', u'O', u'O', u'O', u'O']
예제 #8
0
    def test_read_from_file(self, lazy):
        conll_reader = SrlReader(lazy=lazy)
        instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT /
                                      "conll_2012" / "subdomain")
        instances = ensure_list(instances)

        fields = instances[0].fields
        tokens = [t.text for t in fields["tokens"].tokens]
        assert tokens == [
            "Mali",
            "government",
            "officials",
            "say",
            "the",
            "woman",
            "'s",
            "confession",
            "was",
            "forced",
            ".",
        ]
        assert fields["verb_indicator"].labels[3] == 1
        assert fields["tags"].labels == [
            "B-ARG0",
            "I-ARG0",
            "I-ARG0",
            "B-V",
            "B-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "O",
        ]
        assert fields["metadata"].metadata["words"] == tokens
        assert fields["metadata"].metadata["verb"] == tokens[3]
        assert fields["metadata"].metadata["gold_tags"] == fields[
            "tags"].labels

        fields = instances[1].fields
        tokens = [t.text for t in fields["tokens"].tokens]
        assert tokens == [
            "Mali",
            "government",
            "officials",
            "say",
            "the",
            "woman",
            "'s",
            "confession",
            "was",
            "forced",
            ".",
        ]
        assert fields["verb_indicator"].labels[8] == 1
        assert fields["tags"].labels == [
            "O",
            "O",
            "O",
            "O",
            "B-ARG1",
            "I-ARG1",
            "I-ARG1",
            "I-ARG1",
            "B-V",
            "B-ARG2",
            "O",
        ]
        assert fields["metadata"].metadata["words"] == tokens
        assert fields["metadata"].metadata["verb"] == tokens[8]
        assert fields["metadata"].metadata["gold_tags"] == fields[
            "tags"].labels

        fields = instances[2].fields
        tokens = [t.text for t in fields["tokens"].tokens]
        assert tokens == [
            "The",
            "prosecution",
            "rested",
            "its",
            "case",
            "last",
            "month",
            "after",
            "four",
            "months",
            "of",
            "hearings",
            ".",
        ]
        assert fields["verb_indicator"].labels[2] == 1
        assert fields["tags"].labels == [
            "B-ARG0",
            "I-ARG0",
            "B-V",
            "B-ARG1",
            "I-ARG1",
            "B-ARGM-TMP",
            "I-ARGM-TMP",
            "B-ARGM-TMP",
            "I-ARGM-TMP",
            "I-ARGM-TMP",
            "I-ARGM-TMP",
            "I-ARGM-TMP",
            "O",
        ]
        assert fields["metadata"].metadata["words"] == tokens
        assert fields["metadata"].metadata["verb"] == tokens[2]
        assert fields["metadata"].metadata["gold_tags"] == fields[
            "tags"].labels

        fields = instances[3].fields
        tokens = [t.text for t in fields["tokens"].tokens]
        assert tokens == [
            "The",
            "prosecution",
            "rested",
            "its",
            "case",
            "last",
            "month",
            "after",
            "four",
            "months",
            "of",
            "hearings",
            ".",
        ]
        assert fields["verb_indicator"].labels[11] == 1
        assert fields["tags"].labels == [
            "O",
            "O",
            "O",
            "O",
            "O",
            "O",
            "O",
            "O",
            "O",
            "O",
            "O",
            "B-V",
            "O",
        ]
        assert fields["metadata"].metadata["words"] == tokens
        assert fields["metadata"].metadata["verb"] == tokens[11]
        assert fields["metadata"].metadata["gold_tags"] == fields[
            "tags"].labels

        # Tests a sentence with no verbal predicates.
        fields = instances[4].fields
        tokens = [t.text for t in fields["tokens"].tokens]
        assert tokens == ["Denise", "Dillon", "Headline", "News", "."]
        assert fields["verb_indicator"].labels == [0, 0, 0, 0, 0]
        assert fields["tags"].labels == ["O", "O", "O", "O", "O"]
        assert fields["metadata"].metadata["words"] == tokens
        assert fields["metadata"].metadata["verb"] is None
        assert fields["metadata"].metadata["gold_tags"] == fields[
            "tags"].labels