Esempio n. 1
0
    def test_read_from_file(self, lazy, coding_scheme):
        conll_reader = Conll2003DatasetReader(lazy=lazy,
                                              coding_scheme=coding_scheme)
        instances = conll_reader.read(
            str(AllenNlpTestCase.FIXTURES_ROOT / 'data' / 'conll2003.txt'))
        instances = ensure_list(instances)

        if coding_scheme == 'IOB1':
            expected_labels = ['I-ORG', 'O', 'I-PER', 'O', 'O', 'I-LOC', 'O']
        else:
            expected_labels = ['U-ORG', 'O', 'U-PER', 'O', 'O', 'U-LOC', 'O']

        fields = instances[0].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == [
            'U.N.', 'official', 'Ekeus', 'heads', 'for', 'Baghdad', '.'
        ]
        assert fields["tags"].labels == expected_labels

        fields = instances[1].fields
        tokens = [t.text for t in fields['tokens'].tokens]
        assert tokens == [
            'AI2', 'engineer', 'Joel', 'lives', 'in', 'Seattle', '.'
        ]
        assert fields["tags"].labels == expected_labels
    def test_read_from_file(self, lazy, coding_scheme):
        conll_reader = Conll2003DatasetReader(lazy=lazy,
                                              coding_scheme=coding_scheme)
        instances = conll_reader.read(
            str(AllenNlpTestCase.FIXTURES_ROOT / "data" / "conll2003.txt"))
        instances = ensure_list(instances)

        if coding_scheme == "IOB1":
            expected_labels = ["I-ORG", "O", "I-PER", "O", "O", "I-LOC", "O"]
        else:
            expected_labels = ["U-ORG", "O", "U-PER", "O", "O", "U-LOC", "O"]

        fields = instances[0].fields
        tokens = [t.text for t in fields["tokens"].tokens]
        assert tokens == [
            "U.N.", "official", "Ekeus", "heads", "for", "Baghdad", "."
        ]
        assert fields["tags"].labels == expected_labels

        fields = instances[1].fields
        tokens = [t.text for t in fields["tokens"].tokens]
        assert tokens == [
            "AI2", "engineer", "Joel", "lives", "in", "Seattle", "."
        ]
        assert fields["tags"].labels == expected_labels
Esempio n. 3
0
 def test_read_data_from_with_unsupported_coding_scheme(self):
     with pytest.raises(ConfigurationError):
         # `IOB1` is not supported in `convert_to_coding_scheme`.
         Conll2003DatasetReader(convert_to_coding_scheme="IOB1")
 def test_read_conll2000_from_file(self):
     conll_reader = Conll2003DatasetReader(tag_label='chunk',
                                           ignore_ner_tags=True)
     instances = conll_reader.read(str(AllenNlpTestCase.FIXTURES_ROOT / 'data' / 'conll2000.txt'))
     instances = ensure_list(instances)
     assert len(instances) == 2