Esempio n. 1
0
 def test_is_valid_false_on_sentence_with_invalid_elements(self):
     sentence = Sentence([
         Multiword(first_index=1, last_index=1),  # invalid first == last
         Word(index=1),
         Word(index=2),
     ])
     self.assertFalse(sentence.is_valid())
Esempio n. 2
0
 def test_is_valid_false_if_multiword_last_index_is_too_big(self):
     sentence = Sentence([
         Word(index=1),
         Multiword(first_index=2, last_index=4),  # there is no word w/ ID 4
         Word(index=2),
         Word(index=3),
     ])
     self.assertFalse(sentence.is_valid())
Esempio n. 3
0
 def test_is_valid_false_if_a_word_index_is_skipped(self):
     sentence = Sentence([
         Word(index=1),
         Word(index=2),
         # index 3 missing
         Word(index=4),
     ])
     self.assertFalse(sentence.is_valid())
Esempio n. 4
0
 def test_is_valid_false_if_consecutive_multiwords_overlap(self):
     sentence = Sentence([
         Multiword(first_index=1, last_index=2),
         Multiword(first_index=1, last_index=2),
         Word(index=1),
         Word(index=2)
     ])
     self.assertFalse(sentence.is_valid())
Esempio n. 5
0
 def test_is_valid_true_if_first_element_is_multiword_with_index_1(self):
     sentence = Sentence([
         Multiword(first_index=1, last_index=2),
         # words also included to prevent other validations to fail
         Word(index=1),
         Word(index=2),
     ])
     self.assertTrue(sentence.is_valid())
Esempio n. 6
0
 def test_is_valid_false_if_multiword_index_is_skipped(self):
     sentence = Sentence([
         Word(index=1),
         # index 2 missing
         Multiword(first_index=3, last_index=4),
         Word(index=3),
         Word(index=4),
     ])
     self.assertFalse(sentence.is_valid())
Esempio n. 7
0
 def test_is_valid_true_if_multiword_index_range_is_within_sentence_bounds(
         self):
     sentence = Sentence([
         Word(index=1),
         Multiword(first_index=2, last_index=3),
         Word(index=2),
         Word(index=3)
     ])
     self.assertTrue(sentence.is_valid())
Esempio n. 8
0
 def test_is_valid_true_if_emptynodes_indexes_are_valid(self):
     sentence = Sentence([
         EmptyNode(main_index=0, sub_index=1),
         EmptyNode(main_index=0, sub_index=2),
         Word(index=1),
         Word(index=2),
         EmptyNode(main_index=2, sub_index=1),
         EmptyNode(main_index=2, sub_index=2),
     ])
     self.assertTrue(sentence.is_valid())
Esempio n. 9
0
 def test_is_valid_false_if_multiwords_are_placed_incorrectly(self):
     sentence = Sentence([
         Multiword(first_index=1, last_index=2),
         Multiword(first_index=3, last_index=4),  # should be before word 3
         Word(index=1),
         Word(index=2),
         Word(index=3),
         Word(index=4)
     ])
     self.assertFalse(sentence.is_valid())
Esempio n. 10
0
    def test_is_valid_on_a_sentence_with_foreign_elements(self):
        class Foo(BaseSentenceElement):
            pass

        sentence = Sentence([
            Word(index=1, head=2),
            Foo(form='Bar'),
            Word(index=2, head=0),
            Foo(form='Baz'),
        ])
        self.assertTrue(sentence.is_valid())
Esempio n. 11
0
 def test_is_valid_true_with_all_values_set(self):
     element = Word(index=1,
                    form='Form',
                    lemma='Lemma',
                    upos=UposTag.X,
                    xpos='XPOS',
                    feats={'foo': 'bar'},
                    head=2,
                    deprel='DepRel',
                    misc='Misc',
                    deps={'baz': 'qux'})
     self.assertTrue(element.is_valid())
Esempio n. 12
0
 def test_to_conllu_with_many_elements_and_no_comments(self):
     sentence = Sentence(elements=[
         Multiword(first_index=1, last_index=2, form="Foobar"),
         Word(index=1, form='Foo'),
         Word(index=2, form='bar'),
         EmptyNode(main_index=2, sub_index=1, form='Baz')
     ])
     self.assertEqual(
         '1-2\tFoobar\t_\t_\t_\t_\t_\t_\t_\t_\n'
         '1\tFoo\t_\t_\t_\t_\t_\t_\t_\t_\n'
         '2\tbar\t_\t_\t_\t_\t_\t_\t_\t_\n'
         '2.1\tBaz\t_\t_\t_\t_\t_\t_\t_\t_\n'
         '\n', sentence.to_conllu())
Esempio n. 13
0
    def test_to_conllu_of_sentence_with_all_attributes(self):
        word = Word(index=1,
                    form='Form',
                    lemma='Lemma',
                    upos=UposTag.X,
                    xpos='XPOS',
                    feats='Feat=Foo',
                    head=2,
                    deprel='DepRel',
                    deps='0:Bar',
                    misc='Misc')

        self.assertEqual(
            '1\tForm\tLemma\tX\tXPOS\tFeat=Foo\t2\tDepRel\t0:Bar\tMisc',
            word.to_conllu())
Esempio n. 14
0
 def test_is_valid_true_if_first_element_is_emptynode_with_index_0(self):
     sentence = Sentence([
         EmptyNode(main_index=0, sub_index=1),
         # first word also included to prevent other validations to fail
         Word(index=1)
     ])
     self.assertTrue(sentence.is_valid())
Esempio n. 15
0
 def test_is_valid_false_if_two_emptynodes_has_the_same_sub_index(self):
     sentence = Sentence([
         EmptyNode(main_index=0, sub_index=1),
         EmptyNode(main_index=0, sub_index=1),
         Word(index=1),
     ])
     self.assertFalse(sentence.is_valid())
Esempio n. 16
0
    def test_words_on_sentence_wit_mixed_element(self):
        expected = [Word(index=1), Word(index=2), Word(index=3), Word(index=4)]

        sentence = Sentence([
            Multiword(first_index=1, last_index=2),
            expected[0],  # 1
            expected[1],  # 2
            EmptyNode(main_index=2, sub_index=1),
            EmptyNode(main_index=2, sub_index=2),
            Multiword(first_index=3, last_index=4),
            expected[2],  # 3
            expected[3],  # 4
        ])

        result = sentence.words()
        self.assertIsInstance(result, Generator)
        self.assertEqual(expected, list(result))
Esempio n. 17
0
 def test_is_valid_false_if_an_emptynode_main_index_has_no_word_index(self):
     sentence = Sentence([
         Word(index=1),
         # there is no word with index 2
         EmptyNode(main_index=2, sub_index=1),
         EmptyNode(main_index=2, sub_index=2)
     ])
     self.assertFalse(sentence.is_valid())
Esempio n. 18
0
 def test_is_valid_false_if_an_emptynode_sub_index_is_skipped(self):
     sentence = Sentence([
         EmptyNode(main_index=0, sub_index=1),
         # sub_index 2 missing
         EmptyNode(main_index=0, sub_index=3),
         Word(index=1),
     ])
     self.assertFalse(sentence.is_valid())
Esempio n. 19
0
    def test_raw_tokens_on_sentence_wit_mixed_element(self):
        expected = [
            Multiword(first_index=1, last_index=2),
            Word(index=3),
            Multiword(first_index=4, last_index=6),
            Word(index=7)
        ]

        sentence = Sentence([
            expected[0],  # 1-2
            Word(index=1),
            Word(index=2),
            expected[1],  # 3
            EmptyNode(main_index=3, sub_index=1),
            EmptyNode(main_index=3, sub_index=2),
            expected[2],  # 4-6
            Word(index=4),
            EmptyNode(main_index=4, sub_index=1),
            EmptyNode(main_index=4, sub_index=2),
            Word(index=5),
            Word(index=6),
            expected[3]  # 7
        ])

        result = sentence.raw_tokens()
        self.assertIsInstance(result, Generator)
        self.assertEqual(expected, list(result))
Esempio n. 20
0
 def test_is_valid_false_if_consecutive_emptynodes_have_different_main_id(
         self):
     sentence = Sentence([
         Word(index=1),
         EmptyNode(main_index=1, sub_index=1),
         # word with index 2 missing
         EmptyNode(main_index=2, sub_index=2)
     ])
     self.assertFalse(sentence.is_valid())
Esempio n. 21
0
 def p_wordline_word(prod: YaccProduction) -> None:
     'wordline : INTEGER_ID TAB FORM TAB LEMMA TAB UPOS TAB XPOS TAB ' \
         'FEATS TAB HEAD TAB DEPREL TAB DEPS TAB MISC NEWLINE'
     prod[0] = Word(index=prod[1],
                    form=prod[3],
                    lemma=prod[5],
                    upos=UposTag[prod[7]] if prod[7] else None,
                    xpos=prod[9],
                    feats=prod[11],
                    head=prod[13],
                    deprel=prod[15],
                    deps=prod[17],
                    misc=prod[19])
Esempio n. 22
0
    def test_init_with_some_elements(self):
        elements = [Word(index=1), Word(index=2)]

        sentence = Sentence(elements)
        self.assertIs(sentence.elements, elements)
Esempio n. 23
0
 def test_is_valid_true_with_word_heads_within_sentence_bounds(self):
     sentence = Sentence([
         Word(index=1, head=2),
         Word(index=2, head=0),
     ])
     self.assertTrue(sentence.is_valid())
Esempio n. 24
0
 def test_is_valid_true_if_a_word_has_head_equals_to_index(self):
     sentence = Sentence([Word(index=1, head=1)])
     self.assertTrue(sentence.is_valid())
Esempio n. 25
0
 def test_is_valid_true_if_a_word_has_head_zero(self):
     sentence = Sentence([Word(index=1, head=0)])
     self.assertTrue(sentence.is_valid())
Esempio n. 26
0
 def test_is_valid_false_if_a_word_has_head_beyond_last_word_index(self):
     sentence = Sentence([
         Word(index=1, head=2),  # there is no word with index 2
     ])
     self.assertFalse(sentence.is_valid())
Esempio n. 27
0
 def test_is_valid_false_if_a_word_has_head_less_than_zero(self):
     sentence = Sentence([
         Word(index=1, head=-1),
     ])
     self.assertFalse(sentence.is_valid())
Esempio n. 28
0
 def test_is_valid_false_if_first_element_is_word_with_index_not_1(self):
     sentence = Sentence([Word(index=2)])
     self.assertFalse(sentence.is_valid())
Esempio n. 29
0
 def test_is_valid_true_if_first_element_is_word_with_index_1(self):
     sentence = Sentence([Word(index=1)])
     self.assertTrue(sentence.is_valid())
Esempio n. 30
0
 def test_is_valid_true_with_two_consecutive_word_indexes(self):
     sentence = Sentence([
         Word(index=1),
         Word(index=2),
     ])
     self.assertTrue(sentence.is_valid())