Esempio n. 1
0
    def test_raw_tokens_on_sentence_wit_mixed_element(self):
        expected = [
            Multiword(first_index=1, last_index=2),
            Word(index=3),
            Multiword(first_index=4, last_index=6),
            Word(index=7)
        ]

        sentence = Sentence([
            expected[0],  # 1-2
            Word(index=1),
            Word(index=2),
            expected[1],  # 3
            EmptyNode(main_index=3, sub_index=1),
            EmptyNode(main_index=3, sub_index=2),
            expected[2],  # 4-6
            Word(index=4),
            EmptyNode(main_index=4, sub_index=1),
            EmptyNode(main_index=4, sub_index=2),
            Word(index=5),
            Word(index=6),
            expected[3]  # 7
        ])

        result = sentence.raw_tokens()
        self.assertIsInstance(result, Generator)
        self.assertEqual(expected, list(result))
Esempio n. 2
0
 def test_is_valid_false_if_consecutive_multiwords_overlap(self):
     sentence = Sentence([
         Multiword(first_index=1, last_index=2),
         Multiword(first_index=1, last_index=2),
         Word(index=1),
         Word(index=2)
     ])
     self.assertFalse(sentence.is_valid())
Esempio n. 3
0
    def test_to_conllu_of_sentence_with_all_attributes(self):
        multiword = Multiword(first_index=1,
                              last_index=2,
                              form='Form',
                              misc='Misc')

        self.assertEqual('1-2\tForm\t_\t_\t_\t_\t_\t_\t_\tMisc',
                         multiword.to_conllu())
Esempio n. 4
0
 def test_is_valid_false_if_multiwords_are_placed_incorrectly(self):
     sentence = Sentence([
         Multiword(first_index=1, last_index=2),
         Multiword(first_index=3, last_index=4),  # should be before word 3
         Word(index=1),
         Word(index=2),
         Word(index=3),
         Word(index=4)
     ])
     self.assertFalse(sentence.is_valid())
Esempio n. 5
0
 def test_is_valid_false_on_sentence_with_invalid_elements(self):
     sentence = Sentence([
         Multiword(first_index=1, last_index=1),  # invalid first == last
         Word(index=1),
         Word(index=2),
     ])
     self.assertFalse(sentence.is_valid())
Esempio n. 6
0
    def test_words_on_sentence_wit_mixed_element(self):
        expected = [Word(index=1), Word(index=2), Word(index=3), Word(index=4)]

        sentence = Sentence([
            Multiword(first_index=1, last_index=2),
            expected[0],  # 1
            expected[1],  # 2
            EmptyNode(main_index=2, sub_index=1),
            EmptyNode(main_index=2, sub_index=2),
            Multiword(first_index=3, last_index=4),
            expected[2],  # 3
            expected[3],  # 4
        ])

        result = sentence.words()
        self.assertIsInstance(result, Generator)
        self.assertEqual(expected, list(result))
Esempio n. 7
0
 def test_is_valid_false_if_multiword_last_index_is_too_big(self):
     sentence = Sentence([
         Word(index=1),
         Multiword(first_index=2, last_index=4),  # there is no word w/ ID 4
         Word(index=2),
         Word(index=3),
     ])
     self.assertFalse(sentence.is_valid())
Esempio n. 8
0
 def test_is_valid_true_if_first_element_is_multiword_with_index_1(self):
     sentence = Sentence([
         Multiword(first_index=1, last_index=2),
         # words also included to prevent other validations to fail
         Word(index=1),
         Word(index=2),
     ])
     self.assertTrue(sentence.is_valid())
Esempio n. 9
0
 def test_is_valid_true_if_multiword_index_range_is_within_sentence_bounds(
         self):
     sentence = Sentence([
         Word(index=1),
         Multiword(first_index=2, last_index=3),
         Word(index=2),
         Word(index=3)
     ])
     self.assertTrue(sentence.is_valid())
Esempio n. 10
0
 def test_is_valid_false_if_multiword_index_is_skipped(self):
     sentence = Sentence([
         Word(index=1),
         # index 2 missing
         Multiword(first_index=3, last_index=4),
         Word(index=3),
         Word(index=4),
     ])
     self.assertFalse(sentence.is_valid())
Esempio n. 11
0
    def test_words_on_sentence_without_word_elements(self):
        sentence = Sentence([
            EmptyNode(main_index=0, sub_index=1),
            Multiword(first_index=1, last_index=2),
        ])

        result = sentence.words()
        self.assertIsInstance(result, Generator)
        self.assertEqual([], list(result))
Esempio n. 12
0
    def p_wordline_multiword(prod: YaccProduction) -> None:
        'wordline : RANGE_ID TAB FORM TAB LEMMA TAB UPOS TAB XPOS TAB FEATS ' \
            'TAB HEAD TAB DEPREL TAB DEPS TAB MISC NEWLINE'

        if prod[5] != '_' or any(prod[i] is not None for i in range(7, 18, 2)):
            raise IllegalMultiwordError(prod)

        prod[0] = Multiword(first_index=prod[1][0],
                            last_index=prod[1][1],
                            form=prod[3],
                            misc=prod[19])
Esempio n. 13
0
 def test_to_conllu_with_many_elements_and_no_comments(self):
     sentence = Sentence(elements=[
         Multiword(first_index=1, last_index=2, form="Foobar"),
         Word(index=1, form='Foo'),
         Word(index=2, form='bar'),
         EmptyNode(main_index=2, sub_index=1, form='Baz')
     ])
     self.assertEqual(
         '1-2\tFoobar\t_\t_\t_\t_\t_\t_\t_\t_\n'
         '1\tFoo\t_\t_\t_\t_\t_\t_\t_\t_\n'
         '2\tbar\t_\t_\t_\t_\t_\t_\t_\t_\n'
         '2.1\tBaz\t_\t_\t_\t_\t_\t_\t_\t_\n'
         '\n', sentence.to_conllu())
Esempio n. 14
0
 def test_init_form(self):
     element = Multiword(form='Foo')
     self.assertEqual('Foo', element.form)
Esempio n. 15
0
 def test_init_misc(self):
     element = Multiword(misc='Foo')
     self.assertEqual('Foo', element.misc)
Esempio n. 16
0
 def test_is_valid_false_with_last_index_equal_to_first_index(self):
     element = Multiword(first_index=42, last_index=42)
     self.assertFalse(element.is_valid())
Esempio n. 17
0
 def test_is_valid_false_with_no_values_set(self):
     element = Multiword()
     self.assertFalse(element.is_valid())
Esempio n. 18
0
 def test_init_last_index(self):
     element = Multiword(last_index=42)
     self.assertEqual(42, element.last_index)
Esempio n. 19
0
 def test_is_valid_false_with_last_index_is_not_set(self):
     element = Multiword(first_index=42)
     self.assertFalse(element.is_valid())
Esempio n. 20
0
 def test_to_conllu_of_invalid_sentence_with_no_attributes(self):
     multiword = Multiword()
     self.assertEqual('None-None\t_\t_\t_\t_\t_\t_\t_\t_\t_',
                      multiword.to_conllu())
Esempio n. 21
0
 def test_is_valid_true_with_all_values_set(self):
     element = Multiword(first_index=1,
                         last_index=2,
                         form='Form',
                         misc='Misc')
     self.assertTrue(element.is_valid())
Esempio n. 22
0
 def test_is_valid_true_with_first_index_greater_than_zero(self):
     element = Multiword(first_index=1, last_index=42)
     self.assertTrue(element.is_valid())
Esempio n. 23
0
 def test_is_valid_false_if_first_element_is_multiword_with_index_not_1(
         self):
     sentence = Sentence([Multiword(first_index=2, last_index=5)])
     self.assertFalse(sentence.is_valid())
Esempio n. 24
0
 def test_is_valid_false_with_first_index_less_than_zero(self):
     element = Multiword(first_index=-1, last_index=42)
     self.assertFalse(element.is_valid())
Esempio n. 25
0
 def test_is_valid_false_on_sentence_without_word_elements(self):
     sentence = Sentence([
         EmptyNode(main_index=0, sub_index=1),
         Multiword(first_index=1, last_index=2)
     ])
     self.assertFalse(sentence.is_valid())
Esempio n. 26
0
 def test_init_first_index(self):
     element = Multiword(first_index=42)
     self.assertEqual(42, element.first_index)