def test_is_valid_false_if_two_emptynodes_has_the_same_sub_index(self): sentence = Sentence([ EmptyNode(main_index=0, sub_index=1), EmptyNode(main_index=0, sub_index=1), Word(index=1), ]) self.assertFalse(sentence.is_valid())
def test_raw_tokens_on_sentence_wit_mixed_element(self): expected = [ Multiword(first_index=1, last_index=2), Word(index=3), Multiword(first_index=4, last_index=6), Word(index=7) ] sentence = Sentence([ expected[0], # 1-2 Word(index=1), Word(index=2), expected[1], # 3 EmptyNode(main_index=3, sub_index=1), EmptyNode(main_index=3, sub_index=2), expected[2], # 4-6 Word(index=4), EmptyNode(main_index=4, sub_index=1), EmptyNode(main_index=4, sub_index=2), Word(index=5), Word(index=6), expected[3] # 7 ]) result = sentence.raw_tokens() self.assertIsInstance(result, Generator) self.assertEqual(expected, list(result))
def test_is_valid_false_if_an_emptynode_sub_index_is_skipped(self): sentence = Sentence([ EmptyNode(main_index=0, sub_index=1), # sub_index 2 missing EmptyNode(main_index=0, sub_index=3), Word(index=1), ]) self.assertFalse(sentence.is_valid())
def test_is_valid_false_if_an_emptynode_main_index_has_no_word_index(self): sentence = Sentence([ Word(index=1), # there is no word with index 2 EmptyNode(main_index=2, sub_index=1), EmptyNode(main_index=2, sub_index=2) ]) self.assertFalse(sentence.is_valid())
def test_is_valid_false_if_consecutive_emptynodes_have_different_main_id( self): sentence = Sentence([ Word(index=1), EmptyNode(main_index=1, sub_index=1), # word with index 2 missing EmptyNode(main_index=2, sub_index=2) ]) self.assertFalse(sentence.is_valid())
def test_to_conllu_with_deps_tuple(self): emptynode = EmptyNode( main_index=1, sub_index=2, deps=((1, 'Foo'), (2, 'Bar'))) self.assertEqual( '1.2\t_\t_\t_\t_\t_\t_\t_\t1:Foo|2:Bar\t_', emptynode.to_conllu())
def test_to_conllu_with_deps_str(self): emptynode = EmptyNode( main_index=1, sub_index=2, deps='1:Foo|2:Bar') self.assertEqual( '1.2\t_\t_\t_\t_\t_\t_\t_\t1:Foo|2:Bar\t_', emptynode.to_conllu())
def test_to_conllu_with_feats_as_tuple(self): emptynode = EmptyNode( main_index=1, sub_index=2, feats=(('Foo', ('Bar',)), ('Baz', ('Qux', 'Zet')))) self.assertEqual( '1.2\t_\t_\t_\t_\tFoo=Bar|Baz=Qux,Zet\t_\t_\t_\t_', emptynode.to_conllu())
def test_to_conllu_with_feats_as_str(self): emptynode = EmptyNode( main_index=1, sub_index=2, feats='Foo=Bar|Baz=Qux') self.assertEqual( '1.2\t_\t_\t_\t_\tFoo=Bar|Baz=Qux\t_\t_\t_\t_', emptynode.to_conllu())
def test_is_valid_true_if_emptynodes_indexes_are_valid(self): sentence = Sentence([ EmptyNode(main_index=0, sub_index=1), EmptyNode(main_index=0, sub_index=2), Word(index=1), Word(index=2), EmptyNode(main_index=2, sub_index=1), EmptyNode(main_index=2, sub_index=2), ]) self.assertTrue(sentence.is_valid())
def test_is_valid_true_with_all_values_set(self): element = EmptyNode( main_index=1, sub_index=2, form='Form', lemma='Lemma', upos=UposTag.X, xpos='XPOS', feats={'foo': 'bar'}, deps={'baz': 'qux'}, misc='Misc' ) self.assertTrue(element.is_valid())
def test_to_conllu_of_sentence_with_all_attributes(self): emptynode = EmptyNode( main_index=1, sub_index=2, form='Form', lemma='Lemma', upos=UposTag.X, xpos='XPOS', feats='Feat=Foo', deps='0:Bar', misc='Misc') self.assertEqual( '1.2\tForm\tLemma\tX\tXPOS\tFeat=Foo\t_\t_\t0:Bar\tMisc', emptynode.to_conllu())
def test_is_valid_true_if_first_element_is_emptynode_with_index_0(self): sentence = Sentence([ EmptyNode(main_index=0, sub_index=1), # first word also included to prevent other validations to fail Word(index=1) ]) self.assertTrue(sentence.is_valid())
def test_raw_tokens_on_sentence_without_word_and_multiwords(self): sentence = Sentence([ EmptyNode(main_index=0, sub_index=1), ]) result = sentence.raw_tokens() self.assertIsInstance(result, Generator) self.assertEqual([], list(result))
def test_words_on_sentence_wit_mixed_element(self): expected = [Word(index=1), Word(index=2), Word(index=3), Word(index=4)] sentence = Sentence([ Multiword(first_index=1, last_index=2), expected[0], # 1 expected[1], # 2 EmptyNode(main_index=2, sub_index=1), EmptyNode(main_index=2, sub_index=2), Multiword(first_index=3, last_index=4), expected[2], # 3 expected[3], # 4 ]) result = sentence.words() self.assertIsInstance(result, Generator) self.assertEqual(expected, list(result))
def test_words_on_sentence_without_word_elements(self): sentence = Sentence([ EmptyNode(main_index=0, sub_index=1), Multiword(first_index=1, last_index=2), ]) result = sentence.words() self.assertIsInstance(result, Generator) self.assertEqual([], list(result))
def test_to_conllu_with_many_elements_and_no_comments(self): sentence = Sentence(elements=[ Multiword(first_index=1, last_index=2, form="Foobar"), Word(index=1, form='Foo'), Word(index=2, form='bar'), EmptyNode(main_index=2, sub_index=1, form='Baz') ]) self.assertEqual( '1-2\tFoobar\t_\t_\t_\t_\t_\t_\t_\t_\n' '1\tFoo\t_\t_\t_\t_\t_\t_\t_\t_\n' '2\tbar\t_\t_\t_\t_\t_\t_\t_\t_\n' '2.1\tBaz\t_\t_\t_\t_\t_\t_\t_\t_\n' '\n', sentence.to_conllu())
def p_wordline_emptynode(prod: YaccProduction) -> None: 'wordline : DECIMAL_ID TAB FORM TAB LEMMA TAB UPOS TAB XPOS TAB ' \ 'FEATS TAB HEAD TAB DEPREL TAB DEPS TAB MISC NEWLINE' if prod[13] is not None or prod[15] is not None: raise IllegalEmptyNodeError(prod) prod[0] = EmptyNode(main_index=prod[1][0], sub_index=prod[1][1], form=prod[3], lemma=prod[5], upos=UposTag[prod[7]] if prod[7] else None, xpos=prod[9], feats=prod[11], deps=prod[17], misc=prod[19])
def test_init_xpos(self): element = EmptyNode(xpos='Foo') self.assertEqual('Foo', element.xpos)
def test_init_feats(self): element = EmptyNode(feats={'foo': 'bar'}) self.assertEqual({'foo': 'bar'}, element.feats)
def test_init_deps(self): element = EmptyNode(deps={'foo': 'bar'}) self.assertEqual({'foo': 'bar'}, element.deps)
def test_init_misc(self): element = EmptyNode(misc='Foo') self.assertEqual('Foo', element.misc)
def test_is_valid_true_with_sub_index_greater_than_zero(self): element = EmptyNode(main_index=42, sub_index=1) self.assertTrue(element.is_valid())
def test_is_valid_true_with_main_index_equal_to_zero(self): element = EmptyNode(main_index=0, sub_index=42) self.assertTrue(element.is_valid())
def test_is_valid_false_if_first_element_is_emptynode_with_index_not_0( self): sentence = Sentence([EmptyNode(main_index=1, sub_index=1)]) self.assertFalse(sentence.is_valid())
def test_is_valid_false_on_sentence_without_word_elements(self): sentence = Sentence([ EmptyNode(main_index=0, sub_index=1), Multiword(first_index=1, last_index=2) ]) self.assertFalse(sentence.is_valid())
def test_is_valid_false_with_sub_index_is_not_set(self): element = EmptyNode(main_index=42) self.assertFalse(element.is_valid())
def test_is_valid_false_with_no_values_set(self): element = EmptyNode() self.assertFalse(element.is_valid())
def test_is_valid_false_with_sub_index_less_than_zero(self): element = EmptyNode(main_index=42, sub_index=-1) self.assertFalse(element.is_valid())
def test_is_valid_false_with_sub_index_equal_to_zero(self): element = EmptyNode(main_index=42, sub_index=0) self.assertFalse(element.is_valid())