def test_NonterminalButpreterminal(self): sentence_str = r""" <sentence id="s1"> <tokens> <token surf="surf1" id="t1_1"/> <token surf="surf2" id="t1_2"/> </tokens> <ccg root="sp1-5"> <span terminal="t1_1" category="cat1" id="sp1-1"/> <span terminal="t1_2" category="cat2" id="sp1-2"/> <span child="sp1-1" rule="lex1" category="NP1" id="sp1-3"/> <span child="sp1-2" rule="lex2" category="NP2" id="sp1-4"/> <span child="sp1-3 sp1-4" rule="rr" category="NPP" id="sp1-5"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = sentence.find("ccg") ccg_root = build_ccg_tree(ccg_tree) tokens = sentence.find("tokens") attributes = get_attributes_from_ccg_node_recursively(ccg_root[1], tokens) expected_attributes = {'category' : 'NP2', 'rule' : 'lex2', 'id' : 'sp1-4', 'child' : 'sp1-2', 'child0_terminal' : 't1_2', 'child0_surf' : 'surf2', 'child0_category' : 'cat2', 'child0_id' : 'sp1-2'} self.assertEqual(len(expected_attributes), len(attributes), '\n{0}\nvs.\n{1}'.format(expected_attributes, attributes)) for k in expected_attributes: self.assertEqual(expected_attributes.get(k, None), attributes.get(k, None))
def test_nonterminal1(self): sentence_str = r""" <sentence id="s1"> <tokens> <token surf="surf1" id="t1_1"/> </tokens> <ccg root="sp1-3"> <span terminal="t1_1" category="cat1" id="sp1-1"/> <span child="sp1-1" rule="lex" category="NP" id="sp1-2"/> <span child="sp1-2" rule="rr" category="NPP" id="sp1-3"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = sentence.find("ccg") ccg_root = build_ccg_tree(ccg_tree) tokens = sentence.find("tokens") attributes = get_attributes_from_ccg_node_recursively(ccg_root, tokens) expected_attributes = {'category' : 'NPP', 'rule' : 'rr', 'id' : 'sp1-3', 'child' : 'sp1-2', 'child0_category' : 'NP', 'child0_rule' : 'lex', 'child0_id' : 'sp1-2', 'child0_child' : 'sp1-1', 'child0_child0_terminal' : 't1_1', 'child0_child0_surf' : 'surf1', 'child0_child0_category' : 'cat1', 'child0_child0_id' : 'sp1-1'} self.assertEqual(len(expected_attributes), len(attributes)) for k in expected_attributes: self.assertEqual(expected_attributes.get(k, None), attributes.get(k, None))
def test_terminal(self): sentence_str = r""" <sentence id="s1"> <tokens> <token base="base1" pos="pos1" surf="surf1" id="t1_1"/> </tokens> <ccg root="sp1-1"> <span terminal="t1_1" category="cat1" end="2" begin="1" id="sp1-1"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = sentence.find("ccg") ccg_root = build_ccg_tree(ccg_tree) tokens = sentence.find("tokens") attributes = get_attributes_from_ccg_node_recursively(ccg_root, tokens) expected_attributes = {'terminal' : 't1_1', 'category' : 'cat1', 'end' : '2', 'begin' : '1', 'id' : 'sp1-1', 'base' : 'base1', 'pos' : 'pos1', 'surf' : 'surf1'} self.assertEqual(len(expected_attributes), len(attributes)) for k in expected_attributes: self.assertEqual(expected_attributes[k], attributes[k])
def test_preterminal(self): sentence_str = r""" <sentence id="s1"> <tokens> <token surf="surf1" id="t1_1"/> </tokens> <ccg root="sp1-2"> <span terminal="t1_1" category="cat1" id="sp1-1"/> <span child="sp1-1" rule="lex" category="NP" id="sp1-2"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = sentence.find("ccg") ccg_root = build_ccg_tree(ccg_tree) tokens = sentence.find("tokens") attributes = get_attributes_from_ccg_node_recursively(ccg_root, tokens) expected_attributes = { 'category': 'NP', 'rule': 'lex', 'id': 'sp1-2', 'child': 'sp1-1', 'child0_terminal': 't1_1', 'child0_surf': 'surf1', 'child0_category': 'cat1', 'child0_id': 'sp1-1' } self.assertEqual(len(expected_attributes), len(attributes)) for k in expected_attributes: self.assertEqual(expected_attributes.get(k, None), attributes.get(k, None))
def test_terminal(self): sentence_str = r""" <sentence id="s1"> <tokens> <token base="base1" pos="pos1" surf="surf1" id="t1_1"/> </tokens> <ccg root="sp1-1"> <span terminal="t1_1" category="cat1" end="2" begin="1" id="sp1-1"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = sentence.find("ccg") ccg_root = build_ccg_tree(ccg_tree) tokens = sentence.find("tokens") attributes = get_attributes_from_ccg_node_recursively(ccg_root, tokens) expected_attributes = { 'terminal': 't1_1', 'category': 'cat1', 'end': '2', 'begin': '1', 'id': 'sp1-1', 'base': 'base1', 'pos': 'pos1', 'surf': 'surf1' } self.assertEqual(len(expected_attributes), len(attributes)) for k in expected_attributes: self.assertEqual(expected_attributes[k], attributes[k])
def test_nonterminal2(self): sentence_str = r""" <sentence id="s1"> <tokens> <token surf="surf1" id="t1_1"/> <token surf="surf2" id="t1_2"/> </tokens> <ccg root="sp1-5"> <span terminal="t1_1" category="cat1" id="sp1-1"/> <span terminal="t1_2" category="cat2" id="sp1-2"/> <span child="sp1-1" rule="lex1" category="NP1" id="sp1-3"/> <span child="sp1-2" rule="lex2" category="NP2" id="sp1-4"/> <span child="sp1-3 sp1-4" rule="rr" category="NPP" id="sp1-5"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = sentence.find("ccg") ccg_root = build_ccg_tree(ccg_tree) tokens = sentence.find("tokens") attributes = get_attributes_from_ccg_node_recursively(ccg_root, tokens) expected_attributes = { 'category': 'NPP', 'rule': 'rr', 'id': 'sp1-5', 'child': 'sp1-3 sp1-4', 'child0_category': 'NP1', 'child0_rule': 'lex1', 'child0_id': 'sp1-3', 'child0_child': 'sp1-1', 'child0_child0_terminal': 't1_1', 'child0_child0_surf': 'surf1', 'child0_child0_category': 'cat1', 'child0_child0_id': 'sp1-1', 'child1_category': 'NP2', 'child1_rule': 'lex2', 'child1_id': 'sp1-4', 'child1_child': 'sp1-2', 'child1_child0_terminal': 't1_2', 'child1_child0_surf': 'surf2', 'child1_child0_category': 'cat2', 'child1_child0_id': 'sp1-2' } self.assertEqual( len(expected_attributes), len(attributes), '\n{0}\nvs.\n{1}'.format(expected_attributes, attributes)) for k in expected_attributes: self.assertEqual(expected_attributes.get(k, None), attributes.get(k, None))