Ejemplo n.º 1
0
 def test_NonterminalButpreterminal(self):
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token surf="surf1" id="t1_1"/>
       <token surf="surf2" id="t1_2"/>
     </tokens>
     <ccg root="sp1-5">
       <span terminal="t1_1" category="cat1" id="sp1-1"/>
       <span terminal="t1_2" category="cat2" id="sp1-2"/>
       <span child="sp1-1" rule="lex1" category="NP1" id="sp1-3"/>
       <span child="sp1-2" rule="lex2" category="NP2" id="sp1-4"/>
       <span child="sp1-3 sp1-4" rule="rr" category="NPP" id="sp1-5"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = sentence.find("ccg")
     ccg_root = build_ccg_tree(ccg_tree)
     tokens = sentence.find("tokens")
     attributes = get_attributes_from_ccg_node_recursively(ccg_root[1], tokens)
     expected_attributes = {'category' : 'NP2',
                            'rule' : 'lex2',
                            'id' : 'sp1-4',
                            'child' : 'sp1-2',
                            'child0_terminal' : 't1_2',
                            'child0_surf' : 'surf2',
                            'child0_category' : 'cat2',
                            'child0_id' : 'sp1-2'}
     self.assertEqual(len(expected_attributes), len(attributes),
                      '\n{0}\nvs.\n{1}'.format(expected_attributes, attributes))
     for k in expected_attributes:
         self.assertEqual(expected_attributes.get(k, None), attributes.get(k, None))
Ejemplo n.º 2
0
 def test_nonterminal1(self):
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token surf="surf1" id="t1_1"/>
     </tokens>
     <ccg root="sp1-3">
       <span terminal="t1_1" category="cat1" id="sp1-1"/>
       <span child="sp1-1" rule="lex" category="NP" id="sp1-2"/>
       <span child="sp1-2" rule="rr" category="NPP" id="sp1-3"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = sentence.find("ccg")
     ccg_root = build_ccg_tree(ccg_tree)
     tokens = sentence.find("tokens")
     attributes = get_attributes_from_ccg_node_recursively(ccg_root, tokens)
     expected_attributes = {'category' : 'NPP',
                            'rule' : 'rr',
                            'id' : 'sp1-3',
                            'child' : 'sp1-2',
                            'child0_category' : 'NP',
                            'child0_rule' : 'lex',
                            'child0_id' : 'sp1-2',
                            'child0_child' : 'sp1-1',
                            'child0_child0_terminal' : 't1_1',
                            'child0_child0_surf' : 'surf1',
                            'child0_child0_category' : 'cat1',
                            'child0_child0_id' : 'sp1-1'}
     self.assertEqual(len(expected_attributes), len(attributes))
     for k in expected_attributes:
         self.assertEqual(expected_attributes.get(k, None), attributes.get(k, None))
Ejemplo n.º 3
0
 def test_terminal(self):
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token base="base1" pos="pos1" surf="surf1" id="t1_1"/>
     </tokens>
     <ccg root="sp1-1">
       <span terminal="t1_1" category="cat1" end="2" begin="1" id="sp1-1"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = sentence.find("ccg")
     ccg_root = build_ccg_tree(ccg_tree)
     tokens = sentence.find("tokens")
     attributes = get_attributes_from_ccg_node_recursively(ccg_root, tokens)
     expected_attributes = {'terminal' : 't1_1',
                            'category' : 'cat1',
                            'end' : '2',
                            'begin' : '1',
                            'id' : 'sp1-1',
                            'base' : 'base1',
                            'pos' : 'pos1',
                            'surf' : 'surf1'}
     self.assertEqual(len(expected_attributes), len(attributes))
     for k in expected_attributes:
         self.assertEqual(expected_attributes[k], attributes[k])
Ejemplo n.º 4
0
 def test_preterminal(self):
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token surf="surf1" id="t1_1"/>
     </tokens>
     <ccg root="sp1-2">
       <span terminal="t1_1" category="cat1" id="sp1-1"/>
       <span child="sp1-1" rule="lex" category="NP" id="sp1-2"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = sentence.find("ccg")
     ccg_root = build_ccg_tree(ccg_tree)
     tokens = sentence.find("tokens")
     attributes = get_attributes_from_ccg_node_recursively(ccg_root, tokens)
     expected_attributes = {
         'category': 'NP',
         'rule': 'lex',
         'id': 'sp1-2',
         'child': 'sp1-1',
         'child0_terminal': 't1_1',
         'child0_surf': 'surf1',
         'child0_category': 'cat1',
         'child0_id': 'sp1-1'
     }
     self.assertEqual(len(expected_attributes), len(attributes))
     for k in expected_attributes:
         self.assertEqual(expected_attributes.get(k, None),
                          attributes.get(k, None))
Ejemplo n.º 5
0
 def test_terminal(self):
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token base="base1" pos="pos1" surf="surf1" id="t1_1"/>
     </tokens>
     <ccg root="sp1-1">
       <span terminal="t1_1" category="cat1" end="2" begin="1" id="sp1-1"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = sentence.find("ccg")
     ccg_root = build_ccg_tree(ccg_tree)
     tokens = sentence.find("tokens")
     attributes = get_attributes_from_ccg_node_recursively(ccg_root, tokens)
     expected_attributes = {
         'terminal': 't1_1',
         'category': 'cat1',
         'end': '2',
         'begin': '1',
         'id': 'sp1-1',
         'base': 'base1',
         'pos': 'pos1',
         'surf': 'surf1'
     }
     self.assertEqual(len(expected_attributes), len(attributes))
     for k in expected_attributes:
         self.assertEqual(expected_attributes[k], attributes[k])
Ejemplo n.º 6
0
 def test_nonterminal2(self):
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token surf="surf1" id="t1_1"/>
       <token surf="surf2" id="t1_2"/>
     </tokens>
     <ccg root="sp1-5">
       <span terminal="t1_1" category="cat1" id="sp1-1"/>
       <span terminal="t1_2" category="cat2" id="sp1-2"/>
       <span child="sp1-1" rule="lex1" category="NP1" id="sp1-3"/>
       <span child="sp1-2" rule="lex2" category="NP2" id="sp1-4"/>
       <span child="sp1-3 sp1-4" rule="rr" category="NPP" id="sp1-5"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = sentence.find("ccg")
     ccg_root = build_ccg_tree(ccg_tree)
     tokens = sentence.find("tokens")
     attributes = get_attributes_from_ccg_node_recursively(ccg_root, tokens)
     expected_attributes = {
         'category': 'NPP',
         'rule': 'rr',
         'id': 'sp1-5',
         'child': 'sp1-3 sp1-4',
         'child0_category': 'NP1',
         'child0_rule': 'lex1',
         'child0_id': 'sp1-3',
         'child0_child': 'sp1-1',
         'child0_child0_terminal': 't1_1',
         'child0_child0_surf': 'surf1',
         'child0_child0_category': 'cat1',
         'child0_child0_id': 'sp1-1',
         'child1_category': 'NP2',
         'child1_rule': 'lex2',
         'child1_id': 'sp1-4',
         'child1_child': 'sp1-2',
         'child1_child0_terminal': 't1_2',
         'child1_child0_surf': 'surf2',
         'child1_child0_category': 'cat2',
         'child1_child0_id': 'sp1-2'
     }
     self.assertEqual(
         len(expected_attributes), len(attributes),
         '\n{0}\nvs.\n{1}'.format(expected_attributes, attributes))
     for k in expected_attributes:
         self.assertEqual(expected_attributes.get(k, None),
                          attributes.get(k, None))