def test_lexical_binary_two_types(self): semantic_index = SemanticIndex(None) semantic_rules = [SemanticRule(r'cat1', r'\P.P', {'coq_type' : 'Entity -> Prop'}), SemanticRule(r'cat2', r'\P.P', {'coq_type' : 'Entity -> Prop -> Prop'}), SemanticRule(r'NP', r'\P Q.(Q -> P)', {'rule' : 'lex'})] semantic_index.rules = semantic_rules sentence_str = r""" <sentence id="s1"> <tokens> <token base="base1" pos="pos1" surf="surf1" id="t1_1"/> <token base="base2" pos="pos2" surf="surf2" id="t1_2"/> </tokens> <ccg root="sp1-3"> <span terminal="t1_1" category="cat1" end="2" begin="1" id="sp1-1"/> <span terminal="t1_2" category="cat2" end="3" begin="2" id="sp1-2"/> <span child="sp1-1 sp1-2" rule="lex" category="NP" end="3" begin="1" id="sp1-3"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) coq_types = get_coq_types(ccg_tree) expected_coq_types = ["Parameter _base1 : Entity -> Prop.", "Parameter _base2 : Entity -> Prop -> Prop."] self.assertEqual(expected_coq_types, coq_types)
def test_lexical_binary_one_type(self): semantic_index = SemanticIndex(None) semantic_rules = [ SemanticRule(r'cat1', r'\P.P'), SemanticRule(r'cat2', r'\Q x.Q(x)', {'coq_type': 'Entity -> Prop'}), SemanticRule(r'NP', r'\P Q x.(P -> Q(x))', {'rule': 'lex'}) ] semantic_index.rules = semantic_rules sentence_str = r""" <sentence id="s1"> <tokens> <token base="base1" pos="pos1" surf="surf1" id="t1_1"/> <token base="base2" pos="pos2" surf="surf2" id="t1_2"/> </tokens> <ccg root="sp1-3"> <span terminal="t1_1" category="cat1" end="2" begin="1" id="sp1-1"/> <span terminal="t1_2" category="cat2" end="3" begin="2" id="sp1-2"/> <span child="sp1-1 sp1-2" rule="lex" category="NP" end="3" begin="1" id="sp1-3"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) coq_types = get_coq_types(ccg_tree) expected_coq_types = ["Parameter _base2 : Entity -> Prop."] self.assertEqual(expected_coq_types, coq_types)
def test_inner_node_child_categoryWithFeats(self): semantic_index = SemanticIndex(None) semantic_rules = [ SemanticRule(r'cat1', r'\P.P'), SemanticRule(r'NP/NP', r'\P.P'), SemanticRule(r'NP', r'\P Q.(Q -> P)', {'child1_category': 'NP/NP'}) ] semantic_index.rules = semantic_rules sentence_str = r""" <sentence id="s1"> <tokens> <token base="base1" pos="pos1" surf="surf1" id="t1_1"/> <token base="base2" pos="pos2" surf="surf2" id="t1_2"/> </tokens> <ccg root="sp1-3"> <span terminal="t1_1" category="cat1" end="2" begin="1" id="sp1-1"/> <span terminal="t1_2" category="NP/NP[mod=xx]" end="3" begin="2" id="sp1-2"/> <span child="sp1-1 sp1-2" rule="lex" category="NP" end="3" begin="1" id="sp1-3"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'_base2 -> _base1') self.assertEqual(expected_semantics, lexpr(semantics))
def test_match_any2(self): semantic_index = SemanticIndex(None) semantic_rules = [SemanticRule(r'cat1', r'\P.P'), SemanticRule(r'cat2', r'\P.P'), SemanticRule(r'cat3', r'\P.P'), SemanticRule(r'NP', r'\P Q.(Q & P)', {'rule' : 'lex'}), SemanticRule(r'NP', r'\P Q.(Q | P)', {'child_any_pos' : 'pos1'}), SemanticRule(r'NP', r'\P Q.(Q -> P)', {'child_any_category' : 'cat3'})] semantic_index.rules = semantic_rules sentence_str = r""" <sentence id="s1"> <tokens> <token base="base1" pos="pos1" surf="surf1" id="t1_1"/> <token base="base2" pos="pos2" surf="surf2" id="t1_2"/> <token base="base3" pos="pos3" surf="surf3" id="t1_3"/> </tokens> <ccg root="sp1-5"> <span terminal="t1_1" category="cat1" pos="pos1" end="2" begin="1" id="sp1-1"/> <span terminal="t1_2" category="cat2" pos="pos2" end="3" begin="2" id="sp1-2"/> <span terminal="t1_3" category="cat3" pos="pos3" end="4" begin="3" id="sp1-3"/> <span child="sp1-1 sp1-2" rule="lex" category="NP" end="3" begin="1" id="sp1-4"/> <span child="sp1-4 sp1-3" rule="lex" category="NP" end="4" begin="1" id="sp1-5"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'_base3 -> (_base2 | _base1)') self.assertEqual(expected_semantics, lexpr(semantics))
def test_func_combination_backwardComplexTwoArgs(self): semantic_index = SemanticIndex(None) semantic_rules = [ SemanticRule(r'S\NP\NP', r'\P y x e. P(e, x, y)'), SemanticRule(r'S\S', r'\P Q e. AND(past(e), Q(e))') ] semantic_index.rules = semantic_rules sentence_str = r""" <sentence id="s1"> <tokens> <token id="s1_4" surf="ほめ" pos="動詞" pos1="自立" pos2="*" pos3="*" inflectionType="一段" inflectionForm="連用形" base="ほめる" reading="ホメ"/> <token id="s1_5" surf="た" pos="助動詞" pos1="*" pos2="*" pos3="*" inflectionType="特殊・タ" inflectionForm="基本形" base="た" reading="タ"/> </tokens> <ccg root="s1_sp9"> <span id="s1_sp9" begin="4" end="6" category="(S[mod=nm,form=base]\NP[mod=nm,case=ga])\NP[mod=nm,case=o]" rule="<B2" child="s1_sp10 s1_sp11"/> <span id="s1_sp10" begin="4" end="5" category="(S[mod=nm,form=cont]\NP[mod=nm,case=ga])\NP[mod=nm,case=o]" terminal="s1_4"/> <span id="s1_sp11" begin="5" end="6" category="S[mod=nm,form=base]\S[mod=nm,form=cont]" terminal="s1_5"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'\y x e.AND(past(e), _ほめる(x, y, e))') self.assertEqual(expected_semantics, lexpr(semantics))
def test_inner_node_child_categoryWithFeats(self): semantic_index = SemanticIndex(None) semantic_rules = [SemanticRule(r'cat1', r'\P.P'), SemanticRule(r'NP/NP', r'\P.P'), SemanticRule(r'NP', r'\P Q.(Q -> P)', {'child1_category' : 'NP/NP'})] semantic_index.rules = semantic_rules sentence_str = r""" <sentence id="s1"> <tokens> <token base="base1" pos="pos1" surf="surf1" id="t1_1"/> <token base="base2" pos="pos2" surf="surf2" id="t1_2"/> </tokens> <ccg root="sp1-3"> <span terminal="t1_1" category="cat1" end="2" begin="1" id="sp1-1"/> <span terminal="t1_2" category="NP/NP[mod=xx]" end="3" begin="2" id="sp1-2"/> <span child="sp1-1 sp1-2" rule="lex" category="NP" end="3" begin="1" id="sp1-3"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'_base2 -> _base1') self.assertEqual(expected_semantics, lexpr(semantics))
def test_vertical_bar(self): sentence_str = r""" <sentence id="s1"> <tokens> <token base="base1" pos="pos1" surf="surf1" id="t1_1"/> <token base="base2" pos="pos2" surf="surf2" id="t1_2"/> </tokens> <ccg root="sp1-3"> <span terminal="t1_1" category="N" end="2" begin="1" id="sp1-1"/> <span terminal="t1_2" category="N" end="3" begin="2" id="sp1-2"/> <span child="sp1-1 sp1-2" category="NP\NP" rule=">" end="3" begin="1" id="sp1-3"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) semantic_index = SemanticIndex(None) semantic_rules = [ SemanticRule(r'N', r'\P.P', {}), SemanticRule(r'NP|NP', r'\F1 F2.(F1 -> F2)', {'rule': '>'}), SemanticRule(r'NP/NP', r'\F1 F2.(F1 & F2)', {'rule': '>'}) ] semantic_index.rules = semantic_rules ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) semantics = lexpr(ccg_tree.get('sem', None)) expected_semantics = lexpr(r'(_base1 -> _base2)') self.assertEqual(expected_semantics, semantics)
def test_match_any2(self): semantic_index = SemanticIndex(None) semantic_rules = [ SemanticRule(r'cat1', r'\P.P'), SemanticRule(r'cat2', r'\P.P'), SemanticRule(r'cat3', r'\P.P'), SemanticRule(r'NP', r'\P Q.(Q & P)', {'rule': 'lex'}), SemanticRule(r'NP', r'\P Q.(Q | P)', {'child_any_pos': 'pos1'}), SemanticRule(r'NP', r'\P Q.(Q -> P)', {'child_any_category': 'cat3'}) ] semantic_index.rules = semantic_rules sentence_str = r""" <sentence id="s1"> <tokens> <token base="base1" pos="pos1" surf="surf1" id="t1_1"/> <token base="base2" pos="pos2" surf="surf2" id="t1_2"/> <token base="base3" pos="pos3" surf="surf3" id="t1_3"/> </tokens> <ccg root="sp1-5"> <span terminal="t1_1" category="cat1" pos="pos1" end="2" begin="1" id="sp1-1"/> <span terminal="t1_2" category="cat2" pos="pos2" end="3" begin="2" id="sp1-2"/> <span terminal="t1_3" category="cat3" pos="pos3" end="4" begin="3" id="sp1-3"/> <span child="sp1-1 sp1-2" rule="lex" category="NP" end="3" begin="1" id="sp1-4"/> <span child="sp1-4 sp1-3" rule="lex" category="NP" end="4" begin="1" id="sp1-5"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'_base3 -> (_base2 | _base1)') self.assertEqual(expected_semantics, lexpr(semantics))
def test_lexical_binary_two_coq_complex_type(self): semantic_index = SemanticIndex(None) semantic_rules = [SemanticRule(r'cat1', r'\P x R.P(x, R)', {'coq_type' : 'Entity -> Prop -> Prop'}), SemanticRule(r'cat2', r'\Q S T.Q(S, T)', {'coq_type' : 'Prop -> Entity -> Prop'}), SemanticRule(r'NP', r'\P Q x R S T.(Q(x, R) -> P(S, T))', {'rule' : 'lex'})] semantic_index.rules = semantic_rules sentence_str = r""" <sentence id="s1"> <tokens> <token base="base1" pos="pos1" surf="surf1" id="t1_1"/> <token base="base2" pos="pos2" surf="surf2" id="t1_2"/> </tokens> <ccg root="sp1-3"> <span terminal="t1_1" category="cat1" end="2" begin="1" id="sp1-1"/> <span terminal="t1_2" category="cat2" end="3" begin="2" id="sp1-2"/> <span child="sp1-1 sp1-2" rule="lex" category="NP" end="3" begin="1" id="sp1-3"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) coq_lib = get_coq_types(ccg_tree) expected_coq_lib = ['Parameter _base1 : Entity -> Prop -> Prop.', 'Parameter _base2 : Prop -> Entity -> Prop.'] self.assertEqual(expected_coq_lib, coq_lib) expression = [ccg_tree.get('sem')] coq_sig = convert_coq_signatures_to_nltk(coq_lib) nltk_lib = build_dynamic_library(expression, coq_sig) lib = merge_dynamic_libraries(coq_lib, nltk_lib, './coqlib.v', sentence) expected_lib = ["Parameter _base2 : Prop -> Entity -> Prop.", "Parameter _base1 : Entity -> Prop -> Prop."] self.assertCountEqual(expected_lib, lib)
def test_RTG3Paths2Vars(self): semantic_index = SemanticIndex(None) semantic_rules = [SemanticRule(r'N', r'\P.P', {}), SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule' : '>'}), SemanticRule(r'NPNP', r'\F1 F2.(F1 -> F2)', {'var_paths' : [[0,0], [0,1], [1,0]], 'rule' : '>'})] semantic_index.rules = semantic_rules ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index) with self.assertRaises(nltk.sem.logic.LogicalExpressionException): semantics = lexpr(ccg_tree.get('sem', None))
def test_CFG(self): semantic_index = SemanticIndex(None) semantic_rules = [SemanticRule(r'N', r'\P.P', {}), SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule' : '>'}), SemanticRule(r'NPNP', r'\F1 F2.(F1 -> F2)', {'rule' : '>'})] semantic_index.rules = semantic_rules ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index) semantics = lexpr(ccg_tree.get('sem', None)) expected_semantics = lexpr(r'(_base1 & _base2) -> (_base3 & _base4)') self.assertEqual(expected_semantics, semantics)
def test_RTG3Paths3Vars(self): semantic_index = SemanticIndex(None) semantic_rules = [SemanticRule(r'N', r'\P.P', {}), SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule' : '>'}), SemanticRule(r'NPNP', r'\F1 F2 F3.((F3 & F2) -> F1)', {'var_paths' : [[0,0], [0,1], [1,0]], 'rule' : '>'})] semantic_index.rules = semantic_rules ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index) semantics = lexpr(ccg_tree.get('sem', None)) expected_semantics = lexpr(r'((_base3 & _base2) -> _base1)') self.assertEqual(expected_semantics, semantics)
def test_RTG1Path(self): semantic_index = SemanticIndex(None) semantic_rules = [SemanticRule(r'N', r'\P.P', {}), SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule' : '>'}), SemanticRule(r'NPNP', r'\F1 F2.(F1 -> F2)', {'var_paths' : [[0,1]], 'rule' : '>'})] semantic_index.rules = semantic_rules ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index) semantics = lexpr(ccg_tree.get('sem', None)) expected_semantics = lexpr(r'\F2.(_base2 -> F2)') self.assertEqual(expected_semantics, semantics)
def test_CFG(self): semantic_index = SemanticIndex(None) semantic_rules = [ SemanticRule(r'N', r'\P.P', {}), SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule': '>'}), SemanticRule(r'NPNP', r'\F1 F2.(F1 -> F2)', {'rule': '>'}) ] semantic_index.rules = semantic_rules ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index) semantics = lexpr(ccg_tree.get('sem', None)) expected_semantics = lexpr(r'(_base1 & _base2) -> (_base3 & _base4)') self.assertEqual(expected_semantics, semantics)
def test_RTG3Paths2Vars(self): semantic_index = SemanticIndex(None) semantic_rules = [ SemanticRule(r'N', r'\P.P', {}), SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule': '>'}), SemanticRule(r'NPNP', r'\F1 F2.(F1 -> F2)', { 'var_paths': [[0, 0], [0, 1], [1, 0]], 'rule': '>' }) ] semantic_index.rules = semantic_rules ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index) with self.assertRaises(nltk.sem.logic.LogicalExpressionException): semantics = lexpr(ccg_tree.get('sem', None))
def test_RTG1Path(self): semantic_index = SemanticIndex(None) semantic_rules = [ SemanticRule(r'N', r'\P.P', {}), SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule': '>'}), SemanticRule(r'NPNP', r'\F1 F2.(F1 -> F2)', { 'var_paths': [[0, 1]], 'rule': '>' }) ] semantic_index.rules = semantic_rules ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index) semantics = lexpr(ccg_tree.get('sem', None)) expected_semantics = lexpr(r'\F2.(_base2 -> F2)') self.assertEqual(expected_semantics, semantics)
def test_RTG3Paths3Vars(self): semantic_index = SemanticIndex(None) semantic_rules = [ SemanticRule(r'N', r'\P.P', {}), SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule': '>'}), SemanticRule(r'NPNP', r'\F1 F2 F3.((F3 & F2) -> F1)', { 'var_paths': [[0, 0], [0, 1], [1, 0]], 'rule': '>' }) ] semantic_index.rules = semantic_rules ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index) semantics = lexpr(ccg_tree.get('sem', None)) expected_semantics = lexpr(r'((_base3 & _base2) -> _base1)') self.assertEqual(expected_semantics, semantics)
def test_np_feature_no(self): semantic_index = SemanticIndex(None) semantic_index.rules = [SemanticRule(r'NP', r'\P.P')] sentence_str = r""" <sentence id="s0"> <tokens> <token base="basepred" pos="pos1" surf="surfpred" id="t0_0"/> </tokens> <ccg root="sp0-3"> <span terminal="t0_0" category="NP" end="1" begin="0" id="sp0-3"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'_basepred') self.assertEqual(expected_semantics, lexpr(semantics))
def test_lexical_binary_two_coq_complex_type(self): semantic_index = SemanticIndex(None) semantic_rules = [ SemanticRule(r'cat1', r'\P x R.P(x, R)', {'coq_type': 'Entity -> Prop -> Prop'}), SemanticRule(r'cat2', r'\Q S T.Q(S, T)', {'coq_type': 'Prop -> Entity -> Prop'}), SemanticRule(r'NP', r'\P Q x R S T.(Q(x, R) -> P(S, T))', {'rule': 'lex'}) ] semantic_index.rules = semantic_rules sentence_str = r""" <sentence id="s1"> <tokens> <token base="base1" pos="pos1" surf="surf1" id="t1_1"/> <token base="base2" pos="pos2" surf="surf2" id="t1_2"/> </tokens> <ccg root="sp1-3"> <span terminal="t1_1" category="cat1" end="2" begin="1" id="sp1-1"/> <span terminal="t1_2" category="cat2" end="3" begin="2" id="sp1-2"/> <span child="sp1-1 sp1-2" rule="lex" category="NP" end="3" begin="1" id="sp1-3"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) coq_lib = get_coq_types(ccg_tree) expected_coq_lib = [ 'Parameter _base1 : Entity -> Prop -> Prop.', 'Parameter _base2 : Prop -> Entity -> Prop.' ] self.assertEqual(expected_coq_lib, coq_lib) expression = [ccg_tree.get('sem')] coq_sig = convert_coq_signatures_to_nltk(coq_lib) nltk_lib, _ = build_dynamic_library(expression, coq_sig) lib = merge_dynamic_libraries(coq_sig, nltk_lib, './coqlib.v', sentence) expected_lib = [ "Parameter _base2 : Prop -> (Entity -> Prop).", "Parameter _base1 : Entity -> (Prop -> Prop)." ] self.assertCountEqual(expected_lib, lib)
def setUp(self): self.semantic_index = SemanticIndex(None) semantic_rules = [ SemanticRule(r'NP', r'\P.P'), SemanticRule(r'NP/NP', r'\P Q x.(Q(x) & P(x))', {'rule': 'ADN'}), SemanticRule(r'S\NP', r'\P x.P(x)'), SemanticRule(r'S\NP\NP', r'\P y x.P(x, y)'), SemanticRule(r'S\NP\NP\NP', r'\P z y x.P(x, y, z)'), SemanticRule(r'default', r'\P x.x') ] self.semantic_index.rules = semantic_rules
def test_lexical_unary(self): semantic_index = SemanticIndex(None) semantic_rules = [SemanticRule(r'N', r'\P.P'), SemanticRule(r'NP', r'\P.(P -> P)', {'rule' : 'lex'})] semantic_index.rules = semantic_rules sentence_str = r""" <sentence id="s1"> <tokens> <token base="base1" pos="pos1" surf="surf1" id="t1_1"/> </tokens> <ccg root="sp1-2"> <span terminal="t1_1" category="N" end="2" begin="1" id="sp1-1"/> <span child="sp1-1" rule="lex" category="NP" end="2" begin="1" id="sp1-2"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'_base1 -> _base1') self.assertEqual(expected_semantics, lexpr(semantics))
def test_func_combination_backwardComplexTwoArgs(self): semantic_index = SemanticIndex(None) semantic_rules = [SemanticRule(r'S\NP\NP', r'\P y x e. P(e, x, y)'), SemanticRule(r'S\S', r'\P Q e. AND(past(e), Q(e))')] semantic_index.rules = semantic_rules sentence_str = r""" <sentence id="s1"> <tokens> <token id="s1_4" surf="ほめ" pos="動詞" pos1="自立" pos2="*" pos3="*" inflectionType="一段" inflectionForm="連用形" base="ほめる" reading="ホメ"/> <token id="s1_5" surf="た" pos="助動詞" pos1="*" pos2="*" pos3="*" inflectionType="特殊・タ" inflectionForm="基本形" base="た" reading="タ"/> </tokens> <ccg root="s1_sp9"> <span id="s1_sp9" begin="4" end="6" category="(S[mod=nm,form=base]\NP[mod=nm,case=ga])\NP[mod=nm,case=o]" rule="<B2" child="s1_sp10 s1_sp11"/> <span id="s1_sp10" begin="4" end="5" category="(S[mod=nm,form=cont]\NP[mod=nm,case=ga])\NP[mod=nm,case=o]" terminal="s1_4"/> <span id="s1_sp11" begin="5" end="6" category="S[mod=nm,form=base]\S[mod=nm,form=cont]" terminal="s1_5"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'\y x e.AND(past(e), _ほめる(x, y, e))') self.assertEqual(expected_semantics, lexpr(semantics))
def test_lexical_unary(self): semantic_index = SemanticIndex(None) semantic_rules = [ SemanticRule(r'N', r'\P.P'), SemanticRule(r'NP', r'\P.(P -> P)', {'rule': 'lex'}) ] semantic_index.rules = semantic_rules sentence_str = r""" <sentence id="s1"> <tokens> <token base="base1" pos="pos1" surf="surf1" id="t1_1"/> </tokens> <ccg root="sp1-2"> <span terminal="t1_1" category="N" end="2" begin="1" id="sp1-1"/> <span child="sp1-1" rule="lex" category="NP" end="2" begin="1" id="sp1-2"/> </ccg> </sentence> """ sentence = etree.fromstring(sentence_str) ccg_tree = assign_semantics_to_ccg(sentence, semantic_index) semantics = ccg_tree.get('sem', None) expected_semantics = lexpr(r'_base1 -> _base1') self.assertEqual(expected_semantics, lexpr(semantics))
def main(args=None): DESCRIPTION = textwrap.dedent("""\ categories_template.yaml should contain the semantic templates in YAML format. parsed_sentence.xml contains the CCG-parsed sentences. If --arbi-types is specified, then the arbitrary specification of types is enabled, thus using the argument as the field of the semantic template that is used. E.g, by specifying "--arbi-types coq_type" and a semantic template: - semantics: \P x.P(x) category: NP coq_type: Animal The type "Animal" will be used for this expression. Otherwise, types of the sem/logic module of NLTK are used. """) parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=DESCRIPTION) parser.add_argument("ccg") parser.add_argument("templates") parser.add_argument("sem") parser.add_argument("--arbi-types", action="store_true", default=False) parser.add_argument("--gold_trees", action="store_true", default=True) parser.add_argument("--nbest", nargs='?', type=int, default="0") args = parser.parse_args() if not os.path.exists(args.templates): print('File does not exist: {0}'.format(args.templates)) sys.exit(1) if not os.path.exists(args.ccg): print('File does not exist: {0}'.format(args.ccg)) sys.exit(1) logging.basicConfig(level=logging.WARNING) semantic_index = SemanticIndex(args.templates) parser = etree.XMLParser(remove_blank_text=True) root = etree.parse(args.ccg, parser) for sentence in root.findall('.//sentence'): if args.gold_trees: tree_indices = [int(sentence.get('gold_tree', '0')) + 1] if args.nbest != 1: tree_indices = get_tree_indices(sentence, args.nbest) for tree_index in tree_indices: sem_node = etree.Element('semantics') sem_node.set( 'ccg_id', sentence.xpath('./ccg[{0}]/@id'.format(tree_index))[0]) try: sem_node.set('status', 'success') sem_tree = assign_semantics_to_ccg(sentence, semantic_index, tree_index) sem_node.set( 'root', sentence.xpath('./ccg[{0}]/@root'.format(tree_index))[0]) filter_attributes(sem_tree) sem_node.extend(sem_tree.xpath('.//descendant-or-self::span')) except LogicalExpressionException as e: sem_node.set('status', 'failed') logging.error('An error occurred: {0}'.format(e)) sentence.append(sem_node) root_xml_str = serialize_tree(root) with codecs.open(args.sem, 'wb') as fout: fout.write(root_xml_str)
def main(args=None): global SEMANTIC_INDEX global ARGS global SENTENCES DESCRIPTION = textwrap.dedent("""\ categories_template.yaml should contain the semantic templates in YAML format. parsed_sentence.xml contains the CCG-parsed sentences. If --arbi-types is specified, then the arbitrary specification of types is enabled, thus using the argument as the field of the semantic template that is used. E.g, by specifying "--arbi-types coq_type" and a semantic template: - semantics: \P x.P(x) category: NP coq_type: Animal The type "Animal" will be used for this expression. Otherwise, types of the sem/logic module of NLTK are used. """) parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=DESCRIPTION) parser.add_argument("ccg") parser.add_argument("templates") parser.add_argument("sem") parser.add_argument("--arbi-types", action="store_true", default=False) parser.add_argument("--gold_trees", action="store_true", default=True) parser.add_argument("--nbest", nargs='?', type=int, default="0") parser.add_argument("--ncores", nargs='?', type=int, default="3", help="Number of cores for multiprocessing.") ARGS = parser.parse_args() if not os.path.exists(ARGS.templates): print('File does not exist: {0}'.format(ARGS.templates)) sys.exit(1) if not os.path.exists(ARGS.ccg): print('File does not exist: {0}'.format(ARGS.ccg)) sys.exit(1) logging.basicConfig(level=logging.WARNING) SEMANTIC_INDEX = SemanticIndex(ARGS.templates) parser = etree.XMLParser(remove_blank_text=True) root = etree.parse(ARGS.ccg, parser) SENTENCES = root.findall('.//sentence') # print('Found {0} sentences'.format(len(SENTENCES))) # from pudb import set_trace; set_trace() sentence_inds = range(len(SENTENCES)) sem_nodes_lists = semantic_parse_sentences(sentence_inds, ARGS.ncores) assert len(sem_nodes_lists) == len(SENTENCES), \ 'Element mismatch: {0} vs {1}'.format(len(sem_nodes_lists), len(SENTENCES)) logging.info('Adding XML semantic nodes to sentences...') for sentence, sem_nodes in zip(SENTENCES, sem_nodes_lists): sentence.extend(sem_nodes) logging.info('Finished adding XML semantic nodes to sentences.') root_xml_str = serialize_tree(root) with codecs.open(ARGS.sem, 'wb') as fout: fout.write(root_xml_str)