def test_inner_node_child_categoryWithFeats(self):
     semantic_index = SemanticIndex(None)
     semantic_rules = [
         SemanticRule(r'cat1', r'\P.P'),
         SemanticRule(r'NP/NP', r'\P.P'),
         SemanticRule(r'NP', r'\P Q.(Q -> P)', {'child1_category': 'NP/NP'})
     ]
     semantic_index.rules = semantic_rules
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token base="base1" pos="pos1" surf="surf1" id="t1_1"/>
       <token base="base2" pos="pos2" surf="surf2" id="t1_2"/>
     </tokens>
     <ccg root="sp1-3">
       <span terminal="t1_1" category="cat1" end="2" begin="1" id="sp1-1"/>
       <span terminal="t1_2" category="NP/NP[mod=xx]" end="3" begin="2" id="sp1-2"/>
       <span child="sp1-1 sp1-2" rule="lex" category="NP" end="3" begin="1" id="sp1-3"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'_base2 -> _base1')
     self.assertEqual(expected_semantics, lexpr(semantics))
 def test_match_any2(self):
     semantic_index = SemanticIndex(None)
     semantic_rules = [
         SemanticRule(r'cat1', r'\P.P'),
         SemanticRule(r'cat2', r'\P.P'),
         SemanticRule(r'cat3', r'\P.P'),
         SemanticRule(r'NP', r'\P Q.(Q & P)', {'rule': 'lex'}),
         SemanticRule(r'NP', r'\P Q.(Q | P)', {'child_any_pos': 'pos1'}),
         SemanticRule(r'NP', r'\P Q.(Q -> P)',
                      {'child_any_category': 'cat3'})
     ]
     semantic_index.rules = semantic_rules
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token base="base1" pos="pos1" surf="surf1" id="t1_1"/>
       <token base="base2" pos="pos2" surf="surf2" id="t1_2"/>
       <token base="base3" pos="pos3" surf="surf3" id="t1_3"/>
     </tokens>
     <ccg root="sp1-5">
       <span terminal="t1_1" category="cat1" pos="pos1" end="2" begin="1" id="sp1-1"/>
       <span terminal="t1_2" category="cat2" pos="pos2" end="3" begin="2" id="sp1-2"/>
       <span terminal="t1_3" category="cat3" pos="pos3" end="4" begin="3" id="sp1-3"/>
       <span child="sp1-1 sp1-2" rule="lex" category="NP" end="3" begin="1" id="sp1-4"/>
       <span child="sp1-4 sp1-3" rule="lex" category="NP" end="4" begin="1" id="sp1-5"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'_base3 -> (_base2 | _base1)')
     self.assertEqual(expected_semantics, lexpr(semantics))
 def test_lexical_binary_one_type(self):
     semantic_index = SemanticIndex(None)
     semantic_rules = [
         SemanticRule(r'cat1', r'\P.P'),
         SemanticRule(r'cat2', r'\Q x.Q(x)',
                      {'coq_type': 'Entity -> Prop'}),
         SemanticRule(r'NP', r'\P Q x.(P -> Q(x))', {'rule': 'lex'})
     ]
     semantic_index.rules = semantic_rules
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token base="base1" pos="pos1" surf="surf1" id="t1_1"/>
       <token base="base2" pos="pos2" surf="surf2" id="t1_2"/>
     </tokens>
     <ccg root="sp1-3">
       <span terminal="t1_1" category="cat1" end="2" begin="1" id="sp1-1"/>
       <span terminal="t1_2" category="cat2" end="3" begin="2" id="sp1-2"/>
       <span child="sp1-1 sp1-2" rule="lex" category="NP" end="3" begin="1" id="sp1-3"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, semantic_index)
     coq_types = get_coq_types(ccg_tree)
     expected_coq_types = ["Parameter _base2 : Entity -> Prop."]
     self.assertEqual(expected_coq_types, coq_types)
Esempio n. 4
0
 def test_lexical_binary_two_types(self):
     semantic_index = SemanticIndex(None)
     semantic_rules = [SemanticRule(r'cat1', r'\P.P', {'coq_type' : 'Entity -> Prop'}),
                       SemanticRule(r'cat2', r'\P.P', {'coq_type' : 'Entity -> Prop -> Prop'}),
                       SemanticRule(r'NP', r'\P Q.(Q -> P)', {'rule' : 'lex'})]
     semantic_index.rules = semantic_rules
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token base="base1" pos="pos1" surf="surf1" id="t1_1"/>
       <token base="base2" pos="pos2" surf="surf2" id="t1_2"/>
     </tokens>
     <ccg root="sp1-3">
       <span terminal="t1_1" category="cat1" end="2" begin="1" id="sp1-1"/>
       <span terminal="t1_2" category="cat2" end="3" begin="2" id="sp1-2"/>
       <span child="sp1-1 sp1-2" rule="lex" category="NP" end="3" begin="1" id="sp1-3"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, semantic_index)
     coq_types = get_coq_types(ccg_tree)
     expected_coq_types = ["Parameter _base1 : Entity -> Prop.",
                           "Parameter _base2 : Entity -> Prop -> Prop."]
     self.assertEqual(expected_coq_types, coq_types)
Esempio n. 5
0
 def test_match_any2(self):
     semantic_index = SemanticIndex(None)
     semantic_rules = [SemanticRule(r'cat1', r'\P.P'),
                       SemanticRule(r'cat2', r'\P.P'),
                       SemanticRule(r'cat3', r'\P.P'),
                       SemanticRule(r'NP', r'\P Q.(Q & P)', {'rule' : 'lex'}),
                       SemanticRule(r'NP', r'\P Q.(Q | P)', {'child_any_pos' : 'pos1'}),
                       SemanticRule(r'NP', r'\P Q.(Q -> P)', {'child_any_category' : 'cat3'})]
     semantic_index.rules = semantic_rules
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token base="base1" pos="pos1" surf="surf1" id="t1_1"/>
       <token base="base2" pos="pos2" surf="surf2" id="t1_2"/>
       <token base="base3" pos="pos3" surf="surf3" id="t1_3"/>
     </tokens>
     <ccg root="sp1-5">
       <span terminal="t1_1" category="cat1" pos="pos1" end="2" begin="1" id="sp1-1"/>
       <span terminal="t1_2" category="cat2" pos="pos2" end="3" begin="2" id="sp1-2"/>
       <span terminal="t1_3" category="cat3" pos="pos3" end="4" begin="3" id="sp1-3"/>
       <span child="sp1-1 sp1-2" rule="lex" category="NP" end="3" begin="1" id="sp1-4"/>
       <span child="sp1-4 sp1-3" rule="lex" category="NP" end="4" begin="1" id="sp1-5"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'_base3 -> (_base2 | _base1)')
     self.assertEqual(expected_semantics, lexpr(semantics))
Esempio n. 6
0
def main(args = None):
    DESCRIPTION=textwrap.dedent("""\
            categories_template.yaml should contain the semantic templates
              in YAML format.
            parsed_sentence.xml contains the CCG-parsed sentences.
            If --arbi-types is specified, then the arbitrary specification of
              types is enabled, thus using the argument as the field of the semantic
              template that is used. E.g, by specifying "--arbi-types coq_type"
              and a semantic template:
            - semantics: \P x.P(x)
              category: NP
              coq_type: Animal
            The type "Animal" will be used for this expression. Otherwise,
            types of the sem/logic module of NLTK are used.
      """)

    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=DESCRIPTION)
    parser.add_argument("ccg")
    parser.add_argument("templates")
    parser.add_argument("sem")
    parser.add_argument("--arbi-types", action="store_true", default=False)
    parser.add_argument("--gold_trees", action="store_true", default=True)
    args = parser.parse_args()
      
    if not os.path.exists(args.templates):
        print('File does not exist: {0}'.format(args.templates))
        sys.exit(1)
    if not os.path.exists(args.ccg):
        print('File does not exist: {0}'.format(args.ccg))
        sys.exit(1)
    
    logging.basicConfig(level=logging.WARNING)

    semantic_index = SemanticIndex(args.templates)

    parser = etree.XMLParser(remove_blank_text=True)
    root = etree.parse(args.ccg, parser)

    for sentence in root.findall('.//sentence'):
        sem_node = etree.Element('semantics')
        try:
            sem_node.set('status', 'success')
            tree_index = 1
            if args.gold_trees:
                tree_index = int(sentence.get('gold_tree', '0')) + 1
            sem_tree = assign_semantics_to_ccg(
                sentence, semantic_index, tree_index)
            sem_node.set('root',
                sentence.xpath('./ccg[{0}]/@root'.format(tree_index))[0])
            filter_attributes(sem_tree)
            sem_node.extend(sem_tree.xpath('.//descendant-or-self::span'))
        except LogicalExpressionException:
            sem_node.set('status', 'failed')
        sentence.append(sem_node)

    root_xml_str = serialize_tree(root)
    with codecs.open(args.sem, 'wb') as fout:
        fout.write(root_xml_str)
Esempio n. 7
0
    def test_vertical_bar(self):
        sentence_str = r"""
      <sentence id="s1">
        <tokens>
          <token base="base1" pos="pos1" surf="surf1" id="t1_1"/>
          <token base="base2" pos="pos2" surf="surf2" id="t1_2"/>
        </tokens>
        <ccg root="sp1-3">
          <span terminal="t1_1" category="N" end="2" begin="1" id="sp1-1"/>
          <span terminal="t1_2" category="N" end="3" begin="2" id="sp1-2"/>
          <span child="sp1-1 sp1-2" category="NP\NP" rule=">" end="3" begin="1" id="sp1-3"/>
        </ccg>
      </sentence>
    """
        sentence = etree.fromstring(sentence_str)

        semantic_index = SemanticIndex(None)
        semantic_rules = [
            SemanticRule(r'N', r'\P.P', {}),
            SemanticRule(r'NP|NP', r'\F1 F2.(F1 -> F2)', {'rule': '>'}),
            SemanticRule(r'NP/NP', r'\F1 F2.(F1 & F2)', {'rule': '>'})
        ]
        semantic_index.rules = semantic_rules
        ccg_tree = assign_semantics_to_ccg(sentence, semantic_index)
        semantics = lexpr(ccg_tree.get('sem', None))
        expected_semantics = lexpr(r'(_base1 -> _base2)')
        self.assertEqual(expected_semantics, semantics)
Esempio n. 8
0
 def test_inner_node_child_categoryWithFeats(self):
     semantic_index = SemanticIndex(None)
     semantic_rules = [SemanticRule(r'cat1', r'\P.P'),
                       SemanticRule(r'NP/NP', r'\P.P'),
                       SemanticRule(r'NP', r'\P Q.(Q -> P)',
                                    {'child1_category' : 'NP/NP'})]
     semantic_index.rules = semantic_rules
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token base="base1" pos="pos1" surf="surf1" id="t1_1"/>
       <token base="base2" pos="pos2" surf="surf2" id="t1_2"/>
     </tokens>
     <ccg root="sp1-3">
       <span terminal="t1_1" category="cat1" end="2" begin="1" id="sp1-1"/>
       <span terminal="t1_2" category="NP/NP[mod=xx]" end="3" begin="2" id="sp1-2"/>
       <span child="sp1-1 sp1-2" rule="lex" category="NP" end="3" begin="1" id="sp1-3"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'_base2 -> _base1')
     self.assertEqual(expected_semantics, lexpr(semantics))
 def test_func_combination_backwardComplexTwoArgs(self):
     semantic_index = SemanticIndex(None)
     semantic_rules = [
         SemanticRule(r'S\NP\NP', r'\P y x e. P(e, x, y)'),
         SemanticRule(r'S\S', r'\P Q e. AND(past(e), Q(e))')
     ]
     semantic_index.rules = semantic_rules
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token id="s1_4" surf="ほめ" pos="動詞" pos1="自立" pos2="*" pos3="*" inflectionType="一段" inflectionForm="連用形" base="ほめる" reading="ホメ"/>
       <token id="s1_5" surf="た" pos="助動詞" pos1="*" pos2="*" pos3="*" inflectionType="特殊・タ" inflectionForm="基本形" base="た" reading="タ"/>
     </tokens>
     <ccg root="s1_sp9">
       <span id="s1_sp9" begin="4" end="6" category="(S[mod=nm,form=base]\NP[mod=nm,case=ga])\NP[mod=nm,case=o]" rule="&lt;B2" child="s1_sp10 s1_sp11"/>
       <span id="s1_sp10" begin="4" end="5" category="(S[mod=nm,form=cont]\NP[mod=nm,case=ga])\NP[mod=nm,case=o]" terminal="s1_4"/>
       <span id="s1_sp11" begin="5" end="6" category="S[mod=nm,form=base]\S[mod=nm,form=cont]" terminal="s1_5"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'\y x e.AND(past(e), _ほめる(x, y, e))')
     self.assertEqual(expected_semantics, lexpr(semantics))
Esempio n. 10
0
 def test_lexical_binary_two_coq_complex_type(self):
     semantic_index = SemanticIndex(None)
     semantic_rules = [SemanticRule(r'cat1', r'\P x R.P(x, R)', {'coq_type' : 'Entity -> Prop -> Prop'}),
                       SemanticRule(r'cat2', r'\Q S T.Q(S, T)', {'coq_type' : 'Prop -> Entity -> Prop'}),
                       SemanticRule(r'NP', r'\P Q x R S T.(Q(x, R) -> P(S, T))', {'rule' : 'lex'})]
     semantic_index.rules = semantic_rules
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token base="base1" pos="pos1" surf="surf1" id="t1_1"/>
       <token base="base2" pos="pos2" surf="surf2" id="t1_2"/>
     </tokens>
     <ccg root="sp1-3">
       <span terminal="t1_1" category="cat1" end="2" begin="1" id="sp1-1"/>
       <span terminal="t1_2" category="cat2" end="3" begin="2" id="sp1-2"/>
       <span child="sp1-1 sp1-2" rule="lex" category="NP" end="3" begin="1" id="sp1-3"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, semantic_index)
     coq_lib = get_coq_types(ccg_tree)
     expected_coq_lib = ['Parameter _base1 : Entity -> Prop -> Prop.',
                         'Parameter _base2 : Prop -> Entity -> Prop.']
     self.assertEqual(expected_coq_lib, coq_lib)
     expression = [ccg_tree.get('sem')]
     coq_sig =  convert_coq_signatures_to_nltk(coq_lib)
     nltk_lib = build_dynamic_library(expression, coq_sig)
     lib = merge_dynamic_libraries(coq_lib, nltk_lib, './coqlib.v', sentence)
     expected_lib = ["Parameter _base2 : Prop -> Entity -> Prop.",
                     "Parameter _base1 : Entity -> Prop -> Prop."]
     self.assertCountEqual(expected_lib, lib)
Esempio n. 11
0
 def test_RTG3Paths2Vars(self):
     semantic_index = SemanticIndex(None)
     semantic_rules = [SemanticRule(r'N', r'\P.P', {}),
                       SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule' : '>'}),
                       SemanticRule(r'NPNP', r'\F1 F2.(F1 -> F2)',
                                    {'var_paths' : [[0,0], [0,1], [1,0]], 'rule' : '>'})]
     semantic_index.rules = semantic_rules
     ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index)
     with self.assertRaises(nltk.sem.logic.LogicalExpressionException):
         semantics = lexpr(ccg_tree.get('sem', None))
Esempio n. 12
0
 def test_CFG(self):
     semantic_index = SemanticIndex(None)
     semantic_rules = [SemanticRule(r'N', r'\P.P', {}),
                       SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule' : '>'}),
                       SemanticRule(r'NPNP', r'\F1 F2.(F1 -> F2)', {'rule' : '>'})]
     semantic_index.rules = semantic_rules
     ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index)
     semantics = lexpr(ccg_tree.get('sem', None))
     expected_semantics = lexpr(r'(_base1 & _base2) -> (_base3 & _base4)')
     self.assertEqual(expected_semantics, semantics)
Esempio n. 13
0
 def test_RTG3Paths3Vars(self):
     semantic_index = SemanticIndex(None)
     semantic_rules = [SemanticRule(r'N', r'\P.P', {}),
                       SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule' : '>'}),
                       SemanticRule(r'NPNP', r'\F1 F2 F3.((F3 & F2) -> F1)',
                                    {'var_paths' : [[0,0], [0,1], [1,0]], 'rule' : '>'})]
     semantic_index.rules = semantic_rules
     ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index)
     semantics = lexpr(ccg_tree.get('sem', None))
     expected_semantics = lexpr(r'((_base3 & _base2) -> _base1)')
     self.assertEqual(expected_semantics, semantics)
Esempio n. 14
0
 def test_RTG1Path(self):
     semantic_index = SemanticIndex(None)
     semantic_rules = [SemanticRule(r'N', r'\P.P', {}),
                       SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule' : '>'}),
                       SemanticRule(r'NPNP', r'\F1 F2.(F1 -> F2)',
                                    {'var_paths' : [[0,1]], 'rule' : '>'})]
     semantic_index.rules = semantic_rules
     ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index)
     semantics = lexpr(ccg_tree.get('sem', None))
     expected_semantics = lexpr(r'\F2.(_base2 -> F2)')
     self.assertEqual(expected_semantics, semantics)
Esempio n. 15
0
 def test_CFG(self):
     semantic_index = SemanticIndex(None)
     semantic_rules = [
         SemanticRule(r'N', r'\P.P', {}),
         SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule': '>'}),
         SemanticRule(r'NPNP', r'\F1 F2.(F1 -> F2)', {'rule': '>'})
     ]
     semantic_index.rules = semantic_rules
     ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index)
     semantics = lexpr(ccg_tree.get('sem', None))
     expected_semantics = lexpr(r'(_base1 & _base2) -> (_base3 & _base4)')
     self.assertEqual(expected_semantics, semantics)
Esempio n. 16
0
 def test_RTG3Paths2Vars(self):
     semantic_index = SemanticIndex(None)
     semantic_rules = [
         SemanticRule(r'N', r'\P.P', {}),
         SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule': '>'}),
         SemanticRule(r'NPNP', r'\F1 F2.(F1 -> F2)', {
             'var_paths': [[0, 0], [0, 1], [1, 0]],
             'rule': '>'
         })
     ]
     semantic_index.rules = semantic_rules
     ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index)
     with self.assertRaises(nltk.sem.logic.LogicalExpressionException):
         semantics = lexpr(ccg_tree.get('sem', None))
Esempio n. 17
0
def semantic_parse_sentence(sentence_ind):
    """
    `sentence` is an lxml tree with tokens and ccg nodes.
    It returns an lxml semantics node.
    """
    global lock
    sentence = SENTENCES[sentence_ind]
    sem_nodes = []
    # TODO: try to prevent semantic parsing for fragmented CCG trees.
    # Otherwise, produce fragmented semantics.
    if ARGS.gold_trees:
        # In xpath, elements are 1-indexed.
        # However, gold_tree annotations assumed zero-index.
        # This line fixes it.
        tree_indices = [int(sentence.get('gold_tree', '0')) + 1]
    if ARGS.nbest != 1:
        tree_indices = get_tree_indices(sentence, ARGS.nbest)
    for tree_index in tree_indices:
        sem_node = etree.Element('semantics')
        try:
            sem_tree = assign_semantics_to_ccg(sentence, SEMANTIC_INDEX,
                                               tree_index)
            filter_attributes(sem_tree)
            sem_node.extend(sem_tree.xpath('.//descendant-or-self::span'))
            sem_node.set('status', 'success')
            sem_node.set(
                'ccg_id',
                sentence.xpath('./ccg[{0}]/@id'.format(tree_index))[0])
            sem_node.set(
                'root',
                sentence.xpath('./ccg[{0}]/@root'.format(tree_index))[0])
            # print('.', end='', file=sys.stdout)
            sys.stdout.flush()
        except Exception as e:
            sem_node.set('status', 'failed')
            # from pudb import set_trace; set_trace()
            sentence_surf = ' '.join(sentence.xpath('tokens/token/@surf'))
            lock.acquire()
            logging.error(
                'An error occurred: {0}\nSentence: {1}\nTree XML:\n{2}'.format(
                    e, sentence_surf,
                    etree.tostring(sentence,
                                   encoding='utf-8',
                                   pretty_print=True).decode('utf-8')))
            lock.release()
            # print('x', end='', file=sys.stdout)
            sys.stdout.flush()
        sem_nodes.append(sem_node)
    return [etree.tostring(sem_node) for sem_node in sem_nodes]
Esempio n. 18
0
 def test_RTG1Path(self):
     semantic_index = SemanticIndex(None)
     semantic_rules = [
         SemanticRule(r'N', r'\P.P', {}),
         SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule': '>'}),
         SemanticRule(r'NPNP', r'\F1 F2.(F1 -> F2)', {
             'var_paths': [[0, 1]],
             'rule': '>'
         })
     ]
     semantic_index.rules = semantic_rules
     ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index)
     semantics = lexpr(ccg_tree.get('sem', None))
     expected_semantics = lexpr(r'\F2.(_base2 -> F2)')
     self.assertEqual(expected_semantics, semantics)
Esempio n. 19
0
 def test_RTG3Paths3Vars(self):
     semantic_index = SemanticIndex(None)
     semantic_rules = [
         SemanticRule(r'N', r'\P.P', {}),
         SemanticRule(r'NP', r'\F1 F2.(F1 & F2)', {'rule': '>'}),
         SemanticRule(r'NPNP', r'\F1 F2 F3.((F3 & F2) -> F1)', {
             'var_paths': [[0, 0], [0, 1], [1, 0]],
             'rule': '>'
         })
     ]
     semantic_index.rules = semantic_rules
     ccg_tree = assign_semantics_to_ccg(self.sentence, semantic_index)
     semantics = lexpr(ccg_tree.get('sem', None))
     expected_semantics = lexpr(r'((_base3 & _base2) -> _base1)')
     self.assertEqual(expected_semantics, semantics)
 def test_token_to_const_latin(self):
     sentence_str = r"""
   <sentence id="s0">
     <tokens>
       <token base="*" pos="名詞-固有名詞-組織" surf="Scala" id="t0_0"/>
     </tokens>
     <ccg root="sp0-3">
       <span terminal="t0_0" category="NP[mod=nm,case=nc]" end="1" begin="0" id="sp0-3"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'_Scala')
     self.assertEqual(expected_semantics, lexpr(semantics))
 def test_token_to_const_japanese(self):
     sentence_str = r"""
   <sentence id="s0">
     <tokens>
       <token base="言語" pos="名詞-一般" surf="言語" id="t0_3"/>
     </tokens>
     <ccg root="sp0-9">
       <span terminal="t0_3" category="NP[mod=nm,case=nc]" end="4" begin="3" id="sp0-9"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'_言語')
     self.assertEqual(expected_semantics, lexpr(semantics))
Esempio n. 22
0
 def test_token_to_const_latin(self):
     sentence_str = r"""
   <sentence id="s0">
     <tokens>
       <token base="*" pos="名詞-固有名詞-組織" surf="Scala" id="t0_0"/>
     </tokens>
     <ccg root="sp0-3">
       <span terminal="t0_0" category="NP[mod=nm,case=nc]" end="1" begin="0" id="sp0-3"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'_Scala')
     self.assertEqual(expected_semantics, lexpr(semantics))
Esempio n. 23
0
 def test_token_to_function_2args(self):
     sentence_str = r"""
   <sentence id="s0">
     <tokens>
       <token base="は" pos="助詞-係助詞" surf="は" id="t0_1"/>
     </tokens>
     <ccg root="sp0-4">
       <span terminal="t0_1" category="(S/S)\NP[mod=nm,case=nc]" end="2" begin="1" id="sp0-4"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'\x y._は(y, x)')
     self.assertEqual(expected_semantics, lexpr(semantics))
Esempio n. 24
0
 def test_token_to_const_japanese(self):
     sentence_str = r"""
   <sentence id="s0">
     <tokens>
       <token base="言語" pos="名詞-一般" surf="言語" id="t0_3"/>
     </tokens>
     <ccg root="sp0-9">
       <span terminal="t0_3" category="NP[mod=nm,case=nc]" end="4" begin="3" id="sp0-9"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'_言語')
     self.assertEqual(expected_semantics, lexpr(semantics))
 def test_token_to_function_1arg(self):
     sentence_str = r"""
   <sentence id="s0">
     <tokens>
       <token base="です" katsuyou="基本形" pos="助動詞" surf="です" id="t0_4"/>
     </tokens>
     <ccg root="sp0-10">
       <span terminal="t0_4" category="S[mod=nm,form=base]\NP[mod=nm,case=nc]" end="5" begin="4" id="sp0-10"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'\x._です(x)')
     self.assertEqual(expected_semantics, lexpr(semantics))
Esempio n. 26
0
 def test_token_to_function_1arg(self):
     sentence_str = r"""
   <sentence id="s0">
     <tokens>
       <token base="です" katsuyou="基本形" pos="助動詞" surf="です" id="t0_4"/>
     </tokens>
     <ccg root="sp0-10">
       <span terminal="t0_4" category="S[mod=nm,form=base]\NP[mod=nm,case=nc]" end="5" begin="4" id="sp0-10"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'\x._です(x)')
     self.assertEqual(expected_semantics, lexpr(semantics))
 def test_token_to_function_2args(self):
     sentence_str = r"""
   <sentence id="s0">
     <tokens>
       <token base="は" pos="助詞-係助詞" surf="は" id="t0_1"/>
     </tokens>
     <ccg root="sp0-4">
       <span terminal="t0_1" category="(S/S)\NP[mod=nm,case=nc]" end="2" begin="1" id="sp0-4"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'\x y._は(y, x)')
     self.assertEqual(expected_semantics, lexpr(semantics))
Esempio n. 28
0
 def test_typeraising_for_unary_pred(self):
     sentence_str = r"""
   <sentence id="s0">
     <tokens>
       <token base="良い" katsuyou="基本形" pos="形容詞-自立" surf="良い" id="t0_2"/>
     </tokens>
     <ccg root="sp0-7">
       <span child="sp0-8" rule="ADN" category="NP[case=nc]/NP[case=nc]" end="3" begin="2" id="sp0-7"/>
       <span terminal="t0_2" category="S[mod=adn,form=base]" end="3" begin="2" id="sp0-8"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'\P x.(P(x) & _良い(x))')
     self.assertEqual(expected_semantics, lexpr(semantics))
 def test_typeraising_for_unary_pred(self):
     sentence_str = r"""
   <sentence id="s0">
     <tokens>
       <token base="良い" katsuyou="基本形" pos="形容詞-自立" surf="良い" id="t0_2"/>
     </tokens>
     <ccg root="sp0-7">
       <span child="sp0-8" rule="ADN" category="NP[case=nc]/NP[case=nc]" end="3" begin="2" id="sp0-7"/>
       <span terminal="t0_2" category="S[mod=adn,form=base]" end="3" begin="2" id="sp0-8"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'\P x.(P(x) & _良い(x))')
     self.assertEqual(expected_semantics, lexpr(semantics))
Esempio n. 30
0
 def test_func_combination_backwardSimpleTwoArgs(self):
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token base="F" pos="pos1" surf="F" id="t1_3"/>
       <token base="G" katsuyou="katsuyou2" pos="pos2" surf="G" id="t1_4"/>
     </tokens>
     <ccg root="sp1-7">
       <span child="sp1-8 sp1-9" rule="&lt;B2" category="S[mod=nm,form=base]\NP[mod=nm,case=ga]\NP" end="5" begin="3" id="sp1-7"/>
       <span terminal="t1_3" category="S[mod=nm,form=da]\NP[mod=nm,case=ga]\NP" end="4" begin="3" id="sp1-8"/>
       <span terminal="t1_4" category="S[mod=nm,form=base]\S[mod=nm,form=da]" end="5" begin="4" id="sp1-9"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'\y x._G(_F(x, y))')
     self.assertEqual(expected_semantics, lexpr(semantics))
 def test_func_combination_backward(self):
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token base="簡潔" pos="名詞-形容動詞語幹" surf="簡潔" id="t1_3"/>
       <token base="です" katsuyou="基本形" pos="助動詞" surf="です" id="t1_4"/>
     </tokens>
     <ccg root="sp1-7">
       <span child="sp1-8 sp1-9" rule="&lt;B" category="S[mod=nm,form=base]\NP[mod=nm,case=ga]" end="5" begin="3" id="sp1-7"/>
       <span terminal="t1_3" category="S[mod=nm,form=da]\NP[mod=nm,case=ga]" end="4" begin="3" id="sp1-8"/>
       <span terminal="t1_4" category="S[mod=nm,form=base]\S[mod=nm,form=da]" end="5" begin="4" id="sp1-9"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'\x._です(_簡潔(x))')
     self.assertEqual(expected_semantics, lexpr(semantics))
Esempio n. 32
0
 def test_func_application_backward(self):
     sentence_str = r"""
   <sentence id="s0">
     <tokens>
       <token base="*" pos="名詞-固有名詞-組織" surf="Scala" id="t0_0"/>
       <token base="は" pos="助詞-係助詞" surf="は" id="t0_1"/>
     </tokens>
     <ccg root="sp0-2">
       <span child="sp0-3 sp0-4" rule="&lt;" category="S/S" end="2" begin="0" id="sp0-2"/>
       <span terminal="t0_0" category="NP[mod=nm,case=nc]" end="1" begin="0" id="sp0-3"/>
       <span terminal="t0_1" category="(S/S)\NP[mod=nm,case=nc]" end="2" begin="1" id="sp0-4"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'\y._は(y, _Scala)')
     self.assertEqual(expected_semantics, lexpr(semantics))
    def test_np_feature_no(self):
        semantic_index = SemanticIndex(None)
        semantic_index.rules = [SemanticRule(r'NP', r'\P.P')]

        sentence_str = r"""
      <sentence id="s0">
        <tokens>
          <token base="basepred" pos="pos1" surf="surfpred" id="t0_0"/>
        </tokens>
        <ccg root="sp0-3">
          <span terminal="t0_0" category="NP" end="1" begin="0" id="sp0-3"/>
        </ccg>
      </sentence>
    """
        sentence = etree.fromstring(sentence_str)
        ccg_tree = assign_semantics_to_ccg(sentence, semantic_index)
        semantics = ccg_tree.get('sem', None)
        expected_semantics = lexpr(r'_basepred')
        self.assertEqual(expected_semantics, lexpr(semantics))
 def test_func_application_backward(self):
     sentence_str = r"""
   <sentence id="s0">
     <tokens>
       <token base="*" pos="名詞-固有名詞-組織" surf="Scala" id="t0_0"/>
       <token base="は" pos="助詞-係助詞" surf="は" id="t0_1"/>
     </tokens>
     <ccg root="sp0-2">
       <span child="sp0-3 sp0-4" rule="&lt;" category="S/S" end="2" begin="0" id="sp0-2"/>
       <span terminal="t0_0" category="NP[mod=nm,case=nc]" end="1" begin="0" id="sp0-3"/>
       <span terminal="t0_1" category="(S/S)\NP[mod=nm,case=nc]" end="2" begin="1" id="sp0-4"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'\y._は(y, _Scala)')
     self.assertEqual(expected_semantics, lexpr(semantics))
Esempio n. 35
0
 def test_func_combination_backward(self):
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token base="簡潔" pos="名詞-形容動詞語幹" surf="簡潔" id="t1_3"/>
       <token base="です" katsuyou="基本形" pos="助動詞" surf="です" id="t1_4"/>
     </tokens>
     <ccg root="sp1-7">
       <span child="sp1-8 sp1-9" rule="&lt;B" category="S[mod=nm,form=base]\NP[mod=nm,case=ga]" end="5" begin="3" id="sp1-7"/>
       <span terminal="t1_3" category="S[mod=nm,form=da]\NP[mod=nm,case=ga]" end="4" begin="3" id="sp1-8"/>
       <span terminal="t1_4" category="S[mod=nm,form=base]\S[mod=nm,form=da]" end="5" begin="4" id="sp1-9"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'\x._です(_簡潔(x))')
     self.assertEqual(expected_semantics, lexpr(semantics))
Esempio n. 36
0
    def test_np_feature_no(self):
        semantic_index = SemanticIndex(None)
        semantic_index.rules = [SemanticRule(r'NP', r'\P.P')]

        sentence_str = r"""
      <sentence id="s0">
        <tokens>
          <token base="basepred" pos="pos1" surf="surfpred" id="t0_0"/>
        </tokens>
        <ccg root="sp0-3">
          <span terminal="t0_0" category="NP" end="1" begin="0" id="sp0-3"/>
        </ccg>
      </sentence>
    """
        sentence = etree.fromstring(sentence_str)
        ccg_tree = assign_semantics_to_ccg(sentence, semantic_index)
        semantics = ccg_tree.get('sem', None)
        expected_semantics = lexpr(r'_basepred')
        self.assertEqual(expected_semantics, lexpr(semantics))
 def test_func_combination_backwardSimpleTwoArgs(self):
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token base="F" pos="pos1" surf="F" id="t1_3"/>
       <token base="G" katsuyou="katsuyou2" pos="pos2" surf="G" id="t1_4"/>
     </tokens>
     <ccg root="sp1-7">
       <span child="sp1-8 sp1-9" rule="&lt;B2" category="S[mod=nm,form=base]\NP[mod=nm,case=ga]\NP" end="5" begin="3" id="sp1-7"/>
       <span terminal="t1_3" category="S[mod=nm,form=da]\NP[mod=nm,case=ga]\NP" end="4" begin="3" id="sp1-8"/>
       <span terminal="t1_4" category="S[mod=nm,form=base]\S[mod=nm,form=da]" end="5" begin="4" id="sp1-9"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'\y x._G(_F(x, y))')
     self.assertEqual(expected_semantics, lexpr(semantics))
Esempio n. 38
0
 def test_lexical_binary_two_coq_complex_type(self):
     semantic_index = SemanticIndex(None)
     semantic_rules = [
         SemanticRule(r'cat1', r'\P x R.P(x, R)',
                      {'coq_type': 'Entity -> Prop -> Prop'}),
         SemanticRule(r'cat2', r'\Q S T.Q(S, T)',
                      {'coq_type': 'Prop -> Entity -> Prop'}),
         SemanticRule(r'NP', r'\P Q x R S T.(Q(x, R) -> P(S, T))',
                      {'rule': 'lex'})
     ]
     semantic_index.rules = semantic_rules
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token base="base1" pos="pos1" surf="surf1" id="t1_1"/>
       <token base="base2" pos="pos2" surf="surf2" id="t1_2"/>
     </tokens>
     <ccg root="sp1-3">
       <span terminal="t1_1" category="cat1" end="2" begin="1" id="sp1-1"/>
       <span terminal="t1_2" category="cat2" end="3" begin="2" id="sp1-2"/>
       <span child="sp1-1 sp1-2" rule="lex" category="NP" end="3" begin="1" id="sp1-3"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, semantic_index)
     coq_lib = get_coq_types(ccg_tree)
     expected_coq_lib = [
         'Parameter _base1 : Entity -> Prop -> Prop.',
         'Parameter _base2 : Prop -> Entity -> Prop.'
     ]
     self.assertEqual(expected_coq_lib, coq_lib)
     expression = [ccg_tree.get('sem')]
     coq_sig = convert_coq_signatures_to_nltk(coq_lib)
     nltk_lib, _ = build_dynamic_library(expression, coq_sig)
     lib = merge_dynamic_libraries(coq_sig, nltk_lib, './coqlib.v',
                                   sentence)
     expected_lib = [
         "Parameter _base2 : Prop -> (Entity -> Prop).",
         "Parameter _base1 : Entity -> (Prop -> Prop)."
     ]
     self.assertCountEqual(expected_lib, lib)
 def test_func_application_forward(self):
     sentence_str = r"""
   <sentence id="s0">
     <tokens>
       <token base="良い" katsuyou="基本形" pos="形容詞-自立" surf="良い" id="t0_2"/>
       <token base="言語" pos="名詞-一般" surf="言語" id="t0_3"/>
     </tokens>
     <ccg root="sp0-6">
       <span child="sp0-7 sp0-9" rule="&gt;" category="NP[mod=nm,case=nc]" end="4" begin="2" id="sp0-6"/>
       <span child="sp0-8" rule="ADN" category="NP[case=nc]/NP[case=nc]" end="3" begin="2" id="sp0-7"/>
       <span terminal="t0_2" category="S[mod=adn,form=base]" end="3" begin="2" id="sp0-8"/>
       <span terminal="t0_3" category="NP[mod=nm,case=nc]" end="4" begin="3" id="sp0-9"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'\x.(_言語(x) & _良い(x))')
     self.assertEqual(expected_semantics, lexpr(semantics))
Esempio n. 40
0
 def test_func_application_forward(self):
     sentence_str = r"""
   <sentence id="s0">
     <tokens>
       <token base="良い" katsuyou="基本形" pos="形容詞-自立" surf="良い" id="t0_2"/>
       <token base="言語" pos="名詞-一般" surf="言語" id="t0_3"/>
     </tokens>
     <ccg root="sp0-6">
       <span child="sp0-7 sp0-9" rule="&gt;" category="NP[mod=nm,case=nc]" end="4" begin="2" id="sp0-6"/>
       <span child="sp0-8" rule="ADN" category="NP[case=nc]/NP[case=nc]" end="3" begin="2" id="sp0-7"/>
       <span terminal="t0_2" category="S[mod=adn,form=base]" end="3" begin="2" id="sp0-8"/>
       <span terminal="t0_3" category="NP[mod=nm,case=nc]" end="4" begin="3" id="sp0-9"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'\x.(_言語(x) & _良い(x))')
     self.assertEqual(expected_semantics, lexpr(semantics))
Esempio n. 41
0
 def test_lexical_unary(self):
     semantic_index = SemanticIndex(None)
     semantic_rules = [SemanticRule(r'N', r'\P.P'),
                       SemanticRule(r'NP', r'\P.(P -> P)', {'rule' : 'lex'})]
     semantic_index.rules = semantic_rules
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token base="base1" pos="pos1" surf="surf1" id="t1_1"/>
     </tokens>
     <ccg root="sp1-2">
       <span terminal="t1_1" category="N" end="2" begin="1" id="sp1-1"/>
       <span child="sp1-1" rule="lex" category="NP" end="2" begin="1" id="sp1-2"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'_base1 -> _base1')
     self.assertEqual(expected_semantics, lexpr(semantics))
 def test_lexical_unary(self):
     semantic_index = SemanticIndex(None)
     semantic_rules = [
         SemanticRule(r'N', r'\P.P'),
         SemanticRule(r'NP', r'\P.(P -> P)', {'rule': 'lex'})
     ]
     semantic_index.rules = semantic_rules
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token base="base1" pos="pos1" surf="surf1" id="t1_1"/>
     </tokens>
     <ccg root="sp1-2">
       <span terminal="t1_1" category="N" end="2" begin="1" id="sp1-1"/>
       <span child="sp1-1" rule="lex" category="NP" end="2" begin="1" id="sp1-2"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'_base1 -> _base1')
     self.assertEqual(expected_semantics, lexpr(semantics))
Esempio n. 43
0
    def test_func_application_backward(self):
        # 'は' has category (S/S)\NP[mod=nm,case=nc] which is not in the
        # unittest semantic templates. Thus, it is assigned the default
        # \E O.O and 'Scala' becomes the final meaning representation.

        sentence_str = r"""
      <sentence id="s0">
        <tokens>
          <token base="*" pos="名詞-固有名詞-組織" surf="Scala" id="t0_0"/>
          <token base="は" pos="助詞-係助詞" surf="は" id="t0_1"/>
        </tokens>
        <ccg root="sp0-2">
          <span child="sp0-3 sp0-4" rule="&lt;" category="S/S" end="2" begin="0" id="sp0-2"/>
          <span terminal="t0_0" category="NP[mod=nm,case=nc]" end="1" begin="0" id="sp0-3"/>
          <span terminal="t0_1" category="(S/S)\NP[mod=nm,case=nc]" end="2" begin="1" id="sp0-4"/>
        </ccg>
      </sentence>
    """
        sentence = etree.fromstring(sentence_str)
        ccg_tree = assign_semantics_to_ccg(sentence, self.semantic_index)
        semantics = ccg_tree.get('sem', None)
        expected_semantics = lexpr(r'_Scala')
        self.assertEqual(expected_semantics, lexpr(semantics))
Esempio n. 44
0
 def test_func_combination_backwardComplexTwoArgs(self):
     semantic_index = SemanticIndex(None)
     semantic_rules = [SemanticRule(r'S\NP\NP', r'\P y x e. P(e, x, y)'),
                       SemanticRule(r'S\S', r'\P Q e. AND(past(e), Q(e))')]
     semantic_index.rules = semantic_rules
     sentence_str = r"""
   <sentence id="s1">
     <tokens>
       <token id="s1_4" surf="ほめ" pos="動詞" pos1="自立" pos2="*" pos3="*" inflectionType="一段" inflectionForm="連用形" base="ほめる" reading="ホメ"/>
       <token id="s1_5" surf="た" pos="助動詞" pos1="*" pos2="*" pos3="*" inflectionType="特殊・タ" inflectionForm="基本形" base="た" reading="タ"/>
     </tokens>
     <ccg root="s1_sp9">
       <span id="s1_sp9" begin="4" end="6" category="(S[mod=nm,form=base]\NP[mod=nm,case=ga])\NP[mod=nm,case=o]" rule="&lt;B2" child="s1_sp10 s1_sp11"/>
       <span id="s1_sp10" begin="4" end="5" category="(S[mod=nm,form=cont]\NP[mod=nm,case=ga])\NP[mod=nm,case=o]" terminal="s1_4"/>
       <span id="s1_sp11" begin="5" end="6" category="S[mod=nm,form=base]\S[mod=nm,form=cont]" terminal="s1_5"/>
     </ccg>
   </sentence>
 """
     sentence = etree.fromstring(sentence_str)
     ccg_tree = assign_semantics_to_ccg(sentence, semantic_index)
     semantics = ccg_tree.get('sem', None)
     expected_semantics = lexpr(r'\y x e.AND(past(e), _ほめる(x, y, e))')
     self.assertEqual(expected_semantics, lexpr(semantics))
Esempio n. 45
0
def main(args = None):
    DESCRIPTION=textwrap.dedent("""\
            categories_template.yaml should contain the semantic templates
              in YAML format.
            parsed_sentence.xml contains the parsed sentences. All CCG trees correspond
            to the premises, except the last one, which is the hypothesis.
            If --arbi-types flag is specified, then the arbitrary specification of
            coq_types is enabled. Thus, semantic rule assignments should contain a
            a field such as:
            - semantics: \P x.P(x)
              category: NP
              coq_type: Animal
            If --auto-types is enabled, or no flag is specified, then the automatic
            inference of types is enabled. This automatic inference relies on the naive
            implementation in the sem/logic module of NLTK.
      """)

    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=DESCRIPTION)
    parser.add_argument("expression_templates_filename")
    parser.add_argument("parsed_sentences_filename")
    parser.add_argument("--sem_out_fname", dest="sem_out_fname", nargs='?',
                        type=str, default="")
    parser.add_argument("--theorem_template", dest="theorem_template", nargs='?',
                        type=str, default="")
    parser.add_argument("--arbi-types", action="store_true", default=False)
    parser.add_argument("--abduction", action="store_true", default=False)
    parser.add_argument("--gold_trees", action="store_true", default=False)
    args = parser.parse_args()
      
    if not os.path.exists(args.expression_templates_filename):
      print('File does not exist: {0}'.format(args.expression_templates_filename))
    if not os.path.exists(args.parsed_sentences_filename):
      print('File does not exist: {0}'.format(args.parsed_sentences_filename))
    
    logging.basicConfig(level=logging.WARNING)

    semantic_index = SemanticIndex(args.expression_templates_filename)

    parser = etree.XMLParser(remove_blank_text=True)
    ccg_xml_trees = etree.parse(
        args.parsed_sentences_filename, parser).findall('.//sentence')

    logical_interpretations = []
    ccg_tree_list = []
    ccg_tokens_list = []
    for ccg_xml in ccg_xml_trees:
        ccg_tree = assign_semantics_to_ccg(ccg_xml, semantic_index)
        ccg_tree_list.append(ccg_tree)
        assert 'sem' in ccg_tree.attrib, \
          'The assignment of semantics to CCG tree may have failed. Tree: {0}'\
          .format(etree.tostring(ccg_tree, pretty_print=True, encoding = 'utf-8')\
                  .decode('utf-8'))
        lambda_expression = ccg_tree.get('sem')
        logical_interpretations.append(lambda_expression)
        ccg_tokens = ccg_xml.find("tokens")
        ccg_tokens_list.append(ccg_tokens)
    if arbi_types_requested:
        inference_result, coq_scripts = \
          prove_from_ccg(logical_interpretations, ccg_trees=ccg_tree_list,
                                                ccg_xml_trees=ccg_xml_trees)
    else:
        inference_result, coq_scripts = \
          prove_from_ccg(logical_interpretations, ccg_xml_trees=ccg_xml_trees)
    print(inference_result, file=sys.stdout)
    html_str = convert_trees_to_mathml(ccg_tree_list, ccg_tokens_list, coq_scripts)
    print(html_str, file=sys.stderr)
Esempio n. 46
0
def main(args=None):
    DESCRIPTION = textwrap.dedent("""\
            categories_template.yaml should contain the semantic templates
              in YAML format.
            parsed_sentence.xml contains the CCG-parsed sentences.
            If --arbi-types is specified, then the arbitrary specification of
              types is enabled, thus using the argument as the field of the semantic
              template that is used. E.g, by specifying "--arbi-types coq_type"
              and a semantic template:
            - semantics: \P x.P(x)
              category: NP
              coq_type: Animal
            The type "Animal" will be used for this expression. Otherwise,
            types of the sem/logic module of NLTK are used.
      """)

    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=DESCRIPTION)
    parser.add_argument("ccg")
    parser.add_argument("templates")
    parser.add_argument("sem")
    parser.add_argument("--arbi-types", action="store_true", default=False)
    parser.add_argument("--gold_trees", action="store_true", default=True)
    parser.add_argument("--nbest", nargs='?', type=int, default="0")
    args = parser.parse_args()

    if not os.path.exists(args.templates):
        print('File does not exist: {0}'.format(args.templates))
        sys.exit(1)
    if not os.path.exists(args.ccg):
        print('File does not exist: {0}'.format(args.ccg))
        sys.exit(1)

    logging.basicConfig(level=logging.WARNING)

    semantic_index = SemanticIndex(args.templates)

    parser = etree.XMLParser(remove_blank_text=True)
    root = etree.parse(args.ccg, parser)

    for sentence in root.findall('.//sentence'):
        if args.gold_trees:
            tree_indices = [int(sentence.get('gold_tree', '0')) + 1]
        if args.nbest != 1:
            tree_indices = get_tree_indices(sentence, args.nbest)
        for tree_index in tree_indices:
            sem_node = etree.Element('semantics')
            sem_node.set(
                'ccg_id',
                sentence.xpath('./ccg[{0}]/@id'.format(tree_index))[0])
            try:
                sem_node.set('status', 'success')
                sem_tree = assign_semantics_to_ccg(sentence, semantic_index,
                                                   tree_index)
                sem_node.set(
                    'root',
                    sentence.xpath('./ccg[{0}]/@root'.format(tree_index))[0])
                filter_attributes(sem_tree)
                sem_node.extend(sem_tree.xpath('.//descendant-or-self::span'))
            except LogicalExpressionException as e:
                sem_node.set('status', 'failed')
                logging.error('An error occurred: {0}'.format(e))
            sentence.append(sem_node)

    root_xml_str = serialize_tree(root)
    with codecs.open(args.sem, 'wb') as fout:
        fout.write(root_xml_str)