Exemple #1
0
def add_to_shrg_rules(shrg_rules, lhs, rhs_prev, rhs_next, s, t):

    lhs_he = grammar.Nonterminal(str(len(lhs)))

    rule_prev = grammar.Rule(lhs_he, make_rule(rhs_prev, lhs, s), t)
    rule_next = grammar.Rule(lhs_he, make_rule(rhs_next, lhs, s), t)

    # print lhs_he, '->', rule_prev.rhs.edges(), rule_next.rhs.edges()

    if lhs_he not in shrg_rules:
        shrg_rules[lhs_he] = [(rule_prev, rule_next)]
    else:
        #prev side
        rhs_list = shrg_rules[lhs_he]
        match = None
        for i in range(0,len(rhs_list)):
            rhs = rhs_list[i]
            if nx.is_isomorphic(rhs[0].rhs, rule_prev.rhs, edge_match=edge_isomorph, node_match=node_isomorph):
                # print("prev isomorph")
                if nx.is_isomorphic(rhs[1].rhs, rule_next.rhs, edge_match=edge_isomorph, node_match=node_isomorph):
                    # print("next isomorph")
                    match = rhs_list[i]
                    match[0].weight *= 2
                    match[0].iso = True
                    match[1].weight *= 2
                    match[1].iso = True

        if not match:
            shrg_rules[lhs_he] += [(rule_prev, rule_next)]
Exemple #2
0
    def test_next_category(self):
        # Complete case.
        state = psr.State(rule=gmr.Rule('S', ['VP']),
                          span_start=0,
                          span_stop=0,
                          dot_position=0)
        self.assertEqual('VP', state.next_category)

        # Incomplete case.
        state = psr.State(rule=gmr.Rule('S', ['VP']),
                          span_start=0,
                          span_stop=1,
                          dot_position=1)
        self.assertEqual('', state.next_category)
Exemple #3
0
    def test_incomplete(self):
        # Complete case.
        incomplete_state = psr.State(rule=gmr.Rule('S', ['VP']),
                                     span_start=0,
                                     span_stop=0,
                                     dot_position=0)
        self.assertTrue(incomplete_state.incomplete)

        # Incomplete case.
        complete_state = psr.State(rule=gmr.Rule('S', ['VP']),
                                   span_start=0,
                                   span_stop=0,
                                   dot_position=1)
        self.assertFalse(complete_state.incomplete)
Exemple #4
0
    def R():
        if not Nonterminal():
            return False

        if not lexer.token(r'\->'):
            error('rules LHSs must be followed by "->"')

        rule = grammar.Rule(stack.pop())
        if not Production():
            error('rule "{0}" has no productions'.format(rule.lhs))
        (rhs, actions, prec, assoc) = stack.pop()
        rule.addProduction(rhs=rhs, actions=actions, prec=prec, assoc=assoc)

        while lexer.token(r'\|'):
            if not Production():
                error('(%s) "|" must be followed by a production' % (rule.lhs))
            (rhs, actions, prec, assoc) = stack.pop()
            rule.addProduction(rhs=rhs,
                               actions=actions,
                               prec=prec,
                               assoc=assoc)

        if not lexer.token(';'):
            error('(%s) rules must be ended by ";"' % (rule.lhs))

        stack.append(rule)
        return True
Exemple #5
0
 def test_empty_words(self):
     grammar = gmr.Grammar(gmr.Rule('N', ['Nothing'], preterminal=True))
     words = []
     parser = psr.EarleyParser(grammar)
     trees = parser.parse(words)
     self.assertEqual(0, len(trees))
     self.assertEqual([], trees)
Exemple #6
0
 def test_regex_rule(self):
     grammar = gmr.Grammar(
         gmr.Rule('S', [gmr.Regex(r'[a-z]')], preterminal=True))
     words = ['hello']
     parser = psr.EarleyParser(grammar)
     trees = parser.parse(words)
     self.assertEqual(1, len(trees))
     self.assertEqual([['S', 'hello']], trees)
Exemple #7
0
 def test_programming_language_parsing(self):
     grammar = gmr.Grammar(gmr.Rule('program',
                                    ['variable', 'operator', 'value']),
                           gmr.Rule('variable', [gmr.Regex(r'x')],
                                    preterminal=True),
                           gmr.Rule('operator', [gmr.Regex(r'[+\-=*/]')],
                                    preterminal=True),
                           gmr.Rule('value', [gmr.Regex(r'\d+')],
                                    preterminal=True),
                           distinguished_symbol='program')
     words = ['x', '=', '599993949']
     parser = psr.EarleyParser(grammar)
     trees = parser.parse(words)
     self.assertEqual([[
         'program', ['variable', 'x'], ['operator', '='],
         ['value', '599993949']
     ]], trees)
Exemple #8
0
def markNodesInMatrix(t, cky_matrix, displacement):
    if (t.isPreTerminal()):
        cky_matrix[0, displacement] = t.root
    else:
        cky_matrix[conta_terminali(t) - 1, displacement] = grammar.Rule(
            t.root, [t.children[0].root, t.children[1].root])
        markNodesInMatrix(t.children[0], cky_matrix, displacement)
        markNodesInMatrix(t.children[1], cky_matrix,
                          displacement + conta_terminali(t.children[0]))
    return cky_matrix
def augment_grammar(g):
    """
    augment grammar g by adding new rule, S' -> S, where S was start symbol of g
    changes g in place
    """
    new_start = g.start + "'"
    old_start = g.start
    g.start = new_start
    g.nonterm.append(new_start)
    new_rule = grammar.Rule([new_start, [old_start]])
    g.rules.append(new_rule)
Exemple #10
0
 def test_initializer(self):
     grammar = gmr.Grammar(gmr.Rule('S', ['VP']),
                           gmr.Rule('VP', ['V']),
                           gmr.Rule('V', ['initialize'], preterminal=True))
     self.assertIn(gmr.Rule('S', ['VP']), grammar)
     self.assertIn(gmr.Rule('VP', ['V']), grammar)
     self.assertIn(gmr.Rule('V', ['initialize'], preterminal=True), grammar)
     self.assertEqual(3, len(grammar))
Exemple #11
0
def markNodesInMatrix(t, cky_matrix, displacement, returnTree=False):
    if (t.isPreTerminal()):
        if returnTree:
            cky_matrix[displacement, 0].append(t)
        else:
            cky_matrix[displacement, 0].append(
                grammar.Rule(t.root, [x.root for x in t.children]))
    else:
        if returnTree:
            cky_matrix[displacement, conta_terminali(t) - 1].append(t)
        else:
            cky_matrix[displacement, conta_terminali(t) - 1].append(
                grammar.Rule(t.root, [x.root for x in t.children]))
        for i, x in enumerate(t.children):
            if i == 0:
                markNodesInMatrix(x, cky_matrix, displacement, returnTree)
            else:
                markNodesInMatrix(
                    x, cky_matrix, displacement +
                    sum(conta_terminali(x) for x in t.children[:i]),
                    returnTree)
        # markNodesInMatrix(t.children[0],cky_matrix,displacement)
        # markNodesInMatrix(t.children[1],cky_matrix,displacement + conta_terminali(t.children[0]))
    return cky_matrix
Exemple #12
0
 def test_multiple_parses(self):
     grammar = gmr.Grammar(gmr.Rule('N', ['I'], preterminal=True),
                           gmr.Rule('V', ['made'], preterminal=True),
                           gmr.Rule('N', ['her'], preterminal=True),
                           gmr.Rule('V', ['duck'], preterminal=True),
                           gmr.Rule('N', ['duck'], preterminal=True),
                           gmr.Rule('S', ['N', 'V', 'N', 'V']),
                           gmr.Rule('S', ['N', 'V', 'N', 'N']))
     words = ['I', 'made', 'her', 'duck']
     parser = psr.EarleyParser(grammar)
     trees = parser.parse(words)
     self.assertEqual(2, len(trees))
     self.assertEqual(
         [['S', ['N', 'I'], ['V', 'made'], ['N', 'her'], ['V', 'duck']],
          ['S', ['N', 'I'], ['V', 'made'], ['N', 'her'], ['N', 'duck']]],
         trees)
 def R(self, states, s: earley.Situation, j: int):
     breaked = False
     rule = gr.Rule(copy.deepcopy(s.left), copy.deepcopy(s.beforeDot))
     self.pi.append(self.__grammar.find_rule_number(rule))
     print(self.pi)
     k = len(s.beforeDot)
     print("k = " + str(k))
     c = j
     print("c = " + str(c))
     if k == 0:
         return self.pi
     while k != 0:
         breaked = False
         rightterm = rule.right[k - 1]
         print(rightterm.value)
         if self.__grammar.is_terminal(rightterm):
             k = k - 1
             c = c - 1
             print("k = " + str(k))
             print("c = " + str(c))
         elif self.__grammar.is_nonterminal(rightterm):
             # находим ситуацию в I[c]
             Xk = s.beforeDot[k - 1].value
             A = s.left.value
             for st in states[c]:
                 if breaked:
                     break
                 if not st.afterDot and st.left.value == Xk:
                     r = st.get_k()
                     print("r = " + str(r))
                     # находим ситуацию в I[r]
                     print("-------")
                     for nst in states[r]:
                         if breaked:
                             break
                         if nst.left.value == A and nst.afterDot and nst.afterDot[
                                 0].value == Xk:
                             self.R(states, st, c)
                             k = k - 1
                             c = r
                             breaked = True
     return self.pi
Exemple #14
0
 def R(self, pi, states, s: earley.Situation, j: int):
     rule = gr.Rule(copy.deepcopy(s.left), copy.deepcopy(s.beforeDot))
     pi.append(self.__grammar.find_rule_number(rule))
     # print(pi)
     k = len(s.beforeDot)
     print("k = " + str(k))
     c = j
     print("c = " + str(c))
     while k > 0:
         rightterm = rule.right[k - 1]
         print(rightterm.value)
         if self.__grammar.is_terminal(rightterm):
             k = k - 1
             c = c - 1
             print("k = " + str(k))
             print("c = " + str(c))
         elif self.__grammar.is_nonterminal(rightterm):
             # находим ситуацию в I[c]
             Xk = s.beforeDot[k - 1].value
             A = s.left.value
             searchstate = None
             searchflag = False
             for st in states[c]:
                 if searchflag:
                     break
                 if not st.afterDot and st.left.value == Xk:
                     r = st.get_k()
                     print("r = " + str(r))
                     # находим ситуацию в I[r]
                     print("-------")
                     for nst in states[r]:
                         if nst.left.value == s.left.value \
                                 and nst.afterDot and nst.afterDot[0].value == Xk \
                                 and len(nst.beforeDot) == k - 1:
                             searchstate = st
                             searchflag = True
                             break
             self.R(pi, states, searchstate, c)
             k = k - 1
             c = r
     return pi
Exemple #15
0
    def test_parse(self):
        grammar = gmr.Grammar(
            gmr.Rule('S', ['VP']), gmr.Rule('VP', ['V', 'NP']),
            gmr.Rule('NP', ['Det', 'Nominal']),
            gmr.Rule('Det', ['that'], preterminal=True),
            gmr.Rule('Nominal', ['flight'], preterminal=True),
            gmr.Rule('V', ['Book'], preterminal=True))

        words = ['Book', 'that', 'flight']

        parser = psr.EarleyParser(grammar)
        trees = parser.parse(words)

        self.assertEqual([[
            'S',
            [
                'VP', ['V', 'Book'],
                ['NP', ['Det', 'that'], ['Nominal', 'flight']]
            ]
        ]], trees)
Exemple #16
0
    frhs = networkx.DiGraph(root="i1")
    frhs.add_node("i1", label="instance")
    frhs.add_node("want", label="want")
    frhs.add_edge("i1", "want", label="id")
    frhs.add_node("x", label="instance")
    frhs.add_edge("i1", "x", label="agent")
    hypergraphs.add_hyperedge(frhs, ("x", ),
                              label=grammar.Nonterminal("Entity"),
                              link=0)
    frhs.add_node("i2", label="instance")
    frhs.add_edge("i1", "i2", label="theme")
    hypergraphs.add_hyperedge(frhs, ("i2", "x"),
                              label=grammar.Nonterminal("Truth"),
                              link=1)
    frules.append(grammar.Rule(lhs, frhs, id=0))

    erhs = networkx.DiGraph(root="0")
    erhs.add_node("0", label="S")
    erhs.add_node("1", label="NP")
    erhs.add_node("2", label="VP")
    hypergraphs.add_hyperedge(erhs, ("0", "1", "2"))
    hypergraphs.add_hyperedge(erhs, ("1", ),
                              label=grammar.Nonterminal("Entity"),
                              link=0)
    erhs.add_node("21", label="VBP")
    erhs.add_node("22", label="SBAR")
    hypergraphs.add_hyperedge(erhs, ("2", "21", "22"))
    erhs.add_node("211", label="want")
    hypergraphs.add_hyperedge(erhs, ("21", "211"))
    hypergraphs.add_hyperedge(erhs, ("22", ),
Exemple #17
0
 def test_equality(self):
     first_rule = gmr.Rule('S', ['NP', 'VP'])
     second_rule = gmr.Rule('S', ['NP', 'VP'])
     third_rule = gmr.Rule('S', ['VP', 'NP'])
     self.assertEqual(first_rule, second_rule)
     self.assertNotEqual(first_rule, third_rule)
Exemple #18
0
def fromPosListToRule(posList):
    return [grammar.Rule(x, ['None']) for x in posList]
Exemple #19
0
def parser_with_reconstruction3(sentence, grammar, k_best, distributed_vector=None, dtk_generator=None, referenceTable=None, rule_filter=2):
    #uso la grammatica nuova (grammar_2 )
    words = sentence.split()
    n = len(words)

    #initialize TABLE
    P = numpy.zeros((n, n), dtype=object)
    for i, _ in numpy.ndenumerate(P):
        P[i] = []

    #unit production
    for i, word in enumerate(words):
        # to prevent uncovered words we create rule of the form X -> w
        # for each symbol X in the grammar and for each word w in the sentence
        for symbol in grammar.symbols:
            rule = gramm.Rule(symbol,[word])    # create a new rule
            rt = rule.toTree()                  # and transform into tree

            score = numpy.dot(dtk_generator.sn(rt), distributed_vector)
            ## NORMALIZATION
            score = score/numpy.sqrt(numpy.dot(dtk_generator.sn(rt), dtk_generator.sn(rt)))
            rt.score = score

            #P[i][0].append(((rule, None),(rt, score)))
            P[i][0].append(rt)


        #P[i][0] = sorted(P[i][0], key=lambda x: x[1][1], reverse=True)[:2]
        P[i][0] = sorted(P[i][0], key = lambda x: x.score, reverse=True)[:2]

    #non terminal rules
    numero_dtk = 0 #count iterations for debugging purpose
    for i in range(2, n + 1):
        #TODO:
        #add a check if numero_dtk is too high and break returning "not parsed"
        # total_size = len(dtk_generator.dt_cache) + len(dtk_generator.sn_cache) + len(dtk_generator.dtf_cache)
        # total_size_mbytes = (total_size*8*dtk_generator.dimension)/1048576
        # print (i, total_size_mbytes)
        if psutil.virtual_memory().percent > 95:
            return False, None, P

        for j in range(1, n - i + 2):
            for k in range(1, i):
                # look for combination of a tree in leftCell with a tree in rightCell
                leftCell = P[j - 1][k - 1]
                rightCell = P[j + k - 1][i - k - 1]

                for (subtree1, subtree2) in itertools.product(leftCell, rightCell):
                    stringa = subtree1.root + " " + subtree2.root
                    for rule in grammar.nonterminalrules[stringa]:
                        #FILTER on rules with too low score
                        passed, ruleScore = filterRule(rule, dtk_generator, distributed_vector, rule_filter)
                        if passed:
                            rtt = tree(root=rule.left, children=[subtree1, subtree2])
                            score = numpy.dot(dtk_generator.sn(rtt), distributed_vector)
                            ## NORMALIZATION
                            score = score/ruleScore
                            rtt.score = score

                            P[j-1][i-1].append(rtt)

                            numero_dtk = numero_dtk + 1

            #sort rules
            #P[j-1][i-1] = sorted(P[j-1][i-1], key=lambda x: x[1][1], reverse=True)
            P[j-1][i-1] = sorted(P[j-1][i-1], key=lambda x: x.score, reverse=True)
            #another k_best rules where the root is different than the first rule selected before
            #lista_diversi = [x for x in P[j-1][i-1] if x[0][0].left != P[j-1][i-1][0][0][0].left][:k_best]

            lista_diversi = [x for x in P[j-1][i-1] if x.root != P[j-1][i-1][0].root][:k_best]

            P[j-1][i-1] = P[j-1][i-1][:k_best]
            #if the new rules weren't already selected, add them
            if lista_diversi:
                for a in lista_diversi:
                    if a not in P[j-1][i-1]:
                        P[j-1][i-1].append(a)


            #PARTE DI DEBUG
            #se ho una reference, stampo la lista di regole che ho nella casella dopo aver trimmato e la casella corrispettiva
            #al primo errore ritorno Pp (stampata bene per confrontarla con referenceTable)

            if referenceTable is not None:
                if P[j-1][i-1] and referenceTable[i-1][j-1]:
                    lista_alberi = [x[0][0] for x in P[j-1][i-1]]
                    if referenceTable[i-1][j-1] not in lista_alberi:
                        #rule = P[j-1][i-1][0][0][0]

                        print ("cella: ", (i-1, j-1))

                        print ([x[0][0] for x in P[j-1][i-1]], referenceTable[i-1][j-1]) # <- questo caso è FAIL

                        #albero_sbagliato = P[j-1][i-1][0][1][0]
                        #score1 = P[j-1][i-1][0][1][1]
                        alberi_sbagliati = [x[1][0] for x in P[j-1][i-1]]



                        dtk_generator.dt_cache = {}
                        print ("SN: ")

                        for albero_sbagliato in alberi_sbagliati:

                            rtt = tree(root = referenceTable[i-1][j-1].left, children=alberi_sbagliati[0].children)

                            score1 = numpy.dot(dtk_generator.sn(albero_sbagliato), distributed_vector)
                            print (score1, albero_sbagliato)
                        score2 = numpy.dot(dtk_generator.sn(rtt), distributed_vector)
                        print (score2, rtt)

                        dtk_generator.dtf_cache = {}
                        print ("DTF: ")
                        for albero_sbagliato in alberi_sbagliati:
                            score1 = numpy.dot(dtk_generator.dtf(albero_sbagliato), distributed_vector)
                            regola = tree(root=albero_sbagliato.root, children=[tree(albero_sbagliato.children[0].root, None),tree(albero_sbagliato.children[1].root, None)])
                            print ("punteggio regola: ", numpy.dot(dtk_generator.dtf(regola), distributed_vector), regola)
                            print (score1, albero_sbagliato)
                        score2 = numpy.dot(dtk_generator.dtf(rtt), distributed_vector)
                        print (score2, rtt)
                        #return False, None, P
                else:
                    if referenceTable[i-1][j-1]: # e P[][] è vuota
                        pass
                        #print (P[j-1][i-1],referenceTable[i-1][j-1] ) # <- questo caso è FAIL
                        #return False, None, P
                    if P[j-1][i-1]: # e referenceTable è 0
                        pass
                        #print ("ok?", P[j-1][i-1],referenceTable[i-1][j-1] ) # <- questo caso può andar bene

            #FINE DEBUG

    #print (numero_dtk) #number of iteration

    #list of tree in the final cell of the table
    finalList = P[0][-1]
    if finalList:

        #final sort (by DTK)
        finalList = sorted(finalList, key=lambda x: numpy.dot(dtk_generator.dt(x),distributed_vector), reverse=True)
        return True, finalList , P
    else:
        #treeToCYKMatrix.printCYKMatrix(simpleTable(P))
        return False, None, P
Exemple #20
0
 def test_ambiguity(self):
     grammar = gmr.Grammar(
         gmr.Rule('S', ['NP', 'VP']), gmr.Rule('NP', ['Det', 'Nominal']),
         gmr.Rule('NP', ['Det', 'Nominal', 'PP']),
         gmr.Rule('NP', ['Nominal']), gmr.Rule('VP', ['VP', 'PP']),
         gmr.Rule('VP', ['V', 'NP']), gmr.Rule('PP', ['Prep', 'NP']),
         gmr.Rule('Det', ['a'], preterminal=True),
         gmr.Rule('Nominal', ['I'], preterminal=True),
         gmr.Rule('Nominal', ['man'], preterminal=True),
         gmr.Rule('Nominal', ['telescope'], preterminal=True),
         gmr.Rule('V', ['saw'], preterminal=True),
         gmr.Rule('Prep', ['with'], preterminal=True))
     words = ['I', 'saw', 'a', 'man', 'with', 'a', 'telescope']
     parser = psr.EarleyParser(grammar)
     trees = parser.parse(words)
     self.assertEqual(2, len(trees))
     self.assertEqual(
         [
             # ... saw ... with a telescope
             [
                 'S', ['NP', ['Nominal', 'I']],
                 [
                     'VP',
                     [
                         'VP', ['V', 'saw'],
                         ['NP', ['Det', 'a'], ['Nominal', 'man']]
                     ],
                     [
                         'PP', ['Prep', 'with'],
                         ['NP', ['Det', 'a'], ['Nominal', 'telescope']]
                     ]
                 ]
             ],
             # ... man with a telescope
             [
                 'S', ['NP', ['Nominal', 'I']],
                 [
                     'VP', ['V', 'saw'],
                     [
                         'NP', ['Det', 'a'], ['Nominal', 'man'],
                         [
                             'PP', ['Prep', 'with'],
                             ['NP', ['Det', 'a'], ['Nominal', 'telescope']]
                         ]
                     ]
                 ]
             ]
         ],
         trees)
Exemple #21
0
    def parse(self, sentence, k_best, distributed_vector=None, referenceTable=None):

        words = sentence.split()
        n = len(words)

        #initialize TABLE
        P = numpy.zeros((n, n), dtype=object)
        for i, _ in numpy.ndenumerate(P):
            P[i] = []

        #unit production
        for i, word in enumerate(words):
            # to prevent uncovered words we create rule of the form X -> w
            # for each symbol X in the grammar and for each word w in the sentence
            for symbol in self.grammar.symbols:
                rule = gramm.Rule(symbol,[word])    # create a new rule
                rt = rule.toTree()                  # and transform into tree

                score = numpy.dot(self.dtk_generator.sn(rt), distributed_vector)
                ## NORMALIZATION
                score = score/numpy.sqrt(numpy.dot(self.dtk_generator.sn(rt), self.dtk_generator.sn(rt)))
                rt.score = score

                P[i][0].append(rt)

            P[i, 0] = sorted(P[i, 0], key = lambda x: x.score, reverse=True)[:2]

        #non terminal rules
        numero_dtk = 0 #count iterations for debugging purpose
        for i in range(2, n + 1):
            #TODO:
            #add a check if numero_dtk is too high and break returning "not parsed"
            # total_size = len(dtk_generator.dt_cache) + len(dtk_generator.sn_cache) + len(dtk_generator.dtf_cache)
            # total_size_mbytes = (total_size*8*dtk_generator.dimension)/1048576
            # print (i, total_size_mbytes)
            if psutil.virtual_memory().percent > 95:
                return False, None, P

            for j in range(1, n - i + 2):
                for k in range(1, i):
                    # look for combination of a tree in leftCell with a tree in rightCell
                    leftCell = P[j - 1, k - 1]
                    rightCell = P[j + k - 1, i - k - 1]

                    for (subtree1, subtree2) in itertools.product(leftCell, rightCell):
                        stringa = subtree1.root + " " + subtree2.root
                        for rule in self.grammar.nonterminalrules[stringa]:
                            #FILTER on rules with too low score
                            passed, ruleScore = self.filterRule(rule, distributed_vector, self.filter)
                            if passed:
                                rtt = tree(root=rule.left, children=[subtree1, subtree2])
                                score = numpy.dot(self.dtk_generator.sn(rtt), distributed_vector)
                                ## NORMALIZATION
                                score = score/ruleScore
                                rtt.score = score

                                P[j-1, i-1].append(rtt)

                                numero_dtk = numero_dtk + 1

                #sort rules
                #P[j-1][i-1] = sorted(P[j-1][i-1], key=lambda x: x[1][1], reverse=True)
                P[j-1, i-1] = sorted(P[j-1, i-1], key=lambda x: x.score, reverse=True)
                #another k_best rules where the root is different than the first rule selected before
                #lista_diversi = [x for x in P[j-1][i-1] if x[0][0].left != P[j-1][i-1][0][0][0].left][:k_best]

                lista_diversi = [x for x in P[j-1, i-1] if x.root != P[j-1, i-1][0].root][:k_best]

                P[j-1, i-1] = P[j-1, i-1][:k_best]
                #if the new rules weren't already selected, add them
                if lista_diversi:
                    for a in lista_diversi:
                        if a not in P[j-1, i-1]:
                            P[j-1, i-1].append(a)

        #list of tree in the final cell of the table
        finalList = P[0, -1]
        if finalList:
            #final sort (by DTK)
            finalList = sorted(finalList, key=lambda x: numpy.dot(self.dtk_generator.dt(x),distributed_vector), reverse=True)
            return True, finalList , P
        else:
            #treeToCYKMatrix.printCYKMatrix(simpleTable(P))
            return False, None, P
Exemple #22
0
    def parse(self,
              sentence,
              k_best=2,
              distributed_vector=None,
              referenceTable=None,
              rule_filter=2):
        """return the k-best parse"""
        words = sentence.split()
        n = len(words)

        #initialize TABLE
        C = numpy.zeros((n, n), dtype=object)
        for i, _ in numpy.ndenumerate(C):
            #each cell has a type1 list and a type2 list (C is matrix of completed (up to that point) trees)
            #elements of type1 are complete trees: A -> B C D ... (B, C, D ... sono alberi completi)
            #elements of type2 are LIST of partial trees: [B, C, ..., •] (B, C ... sono ancora alberi completi, ma esiste una regola A -> B C D .... )
            #each element in C should also have a score attached to it (<dtk(element), dtk(reference_tree)> <- o qualche variazione sul tema )
            C[i] = [[], []]

        #parsing step
        numero_dtk = 0
        for span in range(0, n):
            for i in range(0, n - span):
                j = i + span
                if i == j:
                    # to prevent uncovered words we create rule of the form X -> w
                    # for each symbol X in the grammar and for each word w in the sentence
                    for sym in self.grammar.symbols:
                        rule = gramm.Rule(sym, [words[i]])
                        rt = rule.toTree()

                        score = numpy.dot(self.dtk_generator.sn(rt),
                                          distributed_vector)
                        #score = numpy.dot(dtk_generator.dtf(rt), distributed_vector)
                        #score = sorting_method(dtk_generator, rt, distributed_vector)
                        ## NORMALIZATION
                        score = score / numpy.sqrt(
                            numpy.dot(self.dtk_generator.sn(rt),
                                      self.dtk_generator.sn(rt)))
                        rt.score = score

                        C[i][j][0].append(rt)

                        #return None, []

                    C[i, j][0] = sorted(C[i, j][0],
                                        key=lambda x: x.score,
                                        reverse=True)[:k_best]

                    #self-filling part
                    #print ("prima: ", len(C[i, j][0]))
                    for B in C[i, j][0]:  #B = A -> B C
                        B_string = B.root
                        rules = self.grammar.nonterminalrules[
                            B_string]  #X -> A •

                        for r in rules:
                            if B_string != " ".join(r.right):
                                if [B, "•"] not in C[i, j][
                                        1]:  # <- devo dare uno score a questo (o forse no?)
                                    C[i, j][1].append([B, "•"])
                            else:
                                new_tree = tree(root=r.left, children=[B])
                                score = numpy.dot(
                                    self.dtk_generator.sn(new_tree),
                                    distributed_vector)
                                numero_dtk = numero_dtk + 1
                                #print (score, B.score, score > B.score)
                                if score > B.score:  #pensare ad un filtro più stringente....
                                    new_tree.score = score
                                    #print (new_tree)
                                    C[i, j][0].append(new_tree)
                        if len(C[i, j][0]) > 10:
                            break
                    #print ("dopo: ", len(C[i][j][0]))

                    #sort and trimming (credo che non serva sortare l'altra lista...)
                    C[i, j][0] = sorted(C[i, j][0],
                                        key=lambda x: x.score,
                                        reverse=True)[:k_best]
                    #C[i,j][1] = sorted(C[i,j][1], key=lambda x: self.scorePartialRule(x, distributed_vector), reverse=True)[:k_best]

                if j > i:
                    for k in range(0, j):
                        first_cell_C = C[i, k]
                        second_cell_C = C[k + 1, j]
                        #print (len(first_cell_C[1]), len(second_cell_C[0]))
                        for (x,
                             y) in itertools.product(first_cell_C[1],
                                                     second_cell_C[0]):
                            xx = " ".join(c.root for c in x[:-1])
                            yy = y.root

                            string = xx + " " + yy

                            rules = self.grammar.nonterminalrules[string]
                            #print ("regole: ", len(rules), end=" ---- ")

                            for r in rules:
                                #rule filtering
                                passed, ruleScore = self.filterRule(
                                    r, distributed_vector, self.filter)
                                if passed:
                                    if " ".join(r.right) == string:
                                        #print (r, "empty")
                                        children = x[:-1]
                                        children.append(y)
                                        new_tree = tree(root=r.left,
                                                        children=children)
                                        score = numpy.dot(
                                            self.dtk_generator.sn(new_tree),
                                            distributed_vector)
                                        numero_dtk = numero_dtk + 1
                                        new_tree.score = score
                                        if new_tree not in C[i, j][0]:
                                            C[i, j][0].append(new_tree)
                                    else:
                                        new_list = x[:-1] + [y] + ["•"]
                                        if new_list not in C[i, j][1]:
                                            C[i, j][1].append(new_list)

                    # TODO: devo vedere dove mettere il sorting... se qui, dopo il self-filling o in entrambi i posti. (o eventualmente con k diversi)
                    # TODO: sembra vada bene metterlo solo qui
                    # C[i, j][0] = sorted(C[i, j][0], key=lambda x: x.score, reverse=True)[:k_best]

                    #self-filling part
                    #print ("prima: ", len(C[i, j][0]))
                    for B in C[i, j][0]:
                        B_string = B.root  #B = A -> B C
                        rules = self.grammar.nonterminalrules[B_string]
                        for r in rules:
                            #TODO: add another rule filter here?
                            passed, ruleScore = self.filterRule(
                                r, distributed_vector, self.filter)
                            if passed:
                                if B_string != " ".join(r.right):
                                    if [B, "•"] not in C[i, j][1]:
                                        C[i, j][1].append([B, "•"])
                                else:
                                    # per evitare loop infiniti aggiungo un albero solo se il suo score è maggiore di quello precedente
                                    new_tree = tree(root=r.left, children=[B])
                                    score = numpy.dot(
                                        self.dtk_generator.sn(new_tree),
                                        distributed_vector)
                                    numero_dtk = numero_dtk + 1
                                    if score > B.score:  #TODO: pensare ad un filtro più stringente (e che sicuro non crei loop infiniti) ??
                                        new_tree.score = score
                                        C[i, j][0].append(new_tree)
                                        #print ("dopo: ", len(C[i, j][0]), r)
                        if len(
                                C[i, j][0]
                        ) > 20:  # se ne sto aggiungendo troppi lascio perdere...
                            break

                    # sort (no trimming) la prima lista
                    C[i, j][0] = sorted(C[i, j][0],
                                        key=lambda x: x.score,
                                        reverse=True)

                    # as in cyk normale, add a list of "different" rules
                    lista_diversi = [
                        x for x in C[i, j][0] if x.root != C[i, j][0][0].root
                    ][:k_best]

                    #e solo dopo trimmare a k_best
                    C[i, j][0] = C[i, j][0][:k_best]
                    #if the new rules weren't already selected, add them
                    if lista_diversi:
                        for a in lista_diversi:
                            if a not in C[j, i][0]:
                                C[i, j][0].append(a)

                    #infine sorto e trimmo l'altra lista
                    #C[i, j][1] = sorted(C[i, j][1], key=lambda x: self.scorePartialRule(x, distributed_vector), reverse=True)[:k_best]

        print(numero_dtk)
        #rendo l'ouput come quello di CYK_easy
        finalList = C[0][-1][0]
        if finalList:
            #final sort (by DTK)
            finalList = sorted(
                finalList,
                key=lambda x: numpy.dot(self.dtk_generator.dt(x),
                                        distributed_vector),
                reverse=True)
            return True, finalList, C
        else:
            #treeToCYKMatrix.printCYKMatrix(simpleTable(P))
            return False, None, C
Exemple #23
0
    def parse(self,
              sentence,
              k_best=2,
              distributed_vector=None,
              referenceTable=None,
              rule_filter=2,
              realTree=None):
        start = time.time()
        """return the k-best parse"""
        words = sentence.split()
        n = len(words)

        #initialize TABLE
        C = numpy.zeros((n, n), dtype=object)
        for i, _ in numpy.ndenumerate(C):
            #each cell has a type1 list and a type2 list (C is matrix of completed (up to that point) trees)
            #elements of type1 are complete trees: A -> B C D ... (B, C, D ... sono alberi completi)
            #elements of type2 are LIST of partial trees: [B, C, ..., .] (B, C ... sono ancora alberi completi, ma esiste una regola A -> B C D .... )
            #each element in C should also have a score attached to it (<dtk(element), dtk(reference_tree)> <- o qualche variazione sul tema )
            C[i] = [[], []]

        #unit production
        # start_unit = time.time()
        # total_time_symbols = 0
        # total_time_sort = 0
        for i, word in enumerate(words):
            # to prevent uncovered words we create rule of the form X -> w
            # for each symbol X in the grammar and for each word w in the sentence

            # TODO: also, do more clever stuff: i.e if w is a number always do CD -> w
            # TODO: and the same for punctuation

            # 1) parsing step

            # some special cases:
            if word == ",":
                tree = gramm.Rule(",", word)
                rt = tree.toTree()
                score = numpy.dot(self.dtk_generator.dtf(rt),
                                  distributed_vector)
                rt.score = score
                # in this cases I don't think I need to filter, because by definition we take the *right* choice
                # if score > self.LAMBDA/self.filter:
                C[i, 0][0].append(rt)

            elif word in "`'":
                tree = gramm.Rule(2 * word, word)
                rt = tree.toTree()
                score = numpy.dot(self.dtk_generator.dtf(rt),
                                  distributed_vector)
                rt.score = score
                # in this cases I don't think I need to filter, because by definition we take the *right* choice
                # if score > self.LAMBDA/self.filter:
                C[i, 0][0].append(rt)

            else:
                for symbol in self.grammar.posTags:  # prendere lista solo dei POS
                    tree = gramm.Rule(symbol, [word])  # create a new rule
                    rt = tree.toTree()  # and transform into tree

                    #compute and normalize score
                    score = numpy.dot(self.dtk_generator.dtf(rt),
                                      distributed_vector)
                    # score = score/numpy.sqrt(numpy.dot(self.dtk_generator.sn(rt), self.dtk_generator.sn(rt))) #prova senza normalizzazione
                    rt.score = score
                    if score > self.LAMBDA / self.filter:
                        C[i, 0][0].append(rt)
            # total_time_symbols = total_time_symbols + (time.time() - start_unit_symbols)

            C[i, 0][0] = sorted(
                C[i, 0][0], key=lambda x: x.score, reverse=True
            )[:
              k_best]  # prima era [:k_best], a volte la prima scelta è sbagliata...

            # 2) self-filling step
            for tree in C[i, 0][0]:  #rule = A -> w
                treeString = tree.root
                rules = self.grammar.nonterminalrules[treeString]  #X -> A .

                incompleteRules = False
                completeRules = []
                for rule in rules:
                    if treeString != " ".join(rule.right):
                        incompleteRules = True
                    else:
                        completeRules.append(rule)
                # incompleteRules = [rule for rule in rules if treeString != " ".join(rule.right)]
                # completeRules = [rule for rule in rules if treeString == " ".join(rule.right)]

                if incompleteRules:
                    C[i, 0][1].append([tree])

                # for incompleteRule in incompleteRules:
                #     passed, score = self.filterRule(incompleteRule, distributed_vector, self.filter)
                #     if passed:
                #         C[i, 0][1].append([tree])
                #         break

                for completeRule in completeRules:
                    passed, score = self.filterRule(completeRule,
                                                    distributed_vector,
                                                    self.filter)
                    if passed:
                        # it's a complete rule (of the form X -> A )
                        newTree = Tree(root=completeRule.left, children=[tree])
                        newTreescore = numpy.dot(
                            self.dtk_generator.sn(newTree), distributed_vector)
                        passed, score = self.filterTree(
                            newTree, distributed_vector, self.filter)

                        if passed:  #pensare ad un filtro più stringente....
                            newTree.score = newTreescore
                            #print (new_tree)
                            C[i, 0][0].append(newTree)

                            if len(C[i, 0][0]) > 100:
                                print('aiuto')
                                break

                # for rule in rules: #rule X -> A B C
                #     passed, score = self.filterRule(rule, distributed_vector, self.filter)
                #     if passed:
                #         # ulteriore filtro, se la regola ha un punteggio "alto", non provare ad espanderla ancora...?
                #         if treeString != " ".join(rule.right):
                #             # it's a partial rule
                #             if [tree] not in C[i, 0][1]:
                #                 C[i, 0][1].append([tree])
                #         else:
                #             # it's a complete rule (of the form X -> A )
                #             newTree = Tree(root=rule.left, children=[tree])
                #             newTreescore = numpy.dot(self.dtk_generator.sn(newTree), distributed_vector)
                #             passed, score = self.filterTree(newTree, distributed_vector, self.filter)
                #
                #             if passed: #pensare ad un filtro più stringente....
                #                 newTree.score = newTreescore
                #                 #print (new_tree)
                #                 C[i, 0][0].append(newTree)

            #sort and trimming
            if len(C[i, 0][0]) > k_best:
                # print (len(C[i, 0][0]))
                C[i, 0][0] = sorted(C[i, 0][0],
                                    key=lambda x: x.score,
                                    reverse=True)[:k_best]  #[:k_best]
            # start_sort = time.time()
            # print (len(C[i, 0][1]))

            #C[i,0][1] = sorted(C[i,0][1], key=lambda x: self.scorePartialRule(x, self.filter, distributed_vector), reverse=True)[:k_best]
            # total_time_sort = total_time_sort + (time.time() - start_sort)

        #unit production finished, printing for debug
        # for i, word in enumerate(words):
        #     print (word)
        #     for p in C[i, 0][0]:
        #         print (p)
        #         print ("-")
        #     print ("--")

        # print ('fine unit production', time.time() - start_unit)
        # print ('fine symbol production', total_time_symbols)
        # print ('sorting time', total_time_sort)

        start_unit = time.time()
        # after unit rules
        for i in range(2, n + 1):
            for j in range(1, n - i + 2):

                # 1) parsing
                for k in range(1, i):
                    # look for combination of a tree in leftCell with a tree in rightCell
                    leftCell = C[j - 1, k - 1]
                    rightCell = C[j + k - 1, i - k - 1]
                    for (partialRule, completeRule) in itertools.product(
                            leftCell[1], rightCell[0]):
                        ruleString = " ".join(
                            c.root
                            for c in partialRule) + " " + completeRule.root
                        rules = self.grammar.nonterminalrules[ruleString]

                        # provare a dividere in regole complete e parziali e filtrare/ordinare dopo
                        newPartialRule = False
                        newCompleteRule = []
                        for rule in rules:
                            if " ".join(rule.right) == ruleString:
                                newCompleteRule.append(rule)
                            else:
                                newPartialRule = True

                        children = partialRule + [completeRule]

                        if newPartialRule:
                            C[j - 1, i - 1][1].append(children)

                        for rule in newCompleteRule:
                            passed, ruleScore = self.filterRule(
                                rule, distributed_vector, self.filter)
                            if rule == Rule(
                                    left="NP",
                                    right=[
                                        "NP , NP , NP , NP , NP , NP CC NP"
                                    ]):
                                t = rule.toTree()
                                v = numpy.linalg.norm(
                                    self.dtk_generator.dtf(t))
                                print(v)

                            # if passed != (rule in [gramm.Rule.fromTree(x) for x in realTree.allRules()]):
                            #     print (i, j, rule, ruleScore, passed, rule in [gramm.Rule.fromTree(x) for x in realTree.allRules()])
                            if passed:
                                # print (i, j, rule, ruleScore, rule in [gramm.Rule.fromTree(x) for x in realTree.allRules()])

                                newTree = Tree(root=rule.left,
                                               children=children)
                                score = numpy.dot(
                                    self.dtk_generator.sn(newTree),
                                    distributed_vector)
                                newTree.score = score
                                if newTree not in C[j - 1, i - 1][0]:
                                    C[j - 1, i - 1][0].append(newTree)

                # 2) self-filling
                for tree in C[j - 1, i - 1][0]:
                    ruleString = tree.root
                    rules = self.grammar.nonterminalrules[ruleString]

                    incompleteRules = False
                    completeRules = []
                    for rule in rules:
                        if ruleString != " ".join(rule.right):
                            incompleteRules = True
                        else:
                            completeRules.append(rule)

                    if incompleteRules:
                        C[j - 1, i - 1][1].append([tree])

                    for completeRule in completeRules:
                        # filter on rule with low score
                        passed, ruleScore = self.filterRule(
                            completeRule, distributed_vector, self.filter)
                        if passed:
                            # TODO: add a check to prevent chain longer than X -> X
                            if (len(tree.children)
                                    == 1) and (completeRule.left == tree.root
                                               == tree.children[0].root):
                                continue
                            newTree = Tree(root=completeRule.left,
                                           children=[tree])
                            score = numpy.dot(self.dtk_generator.sn(newTree),
                                              distributed_vector)
                            # passed, score2 = self.filterTree(newTree, distributed_vector, self.filter)

                            newTree.score = score

                            C[j - 1, i - 1][0].append(newTree)
                    if len(C[j - 1][i - 1][0]) > 50:
                        break
                        #print ("dopo: ", len(C[i, j][0]), r)

                # stampa numero di nodi
                # for t in C[j-1, i-1][0]:
                #
                #     print (len(list(t.allNodes())), end=" ")
                # if C[j-1, i-1][0]:
                #     print ("numero nodi")

                # 3) sorting and trimming
                if len(C[j - 1, i - 1][0]) > k_best:
                    C[j - 1, i - 1][0] = sorted(C[j - 1, i - 1][0],
                                                key=lambda x: x.score,
                                                reverse=True)

                # if C[j-1][i-1][0]:
                #     print (i, j, C[j-1][i-1][0])

                # as in cyk normale, add a list of "different" rules
                lista_diversi = [
                    x for x in C[j - 1, i - 1][0]
                    if x.root != C[j - 1, i - 1][0][0].root
                ][:k_best]

                #e solo dopo trimmare a k_best
                C[j - 1, i - 1][0] = C[j - 1, i - 1][0][:k_best]
                #if the new rules weren't already selected, add them
                if lista_diversi:
                    for a in lista_diversi:
                        if a not in C[j - 1, i - 1][0]:
                            C[j - 1, i - 1][0].append(a)

                #infine sorto e trimmo l'altra lista
                start_sort = time.time()
                if len(C[j - 1, i - 1][1]) > k_best:
                    # print ("numero di regole parziali: ", len(C[j-1, i-1][1]))
                    # for pr in C[j-1, i-1][1]:
                    #     for t in pr:
                    #         print (t.root, end= " ")
                    #     print (" - ", end = " ")
                    # print()
                    # if (j-1, i-1) == (0, 24):
                    #     l = sorted(C[j-1, i-1][1], key=lambda x: self.scorePartialRule(x, self.filter, distributed_vector), reverse=True)
                    #     print ("cella 0 24: ", [([x.root for x in t], self.scorePartialRule(t, self.filter, distributed_vector)) for t in l])

                    C[j - 1, i - 1][1] = sorted(
                        C[j - 1, i - 1][1],
                        key=lambda x: self.scorePartialRule(
                            x, self.filter, distributed_vector),
                        reverse=True)[:k_best]
                # total_time_sort = total_time_sort + (time.time() - start_sort)

        #rendo l'ouput come quello di CYK
        # print ('fine parsing', time.time() - start_unit)
        # print ('sorting time', total_time_sort)

        finalList = C[0][-1][0]

        # print ("time: ", time.time() - start)
        if finalList:
            #final sort (by DTK)
            finalList = sorted(
                finalList,
                key=lambda x: numpy.dot(self.dtk_generator.dt(x),
                                        distributed_vector),
                reverse=True)
            return True, finalList, C
        else:
            #treeToCYKMatrix.printCYKMatrix(simpleTable(P))
            return False, None, C
def set_rules():
    terms_str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZабвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ0123456789(){}[]+-*/%><=!&|;“‘,_#@$^~№:?"
    rules = []
    # Программа ->
    # rules.append(gr.Rule(gr.Term("программа"), [gr.Term("объявление переменной"), gr.Term("программа")]))
    # rules.append(gr.Rule(gr.Term("программа"), [gr.Term("объявление функции"), gr.Term("программа")]))
    # rules.append(gr.Rule(gr.Term("программа"), [gr.Term("объявление константы"), gr.Term("программа")]))
    rules.append(gr.Rule(gr.Term("программа"), [gr.Term("главная функция")]))

    # главная функция
    rules.append(
        gr.Rule(gr.Term("главная функция"), [
            gr.Term("R3"),
            gr.Term("ID"),
            gr.Term("D6"),
            gr.Term("D7"),
            gr.Term("D4"),
            gr.Term("блок кода"),
            gr.Term("возврат значения"),
            gr.Term("D5")
        ]))

    # объявление переменной
    rules.append(
        gr.Rule(
            gr.Term("объявление переменной"),
            [gr.Term("тип данных"),
             gr.Term("идентификатор"),
             gr.Term("D3")]))
    rules.append(
        gr.Rule(gr.Term("объявление переменной"), [
            gr.Term("тип данных"),
            gr.Term("идентификатор"),
            gr.Term("O15"),
            gr.Term("значение"),
            gr.Term("D3")
        ]))
    rules.append(
        gr.Rule(gr.Term("объявление переменной"), [
            gr.Term("тип данных"),
            gr.Term("идентификатор"),
            gr.Term("O15"),
            gr.Term("выражение"),
            gr.Term("D3")
        ]))
    # в документе "тип данных переменной"

    # объявление константы
    # rules.append(
    #     gr.Rule(gr.Term("объявление константы"), [gr.Term("c"), gr.Term("o"), gr.Term("n"), gr.Term("s"), gr.Term("t"),
    #                                               gr.Term("тип данных"), gr.Term("идентификатор"), gr.Term("="),
    #                                               gr.Term("значение"), gr.Term(";")]))

    # значение
    # rules.append(gr.Rule(gr.Term("значение"), [gr.Term("число")]))
    # rules.append(gr.Rule(gr.Term("значение"), [gr.Term("символьное значение")]))
    # rules.append(gr.Rule(gr.Term("значение"), [gr.Term("логическое значение")]))
    # rules.append(gr.Rule(gr.Term("значение"), [gr.Term("идентификатор")]))
    # rules.append(gr.Rule(gr.Term("значение"), [gr.Term("вызов функции")]))

    # объявление функции
    rules.append(
        gr.Rule(gr.Term("объявление функции"), [
            gr.Term("тип данных функции"),
            gr.Term("идентификатор"),
            gr.Term("D6"),
            gr.Term("параметры функции"),
            gr.Term("D7"),
            gr.Term("D4"),
            gr.Term("тело функции"),
            gr.Term("D5")
        ]))
    rules.append(
        gr.Rule(gr.Term("объявление функции"), [
            gr.Term("тип данных функции"),
            gr.Term("идентификатор"),
            gr.Term("D6"),
            gr.Term("D7"),
            gr.Term("D4"),
            gr.Term("тело функции"),
            gr.Term("D5")
        ]))

    # параметры функции
    rules.append(
        gr.Rule(gr.Term("параметры функции"),
                [gr.Term("тип данных"),
                 gr.Term("идентификатор")]))
    rules.append(
        gr.Rule(gr.Term("параметры функции"), [
            gr.Term("тип данных"),
            gr.Term("идентификатор"),
            gr.Term("D2"),
            gr.Term("параметры функции")
        ]))

    # Значимый тип данных
    rules.append(gr.Rule(gr.Term("значимый тип данных"), [gr.Term("R1")]))
    rules.append(gr.Rule(gr.Term("значимый тип данных"), [gr.Term("R2")]))
    rules.append(gr.Rule(gr.Term("значимый тип данных"), [gr.Term("R3")]))
    rules.append(gr.Rule(gr.Term("значимый тип данных"), [gr.Term("R4")]))
    rules.append(gr.Rule(gr.Term("значимый тип данных"), [gr.Term("R5")]))

    # модификатор типа данных
    rules.append(gr.Rule(gr.Term("модификатор типа данных"), [gr.Term("K6")]))
    rules.append(gr.Rule(gr.Term("модификатор типа данных"), [gr.Term("K5")]))
    rules.append(gr.Rule(gr.Term("модификатор типа данных"), [gr.Term("K7")]))
    rules.append(gr.Rule(gr.Term("модификатор типа данных"), [gr.Term("K8")]))

    # тип данных
    rules.append(
        gr.Rule(gr.Term("тип данных"), [
            gr.Term("модификатор типа данных"),
            gr.Term("значимый тип данных")
        ]))
    rules.append(
        gr.Rule(gr.Term("тип данных"), [gr.Term("значимый тип данных")]))

    # тип данных функции
    rules.append(
        gr.Rule(gr.Term("тип данных функции"), [gr.Term("тип данных")]))
    rules.append(gr.Rule(gr.Term("тип данных функции"), [gr.Term("R6")]))

    # буква
    for i in range(0, 52):
        rules.append(gr.Rule(gr.Term("буква"), [gr.Term(terms_str[i])]))

    # цифра
    for i in range(10):
        rules.append(gr.Rule(gr.Term("цифра"), [gr.Term(str(i))]))

    # целое число
    rules.append(
        gr.Rule(gr.Term("целое число"),
                [gr.Term("цифра"), gr.Term("целое число")]))
    rules.append(gr.Rule(gr.Term("целое число"), [gr.Term("цифра")]))

    # вещественное число
    rules.append(
        gr.Rule(gr.Term("вещественное число"),
                [gr.Term("N"), gr.Term("D1"),
                 gr.Term("N")]))

    # число
    rules.append(gr.Rule(gr.Term("число"), [gr.Term("целое число")]))
    rules.append(gr.Rule(gr.Term("число"), [gr.Term("вещественное число")]))

    # прочие символы
    for i in range(52, 158):
        rules.append(
            gr.Rule(gr.Term("прочие символы"), [gr.Term(terms_str[i])]))

    # символ идентификатора
    # rules.append(gr.Rule(gr.Term("символ идентификатора"), gr.Term("буква")))
    # rules.append(gr.Rule(gr.Term("символ идентификатора"), gr.Term("_")))

    # идентификатор
    rules.append(gr.Rule(gr.Term("идентификатор"), [gr.Term("ID")]))

    # ид
    # rules.append(gr.Rule(gr.Term("ид"), [gr.Term("символ идентификатора"), gr.Term("ид")]))
    # rules.append(gr.Rule(gr.Term("ид"), [gr.Term("цифра"), gr.Term("ид")]))
    # rules.append(gr.Rule(gr.Term("ид"), gr.Term("символ идентификатора")))
    # rules.append(gr.Rule(gr.Term("ид"), gr.Term("цифра")))

    # тело функции
    rules.append(
        gr.Rule(gr.Term("тело функции"),
                [gr.Term("блок кода"),
                 gr.Term("возврат значения")]))
    rules.append(gr.Rule(gr.Term("тело функции"), [gr.Term("блок кода")]))

    # возврат значения
    rules.append(
        gr.Rule(gr.Term("возврат значения"),
                [gr.Term("K10"),
                 gr.Term("выражение"),
                 gr.Term("D3")]))
    rules.append(
        gr.Rule(gr.Term("возврат значения"),
                [gr.Term("K10"), gr.Term("ID"),
                 gr.Term("D3")]))
    rules.append(
        gr.Rule(gr.Term("возврат значения"),
                [gr.Term("K10"),
                 gr.Term("имя константы"),
                 gr.Term("D3")]))

    # блок кода
    rules.append(
        gr.Rule(gr.Term("блок кода"),
                [gr.Term("инструкция"),
                 gr.Term("блок кода")]))
    rules.append(gr.Rule(gr.Term("блок кода"), [gr.Term("инструкция")]))

    # цикл
    rules.append(
        gr.Rule(gr.Term("цикл"), [
            gr.Term("K9"),
            gr.Term("D6"),
            gr.Term("выражение"),
            gr.Term("D7"),
            gr.Term("D4"),
            gr.Term("тело цикла"),
            gr.Term("D5")
        ]))
    rules.append(
        gr.Rule(gr.Term("цикл"), [
            gr.Term("K1"),
            gr.Term("D4"),
            gr.Term("тело цикла"),
            gr.Term("D5"),
            gr.Term("K9"),
            gr.Term("D6"),
            gr.Term("выражение"),
            gr.Term("D7")
        ]))

    rules.append(
        gr.Rule(gr.Term("цикл"), [
            gr.Term("K3"),
            gr.Term("D6"),
            gr.Term("инструкция"),
            gr.Term("лог выражение"),
            gr.Term("D3"),
            gr.Term("присваивание"),
            gr.Term("D7"),
            gr.Term("D4"),
            gr.Term("тело цикла"),
            gr.Term("D5")
        ]))

    rules.append(
        gr.Rule(gr.Term("цикл"), [
            gr.Term("K3"),
            gr.Term("D6"),
            gr.Term("ID"),
            gr.Term("D3"),
            gr.Term("лог выражение"),
            gr.Term("D3"),
            gr.Term("присваивание"),
            gr.Term("D7"),
            gr.Term("D4"),
            gr.Term("тело цикла"),
            gr.Term("D5")
        ]))

    # тело цикла
    # rules.append(
    #    gr.Rule(gr.Term("тело цикла"), [gr.Term("блок кода"), gr.Term("оператор цикла"), gr.Term("блок кода")]))
    # rules.append(gr.Rule(gr.Term("тело цикла"), [gr.Term("оператор цикла"), gr.Term("блок кода")]))
    # rules.append(gr.Rule(gr.Term("тело цикла"), [gr.Term("блок кода"), gr.Term("оператор цикла")]))
    rules.append(gr.Rule(gr.Term("тело цикла"), [gr.Term("блок кода")]))
    # rules.append(gr.Rule(gr.Term("тело цикла"), [gr.Term("оператор цикла")]))

    # оператор цикла
    # rules.append(gr.Rule(gr.Term("оператор цикла"),
    #                      [gr.Term("b"), gr.Term("r"), gr.Term("e"), gr.Term("a"), gr.Term("k"), gr.Term("D3")]))
    # rules.append(gr.Rule(gr.Term("оператор цикла"),
    #                      [gr.Term("c"), gr.Term("o"), gr.Term("n"), gr.Term("t"), gr.Term("i"), gr.Term("n"),
    #                       gr.Term("u"), gr.Term("e"), gr.Term(";")]))

    # ветвление
    rules.append(
        gr.Rule(gr.Term("ветвление"), [
            gr.Term("K4"),
            gr.Term("D6"),
            gr.Term("выражение"),
            gr.Term("D7"),
            gr.Term("D4"),
            gr.Term("блок кода"),
            gr.Term("D5")
        ]))
    rules.append(
        gr.Rule(gr.Term("ветвление"), [
            gr.Term("K4"),
            gr.Term("D6"),
            gr.Term("выражение"),
            gr.Term("D7"),
            gr.Term("D4"),
            gr.Term("блок кода"),
            gr.Term("D5"),
            gr.Term("K2"),
            gr.Term("D4"),
            gr.Term("блок кода"),
            gr.Term("D5")
        ]))
    rules.append(
        gr.Rule(gr.Term("ветвление"), [
            gr.Term("K4"),
            gr.Term("D6"),
            gr.Term("выражение"),
            gr.Term("D7"),
            gr.Term("D4"),
            gr.Term("блок кода"),
            gr.Term("D5"),
            gr.Term("K2"),
            gr.Term("ветвление")
        ]))

    # символьное значение
    rules.append(gr.Rule(gr.Term("символьное значение"), [gr.Term("C")]))
    # rules.append(gr.Rule(gr.Term("символьное значение"), [gr.Term("'"), gr.Term("цифра"), gr.Term("'")]))
    # rules.append(gr.Rule(gr.Term("символьное значение"), [gr.Term("'"), gr.Term("прочие символы"), gr.Term("'")]))

    # оператор сравнения
    rules.append(gr.Rule(gr.Term("оператор сравнения"), [gr.Term("O13")]))
    rules.append(gr.Rule(gr.Term("оператор сравнения"), [gr.Term("O14")]))
    rules.append(gr.Rule(gr.Term("оператор сравнения"), [gr.Term("O11")]))
    rules.append(gr.Rule(gr.Term("оператор сравнения"), [gr.Term("O12")]))
    rules.append(gr.Rule(gr.Term("оператор сравнения"), [gr.Term("O9")]))
    rules.append(gr.Rule(gr.Term("оператор сравнения"), [gr.Term("O10")]))

    # мат знак типа сложения
    rules.append(gr.Rule(gr.Term("мат знак типа сложения"), [gr.Term("O1")]))
    rules.append(gr.Rule(gr.Term("мат знак типа сложения"), [gr.Term("O2")]))

    # мат знак типа умножения
    rules.append(gr.Rule(gr.Term("мат знак типа умножения"), [gr.Term("O3")]))
    rules.append(gr.Rule(gr.Term("мат знак типа умножения"), [gr.Term("O4")]))
    rules.append(gr.Rule(gr.Term("мат знак типа умножения"), [gr.Term("O5")]))

    # мат выражение
    rules.append(gr.Rule(gr.Term("мат выражение"), [gr.Term("E1")]))

    # E1
    rules.append(
        gr.Rule(
            gr.Term("E1"),
            [gr.Term("T1"),
             gr.Term("мат знак типа сложения"),
             gr.Term("E1")]))
    rules.append(gr.Rule(gr.Term("E1"), [gr.Term("T1")]))

    # T1
    rules.append(
        gr.Rule(
            gr.Term("T1"),
            [gr.Term("F1"),
             gr.Term("мат знак типа умножения"),
             gr.Term("T1")]))
    rules.append(gr.Rule(gr.Term("T1"), [gr.Term("F1")]))
    # разве тут не должно быть наподобие предыдущего, F1 мат знак T1 ?

    # F1
    rules.append(
        gr.Rule(gr.Term("F1"),
                [gr.Term("("), gr.Term("E1"),
                 gr.Term(")")]))
    rules.append(gr.Rule(gr.Term("F1"), [gr.Term("N")]))
    rules.append(gr.Rule(gr.Term("F1"), [gr.Term("вещественное число")]))
    rules.append(gr.Rule(gr.Term("F1"), [gr.Term("ID")]))

    # логическое значение
    rules.append(gr.Rule(gr.Term("логическое значение"), [gr.Term("R10")]))
    rules.append(gr.Rule(gr.Term("логическое значение"), [gr.Term("R11")]))

    # лог знак типа сложения
    rules.append(gr.Rule(gr.Term("лог знак типа сложения"), [gr.Term("O6")]))
    rules.append(
        gr.Rule(gr.Term("лог знак типа сложения"),
                [gr.Term("оператор сравнения")]))

    # лог знак типа умножения
    rules.append(gr.Rule(gr.Term("лог знак типа умножения"), [gr.Term("O7")]))

    # лог знак унарной операции
    rules.append(gr.Rule(gr.Term("лог знак унарной операции"),
                         [gr.Term("O8")]))

    # лог выражение
    rules.append(
        gr.Rule(gr.Term("лог выражение"), [
            gr.Term("мат выражение"),
            gr.Term("оператор сравнения"),
            gr.Term("мат выражение")
        ]))
    rules.append(
        gr.Rule(gr.Term("лог выражение"), [
            gr.Term("символьное значение"),
            gr.Term("оператор сравнения"),
            gr.Term("символьное значение")
        ]))
    rules.append(gr.Rule(gr.Term("лог выражение"), [gr.Term("лог значение")]))

    # # E2
    # rules.append(gr.Rule(gr.Term("E2"), [gr.Term("T2"), gr.Term("лог знак типа сложения"), gr.Term("E2")]))
    # rules.append(gr.Rule(gr.Term("E2"), [gr.Term("T2")]))
    # #rules.append(gr.Rule(gr.Term("E2"), [gr.Term("лог знак унарной операции"), gr.Term("T2")]))
    #
    # # T2
    # rules.append(gr.Rule(gr.Term("T2"), [gr.Term("T2"), gr.Term("лог знак типа умножения"), gr.Term("F2")]))
    # rules.append(gr.Rule(gr.Term("T2"), [gr.Term("F2")]))
    # #rules.append(gr.Rule(gr.Term("T2"), [gr.Term("лог знак унарной операции"), gr.Term("F2")]))
    # # разве тут не должно быть наподобие предыдущего, F2 лог знак T2 ?
    #
    # # F2
    # rules.append(gr.Rule(gr.Term("F2"), [gr.Term("D6"), gr.Term("E2"), gr.Term("D7")]))
    # rules.append(gr.Rule(gr.Term("F2"), [gr.Term("лог значение")]))
    # rules.append(gr.Rule(gr.Term("F2"), [gr.Term("мат выражение")]))

    # выражение
    rules.append(gr.Rule(gr.Term("выражение"), [gr.Term("лог выражение")]))
    rules.append(gr.Rule(gr.Term("выражение"), [gr.Term("мат выражение")]))
    rules.append(
        gr.Rule(gr.Term("выражение"), [gr.Term("символьное значение")]))

    # инструкция
    rules.append(
        gr.Rule(
            gr.Term("инструкция"),
            [gr.Term("присваивание"), gr.Term("D3")]))
    rules.append(
        gr.Rule(gr.Term("инструкция"), [gr.Term("объявление переменной")]))
    rules.append(
        gr.Rule(gr.Term("инструкция"), [gr.Term("объявление константы")]))
    rules.append(
        gr.Rule(
            gr.Term("инструкция"),
            [gr.Term("вызов функции"), gr.Term("D3")]))
    rules.append(
        gr.Rule(gr.Term("инструкция"),
                [gr.Term("выражение"), gr.Term("D3")]))
    rules.append(gr.Rule(gr.Term("инструкция"), [gr.Term("цикл")]))
    rules.append(gr.Rule(gr.Term("инструкция"), [gr.Term("ветвление")]))

    # вызов функции
    rules.append(
        gr.Rule(gr.Term("вызов функции"),
                [gr.Term("имя функции"),
                 gr.Term("D6"),
                 gr.Term("D7")]))
    rules.append(
        gr.Rule(gr.Term("вызов функции"), [
            gr.Term("имя функции"),
            gr.Term("D6"),
            gr.Term("параметры вызова функции"),
            gr.Term("D7")
        ]))

    # параметры вызова функции
    rules.append(
        gr.Rule(gr.Term("параметры вызова функции"), [gr.Term("выражение")]))
    rules.append(
        gr.Rule(gr.Term("параметры вызова функции"), [
            gr.Term("выражение"),
            gr.Term(","),
            gr.Term("параметры вызова функции")
        ]))

    # оператор присваивания
    rules.append(gr.Rule(gr.Term("оператор присваивания"), [gr.Term("O15")]))
    # rules.append(gr.Rule(gr.Term("оператор присваивания"), [gr.Term("+"), gr.Term("=")]))
    # rules.append(gr.Rule(gr.Term("оператор присваивания"), [gr.Term("-"), gr.Term("=")]))
    # rules.append(gr.Rule(gr.Term("оператор присваивания"), [gr.Term("*"), gr.Term("=")]))
    # rules.append(gr.Rule(gr.Term("оператор присваивания"), [gr.Term("/"), gr.Term("=")]))
    # rules.append(gr.Rule(gr.Term("оператор присваивания"), [gr.Term("%"), gr.Term("=")]))

    # присваивание
    rules.append(
        gr.Rule(gr.Term("присваивание"), [
            gr.Term("идентификатор"),
            gr.Term("оператор присваивания"),
            gr.Term("выражение")
        ]))
    rules.append(
        gr.Rule(gr.Term("присваивание"), [
            gr.Term("идентификатор"),
            gr.Term("оператор присваивания"),
            gr.Term("идентификатор")
        ]))

    return rules
Exemple #25
0
def topRule(t):
    return grammar.Rule(t.root, [x.root for x in t.children])