Ejemplo n.º 1
0
def shorten(production):
    lhs = production.lhs()
    rhs = production.rhs()
    if len(rhs) > 2:  #it's too long
        new_nt = grammar.Nonterminal(create_nonterminal())
        new_production_1 = grammar.Production(lhs, (rhs[0], new_nt))
        new_rhs = ()
        for i in range(1, len(rhs)):
            new_rhs = new_rhs + (rhs[i], )

        new_production_2 = grammar.Production(new_nt, new_rhs)

        marked_for_deletion.append(production)

        R.append(new_production_1)
        R.append(new_production_2)
Ejemplo n.º 2
0
def remove_rhs_terminals(production):
    rhs = production.rhs()
    if len(rhs) > 1:
        new_rhs = ()
        for element in rhs:
            if grammar.is_terminal(element):
                #create dummy nonterminal
                new_nt = grammar.Nonterminal(create_nonterminal())
                new_rhs = new_rhs + (new_nt, )

                #define dummy nonterminal
                R.append(grammar.Production(new_nt, (element, )))
            else:
                new_rhs = new_rhs + (element, )

        #replace rule
        R[R.index(production)] = grammar.Production(production.lhs(), new_rhs)
Ejemplo n.º 3
0
def _rules_input_prompt(nonterminals, terminals):
    """
    Given a list of nonterminals and a list of terminals, this function creates a list of rules
    (aka grammar.Production instances) and returns it. There must be a whitespace between all terminals and nonterminals symbols. Extra whitespaces are ignored/conside.
    """
    print("You will now enter the rules of the grammar.")
    print("Rule must be respect the following format : "
          "\n 1. the two characters \"->\" are used for separating the right hand side from the left hand side"
          "\n 2. There must be a whitespace between all symbols in the grammar (including non-terminals)"
          "\n 3. The character \'|\' means \"or\" "
          "\n ex. A -> b c|A b|d")
    print("Press enter when you are done.")
    grammar_rules = []
    while (True):
        try:
            x = input()
            if x == '':
                break
            elif '->' in x:

                y = x.split('->')
                left_hand_side = y[0].replace(' ', '')

                assert len(y) == 2
                assert left_hand_side in nonterminals
                left_hand_side = grammar.Nonterminal(left_hand_side)
                y[1] = y[1].split('|')
                for rhs in y[1]:
                    right_hand_side = []
                    for y in rhs.split(' '):
                        if y != '':
                            if y in nonterminals:
                                right_hand_side.append(grammar.Nonterminal(y))


                            elif y in terminals:
                                right_hand_side.append(y)
                            else:
                                print(y + " is not a valid input.")
                                raise Exception

                    grammar_rules.append(grammar.Production(left_hand_side, right_hand_side))
            else:
                print("Invalid input. no \'->\'  in the input.")
                raise Exception

        except Exception as e:
            print("Invalid Input.")
            pass

    return grammar_rules
Ejemplo n.º 4
0
def generate_grammar(phrases):
    prods = []
    for p in phrases:
        if p in known_functions:
            tags = known_functions[p]
        else:
            found = False
            tags = [lit, d, syn, first]
            for kind in INDICATORS:
                if any(w == p or (len(w) > 5 and abs(len(w) - len(p)) <= 3
                                  and p.startswith(w[:-3]))
                       for w in INDICATORS[kind]):
                    tags.append(gram.Nonterminal(kind))
                    found = True
            if not found:
                # tags = word_tags
                tags = [lit, d, syn, first, ana_, sub_, rev_]
        for t in tags:
            prods.append(gram.Production(t, [p]))
    return gram.ContextFreeGrammar(top, base_prods + prods)
Ejemplo n.º 5
0
        [d, clue_arg, clue_arg],
        [d, clue_arg, clue_arg, clue_arg],
    ]
}

additional_clue_rules = [[sub_init_] + [first] * i
                         for i in range(3, 8)] + [[first] * i + [sub_init_]
                                                  for i in range(3, 8)]
for r in additional_clue_rules:
    production_rules[top].append(r + [d])
    production_rules[top].append([d] + r)

base_prods = []
for n, rules in production_rules.items():
    for r in rules:
        base_prods.append(gram.Production(n, r))

known_functions = {
    'in': [ins_, lit, null, sub_],
    'a': [lit, syn, null],
    'is': [null, lit],
    'for': [null, syn],
    'large': [first, syn],
    'primarily': [sub_init_],
    'and': [null, lit],
    'of': [null],
    'on': [ins_, null, lit, syn],
    'with': [null, ins_]
}

Ejemplo n.º 6
0
import nltk.parse as parse
import nltk.grammar as grammar
from pycryptics.grammar.memo_chart import MemoChart, ClueTree

top = grammar.Nonterminal('top')
bar = grammar.Nonterminal('bar')
baz = grammar.Nonterminal('baz')
foo = grammar.Nonterminal('foo')

prods = [
    grammar.Production(top, [bar]),
    grammar.Production(top, [baz]),
    grammar.Production(bar, [foo]),
    grammar.Production(baz, [foo]),
    grammar.Production(foo, ['foo'])
]

g = grammar.ContextFreeGrammar(top, prods)

parser = parse.EarleyChartParser(g, trace=True, chart_class=MemoChart)

p = parser.nbest_parse(['foo'])
print p
print p[0][0][0]
print p[1][0][0]
print "==", p[0][0][0] == p[1][0][0]
print "is", p[0][0][0] is p[1][0][0]
print isinstance(p[0], ClueTree)
Ejemplo n.º 7
0
def repl_unit(A, B):
    for rule in R:
        if rule.lhs() == B:
            R.append(grammar.Production(A, rule.rhs()))