def shorten(production): lhs = production.lhs() rhs = production.rhs() if len(rhs) > 2: #it's too long new_nt = grammar.Nonterminal(create_nonterminal()) new_production_1 = grammar.Production(lhs, (rhs[0], new_nt)) new_rhs = () for i in range(1, len(rhs)): new_rhs = new_rhs + (rhs[i], ) new_production_2 = grammar.Production(new_nt, new_rhs) marked_for_deletion.append(production) R.append(new_production_1) R.append(new_production_2)
def remove_rhs_terminals(production): rhs = production.rhs() if len(rhs) > 1: new_rhs = () for element in rhs: if grammar.is_terminal(element): #create dummy nonterminal new_nt = grammar.Nonterminal(create_nonterminal()) new_rhs = new_rhs + (new_nt, ) #define dummy nonterminal R.append(grammar.Production(new_nt, (element, ))) else: new_rhs = new_rhs + (element, ) #replace rule R[R.index(production)] = grammar.Production(production.lhs(), new_rhs)
def _rules_input_prompt(nonterminals, terminals): """ Given a list of nonterminals and a list of terminals, this function creates a list of rules (aka grammar.Production instances) and returns it. There must be a whitespace between all terminals and nonterminals symbols. Extra whitespaces are ignored/conside. """ print("You will now enter the rules of the grammar.") print("Rule must be respect the following format : " "\n 1. the two characters \"->\" are used for separating the right hand side from the left hand side" "\n 2. There must be a whitespace between all symbols in the grammar (including non-terminals)" "\n 3. The character \'|\' means \"or\" " "\n ex. A -> b c|A b|d") print("Press enter when you are done.") grammar_rules = [] while (True): try: x = input() if x == '': break elif '->' in x: y = x.split('->') left_hand_side = y[0].replace(' ', '') assert len(y) == 2 assert left_hand_side in nonterminals left_hand_side = grammar.Nonterminal(left_hand_side) y[1] = y[1].split('|') for rhs in y[1]: right_hand_side = [] for y in rhs.split(' '): if y != '': if y in nonterminals: right_hand_side.append(grammar.Nonterminal(y)) elif y in terminals: right_hand_side.append(y) else: print(y + " is not a valid input.") raise Exception grammar_rules.append(grammar.Production(left_hand_side, right_hand_side)) else: print("Invalid input. no \'->\' in the input.") raise Exception except Exception as e: print("Invalid Input.") pass return grammar_rules
def generate_grammar(phrases): prods = [] for p in phrases: if p in known_functions: tags = known_functions[p] else: found = False tags = [lit, d, syn, first] for kind in INDICATORS: if any(w == p or (len(w) > 5 and abs(len(w) - len(p)) <= 3 and p.startswith(w[:-3])) for w in INDICATORS[kind]): tags.append(gram.Nonterminal(kind)) found = True if not found: # tags = word_tags tags = [lit, d, syn, first, ana_, sub_, rev_] for t in tags: prods.append(gram.Production(t, [p])) return gram.ContextFreeGrammar(top, base_prods + prods)
[d, clue_arg, clue_arg], [d, clue_arg, clue_arg, clue_arg], ] } additional_clue_rules = [[sub_init_] + [first] * i for i in range(3, 8)] + [[first] * i + [sub_init_] for i in range(3, 8)] for r in additional_clue_rules: production_rules[top].append(r + [d]) production_rules[top].append([d] + r) base_prods = [] for n, rules in production_rules.items(): for r in rules: base_prods.append(gram.Production(n, r)) known_functions = { 'in': [ins_, lit, null, sub_], 'a': [lit, syn, null], 'is': [null, lit], 'for': [null, syn], 'large': [first, syn], 'primarily': [sub_init_], 'and': [null, lit], 'of': [null], 'on': [ins_, null, lit, syn], 'with': [null, ins_] }
import nltk.parse as parse import nltk.grammar as grammar from pycryptics.grammar.memo_chart import MemoChart, ClueTree top = grammar.Nonterminal('top') bar = grammar.Nonterminal('bar') baz = grammar.Nonterminal('baz') foo = grammar.Nonterminal('foo') prods = [ grammar.Production(top, [bar]), grammar.Production(top, [baz]), grammar.Production(bar, [foo]), grammar.Production(baz, [foo]), grammar.Production(foo, ['foo']) ] g = grammar.ContextFreeGrammar(top, prods) parser = parse.EarleyChartParser(g, trace=True, chart_class=MemoChart) p = parser.nbest_parse(['foo']) print p print p[0][0][0] print p[1][0][0] print "==", p[0][0][0] == p[1][0][0] print "is", p[0][0][0] is p[1][0][0] print isinstance(p[0], ClueTree)
def repl_unit(A, B): for rule in R: if rule.lhs() == B: R.append(grammar.Production(A, rule.rhs()))