def cky_parse(grammar, tokens): #Initialize parse table table = ParseTable(tokens) #Move left to right across table for j in xrange(1, table.n + 1): #Add nonterminal symbols that correspond to terminal symbol rules = grammar.productions(rhs = tokens[j-1]) table[(j-1,j)] = TableEntry() for rule in rules: new_rule = nltk.grammar.Production(nltk.grammar.Nonterminal(rule.lhs()), table[(table.n,j)].symbols + [None]) table[(j-1,j)].add_entry({rule.lhs(): (new_rule, ((table.n,j), None))}) #Iterate over all non-(nonterminal -> terminal) cells of table #Move up rows for i in reversed(xrange(0, j)): #Analyze all reachable cells #Reset new symbols list and children dict symbols = [] probs = [] children = [] #Iterate over all possible children for k in xrange(i+1, j): #Analyze all cells to left in row try: left_possibilities = table[i,k].symbols except KeyError: continue for left in left_possibilities: #Analyze all cells below in column try: down_possibilities = table[k,j].symbols except KeyError: continue for down in down_possibilities: #Determine all possible nonterminals A in rules A -> B C where B is left and C is down rules = [x for x in grammar.productions() if x.rhs() == (left, down)] if rules != []: for rule in rules: if rule.lhs() not in symbols: if not (rule.lhs() == grammar.start() and (i,j) != (0,table.n)): symbols.append(rule.lhs()) new_rule = nltk.grammar.Production(rule.lhs(), [left, down]) children.append({rule.lhs(): (new_rule, ((i,k),(k,j)))}) else: new_rule = nltk.grammar.Production(rule.lhs(), [left, down]) children.append({rule.lhs(): (new_rule, ((i,k),(k,j)))}) #Add new entry to table if symbols != []: table[(i,j)] = TableEntry() for child in children: table[(i,j)].add_entry(child) return table
def main(): #Load and sort grammar. grammar = nltk.data.load(sys.argv[1]) grammar_dict = {} for production in grammar.productions(): nt = production.lhs().symbol() if nt in grammar_dict: grammar_dict[nt].append(production.rhs()) else: grammar_dict[nt] = [production.rhs()] #Generate sentence from start symbol. print(generate(grammar.start().symbol(), grammar_dict))
def add_sub_nonterminal(grammar, n_gram, freq): '''Replaces an n-gram in a grammar with a new non-terminal''' n_gram = list(n_gram) prods = [] nt = nltk.grammar.Nonterminal('sub_%s_%s' % ('_'.join("(%s)" % symbol for symbol in n_gram), grammar.new_symbol_count)) grammar.new_symbol_count += 1 for prod in grammar.productions(): rhs = list(prod.rhs()) i = 0 while i < len(rhs): if rhs[i:i + len(n_gram)] == n_gram: rhs[i:i + len(n_gram)] = [nt] i += 1 new_prod = nltk.grammar.WeightedProduction(prod.lhs(), rhs, prob = prod.prob()) new_prod.freq = prod.freq prods.append(new_prod) new_prod = nltk.grammar.WeightedProduction(nt, n_gram, prob = 1.0) new_prod.freq = freq prods.append(new_prod) new_grammar = nltk.grammar.WeightedGrammar(grammar.start(), prods) new_grammar.new_symbol_count = grammar.new_symbol_count return new_grammar
def add_join_nonterminal(grammar, diff_index, base_prod): prods = [] new_prods = [] for prod in grammar.productions(): if len(prod.rhs()) == len(base_prod.rhs()) and all(prod.rhs()[i] == base_prod.rhs()[i] or i == diff_index for i in range(len(prod.rhs()))): prods.append(prod) else: new_prods.append(prod) nt = nltk.grammar.Nonterminal('join_%s_%s' % ('_'.join("(%s)" % prod.rhs()[diff_index] for prod in prods), grammar.new_symbol_count)) grammar.new_symbol_count += 1 total_freq = sum(prod.freq for prod in prods) new_rhs = list(base_prod.rhs()) new_rhs[diff_index] = nt for prod in prods: new_prod = nltk.grammar.WeightedProduction(prod.lhs(), new_rhs, prob = prod.prob()) new_prod.freq = prod.freq new_prods.append(new_prod) new_prod = nltk.grammar.WeightedProduction(nt, [prod.rhs()[diff_index]], prob = prod.freq / total_freq) new_prod.freq = prod.freq new_prods.append(new_prod) new_grammar = nltk.grammar.WeightedGrammar(grammar.start(), new_prods) new_grammar.new_symbol_count = grammar.new_symbol_count return new_grammar