def add_verb(self, form, root, past, present, ppart=None): if ppart is None: ppart = past (pos, proc) = form postf = pos.copy() postf["tense"] = False postt = pos.copy() postt["tense"] = True self.add_rule(cfg.Production(postf.copy(), [root]), proc(root, False)) self.add_rule(cfg.Production(postt.copy(), [past]), proc(root, "past")) self.add_rule(cfg.Production(postt.copy(), [present]), proc(root, "present")) self.add_rule( cfg.Production(GrammarCategory(pos="V_part", form=pos["pos"]), [ppart]), proc(root, "past-participle"))
def add_lexicon(self, preterminal, terminals): self.parser = None if isinstance(preterminal, str): preterminal = Category.parse(preterminal) if not isinstance(preterminal, cfg.Nonterminal): preterminal = cfg.Nonterminal(preterminal) for terminal in terminals: prod = cfg.Production(preterminal, [terminal]) self.lexicon.append(prod) self.productions.append(prod)
def parse_rule(self, text): tokens = text.split() i = 0 while i < len(tokens): if tokens[i].endswith(",") or tokens[i].endswith(":"): replacement = " ".join(tokens[i:i + 2]) tokens[i:i + 2] = [replacement] else: i += 1 return cfg.Production( GrammarCategory.parse(tokens[0]), map(lambda x: GrammarCategory.parse(x), tokens[2:]))
def parse_rule(self, text): # Remove the start and end quotes tokens = text.split() if (len(tokens) > 3 and "'" in text): quote_split = text.split("'") space_split = text.split() tokens = space_split[:2] + [quote_split[1]] i = 0 while i < len(tokens): if tokens[i].endswith(",") or tokens[i].endswith(":"): replacement = " ".join(tokens[i:i + 2]) tokens[i:i + 2] = [replacement] else: i += 1 return cfg.Production( GrammarCategory.parse(tokens[0]), map(lambda x: GrammarCategory.parse(x), tokens[2:]))
def demo(): import sys, time S = GrammarCategory.parse('S') VP = GrammarCategory.parse('VP') NP = GrammarCategory.parse('NP') PP = GrammarCategory.parse('PP') V = GrammarCategory.parse('V') N = GrammarCategory.parse('N') P = GrammarCategory.parse('P') Name = GrammarCategory.parse('Name') Det = GrammarCategory.parse('Det') DetSg = GrammarCategory.parse('Det[-pl]') DetPl = GrammarCategory.parse('Det[+pl]') NSg = GrammarCategory.parse('N[-pl]') NPl = GrammarCategory.parse('N[+pl]') # Define some grammatical productions. grammatical_productions = [ cfg.Production(S, (NP, VP)), cfg.Production(PP, (P, NP)), cfg.Production(NP, (NP, PP)), cfg.Production(VP, (VP, PP)), cfg.Production(VP, (V, NP)), cfg.Production(VP, (V, )), cfg.Production(NP, (DetPl, NPl)), cfg.Production(NP, (DetSg, NSg)) ] # Define some lexical productions. lexical_productions = [ cfg.Production(NP, ('John', )), cfg.Production(NP, ('I', )), cfg.Production(Det, ('the', )), cfg.Production(Det, ('my', )), cfg.Production(Det, ('a', )), cfg.Production(NSg, ('dog', )), cfg.Production(NSg, ('cookie', )), cfg.Production(V, ('ate', )), cfg.Production(V, ('saw', )), cfg.Production(P, ('with', )), cfg.Production(P, ('under', )), ] earley_grammar = cfg.Grammar(S, grammatical_productions) earley_lexicon = {} for prod in lexical_productions: earley_lexicon.setdefault(prod.rhs()[0].upper(), []).append(prod.lhs()) def lexicon(word): return earley_lexicon.get(word.upper(), []) sent = 'I saw John with a dog with my cookie' print "Sentence:\n", sent from nltk import tokenize tokens = list(tokenize.whitespace(sent)) t = time.time() cp = FeatureEarleyChartParse(earley_grammar, lexicon, trace=1) trees = cp.get_parse_list(tokens) print "Time: %s" % (time.time() - t) for tree in trees: print tree