def __init__(self, pcfg=None): if pcfg is None: pcfg = build_model() self.pcfg = pcfg self.tokenizer = PennTreebankTokenizer() if nltk_is_available: self.parse = self.nltk_parse else: self.parse = self.raw_parse
class Parser: def __init__(self, pcfg=None): if pcfg is None: pcfg = build_model() self.pcfg = pcfg self.tokenizer = PennTreebankTokenizer() if nltk_is_available: self.parse = self.nltk_parse else: self.parse = self.raw_parse def norm_parse(self, sentence): words = self.tokenizer.tokenize(sentence) if is_cap_word(words[0]): words[0] = words[0].lower() norm_words = [] for word in words: if isinstance(word, tuple): # This is already a word normalized to the Treebank conventions norm_words.append(word) else: # rare words normalization norm_words.append((self.pcfg.norm_word(word), word)) return CKY(self.pcfg, norm_words) def raw_parse(self, sentence): tree = self.norm_parse(sentence) un_chomsky_normal_form(tree) return tree def nltk_parse(self, sentence): return nltk_tree(self.raw_parse(sentence))
class Parser: def __init__(self, pcfg): self.pcfg = pcfg self.tokenizer = PennTreebankTokenizer() def parse(self, sentence): words = self.tokenizer.tokenize(sentence) norm_words = [] for word in words: # rare words normalization + keep word norm_words.append((self.pcfg.norm_word(word), word)) tree = CKY(self.pcfg, norm_words) tree[0] = tree[0].split("|")[0] return tree
def __init__(self, pcfg): self.pcfg = pcfg self.tokenizer = PennTreebankTokenizer()