def norm_parse(self, sentence): words = self.tokenizer.tokenize(sentence) if is_cap_word(words[0]): words[0] = words[0].lower() norm_words = [] for word in words: if isinstance(word, tuple): # This is already a word normalized to the Treebank conventions norm_words.append(word) else: # rare words normalization norm_words.append((self.pcfg.norm_word(word), word)) return CKY(self.pcfg, norm_words)
def lower_first_word(tree): if len(tree) == 2: if is_cap_word(tree[1]): tree[1] = tree[1].lower() else: lower_first_word(tree[1])