def find_ngram(self, wordclass): """ Return the ngram corresponding to the wordclass specified; whether this is a Penn wordclass or a native wordclass ('NOUN', 'VERB', etc.). """ if wordclass in self.ngrams_by_penn(): return self.ngrams_by_penn()[wordclass] elif wordclass_base(wordclass) in self.ngrams_by_penn(): return self.ngrams_by_penn()[wordclass_base(wordclass)] else: for ngram in self.ngrams_list: if ngram.wordclass == wordclass: return ngram return None
def subcategory_ratio(self, pos): baseclass = wordclass_base(pos) base_ratio = self.ratio(baseclass) if pos in self.pos_to_lemma_ratios: return base_ratio / self.pos_to_lemma_ratios[pos][1] else: return base_ratio
def base_ratios(self): try: return self._base_ratios except AttributeError: self._base_ratios = defaultdict(lambda: 0) for pos, value in self.ratios().items(): wc = wordclass_base(pos) self._base_ratios[wc] += value return self._base_ratios
def sum_subcategories(self, parts): if parts: baseclass = wordclass_base(parts[0]) base_ratio = self.ratio(baseclass) total = 0 for pos in parts: if pos in self.pos_to_lemma_ratios: total += base_ratio / self.pos_to_lemma_ratios[pos][1] else: total += base_ratio return min((total, base_ratio)) else: return 0
def add_np(self): """ Check if a dummy "NP" wordclass needs to be added """ if (not 'NP' in self.full_set_of_wordclasses() and self.wordform.lower() != self.wordform and is_proper_name(self.wordform)): group_type = wordclass_group('NP') base_class = wordclass_base('NP') if not group_type in self.model(): self.model()[group_type] = Group('core') self.model()[group_type].add_null(base_class) self.model()[group_type].model()[base_class].add_null('NP') self.np_added = True
def base_set_of_wordclasses(self): return set([wordclass_base(wc) for wc in self.full_set_of_wordclasses()])
def __init__(self, wordclass): self.wordclass = wordclass self.baseclass = wordclass_base(wordclass) self.items = list() self.ratio = 1.0 # default placeholder
def add(self, lex_item): base = wordclass_base(lex_item.wordclass) if not base in self.model(): self.model()[base] = Base(base) self.model()[base].add(lex_item)