예제 #1
0
 def find_ngram(self, wordclass):
     """
     Return the ngram corresponding to the wordclass specified;
     whether this is a Penn wordclass or a native wordclass
     ('NOUN', 'VERB', etc.).
     """
     if wordclass in self.ngrams_by_penn():
         return self.ngrams_by_penn()[wordclass]
     elif wordclass_base(wordclass) in self.ngrams_by_penn():
         return self.ngrams_by_penn()[wordclass_base(wordclass)]
     else:
         for ngram in self.ngrams_list:
             if ngram.wordclass == wordclass:
                 return ngram
     return None
예제 #2
0
 def subcategory_ratio(self, pos):
     baseclass = wordclass_base(pos)
     base_ratio = self.ratio(baseclass)
     if pos in self.pos_to_lemma_ratios:
         return base_ratio / self.pos_to_lemma_ratios[pos][1]
     else:
         return base_ratio
예제 #3
0
 def base_ratios(self):
     try:
         return self._base_ratios
     except AttributeError:
         self._base_ratios = defaultdict(lambda: 0)
         for pos, value in self.ratios().items():
             wc = wordclass_base(pos)
             self._base_ratios[wc] += value
         return self._base_ratios
예제 #4
0
 def sum_subcategories(self, parts):
     if parts:
         baseclass = wordclass_base(parts[0])
         base_ratio = self.ratio(baseclass)
         total = 0
         for pos in parts:
             if pos in self.pos_to_lemma_ratios:
                 total += base_ratio / self.pos_to_lemma_ratios[pos][1]
             else:
                 total += base_ratio
         return min((total, base_ratio))
     else:
         return 0
예제 #5
0
 def add_np(self):
     """
     Check if a dummy "NP" wordclass needs to be added
     """
     if (not 'NP' in self.full_set_of_wordclasses() and
             self.wordform.lower() != self.wordform and
             is_proper_name(self.wordform)):
         group_type = wordclass_group('NP')
         base_class = wordclass_base('NP')
         if not group_type in self.model():
             self.model()[group_type] = Group('core')
         self.model()[group_type].add_null(base_class)
         self.model()[group_type].model()[base_class].add_null('NP')
         self.np_added = True
예제 #6
0
 def base_set_of_wordclasses(self):
     return set([wordclass_base(wc)
                 for wc in self.full_set_of_wordclasses()])
예제 #7
0
 def __init__(self, wordclass):
     self.wordclass = wordclass
     self.baseclass = wordclass_base(wordclass)
     self.items = list()
     self.ratio = 1.0  # default placeholder
예제 #8
0
 def add(self, lex_item):
     base = wordclass_base(lex_item.wordclass)
     if not base in self.model():
         self.model()[base] = Base(base)
     self.model()[base].add(lex_item)