Beispiel #1
0
    def find_top_word(self, _class, topk=None):
        '''Find the keywords of given _class

    Args:
      _class: the _class the chek word
      topk: top k keywords to find, None means all words

    Returns:
      a list of topk term
    '''
        if topk:
            heap = TopkHeap(topk)
            for term in self.ciindex.get_terms():
                heap.push((self.estimate_mi(_class, term), term))
            return heap.topk()
        else:
            res = []
            for term in self.ciindex.get_terms():
                res.append((self.estimate_mi(_class, term), term))
            return sorted(res)[::-1]
Beispiel #2
0
  def find_top_word(self, _class, topk = None):
    '''Find the keywords of given _class

    Args:
      _class: the _class the chek word
      topk: top k keywords to find, None means all words

    Returns:
      a list of topk term
    '''
    if topk:
      heap = TopkHeap(topk)
      for term in self.ciindex.get_terms():
        heap.push((self.estimate_mi(_class, term), term))
      return heap.topk()
    else:
      res = []
      for term in self.ciindex.get_terms():
        res.append((self.estimate_mi(_class, term), term))
      return sorted(res)[::-1]
Beispiel #3
0
 def get_class_keywords(self, _class, top_k):
   heap = TopkHeap(top_k)
   for term in self.term_num_docs:
     heap.push((self.term_cdf[term][_class], term))
   return [item[1] for item in heap.topk()]
Beispiel #4
0
 def top_k_appear(self, k):
   heap = TopkHeap(k)
   for term in self.get_terms():
     heap.push((self.get_word_appear(term), term))
   return heap.topk()