예제 #1
0
 def term2idf_unstemmed(self, term):
     term = J.A_AnalyzerUtils.analyze(self._get_analyzer(), term).toArray()
     if len(term) == 1:
         df = self._reader().docFreq(
             J.L_Term(J.A_IndexArgs.CONTENTS, term[0]))
         return math.log((self._reader().numDocs() + 1) / (df + 1))
     return 0.  # stop word; very common
 def term2idf_unstemmed(self, term):
     term = J.A_AnalyzerUtils.tokenize(self._get_analyzer(), term).toArray()
     if len(term) == 1:
         df = self._reader().docFreq(J.L_Term(self._primary_field, term[0]))
         doc_count = self.collection_stats().docCount()
         return math.log(1 + (doc_count - df + 0.5) / (df + 0.5))
     return 0.  # stop word; very common
예제 #3
0
 def doc_freq(self, term):
     return self._reader().docFreq(J.L_Term(self._primary_field, term))