Exemple #1
0
    def __init__(self, lexicon):
        BaseIndex.__init__(self, lexicon)

        # ._wordinfo for Okapi is
        # wid -> {docid -> frequency}; t -> D -> f(D, t)

        # ._docweight for Okapi is
        # docid -> # of words in the doc
        # This is just len(self._docwords[docid]), but _docwords is stored
        # in compressed form, so uncompressing it just to count the list
        # length would be ridiculously expensive.

        # sum(self._docweight.values()), the total # of words in all docs
        # This is a long for "better safe than sorry" reasons.  It isn't
        # used often enough that speed should matter.
        self._totaldoclen = 0L
Exemple #2
0
    def __init__(self, lexicon, family=None):
        BaseIndex.__init__(self, lexicon, family=family)

        # ._wordinfo for Okapi is
        # wid -> {docid -> frequency}; t -> D -> f(D, t)

        # ._docweight for Okapi is
        # docid -> # of words in the doc
        # This is just len(self._docwords[docid]), but _docwords is stored
        # in compressed form, so uncompressing it just to count the list
        # length would be ridiculously expensive.

        # sum(self._docweight.values()), the total # of words in all docs
        # This is a long for "better safe than sorry" reasons.  It isn't
        # used often enough that speed should matter.
        self._totaldoclen = Length(0)
Exemple #3
0
 def unindex_doc(self, docid):
     if docid not in self._docwords:
         return
     self._change_doc_len(-self._docweight[docid])
     BaseIndex.unindex_doc(self, docid)
Exemple #4
0
 def _reindex_doc(self, docid, text):
     self._change_doc_len(-self._docweight[docid])
     return BaseIndex._reindex_doc(self, docid, text)
Exemple #5
0
 def index_doc(self, docid, text):
     count = BaseIndex.index_doc(self, docid, text)
     self._change_doc_len(count)
     return count
Exemple #6
0
 def __init__(self, lexicon, family=None):
     BaseIndex.__init__(self, lexicon, family=family)
Exemple #7
0
 def unindex_doc(self, docid):
     self._totaldoclen -= self._docweight.get(docid, 0)
     BaseIndex.unindex_doc(self, docid)
Exemple #8
0
 def _reindex_doc(self, docid, text):
     self._totaldoclen -= self._docweight[docid]
     return BaseIndex._reindex_doc(self, docid, text)
Exemple #9
0
 def index_doc(self, docid, text):
     count = BaseIndex.index_doc(self, docid, text)
     self._totaldoclen += count
     return count
Exemple #10
0
 def __init__(self, lexicon):
     BaseIndex.__init__(self, lexicon)
Exemple #11
0
 def __init__(self, lexicon, family=None):
     BaseIndex.__init__(self, lexicon, family=family)
 def unindex_doc(self, docid):
     if docid not in self._docwords:
         return
     self._change_doc_len(-self._docweight[docid])
     BaseIndex.unindex_doc(self, docid)
 def _reindex_doc(self, docid, text):
     self._change_doc_len(-self._docweight[docid])
     return BaseIndex._reindex_doc(self, docid, text)
 def index_doc(self, docid, text):
     count = BaseIndex.index_doc(self, docid, text)
     self._change_doc_len(count)
     return count