def __init__(self, lexicon): BaseIndex.__init__(self, lexicon) # ._wordinfo for Okapi is # wid -> {docid -> frequency}; t -> D -> f(D, t) # ._docweight for Okapi is # docid -> # of words in the doc # This is just len(self._docwords[docid]), but _docwords is stored # in compressed form, so uncompressing it just to count the list # length would be ridiculously expensive. # sum(self._docweight.values()), the total # of words in all docs # This is a long for "better safe than sorry" reasons. It isn't # used often enough that speed should matter. self._totaldoclen = 0L
def __init__(self, lexicon, family=None): BaseIndex.__init__(self, lexicon, family=family) # ._wordinfo for Okapi is # wid -> {docid -> frequency}; t -> D -> f(D, t) # ._docweight for Okapi is # docid -> # of words in the doc # This is just len(self._docwords[docid]), but _docwords is stored # in compressed form, so uncompressing it just to count the list # length would be ridiculously expensive. # sum(self._docweight.values()), the total # of words in all docs # This is a long for "better safe than sorry" reasons. It isn't # used often enough that speed should matter. self._totaldoclen = Length(0)
def unindex_doc(self, docid): if docid not in self._docwords: return self._change_doc_len(-self._docweight[docid]) BaseIndex.unindex_doc(self, docid)
def _reindex_doc(self, docid, text): self._change_doc_len(-self._docweight[docid]) return BaseIndex._reindex_doc(self, docid, text)
def index_doc(self, docid, text): count = BaseIndex.index_doc(self, docid, text) self._change_doc_len(count) return count
def __init__(self, lexicon, family=None): BaseIndex.__init__(self, lexicon, family=family)
def unindex_doc(self, docid): self._totaldoclen -= self._docweight.get(docid, 0) BaseIndex.unindex_doc(self, docid)
def _reindex_doc(self, docid, text): self._totaldoclen -= self._docweight[docid] return BaseIndex._reindex_doc(self, docid, text)
def index_doc(self, docid, text): count = BaseIndex.index_doc(self, docid, text) self._totaldoclen += count return count
def __init__(self, lexicon): BaseIndex.__init__(self, lexicon)