예제 #1
0
    def __init__(self, lexicon):
        BaseIndex.__init__(self, lexicon)

        # ._wordinfo for Okapi is
        # wid -> {docid -> frequency}; t -> D -> f(D, t)

        # ._docweight for Okapi is
        # docid -> # of words in the doc
        # This is just len(self._docwords[docid]), but _docwords is stored
        # in compressed form, so uncompressing it just to count the list
        # length would be ridiculously expensive.

        # sum(self._docweight.values()), the total # of words in all docs
        # This is a long for "better safe than sorry" reasons.  It isn't
        # used often enough that speed should matter.
        self._totaldoclen = 0L
예제 #2
0
    def __init__(self, lexicon):
        BaseIndex.__init__(self, lexicon)

        # ._wordinfo for Okapi is
        # wid -> {docid -> frequency}; t -> D -> f(D, t)

        # ._docweight for Okapi is
        # docid -> # of words in the doc
        # This is just len(self._docwords[docid]), but _docwords is stored
        # in compressed form, so uncompressing it just to count the list
        # length would be ridiculously expensive.

        # sum(self._docweight.values()), the total # of words in all docs
        # This is a long for "better safe than sorry" reasons.  It isn't
        # used often enough that speed should matter.
        # Use a BTree.Length.Length object to avoid concurrent write conflicts
        self._totaldoclen = Length(0L)
예제 #3
0
 def __init__(self, lexicon):
     BaseIndex.__init__(self, lexicon)
예제 #4
0
 def __init__(self, lexicon):
     BaseIndex.__init__(self, lexicon)