コード例 #1
0
    def term_info(self, fieldname, text):
        term = (fieldname, text)

        # Get the term infos for the sub-readers containing the term
        tis = [(r.term_info(fieldname, text), offset)
               for r, offset in zip_(self.readers, self.doc_offsets)
               if term in r]

        # If only one reader had the term, return its terminfo with the offset
        # added
        if not tis:
            raise TermNotFound(term)
        elif len(tis) == 1:
            ti, offset = tis[0]
            ti._minid += offset
            ti._maxid += offset
            return ti

        # Combine the various statistics
        w = sum(ti.weight() for ti, _ in tis)
        df = sum(ti.doc_frequency() for ti, _ in tis)
        ml = min(ti.min_length() for ti, _ in tis)
        xl = max(ti.max_length() for ti, _ in tis)
        xw = max(ti.max_weight() for ti, _ in tis)

        # For min and max ID, we need to add the doc offsets
        mid = min(ti.min_id() + offset for ti, offset in tis)
        xid = max(ti.max_id() + offset for ti, offset in tis)

        return TermInfo(w, df, ml, xl, xw, mid, xid)
コード例 #2
0
ファイル: reading.py プロジェクト: intabeta/inta
    def term_info(self, fieldname, text):
        term = (fieldname, text)

        # Get the term infos for the sub-readers containing the term
        tis = [(r.term_info(fieldname, text), offset) for r, offset
               in zip_(self.readers, self.doc_offsets) if term in r]

        # If only one reader had the term, return its terminfo with the offset
        # added
        if not tis:
            raise TermNotFound(term)
        elif len(tis) == 1:
            ti, offset = tis[0]
            ti._minid += offset
            ti._maxid += offset
            return ti

        # Combine the various statistics
        w = sum(ti.weight() for ti, _ in tis)
        df = sum(ti.doc_frequency() for ti, _ in tis)
        ml = min(ti.min_length() for ti, _ in tis)
        xl = max(ti.max_length() for ti, _ in tis)
        xw = max(ti.max_weight() for ti, _ in tis)

        # For min and max ID, we need to add the doc offsets
        mid = min(ti.min_id() + offset for ti, offset in tis)
        xid = max(ti.max_id() + offset for ti, offset in tis)

        return TermInfo(w, df, ml, xl, xw, mid, xid)
コード例 #3
0
ファイル: ramindex.py プロジェクト: skrieder/microblog
    def vector(self, docnum, fieldname):
        if fieldname not in self.schema:
            raise TermNotFound("No  field %r" % fieldname)
        vformat = self.schema[fieldname].vector
        if not vformat:
            raise Exception("No vectors are stored for field %r" % fieldname)

        vformat = self.schema[fieldname].vector
        ids, weights, values = zip_(*self.vectors[docnum, fieldname])
        return ListMatcher(ids, weights, values, format=vformat)
コード例 #4
0
ファイル: reading.py プロジェクト: sudhir-12/spoken-website
    def term_info(self, fieldname, text):
        term = (fieldname, text)

        # Get the term infos for the sub-readers containing the term
        tis = [(r.term_info(fieldname, text), offset) for r, offset
               in zip_(self.readers, self.doc_offsets) if term in r]

        # If only one reader had the term, return its terminfo with the offset
        # added
        if not tis:
            raise TermNotFound(term)

        return combine_terminfos(tis)
コード例 #5
0
ファイル: reading.py プロジェクト: Apophus/microblog
    def term_info(self, fieldname, text):
        term = (fieldname, text)

        # Get the term infos for the sub-readers containing the term
        tis = [(r.term_info(fieldname, text), offset) for r, offset
               in zip_(self.readers, self.doc_offsets) if term in r]

        # If only one reader had the term, return its terminfo with the offset
        # added
        if not tis:
            raise TermNotFound(term)

        return combine_terminfos(tis)
コード例 #6
0
ファイル: ramindex.py プロジェクト: skrieder/microblog
    def postings(self, fieldname, text, scorer=None):
        self._test_field(fieldname)
        try:
            terminfo = self.term_info(fieldname, text)
        except KeyError:
            raise TermNotFound((fieldname, text))

        format = self.schema[fieldname].format
        postings = self.invindex[fieldname][text]
        excludeset = self.deleted
        if excludeset:
            postings = [x for x in postings if x[0] not in excludeset]
            if not postings:
                return NullMatcher()
        ids, weights, values = zip_(*postings)
        lm = ListMatcher(ids, weights, values, format=format, scorer=scorer,
                         term=(fieldname, text), terminfo=terminfo)
        return lm
コード例 #7
0
ファイル: reading.py プロジェクト: intabeta/inta
 def leaf_readers(self):
     return zip_(self.readers, self.doc_offsets)
コード例 #8
0
ファイル: reading.py プロジェクト: sudhir-12/spoken-website
 def leaf_readers(self):
     return zip_(self.readers, self.doc_offsets)