Ejemplo n.º 1
0
    def _write_block(self):
        posting_size = self.format.posting_size
        stringids = self.stringids
        pf = self.postfile
        ids = self.blockids
        values = self.blockvalues
        postcount = len(ids)

        if stringids:
            pf.write_string(utf8encode(ids[-1])[0])
        else:
            pf.write_uint(ids[-1])

        startoffset = pf.tell()
        # Place holder for pointer to next block
        pf.write_uint(0)

        # Write the number of postings in this block
        pf.write_byte(postcount)
        if stringids:
            for id in ids:
                pf.write_string(utf8encode(id)[0])
        else:
            pf.write_array(ids)

        if posting_size < 0:
            # Write array of value lengths
            lengths = array("I")
            for valuestring in values:
                lengths.append(len(valuestring))
            pf.write_array(lengths)

        if posting_size != 0:
            pf.write("".join(values))

        # Seek back and write the pointer to the next block
        pf.flush()
        nextoffset = pf.tell()
        pf.seek(startoffset)
        pf.write_uint(nextoffset)
        pf.seek(nextoffset)

        self.posttotal += postcount
        self._reset_block()
        self.blockcount += 1
Ejemplo n.º 2
0
    def _write_block(self):
        posting_size = self.format.posting_size
        stringids = self.stringids
        pf = self.postfile
        ids = self.blockids
        values = self.blockvalues
        postcount = len(ids)

        if stringids:
            pf.write_string(utf8encode(ids[-1])[0])
        else:
            pf.write_uint(ids[-1])

        startoffset = pf.tell()
        # Place holder for pointer to next block
        pf.write_uint(0)

        # Write the number of postings in this block
        pf.write_byte(postcount)
        if stringids:
            for id in ids:
                pf.write_string(utf8encode(id)[0])
        else:
            pf.write_array(ids)

        if posting_size < 0:
            # Write array of value lengths
            lengths = array("I")
            for valuestring in values:
                lengths.append(len(valuestring))
            pf.write_array(lengths)

        if posting_size != 0:
            pf.write("".join(values))

        # Seek back and write the pointer to the next block
        pf.flush()
        nextoffset = pf.tell()
        pf.seek(startoffset)
        pf.write_uint(nextoffset)
        pf.seek(nextoffset)

        self.posttotal += postcount
        self._reset_block()
        self.blockcount += 1
Ejemplo n.º 3
0
def encode_posting(fieldnum, text, doc, freq, datastring):
    """Encodes a posting as a string, for sorting.
    """

    return "".join([pack_ushort(fieldnum),
                    utf8encode(text)[0],
                    chr(0),
                    pack2ints(doc, freq),
                    datastring
                    ])
Ejemplo n.º 4
0
def encode_posting(fieldnum, text, doc, freq, datastring):
    """Encodes a posting as a string, for sorting.
    """

    return "".join([
        pack_ushort(fieldnum),
        utf8encode(text)[0],
        chr(0),
        pack2ints(doc, freq), datastring
    ])
Ejemplo n.º 5
0
def encode_termkey(term):
    fieldnum, text = term
    return packushort(fieldnum) + utf8encode(text)[0]