Esempio n. 1
0
    def from_file(cls, postfile, postingsize, stringids=False):
        start = postfile.tell()

        # Read the block header information from the posting file
        header = cls._struct.unpack(postfile.read(cls._struct.size))

        # Create the base block object
        block = cls(postfile, postingsize, stringids=stringids,
                    maxweight=header[7], maxwol=header[8],
                    maxlength=header[11], minlength=header[12])

        # Fill in the attributes needed by this block implementation
        block.nextoffset = start + header[0]
        block.compression = header[1]
        block.postcount = header[2]
        block.typecode = header[3]
        block.idslen = header[5]
        block.weightslen = header[6]

        if PY3:
            block.typecode = block.typecode.decode('latin-1')

        # Read the "maximum ID" part of the header, based on whether we're
        # using string IDs
        if stringids:
            block.maxid = load(postfile)
        else:
            block.maxid = postfile.read_uint()

        # The position after the header
        block.dataoffset = postfile.tell()
        return block
Esempio n. 2
0
 def _add_file(self, args):
     writer = self.writer
     filename, length = args
     f = open(filename, "rb")
     for _ in xrange(length):
         writer.add_document(**load(f))
     f.close()
     os.remove(filename)
Esempio n. 3
0
 def _add_file(self, args):
     writer = self.writer
     filename, length = args
     f = open(filename, "rb")
     for _ in xrange(length):
         writer.add_document(**load(f))
     f.close()
     os.remove(filename)
Esempio n. 4
0
 def _read_run(self, path):
     f = self._open_run(path)
     try:
         while True:
             yield load(f)
     except EOFError:
         return
     finally:
         f.close()
         self._remove_run(path)
Esempio n. 5
0
 def _read_run(self, path):
     f = self._open_run(path)
     try:
         while True:
             yield load(f)
     except EOFError:
         return
     finally:
         f.close()
         self._remove_run(path)
Esempio n. 6
0
 def _read_run(path):
     import os.path
     f = open(path, "rb")
     try:
         while True:
             yield load(f)
     except EOFError:
         return
     finally:
         f.close()
         os.remove(path)
Esempio n. 7
0
 def _read_run(path):
     import os.path
     f = open(path, "rb")
     try:
         while True:
             yield load(f)
     except EOFError:
         return
     finally:
         f.close()
         os.remove(path)
Esempio n. 8
0
 def _add_file(self, filename, length):
     subpool = self.subpool
     f = open(filename, "rb")
     for _ in xrange(length):
         code, args = load(f)
         if code == 0:
             subpool.add_content(*args)
         elif code == 1:
             subpool.add_posting(*args)
         elif code == 2:
             subpool.add_field_length(*args)
     f.close()
     os.remove(filename)
Esempio n. 9
0
 def _add_file(self, filename, length):
     subpool = self.subpool
     f = open(filename, "rb")
     for _ in xrange(length):
         code, args = load(f)
         if code == 0:
             subpool.add_content(*args)
         elif code == 1:
             subpool.add_posting(*args)
         elif code == 2:
             subpool.add_field_length(*args)
     f.close()
     os.remove(filename)
Esempio n. 10
0
    def from_file(cls, postfile, postingsize, stringids=False):
        start = postfile.tell()
        block = cls(postingsize, stringids=stringids)
        block.postfile = postfile
        header = cls._struct.unpack(postfile.read(cls._struct.size))
        block.nextoffset = start + header[0]
        block.cmp = header[1]
        block.count = header[2]
        block.idcode = header[3]
        block.idslen = header[5]
        block.wtslen = header[6]
        block.maxweight = header[7]
        block.maxlength = byte_to_length(header[11])
        block.minlength = byte_to_length(header[12])

        block.maxid = load(postfile) if stringids else postfile.read_uint()
        block.dataoffset = postfile.tell()
        return block
Esempio n. 11
0
    def from_file(cls, postfile, postingsize, stringids=False):
        start = postfile.tell()
        block = cls(postingsize, stringids=stringids)
        block.postfile = postfile
        header = cls._struct.unpack(postfile.read(cls._struct.size))
        block.nextoffset = start + header[0]
        block.cmp = header[1]
        block.count = header[2]
        block.idcode = header[3].decode("Latin1")
        block.idslen = header[5]
        block.wtslen = header[6]
        block.maxweight = header[7]
        block.maxlength = byte_to_length(header[11])
        block.minlength = byte_to_length(header[12])

        block.maxid = load(postfile) if stringids else postfile.read_uint()
        block.dataoffset = postfile.tell()
        return block
Esempio n. 12
0
    def from_file(cls, postfile, postingsize, stringids=False):
        start = postfile.tell()

        # Read the block header information from the posting file
        header = cls._struct.unpack(postfile.read(cls._struct.size))

        # Create the base block object
        block = cls(postfile,
                    postingsize,
                    stringids=stringids,
                    maxweight=header[7],
                    maxwol=header[8],
                    maxlength=header[11],
                    minlength=header[12])

        # Fill in the attributes needed by this block implementation
        block.nextoffset = start + header[0]
        block.compression = header[1]
        block.postcount = header[2]
        block.typecode = header[3]
        block.idslen = header[5]
        block.weightslen = header[6]

        if PY3:
            block.typecode = block.typecode.decode('latin-1')

        # Read the "maximum ID" part of the header, based on whether we're
        # using string IDs
        if stringids:
            block.maxid = load(postfile)
        else:
            block.maxid = postfile.read_uint()

        # The position after the header
        block.dataoffset = postfile.tell()
        return block