Beispiel #1
0
    def from_bytes(cls, s):
        st = cls._struct
        vals = st.unpack(s[:st.size])
        terminfo = cls()

        flags = vals[0]
        terminfo._weight = vals[1]
        terminfo._df = vals[2]
        terminfo._minlength = byte_to_length(vals[3])
        terminfo._maxlength = byte_to_length(vals[4])
        terminfo._maxweight = vals[5]
        terminfo._minid = None if vals[6] == 0xffffffff else vals[6]
        terminfo._maxid = None if vals[7] == 0xffffffff else vals[7]

        if flags:
            # Postings are stored inline
            terminfo._inlined = loads(s[st.size:])
        else:
            # Last bytes are pointer into posting file and length
            offpos = st.size
            lenpos = st.size + _LONG_SIZE
            terminfo._offset = unpack_long(s[offpos:lenpos])[0]
            terminfo._length = unpack_int(s[lenpos:lenpos + _INT_SIZE])

        return terminfo
    def from_string(cls, s):
        hbyte = ord(s[0:1])
        if hbyte < 2:
            st = cls.struct
            # Freq, Doc freq, min len, max len, max w, max WOL, min ID, max ID
            f, df, ml, xl, xw, xwol, mid, xid = st.unpack(s[1:st.size + 1])
            mid = None if mid == NO_ID else mid
            xid = None if xid == NO_ID else xid
            # Postings
            pstr = s[st.size + 1:]
            if hbyte == 0:
                p = unpack_long(pstr)[0]
            else:
                p = loads(pstr + b("."))
        else:
            # Old format was encoded as a variable length pickled tuple
            v = loads(s + b("."))
            if len(v) == 1:
                f = df = 1
                p = v[0]
            elif len(v) == 2:
                f = df = v[1]
                p = v[0]
            else:
                f, p, df = v
            # Fake values for stats which weren't stored before
            ml = 1
            xl = 106374
            xw = 999999999
            xwol = 999999999
            mid = -1
            xid = -1

        return cls(f, df, ml, xl, xw, xwol, mid, xid, p)
    def from_bytes(cls, s):
        st = cls._struct
        vals = st.unpack(s[:st.size])
        terminfo = cls()

        flags = vals[0]
        terminfo._weight = vals[1]
        terminfo._df = vals[2]
        terminfo._minlength = byte_to_length(vals[3])
        terminfo._maxlength = byte_to_length(vals[4])
        terminfo._maxweight = vals[5]
        terminfo._minid = None if vals[6] == 0xffffffff else vals[6]
        terminfo._maxid = None if vals[7] == 0xffffffff else vals[7]

        if flags:
            # Postings are stored inline
            terminfo._inlined = loads(s[st.size:])
        else:
            # Last bytes are pointer into posting file and length
            offpos = st.size
            lenpos = st.size + _LONG_SIZE
            terminfo._offset = unpack_long(s[offpos:lenpos])[0]
            terminfo._length = unpack_int(s[lenpos:lenpos + _INT_SIZE])

        return terminfo
Beispiel #4
0
 def from_file(file, stringids=False):
     here = file.tell()
     
     encoded_header = file.read(BlockInfo._struct.size)
     header = BlockInfo._struct.unpack(encoded_header)
     (flags, _, _, nextoffset, idslen, weightslen, postcount, maxweight,
      maxwol, _, minlength) = header
     
     if not flags:
         nextoffset = unpack_long(encoded_header[:8])
     else:
         nextoffset = here + nextoffset
     
     assert postcount > 0
     minlength = byte_to_length(minlength)
     
     if stringids:
         maxid = utf8decode(file.read_string())[0]
     else:
         maxid = file.read_uint()
     
     dataoffset = file.tell()
     return BlockInfo(flags=flags, nextoffset=nextoffset,
                      postcount=postcount, maxweight=maxweight,
                      maxwol=maxwol, maxid=maxid, minlength=minlength,
                      dataoffset=dataoffset, idslen=idslen,
                      weightslen=weightslen)
Beispiel #5
0
    def from_file(file, stringids=False):
        here = file.tell()

        encoded_header = file.read(BlockInfo._struct.size)
        header = BlockInfo._struct.unpack(encoded_header)
        (flags, _, _, nextoffset, idslen, weightslen, postcount, maxweight,
         maxwol, _, minlength) = header

        if not flags:
            nextoffset = unpack_long(encoded_header[:8])
        else:
            nextoffset = here + nextoffset

        assert postcount > 0
        minlength = byte_to_length(minlength)

        if stringids:
            maxid = utf8decode(file.read_string())[0]
        else:
            maxid = file.read_uint()

        dataoffset = file.tell()
        return BlockInfo(flags=flags,
                         nextoffset=nextoffset,
                         postcount=postcount,
                         maxweight=maxweight,
                         maxwol=maxwol,
                         maxid=maxid,
                         minlength=minlength,
                         dataoffset=dataoffset,
                         idslen=idslen,
                         weightslen=weightslen)
Beispiel #6
0
    def from_string(cls, s):
        assert isinstance(s, bytes_type)

        if isinstance(s, string_type):
            hbyte = ord(s[0])  # Python 2.x - str
        else:
            hbyte = s[0]  # Python 3 - bytes

        if hbyte < 2:
            st = cls.struct
            # Weight, Doc freq, min len, max len, max w, unused, min ID, max ID
            w, df, ml, xl, xw, _, mid, xid = st.unpack(s[1:st.size + 1])
            mid = None if mid == NO_ID else mid
            xid = None if xid == NO_ID else xid
            # Postings
            pstr = s[st.size + 1:]
            if hbyte == 0:
                p = unpack_long(pstr)[0]
            else:
                p = loads(pstr + b("."))
        else:
            # Old format was encoded as a variable length pickled tuple
            v = loads(s + b("."))
            if len(v) == 1:
                w = df = 1
                p = v[0]
            elif len(v) == 2:
                w = df = v[1]
                p = v[0]
            else:
                w, p, df = v
            # Fake values for stats which weren't stored before
            ml = 1
            xl = 255
            xw = 999999999
            mid = -1
            xid = -1

        ml = byte_to_length(ml)
        xl = byte_to_length(xl)
        obj = cls(w, df, ml, xl, xw, mid, xid)
        obj.postings = p
        return obj
Beispiel #7
0
 def valuedecoder(self, v):
     return unpack_long(v)[0]
Beispiel #8
0
 def get_long(self, position):
     return unpack_long(self.map[position:position + _LONG_SIZE])[0]
Beispiel #9
0
 def read_long(self):
     return unpack_long(self.file.read(_LONG_SIZE))[0]
Beispiel #10
0
 def valuedecoder(self, v):
     return unpack_long(v)[0]
Beispiel #11
0
 def get_long(self, position):
     return unpack_long(self.get(position, _LONG_SIZE))[0]
 def get_long(self, position):
     return unpack_long(self.map[position:position + _LONG_SIZE])[0]
 def read_long(self):
     return unpack_long(self.file.read(_LONG_SIZE))[0]
Beispiel #14
0
 def get_long(self, position):
     return unpack_long(self.get(position, _LONG_SIZE))[0]