예제 #1
0
    def _btexts(self, ixreader):
        fieldname = self.fieldname
        field = ixreader.schema[fieldname]
        startexcl = self.startexcl
        endexcl = self.endexcl

        if self.start is None:
            start = b("")
        else:
            try:
                start = field.to_bytes(self.start)
            except ValueError:
                return

        if self.end is None:
            end = b("\xFF\xFF\xFF\xFF")
        else:
            try:
                end = field.to_bytes(self.end)
            except ValueError:
                return

        for fname, t in ixreader.terms_from(fieldname, start):
            if fname != fieldname:
                break
            if t == start and startexcl:
                continue
            if t == end and endexcl:
                break
            if t > end:
                break
            yield t
예제 #2
0
 def decode_positions(self, valuestring):
     if not valuestring.endswith(b(".")):
         valuestring += b(".")
     codes = loads(valuestring[_INT_SIZE:])
     position = 0
     posns = []
     for code in codes:
         position = code[0] + position
         posns.append(position)
     return posns
예제 #3
0
 def decode_position_boosts(self, valuestring):
     if not valuestring.endswith(b(".")):
         valuestring += b(".")
     codes = loads(valuestring[_INT_SIZE + _FLOAT_SIZE:])
     position = 0
     posns_boosts = []
     for code in codes:
         position = code[0] + position
         posns_boosts.append((position, code[1]))
     return posns_boosts
예제 #4
0
 def decode_character_boosts(self, valuestring):
     if not valuestring.endswith(b(".")):
         valuestring += b(".")
     codes = loads(valuestring[_INT_SIZE + _FLOAT_SIZE:])
     position = 0
     endchar = 0
     posn_char_boosts = []
     for code in codes:
         position = position + code[0]
         startchar = endchar + code[1]
         endchar = startchar + code[2]
         posn_char_boosts.append((position, startchar, endchar, code[3]))
     return posn_char_boosts
예제 #5
0
 def decode_characters(self, valuestring):
     if not valuestring.endswith(b(".")):
         valuestring += b(".")
     codes = loads(valuestring[_INT_SIZE:])
     position = 0
     endchar = 0
     posns_chars = []
     for code in codes:
         position = code[0] + position
         startchar = code[1] + endchar
         endchar = code[2] + startchar
         posns_chars.append((position, startchar, endchar))
     return posns_chars
예제 #6
0
    def __init__(self, fixedlen, default=None):
        """
        :param fixedlen: the fixed length of byte strings in this column.
        :param default: the default value to use for documents that don't
            specify a value. If you don't specify a default, the column will
            use ``b'\\x00' * fixedlen``.
        """

        self._fixedlen = fixedlen

        if default is None:
            default = b("\x00") * fixedlen
        elif len(default) != fixedlen:
            raise ValueError
        self._default = default
예제 #7
0
    def __init__(self, fixedlen=0, default=None):
        """
        :param fixedlen: an optional fixed length for the values. If you
            specify a number other than 0, the column will require all values
            to be the specified length.
        :param default: a default value to use for documents that don't specify
            one. If you don't specify a default, the column will use an empty
            bytestring (``b''``), or if you specify a fixed length,
            ``b'\\x00' * fixedlen``.
        """

        self._fixedlen = fixedlen

        if default is None:
            default = b("\x00") * fixedlen if fixedlen else emptybytes
        elif fixedlen and len(default) != fixedlen:
            raise ValueError
        self._default = default
예제 #8
0
 def _prep_vectors(self):
     self._vpostfile = self._create_file(W3Codec.VPOSTS_EXT)
     # We'll use offset==0 as a marker for "no vectors", so we can't start
     # postings at position 0, so just write a few header bytes :)
     self._vpostfile.write(b("VPST"))
예제 #9
0
    pack_ushort,
    unpack_int,
    unpack_long,
    unpack_ushort,
)
from whoosh.util.numeric import byte_to_length, length_to_byte
from whoosh.util.numlists import delta_decode, delta_encode

try:
    import zlib
except ImportError:
    zlib = None

# This byte sequence is written at the start of a posting list to identify the
# codec/version
WHOOSH3_HEADER_MAGIC = b("W3Bl")

# Column type to store field length info
LENGTHS_COLUMN = columns.NumericColumn("B", default=0)
# Column type to store pointers to vector posting lists
VECTOR_COLUMN = columns.NumericColumn("I")
# Column type to store vector posting list lengths
VECTOR_LEN_COLUMN = columns.NumericColumn("i")
# Column type to store values of stored fields
STORED_COLUMN = columns.PickleColumn(columns.CompressedBytesColumn())


class W3Codec(base.Codec):
    # File extensions
    TERMS_EXT = ".trm"  # Term index
    POSTS_EXT = ".pst"  # Term postings