def _initialize(self, path, start, length): self._stream = self.getStream(path) if length == 0: self._end = self._stream.getPos() + self._stream.length() else: self._end = self._stream.getPos() + length # Parse Header version_block = self._stream.read(len(VERSION)) self._version = version_block[len(VERSION_PREFIX)] if not self._version.startswith(VERSION_PREFIX): raise VersionPrefixException(VERSION_PREFIX, self._version[0:len(VERSION_PREFIX)]) if self._version > VERSION[len(VERSION_PREFIX)]: raise VersionMismatchException(VERSION[len(VERSION_PREFIX)], self._version) if self._version < BLOCK_COMPRESS_VERSION: # Same as below, but with UTF8 Deprecated Class raise NotImplementedError else: self._key_class_name = Text.readString(self._stream) self._value_class_name = Text.readString(self._stream) if ord(self._version) > 2: self._decompress = self._stream.readBoolean() else: self._decompress = False if self._version >= BLOCK_COMPRESS_VERSION: self._block_compressed = self._stream.readBoolean() else: self._block_compressed = False # setup compression codec if self._decompress: if self._version >= CUSTOM_COMPRESS_VERSION: codec_class = Text.readString(self._stream) self._codec = CodecPool().getDecompressor(codec_class) else: self._codec = CodecPool().getDecompressor() self._metadata = Metadata() if self._version >= VERSION_WITH_METADATA: self._metadata.readFields(self._stream) if self._version > 1: self._sync = self._stream.read(SYNC_HASH_SIZE) self._header_end = self._stream.getPos()
def __init__(self, path, key_class, value_class, metadata, compress=False, block_compress=False): if os.path.exists(path): raise IOError("File %s already exists." % path) self._key_class = key_class self._value_class = value_class self._compress = compress self._block_compress = block_compress if not metadata: metadata = Metadata() self._metadata = metadata if self._compress or self._block_compress: self._codec = CodecPool().getCompressor() else: self._codec = None self._last_sync = 0 self._block = None self._stream = DataOutputStream(FileOutputStream(path)) # sync is 16 random bytes self._sync = md5('%s@%d' % (uuid1().bytes, int(time() * 1000))).digest() self._writeFileHeader()