def _initialize(self, path, start, length): self._stream = DataInputStream(FileInputStream(path)) if length == 0: self._end = self._stream.getPos() + self._stream.length() else: self._end = self._stream.getPos() + length # Parse Header version_block = self._stream.read(len(VERSION)) self._version = version_block[3] if self._version > VERSION[3]: raise VersionMismatchException(VERSION[3], self._version) if self._version < BLOCK_COMPRESS_VERSION: # Same as below, but with UTF8 Deprecated Class raise NotImplementedError else: key_class_name = Text.readString(self._stream) value_class_name = Text.readString(self._stream) self._key_class = hadoopClassFromName(key_class_name) self._value_class = hadoopClassFromName(value_class_name) if ord(self._version) > 2: self._decompress = self._stream.readBoolean() else: self._decompress = False if self._version >= BLOCK_COMPRESS_VERSION: self._block_compressed = self._stream.readBoolean() else: self._block_compressed = False # setup compression codec if self._decompress: if self._version >= CUSTOM_COMPRESS_VERSION: codec_class = Text.readString(self._stream) self._codec = CodecPool().getDecompressor(codec_class) else: self._codec = CodecPool().getDecompressor() self._metadata = Metadata() if self._version >= VERSION_WITH_METADATA: self._metadata.readFields(self._stream) if self._version > 1: self._sync = self._stream.read(SYNC_HASH_SIZE) self._header_end = self._stream.getPos()
def getValueClass(self): if not self._value_class: self._value_class = hadoopClassFromName(self._value_class_name) return self._value_class
def getKeyClass(self): if not self._key_class: self._key_class = hadoopClassFromName(self._key_class_name) return self._key_class