Exemple #1
0
    def _initialize(self, path, start, length):
        self._stream = DataInputStream(FileInputStream(path))

        if length == 0:
            self._end = self._stream.getPos() + self._stream.length()
        else:
            self._end = self._stream.getPos() + length

        # Parse Header
        version_block = self._stream.read(len(VERSION))

        self._version = version_block[3]
        if self._version > VERSION[3]:
            raise VersionMismatchException(VERSION[3], self._version)

        if self._version < BLOCK_COMPRESS_VERSION:
            # Same as below, but with UTF8 Deprecated Class
            raise NotImplementedError
        else:
            key_class_name = Text.readString(self._stream)
            value_class_name = Text.readString(self._stream)
            self._key_class = hadoopClassFromName(key_class_name)
            self._value_class = hadoopClassFromName(value_class_name)

        if ord(self._version) > 2:
            self._decompress = self._stream.readBoolean()
        else:
            self._decompress = False

        if self._version >= BLOCK_COMPRESS_VERSION:
            self._block_compressed = self._stream.readBoolean()
        else:
            self._block_compressed = False

        # setup compression codec
        if self._decompress:
            if self._version >= CUSTOM_COMPRESS_VERSION:
                codec_class = Text.readString(self._stream)
                self._codec = CodecPool().getDecompressor(codec_class)
            else:
                self._codec = CodecPool().getDecompressor()

        self._metadata = Metadata()
        if self._version >= VERSION_WITH_METADATA:
            self._metadata.readFields(self._stream)

        if self._version > 1:
            self._sync = self._stream.read(SYNC_HASH_SIZE)
            self._header_end = self._stream.getPos()
Exemple #2
0
 def getValueClass(self):
     if not self._value_class:
       self._value_class = hadoopClassFromName(self._value_class_name)
     return self._value_class
Exemple #3
0
 def getKeyClass(self):
     if not self._key_class:
       self._key_class = hadoopClassFromName(self._key_class_name)
     return self._key_class