Python HuffcdicReader.loadCdicの例

プログラミング言語: Python

名前空間/パッケージ名: mobi_uncompress

クラス/型: HuffcdicReader

メソッド/関数: loadCdic

hotexamples.comのコード掲載数: 4

Python HuffcdicReader.loadCdic - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのmobi_uncompress.HuffcdicReader.loadCdicの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

HuffcdicReader(2)

loadCdic(2)

loadHuff(2)

コード例 #1

ファイルを表示

ファイル: mobi_header.py プロジェクト: ywzhaiqi/MyKindleTools

    def __init__(self, sect, sectNumber):
        self.sect = sect
        self.start = sectNumber
        self.header = self.sect.loadSection(self.start)
        if len(self.header)>20 and self.header[16:20] == 'MOBI':
            self.sect.setsectiondescription(0,"Mobipocket Header")
            self.palm = False
        elif self.sect.ident == 'TEXtREAd':
            self.sect.setsectiondescription(0, "PalmDOC Header")
            self.palm = True
        else:
            raise unpackException('Unknown File Format')

        self.records, = struct.unpack_from('>H', self.header, 0x8)

        # set defaults in case this is a PalmDOC
        self.title = self.sect.palmname
        self.length = len(self.header)-16
        self.type = 3
        self.codepage = 1252
        self.codec = 'windows-1252'
        self.unique_id = 0
        self.version = 0
        self.hasExth = False
        self.exth = ''
        self.exth_offset = self.length + 16
        self.exth_length = 0
        self.crypto_type = 0
        self.firstnontext = self.start+self.records + 1
        self.firstresource = self.start+self.records + 1
        self.ncxidx = 0xffffffff
        self.metaOrthIndex = 0xffffffff
        self.metaInflIndex = 0xffffffff
        self.skelidx = 0xffffffff
        self.fragidx = 0xffffffff
        self.guideidx = 0xffffffff
        self.fdst = 0xffffffff
        self.mlstart = self.sect.loadSection(self.start+1)[:4]
        self.rawSize = 0
        self.metadata = {}

        # set up for decompression/unpacking
        self.compression, = struct.unpack_from('>H', self.header, 0x0)
        if self.compression == 0x4448:
            reader = HuffcdicReader()
            huffoff, huffnum = struct.unpack_from('>LL', self.header, 0x70)
            huffoff = huffoff + self.start
            self.sect.setsectiondescription(huffoff,"Huffman Compression Seed")
            reader.loadHuff(self.sect.loadSection(huffoff))
            for i in xrange(1, huffnum):
                self.sect.setsectiondescription(huffoff+i,"Huffman CDIC Compression Seed %d" % i)
                reader.loadCdic(self.sect.loadSection(huffoff+i))
            self.unpack = reader.unpack
        elif self.compression == 2:
            self.unpack = PalmdocReader().unpack
        elif self.compression == 1:
            self.unpack = UncompressedReader().unpack
        else:
            raise unpackException('invalid compression type: 0x%4x' % self.compression)

        if self.palm:
            return

        self.length, self.type, self.codepage, self.unique_id, self.version = struct.unpack('>LLLLL', self.header[20:40])
        codec_map = {
            1252 : 'windows-1252',
            65001: 'utf-8',
        }
        if self.codepage in codec_map.keys():
            self.codec = codec_map[self.codepage]

        # title
        toff, tlen = struct.unpack('>II', self.header[0x54:0x5c])
        tend = toff + tlen
        self.title=self.header[toff:tend]

        exth_flag, = struct.unpack('>L', self.header[0x80:0x84])
        self.hasExth = exth_flag & 0x40
        self.exth_offset = self.length + 16
        self.exth_length = 0
        if self.hasExth:
            self.exth_length, = struct.unpack_from('>L', self.header, self.exth_offset+4)
            self.exth_length = ((self.exth_length + 3)>>2)<<2  # round to next 4 byte boundary
            self.exth = self.header[self.exth_offset:self.exth_offset+self.exth_length]

        # parse the exth / metadata
        self.parseMetaData()

        # self.mlstart = self.sect.loadSection(self.start+1)
        # self.mlstart = self.mlstart[0:4]
        self.crypto_type, = struct.unpack_from('>H', self.header, 0xC)

        # Start sector for additional files such as images, fonts, resources, etc
        # Can be missing so fall back to default set previously
        ofst, = struct.unpack_from('>L', self.header, 0x6C)
        if ofst != 0xffffffff:
            self.firstresource = ofst + self.start
        ofst, = struct.unpack_from('>L', self.header, 0x50)
        if ofst != 0xffffffff:
            self.firstnontext = ofst + self.start

        if self.isPrintReplica():
            return

        if self.version < 8:
            # Dictionary metaOrthIndex
            self.metaOrthIndex, = struct.unpack_from('>L', self.header, 0x28)
            if self.metaOrthIndex != 0xffffffff:
                self.metaOrthIndex += self.start

            # Dictionary metaInflIndex
            self.metaInflIndex, = struct.unpack_from('>L', self.header, 0x2C)
            if self.metaInflIndex != 0xffffffff:
                self.metaInflIndex += self.start

        # handle older headers without any ncxindex info and later
        # specifically 0xe4 headers
        if self.length + 16 < 0xf8:
            return

        # NCX Index
        self.ncxidx, = struct.unpack('>L', self.header[0xf4:0xf8])
        if self.ncxidx != 0xffffffff:
            self.ncxidx += self.start

        # K8 specific Indexes
        if self.start != 0 or self.version == 8:
            # Index into <xml> file skeletons in RawML
            self.skelidx, = struct.unpack_from('>L', self.header, 0xfc)
            if self.skelidx != 0xffffffff:
                self.skelidx += self.start

            # Index into <div> sections in RawML
            self.fragidx, = struct.unpack_from('>L', self.header, 0xf8)
            if self.fragidx != 0xffffffff:
                self.fragidx += self.start

            # Index into Other files
            self.guideidx, = struct.unpack_from('>L', self.header, 0x104)
            if self.guideidx != 0xffffffff:
                self.guideidx += self.start

            # dictionaries do not seem to use the same approach in K8's
            # so disable them
            self.metaOrthIndex = 0xffffffff
            self.metaInflIndex = 0xffffffff

            # need to use the FDST record to find out how to properly unpack
            # the rawML into pieces
            # it is simply a table of start and end locations for each flow piece
            self.fdst, = struct.unpack_from('>L', self.header, 0xc0)
            self.fdstcnt, = struct.unpack_from('>L', self.header, 0xc4)
            # if cnt is 1 or less, fdst section mumber can be garbage
            if self.fdstcnt <= 1:
                self.fdst = 0xffffffff
            if self.fdst != 0xffffffff:
                self.fdst += self.start

コード例 #2

ファイルを表示

ファイル: mobi_unpack.py プロジェクト: mlitwin/hesperian-tools

    def __init__(self, sect, sectNumber):
        self.sect = sect
        self.start = sectNumber
        self.header = self.sect.loadSection(self.start)
        self.records, = struct.unpack_from(">H", self.header, 0x8)
        self.length, self.type, self.codepage, self.unique_id, self.version = struct.unpack(
            ">LLLLL", self.header[20:40]
        )
        print "Mobi Version: ", self.version

        # codec
        self.codec = "windows-1252"
        codec_map = {1252: "windows-1252", 65001: "utf-8"}
        if self.codepage in codec_map.keys():
            self.codec = codec_map[self.codepage]
        print "Codec: ", self.codec

        # title
        toff, tlen = struct.unpack(">II", self.header[0x54:0x5C])
        tend = toff + tlen
        self.title = self.header[toff:tend]
        print "Title: ", self.title

        # set up for decompression/unpacking
        compression, = struct.unpack_from(">H", self.header, 0x0)
        if compression == 0x4448:
            print "Huffdic compression"
            reader = HuffcdicReader()
            huffoff, huffnum = struct.unpack_from(">LL", self.header, 0x70)
            huffoff = huffoff + self.start
            reader.loadHuff(self.sect.loadSection(huffoff))
            for i in xrange(1, huffnum):
                reader.loadCdic(self.sect.loadSection(huffoff + i))
            self.unpack = reader.unpack
        elif compression == 2:
            print "Palmdoc compression"
            self.unpack = PalmdocReader().unpack
        elif compression == 1:
            print "No compression"
            self.unpack = UncompressedReader().unpack
        else:
            raise unpackException("invalid compression type: 0x%4x" % compression)

        exth_flag, = struct.unpack(">L", self.header[0x80:0x84])
        self.hasExth = exth_flag & 0x40
        self.mlstart = self.sect.loadSection(self.start + 1)
        self.mlstart = self.mlstart[0:4]
        self.crypto_type, = struct.unpack_from(">H", self.header, 0xC)

        # default initial values set to disable these advanced features not found in TEXtREAd
        self.firstaddl = self.records + 1
        self.ncxidx = 0xFFFFFFFF
        self.metaOrthIndex = 0xFFFFFFFF
        self.metaInflIndex = 0xFFFFFFFF
        self.skelidx = 0xFFFFFFFF
        self.dividx = 0xFFFFFFFF
        self.othidx = 0xFFFFFFF
        self.fdst = 0xFFFFFFFF

        if self.sect.ident == "TEXtREAd":
            return

        # Start sector for additional files such as images, fonts, resources, etc
        self.firstaddl, = struct.unpack_from(">L", self.header, 0x6C)
        if self.firstaddl != 0xFFFFFFFF:
            self.firstaddl += self.start

        if self.mlstart == "%MOP":
            return

        if self.version < 8:
            # Dictionary metaOrthIndex
            self.metaOrthIndex, = struct.unpack_from(">L", self.header, 0x28)
            if self.metaOrthIndex != 0xFFFFFFFF:
                self.metaOrthIndex += self.start

            # Dictionary metaInflIndex
            self.metaInflIndex, = struct.unpack_from(">L", self.header, 0x2C)
            if self.metaInflIndex != 0xFFFFFFFF:
                self.metaInflIndex += self.start

        # handle older headers without any ncxindex info and later
        # specifically 0xe4 headers
        if self.length + 16 < 0xF8:
            return

        # NCX Index
        self.ncxidx, = struct.unpack(">L", self.header[0xF4:0xF8])
        if self.ncxidx != 0xFFFFFFFF:
            self.ncxidx += self.start

        # K8 specific Indexes
        if self.start != 0 or self.version == 8:
            # Index into <xml> file skeletons in RawML
            self.skelidx, = struct.unpack_from(">L", self.header, 0xFC)
            if self.skelidx != 0xFFFFFFFF:
                self.skelidx += self.start

            # Index into <div> sections in RawML
            self.dividx, = struct.unpack_from(">L", self.header, 0xF8)
            if self.dividx != 0xFFFFFFFF:
                self.dividx += self.start

            # Index into Other files
            self.othidx, = struct.unpack_from(">L", self.header, 0x104)
            if self.othidx != 0xFFFFFFFF:
                self.othidx += self.start

            # dictionaries do not seem to use the same approach in K8's
            # so disable them
            self.metaOrthIndex = 0xFFFFFFFF
            self.metaInflIndex = 0xFFFFFFFF

            # need to use the FDST record to find out how to properly unpack
            # the rawML into pieces
            # it is simply a table of start and end locations for each flow piece
            self.fdst, = struct.unpack_from(">L", self.header, 0xC0)
            self.fdstcnt, = struct.unpack_from(">L", self.header, 0xC4)
            # if cnt is 1 or less, fdst section mumber can be garbage
            if self.fdstcnt <= 1:
                self.fdst = 0xFFFFFFFF
            if self.fdst != 0xFFFFFFFF:
                self.fdst += self.start

        if DEBUG:
            print "firstaddl %0x" % self.firstaddl
            print "ncxidx %0x" % self.ncxidx
            print "exth flags %0x" % exth_flag
            if self.version == 8 or self.start != 0:
                print "skelidx %0x" % self.skelidx
                print "dividx %0x" % self.dividx
                print "othidx %0x" % self.othidx
                print "fdst %0x" % self.fdst

コード例 #3

ファイルを表示

ファイル: mobi_unpack.py プロジェクト: tarsbase/bookworm-1

    def __init__(self, sect, sectNumber):
        self.sect = sect
        self.start = sectNumber
        self.header = self.sect.loadSection(self.start)
        self.records, = struct.unpack_from('>H', self.header, 0x8)
        self.length, self.type, self.codepage, self.unique_id, self.version = struct.unpack(
            '>LLLLL', self.header[20:40])
        print "Mobi Version: ", self.version

        # codec
        self.codec = 'windows-1252'
        codec_map = {
            1252: 'windows-1252',
            65001: 'utf-8',
        }
        if self.codepage in codec_map.keys():
            self.codec = codec_map[self.codepage]
        print "Codec: ", self.codec

        # title
        toff, tlen = struct.unpack('>II', self.header[0x54:0x5c])
        tend = toff + tlen
        self.title = self.header[toff:tend]
        print "Title: ", self.title

        # set up for decompression/unpacking
        compression, = struct.unpack_from('>H', self.header, 0x0)
        if compression == 0x4448:
            print "Huffdic compression"
            reader = HuffcdicReader()
            huffoff, huffnum = struct.unpack_from('>LL', self.header, 0x70)
            huffoff = huffoff + self.start
            reader.loadHuff(self.sect.loadSection(huffoff))
            for i in xrange(1, huffnum):
                reader.loadCdic(self.sect.loadSection(huffoff + i))
            self.unpack = reader.unpack
        elif compression == 2:
            print "Palmdoc compression"
            self.unpack = PalmdocReader().unpack
        elif compression == 1:
            print "No compression"
            self.unpack = UncompressedReader().unpack
        else:
            raise unpackException('invalid compression type: 0x%4x' %
                                  compression)

        exth_flag, = struct.unpack('>L', self.header[0x80:0x84])
        self.hasExth = exth_flag & 0x40
        self.mlstart = self.sect.loadSection(self.start + 1)
        self.mlstart = self.mlstart[0:4]
        self.crypto_type, = struct.unpack_from('>H', self.header, 0xC)

        # default initial values set to disable these advanced features not found in TEXtREAd
        self.firstaddl = self.records + 1
        self.ncxidx = 0xffffffff
        self.metaOrthIndex = 0xffffffff
        self.metaInflIndex = 0xffffffff
        self.skelidx = 0xffffffff
        self.dividx = 0xffffffff
        self.othidx = 0xfffffff
        self.fdst = 0xffffffff

        if self.sect.ident == 'TEXtREAd':
            return

        # Start sector for additional files such as images, fonts, resources, etc
        self.firstaddl, = struct.unpack_from('>L', self.header, 0x6C)
        if self.firstaddl != 0xffffffff:
            self.firstaddl += self.start

        if self.mlstart == '%MOP':
            return

        if self.version < 8:
            # Dictionary metaOrthIndex
            self.metaOrthIndex, = struct.unpack_from('>L', self.header, 0x28)
            if self.metaOrthIndex != 0xffffffff:
                self.metaOrthIndex += self.start

            # Dictionary metaInflIndex
            self.metaInflIndex, = struct.unpack_from('>L', self.header, 0x2C)
            if self.metaInflIndex != 0xffffffff:
                self.metaInflIndex += self.start

        # handle older headers without any ncxindex info and later
        # specifically 0xe4 headers
        if self.length + 16 < 0xf8:
            return

        # NCX Index
        self.ncxidx, = struct.unpack('>L', self.header[0xf4:0xf8])
        if self.ncxidx != 0xffffffff:
            self.ncxidx += self.start

        # K8 specific Indexes
        if self.start != 0 or self.version == 8:
            # Index into <xml> file skeletons in RawML
            self.skelidx, = struct.unpack_from('>L', self.header, 0xfc)
            if self.skelidx != 0xffffffff:
                self.skelidx += self.start

            # Index into <div> sections in RawML
            self.dividx, = struct.unpack_from('>L', self.header, 0xf8)
            if self.dividx != 0xffffffff:
                self.dividx += self.start

            # Index into Other files
            self.othidx, = struct.unpack_from('>L', self.header, 0x104)
            if self.othidx != 0xffffffff:
                self.othidx += self.start

            # dictionaries do not seem to use the same approach in K8's
            # so disable them
            self.metaOrthIndex = 0xffffffff
            self.metaInflIndex = 0xffffffff

            # need to use the FDST record to find out how to properly unpack
            # the rawML into pieces
            # it is simply a table of start and end locations for each flow piece
            self.fdst, = struct.unpack_from('>L', self.header, 0xc0)
            self.fdstcnt, = struct.unpack_from('>L', self.header, 0xc4)
            # if cnt is 1 or less, fdst section mumber can be garbage
            if self.fdstcnt <= 1:
                self.fdst = 0xffffffff
            if self.fdst != 0xffffffff:
                self.fdst += self.start

        if DEBUG:
            print "firstaddl %0x" % self.firstaddl
            print "ncxidx %0x" % self.ncxidx
            print "exth flags %0x" % exth_flag
            if self.version == 8 or self.start != 0:
                print "skelidx %0x" % self.skelidx
                print "dividx %0x" % self.dividx
                print "othidx %0x" % self.othidx
                print "fdst %0x" % self.fdst

コード例 #4

ファイルを表示

    def __init__(self, sect, sectNumber):
        self.sect = sect
        self.start = sectNumber
        self.header = self.sect.loadSection(self.start)
        if len(self.header) > 20 and self.header[16:20] == 'MOBI':
            self.sect.setsectiondescription(0, "Mobipocket Header")
            self.palm = False
        elif self.sect.ident == 'TEXtREAd':
            self.sect.setsectiondescription(0, "PalmDOC Header")
            self.palm = True
        else:
            raise unpackException('Unknown File Format')

        self.records, = struct.unpack_from('>H', self.header, 0x8)

        # set defaults in case this is a PalmDOC
        self.title = self.sect.palmname
        self.length = len(self.header) - 16
        self.type = 3
        self.codepage = 1252
        self.codec = 'windows-1252'
        self.unique_id = 0
        self.version = 0
        self.hasExth = False
        self.exth = ''
        self.exth_offset = self.length + 16
        self.exth_length = 0
        self.crypto_type = 0
        self.firstnontext = self.start + self.records + 1
        self.firstresource = self.start + self.records + 1
        self.ncxidx = 0xffffffff
        self.metaOrthIndex = 0xffffffff
        self.metaInflIndex = 0xffffffff
        self.skelidx = 0xffffffff
        self.fragidx = 0xffffffff
        self.guideidx = 0xffffffff
        self.fdst = 0xffffffff
        self.mlstart = self.sect.loadSection(self.start + 1)[:4]
        self.rawSize = 0
        self.metadata = {}

        # set up for decompression/unpacking
        self.compression, = struct.unpack_from('>H', self.header, 0x0)
        if self.compression == 0x4448:
            reader = HuffcdicReader()
            huffoff, huffnum = struct.unpack_from('>LL', self.header, 0x70)
            huffoff = huffoff + self.start
            self.sect.setsectiondescription(huffoff,
                                            "Huffman Compression Seed")
            reader.loadHuff(self.sect.loadSection(huffoff))
            for i in xrange(1, huffnum):
                self.sect.setsectiondescription(
                    huffoff + i, "Huffman CDIC Compression Seed %d" % i)
                reader.loadCdic(self.sect.loadSection(huffoff + i))
            self.unpack = reader.unpack
        elif self.compression == 2:
            self.unpack = PalmdocReader().unpack
        elif self.compression == 1:
            self.unpack = UncompressedReader().unpack
        else:
            raise unpackException('invalid compression type: 0x%4x' %
                                  self.compression)

        if self.palm:
            return

        self.length, self.type, self.codepage, self.unique_id, self.version = struct.unpack(
            '>LLLLL', self.header[20:40])
        codec_map = {
            1252: 'windows-1252',
            65001: 'utf-8',
        }
        if self.codepage in codec_map.keys():
            self.codec = codec_map[self.codepage]

        # title
        toff, tlen = struct.unpack('>II', self.header[0x54:0x5c])
        tend = toff + tlen
        self.title = self.header[toff:tend]

        exth_flag, = struct.unpack('>L', self.header[0x80:0x84])
        self.hasExth = exth_flag & 0x40
        self.exth_offset = self.length + 16
        self.exth_length = 0
        if self.hasExth:
            self.exth_length, = struct.unpack_from('>L', self.header,
                                                   self.exth_offset + 4)
            self.exth_length = ((self.exth_length + 3) >>
                                2) << 2  # round to next 4 byte boundary
            self.exth = self.header[self.exth_offset:self.exth_offset +
                                    self.exth_length]

        # parse the exth / metadata
        self.parseMetaData()

        # self.mlstart = self.sect.loadSection(self.start+1)
        # self.mlstart = self.mlstart[0:4]
        self.crypto_type, = struct.unpack_from('>H', self.header, 0xC)

        # Start sector for additional files such as images, fonts, resources, etc
        # Can be missing so fall back to default set previously
        ofst, = struct.unpack_from('>L', self.header, 0x6C)
        if ofst != 0xffffffff:
            self.firstresource = ofst + self.start
        ofst, = struct.unpack_from('>L', self.header, 0x50)
        if ofst != 0xffffffff:
            self.firstnontext = ofst + self.start

        if self.isPrintReplica():
            return

        if self.version < 8:
            # Dictionary metaOrthIndex
            self.metaOrthIndex, = struct.unpack_from('>L', self.header, 0x28)
            if self.metaOrthIndex != 0xffffffff:
                self.metaOrthIndex += self.start

            # Dictionary metaInflIndex
            self.metaInflIndex, = struct.unpack_from('>L', self.header, 0x2C)
            if self.metaInflIndex != 0xffffffff:
                self.metaInflIndex += self.start

        # handle older headers without any ncxindex info and later
        # specifically 0xe4 headers
        if self.length + 16 < 0xf8:
            return

        # NCX Index
        self.ncxidx, = struct.unpack('>L', self.header[0xf4:0xf8])
        if self.ncxidx != 0xffffffff:
            self.ncxidx += self.start

        # K8 specific Indexes
        if self.start != 0 or self.version == 8:
            # Index into <xml> file skeletons in RawML
            self.skelidx, = struct.unpack_from('>L', self.header, 0xfc)
            if self.skelidx != 0xffffffff:
                self.skelidx += self.start

            # Index into <div> sections in RawML
            self.fragidx, = struct.unpack_from('>L', self.header, 0xf8)
            if self.fragidx != 0xffffffff:
                self.fragidx += self.start

            # Index into Other files
            self.guideidx, = struct.unpack_from('>L', self.header, 0x104)
            if self.guideidx != 0xffffffff:
                self.guideidx += self.start

            # dictionaries do not seem to use the same approach in K8's
            # so disable them
            self.metaOrthIndex = 0xffffffff
            self.metaInflIndex = 0xffffffff

            # need to use the FDST record to find out how to properly unpack
            # the rawML into pieces
            # it is simply a table of start and end locations for each flow piece
            self.fdst, = struct.unpack_from('>L', self.header, 0xc0)
            self.fdstcnt, = struct.unpack_from('>L', self.header, 0xc4)
            # if cnt is 1 or less, fdst section mumber can be garbage
            if self.fdstcnt <= 1:
                self.fdst = 0xffffffff
            if self.fdst != 0xffffffff:
                self.fdst += self.start