def __init__(self, sect, sectNumber): self.sect = sect self.start = sectNumber self.header = self.sect.loadSection(self.start) if len(self.header)>20 and self.header[16:20] == 'MOBI': self.sect.setsectiondescription(0,"Mobipocket Header") self.palm = False elif self.sect.ident == 'TEXtREAd': self.sect.setsectiondescription(0, "PalmDOC Header") self.palm = True else: raise unpackException('Unknown File Format') self.records, = struct.unpack_from('>H', self.header, 0x8) # set defaults in case this is a PalmDOC self.title = self.sect.palmname self.length = len(self.header)-16 self.type = 3 self.codepage = 1252 self.codec = 'windows-1252' self.unique_id = 0 self.version = 0 self.hasExth = False self.exth = '' self.exth_offset = self.length + 16 self.exth_length = 0 self.crypto_type = 0 self.firstnontext = self.start+self.records + 1 self.firstresource = self.start+self.records + 1 self.ncxidx = 0xffffffff self.metaOrthIndex = 0xffffffff self.metaInflIndex = 0xffffffff self.skelidx = 0xffffffff self.fragidx = 0xffffffff self.guideidx = 0xffffffff self.fdst = 0xffffffff self.mlstart = self.sect.loadSection(self.start+1)[:4] self.rawSize = 0 self.metadata = {} # set up for decompression/unpacking self.compression, = struct.unpack_from('>H', self.header, 0x0) if self.compression == 0x4448: reader = HuffcdicReader() huffoff, huffnum = struct.unpack_from('>LL', self.header, 0x70) huffoff = huffoff + self.start self.sect.setsectiondescription(huffoff,"Huffman Compression Seed") reader.loadHuff(self.sect.loadSection(huffoff)) for i in xrange(1, huffnum): self.sect.setsectiondescription(huffoff+i,"Huffman CDIC Compression Seed %d" % i) reader.loadCdic(self.sect.loadSection(huffoff+i)) self.unpack = reader.unpack elif self.compression == 2: self.unpack = PalmdocReader().unpack elif self.compression == 1: self.unpack = UncompressedReader().unpack else: raise unpackException('invalid compression type: 0x%4x' % self.compression) if self.palm: return self.length, self.type, self.codepage, self.unique_id, self.version = struct.unpack('>LLLLL', self.header[20:40]) codec_map = { 1252 : 'windows-1252', 65001: 'utf-8', } if self.codepage in codec_map.keys(): self.codec = codec_map[self.codepage] # title toff, tlen = struct.unpack('>II', self.header[0x54:0x5c]) tend = toff + tlen self.title=self.header[toff:tend] exth_flag, = struct.unpack('>L', self.header[0x80:0x84]) self.hasExth = exth_flag & 0x40 self.exth_offset = self.length + 16 self.exth_length = 0 if self.hasExth: self.exth_length, = struct.unpack_from('>L', self.header, self.exth_offset+4) self.exth_length = ((self.exth_length + 3)>>2)<<2 # round to next 4 byte boundary self.exth = self.header[self.exth_offset:self.exth_offset+self.exth_length] # parse the exth / metadata self.parseMetaData() # self.mlstart = self.sect.loadSection(self.start+1) # self.mlstart = self.mlstart[0:4] self.crypto_type, = struct.unpack_from('>H', self.header, 0xC) # Start sector for additional files such as images, fonts, resources, etc # Can be missing so fall back to default set previously ofst, = struct.unpack_from('>L', self.header, 0x6C) if ofst != 0xffffffff: self.firstresource = ofst + self.start ofst, = struct.unpack_from('>L', self.header, 0x50) if ofst != 0xffffffff: self.firstnontext = ofst + self.start if self.isPrintReplica(): return if self.version < 8: # Dictionary metaOrthIndex self.metaOrthIndex, = struct.unpack_from('>L', self.header, 0x28) if self.metaOrthIndex != 0xffffffff: self.metaOrthIndex += self.start # Dictionary metaInflIndex self.metaInflIndex, = struct.unpack_from('>L', self.header, 0x2C) if self.metaInflIndex != 0xffffffff: self.metaInflIndex += self.start # handle older headers without any ncxindex info and later # specifically 0xe4 headers if self.length + 16 < 0xf8: return # NCX Index self.ncxidx, = struct.unpack('>L', self.header[0xf4:0xf8]) if self.ncxidx != 0xffffffff: self.ncxidx += self.start # K8 specific Indexes if self.start != 0 or self.version == 8: # Index into <xml> file skeletons in RawML self.skelidx, = struct.unpack_from('>L', self.header, 0xfc) if self.skelidx != 0xffffffff: self.skelidx += self.start # Index into <div> sections in RawML self.fragidx, = struct.unpack_from('>L', self.header, 0xf8) if self.fragidx != 0xffffffff: self.fragidx += self.start # Index into Other files self.guideidx, = struct.unpack_from('>L', self.header, 0x104) if self.guideidx != 0xffffffff: self.guideidx += self.start # dictionaries do not seem to use the same approach in K8's # so disable them self.metaOrthIndex = 0xffffffff self.metaInflIndex = 0xffffffff # need to use the FDST record to find out how to properly unpack # the rawML into pieces # it is simply a table of start and end locations for each flow piece self.fdst, = struct.unpack_from('>L', self.header, 0xc0) self.fdstcnt, = struct.unpack_from('>L', self.header, 0xc4) # if cnt is 1 or less, fdst section mumber can be garbage if self.fdstcnt <= 1: self.fdst = 0xffffffff if self.fdst != 0xffffffff: self.fdst += self.start
def __init__(self, sect, sectNumber): self.sect = sect self.start = sectNumber self.header = self.sect.loadSection(self.start) self.records, = struct.unpack_from(">H", self.header, 0x8) self.length, self.type, self.codepage, self.unique_id, self.version = struct.unpack( ">LLLLL", self.header[20:40] ) print "Mobi Version: ", self.version # codec self.codec = "windows-1252" codec_map = {1252: "windows-1252", 65001: "utf-8"} if self.codepage in codec_map.keys(): self.codec = codec_map[self.codepage] print "Codec: ", self.codec # title toff, tlen = struct.unpack(">II", self.header[0x54:0x5C]) tend = toff + tlen self.title = self.header[toff:tend] print "Title: ", self.title # set up for decompression/unpacking compression, = struct.unpack_from(">H", self.header, 0x0) if compression == 0x4448: print "Huffdic compression" reader = HuffcdicReader() huffoff, huffnum = struct.unpack_from(">LL", self.header, 0x70) huffoff = huffoff + self.start reader.loadHuff(self.sect.loadSection(huffoff)) for i in xrange(1, huffnum): reader.loadCdic(self.sect.loadSection(huffoff + i)) self.unpack = reader.unpack elif compression == 2: print "Palmdoc compression" self.unpack = PalmdocReader().unpack elif compression == 1: print "No compression" self.unpack = UncompressedReader().unpack else: raise unpackException("invalid compression type: 0x%4x" % compression) exth_flag, = struct.unpack(">L", self.header[0x80:0x84]) self.hasExth = exth_flag & 0x40 self.mlstart = self.sect.loadSection(self.start + 1) self.mlstart = self.mlstart[0:4] self.crypto_type, = struct.unpack_from(">H", self.header, 0xC) # default initial values set to disable these advanced features not found in TEXtREAd self.firstaddl = self.records + 1 self.ncxidx = 0xFFFFFFFF self.metaOrthIndex = 0xFFFFFFFF self.metaInflIndex = 0xFFFFFFFF self.skelidx = 0xFFFFFFFF self.dividx = 0xFFFFFFFF self.othidx = 0xFFFFFFF self.fdst = 0xFFFFFFFF if self.sect.ident == "TEXtREAd": return # Start sector for additional files such as images, fonts, resources, etc self.firstaddl, = struct.unpack_from(">L", self.header, 0x6C) if self.firstaddl != 0xFFFFFFFF: self.firstaddl += self.start if self.mlstart == "%MOP": return if self.version < 8: # Dictionary metaOrthIndex self.metaOrthIndex, = struct.unpack_from(">L", self.header, 0x28) if self.metaOrthIndex != 0xFFFFFFFF: self.metaOrthIndex += self.start # Dictionary metaInflIndex self.metaInflIndex, = struct.unpack_from(">L", self.header, 0x2C) if self.metaInflIndex != 0xFFFFFFFF: self.metaInflIndex += self.start # handle older headers without any ncxindex info and later # specifically 0xe4 headers if self.length + 16 < 0xF8: return # NCX Index self.ncxidx, = struct.unpack(">L", self.header[0xF4:0xF8]) if self.ncxidx != 0xFFFFFFFF: self.ncxidx += self.start # K8 specific Indexes if self.start != 0 or self.version == 8: # Index into <xml> file skeletons in RawML self.skelidx, = struct.unpack_from(">L", self.header, 0xFC) if self.skelidx != 0xFFFFFFFF: self.skelidx += self.start # Index into <div> sections in RawML self.dividx, = struct.unpack_from(">L", self.header, 0xF8) if self.dividx != 0xFFFFFFFF: self.dividx += self.start # Index into Other files self.othidx, = struct.unpack_from(">L", self.header, 0x104) if self.othidx != 0xFFFFFFFF: self.othidx += self.start # dictionaries do not seem to use the same approach in K8's # so disable them self.metaOrthIndex = 0xFFFFFFFF self.metaInflIndex = 0xFFFFFFFF # need to use the FDST record to find out how to properly unpack # the rawML into pieces # it is simply a table of start and end locations for each flow piece self.fdst, = struct.unpack_from(">L", self.header, 0xC0) self.fdstcnt, = struct.unpack_from(">L", self.header, 0xC4) # if cnt is 1 or less, fdst section mumber can be garbage if self.fdstcnt <= 1: self.fdst = 0xFFFFFFFF if self.fdst != 0xFFFFFFFF: self.fdst += self.start if DEBUG: print "firstaddl %0x" % self.firstaddl print "ncxidx %0x" % self.ncxidx print "exth flags %0x" % exth_flag if self.version == 8 or self.start != 0: print "skelidx %0x" % self.skelidx print "dividx %0x" % self.dividx print "othidx %0x" % self.othidx print "fdst %0x" % self.fdst
def __init__(self, sect, sectNumber): self.sect = sect self.start = sectNumber self.header = self.sect.loadSection(self.start) self.records, = struct.unpack_from('>H', self.header, 0x8) self.length, self.type, self.codepage, self.unique_id, self.version = struct.unpack( '>LLLLL', self.header[20:40]) print "Mobi Version: ", self.version # codec self.codec = 'windows-1252' codec_map = { 1252: 'windows-1252', 65001: 'utf-8', } if self.codepage in codec_map.keys(): self.codec = codec_map[self.codepage] print "Codec: ", self.codec # title toff, tlen = struct.unpack('>II', self.header[0x54:0x5c]) tend = toff + tlen self.title = self.header[toff:tend] print "Title: ", self.title # set up for decompression/unpacking compression, = struct.unpack_from('>H', self.header, 0x0) if compression == 0x4448: print "Huffdic compression" reader = HuffcdicReader() huffoff, huffnum = struct.unpack_from('>LL', self.header, 0x70) huffoff = huffoff + self.start reader.loadHuff(self.sect.loadSection(huffoff)) for i in xrange(1, huffnum): reader.loadCdic(self.sect.loadSection(huffoff + i)) self.unpack = reader.unpack elif compression == 2: print "Palmdoc compression" self.unpack = PalmdocReader().unpack elif compression == 1: print "No compression" self.unpack = UncompressedReader().unpack else: raise unpackException('invalid compression type: 0x%4x' % compression) exth_flag, = struct.unpack('>L', self.header[0x80:0x84]) self.hasExth = exth_flag & 0x40 self.mlstart = self.sect.loadSection(self.start + 1) self.mlstart = self.mlstart[0:4] self.crypto_type, = struct.unpack_from('>H', self.header, 0xC) # default initial values set to disable these advanced features not found in TEXtREAd self.firstaddl = self.records + 1 self.ncxidx = 0xffffffff self.metaOrthIndex = 0xffffffff self.metaInflIndex = 0xffffffff self.skelidx = 0xffffffff self.dividx = 0xffffffff self.othidx = 0xfffffff self.fdst = 0xffffffff if self.sect.ident == 'TEXtREAd': return # Start sector for additional files such as images, fonts, resources, etc self.firstaddl, = struct.unpack_from('>L', self.header, 0x6C) if self.firstaddl != 0xffffffff: self.firstaddl += self.start if self.mlstart == '%MOP': return if self.version < 8: # Dictionary metaOrthIndex self.metaOrthIndex, = struct.unpack_from('>L', self.header, 0x28) if self.metaOrthIndex != 0xffffffff: self.metaOrthIndex += self.start # Dictionary metaInflIndex self.metaInflIndex, = struct.unpack_from('>L', self.header, 0x2C) if self.metaInflIndex != 0xffffffff: self.metaInflIndex += self.start # handle older headers without any ncxindex info and later # specifically 0xe4 headers if self.length + 16 < 0xf8: return # NCX Index self.ncxidx, = struct.unpack('>L', self.header[0xf4:0xf8]) if self.ncxidx != 0xffffffff: self.ncxidx += self.start # K8 specific Indexes if self.start != 0 or self.version == 8: # Index into <xml> file skeletons in RawML self.skelidx, = struct.unpack_from('>L', self.header, 0xfc) if self.skelidx != 0xffffffff: self.skelidx += self.start # Index into <div> sections in RawML self.dividx, = struct.unpack_from('>L', self.header, 0xf8) if self.dividx != 0xffffffff: self.dividx += self.start # Index into Other files self.othidx, = struct.unpack_from('>L', self.header, 0x104) if self.othidx != 0xffffffff: self.othidx += self.start # dictionaries do not seem to use the same approach in K8's # so disable them self.metaOrthIndex = 0xffffffff self.metaInflIndex = 0xffffffff # need to use the FDST record to find out how to properly unpack # the rawML into pieces # it is simply a table of start and end locations for each flow piece self.fdst, = struct.unpack_from('>L', self.header, 0xc0) self.fdstcnt, = struct.unpack_from('>L', self.header, 0xc4) # if cnt is 1 or less, fdst section mumber can be garbage if self.fdstcnt <= 1: self.fdst = 0xffffffff if self.fdst != 0xffffffff: self.fdst += self.start if DEBUG: print "firstaddl %0x" % self.firstaddl print "ncxidx %0x" % self.ncxidx print "exth flags %0x" % exth_flag if self.version == 8 or self.start != 0: print "skelidx %0x" % self.skelidx print "dividx %0x" % self.dividx print "othidx %0x" % self.othidx print "fdst %0x" % self.fdst
def __init__(self, sect, sectNumber): self.sect = sect self.start = sectNumber self.header = self.sect.loadSection(self.start) if len(self.header) > 20 and self.header[16:20] == 'MOBI': self.sect.setsectiondescription(0, "Mobipocket Header") self.palm = False elif self.sect.ident == 'TEXtREAd': self.sect.setsectiondescription(0, "PalmDOC Header") self.palm = True else: raise unpackException('Unknown File Format') self.records, = struct.unpack_from('>H', self.header, 0x8) # set defaults in case this is a PalmDOC self.title = self.sect.palmname self.length = len(self.header) - 16 self.type = 3 self.codepage = 1252 self.codec = 'windows-1252' self.unique_id = 0 self.version = 0 self.hasExth = False self.exth = '' self.exth_offset = self.length + 16 self.exth_length = 0 self.crypto_type = 0 self.firstnontext = self.start + self.records + 1 self.firstresource = self.start + self.records + 1 self.ncxidx = 0xffffffff self.metaOrthIndex = 0xffffffff self.metaInflIndex = 0xffffffff self.skelidx = 0xffffffff self.fragidx = 0xffffffff self.guideidx = 0xffffffff self.fdst = 0xffffffff self.mlstart = self.sect.loadSection(self.start + 1)[:4] self.rawSize = 0 self.metadata = {} # set up for decompression/unpacking self.compression, = struct.unpack_from('>H', self.header, 0x0) if self.compression == 0x4448: reader = HuffcdicReader() huffoff, huffnum = struct.unpack_from('>LL', self.header, 0x70) huffoff = huffoff + self.start self.sect.setsectiondescription(huffoff, "Huffman Compression Seed") reader.loadHuff(self.sect.loadSection(huffoff)) for i in xrange(1, huffnum): self.sect.setsectiondescription( huffoff + i, "Huffman CDIC Compression Seed %d" % i) reader.loadCdic(self.sect.loadSection(huffoff + i)) self.unpack = reader.unpack elif self.compression == 2: self.unpack = PalmdocReader().unpack elif self.compression == 1: self.unpack = UncompressedReader().unpack else: raise unpackException('invalid compression type: 0x%4x' % self.compression) if self.palm: return self.length, self.type, self.codepage, self.unique_id, self.version = struct.unpack( '>LLLLL', self.header[20:40]) codec_map = { 1252: 'windows-1252', 65001: 'utf-8', } if self.codepage in codec_map.keys(): self.codec = codec_map[self.codepage] # title toff, tlen = struct.unpack('>II', self.header[0x54:0x5c]) tend = toff + tlen self.title = self.header[toff:tend] exth_flag, = struct.unpack('>L', self.header[0x80:0x84]) self.hasExth = exth_flag & 0x40 self.exth_offset = self.length + 16 self.exth_length = 0 if self.hasExth: self.exth_length, = struct.unpack_from('>L', self.header, self.exth_offset + 4) self.exth_length = ((self.exth_length + 3) >> 2) << 2 # round to next 4 byte boundary self.exth = self.header[self.exth_offset:self.exth_offset + self.exth_length] # parse the exth / metadata self.parseMetaData() # self.mlstart = self.sect.loadSection(self.start+1) # self.mlstart = self.mlstart[0:4] self.crypto_type, = struct.unpack_from('>H', self.header, 0xC) # Start sector for additional files such as images, fonts, resources, etc # Can be missing so fall back to default set previously ofst, = struct.unpack_from('>L', self.header, 0x6C) if ofst != 0xffffffff: self.firstresource = ofst + self.start ofst, = struct.unpack_from('>L', self.header, 0x50) if ofst != 0xffffffff: self.firstnontext = ofst + self.start if self.isPrintReplica(): return if self.version < 8: # Dictionary metaOrthIndex self.metaOrthIndex, = struct.unpack_from('>L', self.header, 0x28) if self.metaOrthIndex != 0xffffffff: self.metaOrthIndex += self.start # Dictionary metaInflIndex self.metaInflIndex, = struct.unpack_from('>L', self.header, 0x2C) if self.metaInflIndex != 0xffffffff: self.metaInflIndex += self.start # handle older headers without any ncxindex info and later # specifically 0xe4 headers if self.length + 16 < 0xf8: return # NCX Index self.ncxidx, = struct.unpack('>L', self.header[0xf4:0xf8]) if self.ncxidx != 0xffffffff: self.ncxidx += self.start # K8 specific Indexes if self.start != 0 or self.version == 8: # Index into <xml> file skeletons in RawML self.skelidx, = struct.unpack_from('>L', self.header, 0xfc) if self.skelidx != 0xffffffff: self.skelidx += self.start # Index into <div> sections in RawML self.fragidx, = struct.unpack_from('>L', self.header, 0xf8) if self.fragidx != 0xffffffff: self.fragidx += self.start # Index into Other files self.guideidx, = struct.unpack_from('>L', self.header, 0x104) if self.guideidx != 0xffffffff: self.guideidx += self.start # dictionaries do not seem to use the same approach in K8's # so disable them self.metaOrthIndex = 0xffffffff self.metaInflIndex = 0xffffffff # need to use the FDST record to find out how to properly unpack # the rawML into pieces # it is simply a table of start and end locations for each flow piece self.fdst, = struct.unpack_from('>L', self.header, 0xc0) self.fdstcnt, = struct.unpack_from('>L', self.header, 0xc4) # if cnt is 1 or less, fdst section mumber can be garbage if self.fdstcnt <= 1: self.fdst = 0xffffffff if self.fdst != 0xffffffff: self.fdst += self.start