Example #1
0
 def parsetag(self, s):
     p = 1
     tname = None
     ttype = None
     tattr = dict_()
     while s[p:p+1] == ' ' :
         p += 1
     if s[p:p+1] == '/':
         ttype = 'end'
         p += 1
         while s[p:p+1] == ' ' :
             p += 1
     b = p
     while s[p:p+1] not in ('>', '/', ' ', '"', "'",'\r','\n') :
         p += 1
     tname=s[b:p].lower()
     # some special cases
     if tname == '?xml':
         tname = 'xml'
     if tname == '!--':
         ttype = 'single'
         comment = s[p:-3].strip()
         tattr['comment'] = comment
     if ttype is None:
         # parse any attributes of begin or single tags
         while s.find('=',p) != -1 :
             while s[p:p+1] == ' ' :
                 p += 1
             b = p
             while s[p:p+1] != '=' :
                 p += 1
             aname = s[b:p].lower()
             aname = aname.rstrip(' ')
             p += 1
             while s[p:p+1] == ' ' :
                 p += 1
             if s[p:p+1] in ('"', "'") :
                 p = p + 1
                 b = p
                 while s[p:p+1] not in ('"', "'"):
                     p += 1
                 val = s[b:p]
                 p += 1
             else :
                 b = p
                 while s[p:p+1] not in ('>', '/', ' ') :
                     p += 1
                 val = s[b:p]
             tattr[aname] = val
     if ttype is None:
         ttype = 'begin'
         if s.find('/',p) >= 0:
             ttype = 'single'
     return ttype, tname, tattr
Example #2
0
 def parsetag(self, s):
     p = 1
     tname = None
     ttype = None
     tattr = dict_()
     while s[p:p+1] == ' ' :
         p += 1
     if s[p:p+1] == '/':
         ttype = 'end'
         p += 1
         while s[p:p+1] == ' ' :
             p += 1
     b = p
     while s[p:p+1] not in ('>', '/', ' ', '"', "'",'\r','\n') :
         p += 1
     tname=s[b:p].lower()
     # some special cases
     if tname == '?xml':
         tname = 'xml'
     if tname == '!--':
         ttype = 'single'
         comment = s[p:-3].strip()
         tattr['comment'] = comment
     if ttype is None:
         # parse any attributes of begin or single tags
         while s.find('=',p) != -1 :
             while s[p:p+1] == ' ' :
                 p += 1
             b = p
             while s[p:p+1] != '=' :
                 p += 1
             aname = s[b:p].lower()
             aname = aname.rstrip(' ')
             p += 1
             while s[p:p+1] == ' ' :
                 p += 1
             if s[p:p+1] in ('"', "'") :
                 p = p + 1
                 b = p
                 while s[p:p+1] not in ('"', "'"):
                     p += 1
                 val = s[b:p]
                 p += 1
             else :
                 b = p
                 while s[p:p+1] not in ('>', '/', ' ') :
                     p += 1
                 val = s[b:p]
             tattr[aname] = val
     if ttype is None:
         ttype = 'begin'
         if s.find('/',p) >= 0:
             ttype = 'single'
     return ttype, tname, tattr
Example #3
0
 def parsetag(self, s):
     p = 1
     tname = None
     ttype = None
     tattr = dict_()
     while s[p:p + 1] == " ":
         p += 1
     if s[p:p + 1] == "/":
         ttype = "end"
         p += 1
         while s[p:p + 1] == " ":
             p += 1
     b = p
     while s[p:p + 1] not in (">", "/", " ", '"', "'", "\r", "\n"):
         p += 1
     tname = s[b:p].lower()
     # some special cases
     if tname == "?xml":
         tname = "xml"
     if tname == "!--":
         ttype = "single"
         comment = s[p:-3].strip()
         tattr["comment"] = comment
     if ttype is None:
         # parse any attributes of begin or single tags
         while s.find("=", p) != -1:
             while s[p:p + 1] == " ":
                 p += 1
             b = p
             while s[p:p + 1] != "=":
                 p += 1
             aname = s[b:p].lower()
             aname = aname.rstrip(" ")
             p += 1
             while s[p:p + 1] == " ":
                 p += 1
             if s[p:p + 1] in ('"', "'"):
                 p = p + 1
                 b = p
                 while s[p:p + 1] not in ('"', "'"):
                     p += 1
                 val = s[b:p]
                 p += 1
             else:
                 b = p
                 while s[p:p + 1] not in (">", "/", " "):
                     p += 1
                 val = s[b:p]
             tattr[aname] = val
     if ttype is None:
         ttype = "begin"
         if s.find("/", p) >= 0:
             ttype = "single"
     return ttype, tname, tattr
Example #4
0
    def __init__(self, fpath, debug):
        self.fpath = fpath
        self.f = None
        self.debug = debug

        self.type = b''
        self.sec_offset = 0
        self.sec_count = 0
        self.header = b''
        self.header_offset = 0
        self.header_size = 0
        self.mobi_header_offset = 0x10

        self.version = 0
        self.codepage = 1252
        self.codec = 'windows-1252'
        self.first_resc_offset = 0
        #
        self.exth = b''
        self.exth_offset = 0
        self.exth_size = 0
        #
        self.meta_data = dict_()
        self.image_data = dict_()
Example #5
0
    def __init__(self, sect, sectNumber):
        self.sect = sect
        self.start = sectNumber
        self.header = self.sect.loadSection(self.start)
        if len(self.header) > 20 and self.header[16:20] == b'MOBI':
            self.sect.setsectiondescription(0, "Mobipocket Header")
            self.palm = False
        elif self.sect.ident == b'TEXtREAd':
            self.sect.setsectiondescription(0, "PalmDOC Header")
            self.palm = True
        else:
            raise unpackException('Unknown File Format')

        self.records, = struct.unpack_from(b'>H', self.header, 0x8)

        # set defaults in case this is a PalmDOC
        self.title = self.sect.palmname.decode('latin-1', errors='replace')
        self.length = len(self.header) - 16
        self.type = 3
        self.codepage = 1252
        self.codec = 'windows-1252'
        self.unique_id = 0
        self.version = 0
        self.hasExth = False
        self.exth = b''
        self.exth_offset = self.length + 16
        self.exth_length = 0
        self.crypto_type = 0
        self.firstnontext = self.start + self.records + 1
        self.firstresource = self.start + self.records + 1
        self.ncxidx = 0xffffffff
        self.metaOrthIndex = 0xffffffff
        self.metaInflIndex = 0xffffffff
        self.skelidx = 0xffffffff
        self.fragidx = 0xffffffff
        self.guideidx = 0xffffffff
        self.fdst = 0xffffffff
        self.mlstart = self.sect.loadSection(self.start + 1)[:4]
        self.rawSize = 0
        self.metadata = dict_()

        # set up for decompression/unpacking
        self.compression, = struct.unpack_from(b'>H', self.header, 0x0)
        if self.compression == 0x4448:
            reader = HuffcdicReader()
            huffoff, huffnum = struct.unpack_from(b'>LL', self.header, 0x70)
            huffoff = huffoff + self.start
            self.sect.setsectiondescription(huffoff,
                                            "Huffman Compression Seed")
            reader.loadHuff(self.sect.loadSection(huffoff))
            for i in range(1, huffnum):
                self.sect.setsectiondescription(
                    huffoff + i, "Huffman CDIC Compression Seed %d" % i)
                reader.loadCdic(self.sect.loadSection(huffoff + i))
            self.unpack = reader.unpack
        elif self.compression == 2:
            self.unpack = PalmdocReader().unpack
        elif self.compression == 1:
            self.unpack = UncompressedReader().unpack
        else:
            raise unpackException('invalid compression type: 0x%4x' %
                                  self.compression)

        if self.palm:
            return

        self.length, self.type, self.codepage, self.unique_id, self.version = struct.unpack(
            b'>LLLLL', self.header[20:40])
        codec_map = {
            1252: 'windows-1252',
            65001: 'utf-8',
        }
        if self.codepage in codec_map:
            self.codec = codec_map[self.codepage]

        # title
        toff, tlen = struct.unpack(b'>II', self.header[0x54:0x5c])
        tend = toff + tlen
        self.title = self.header[toff:tend].decode(self.codec,
                                                   errors='replace')

        exth_flag, = struct.unpack(b'>L', self.header[0x80:0x84])
        self.hasExth = exth_flag & 0x40
        self.exth_offset = self.length + 16
        self.exth_length = 0
        if self.hasExth:
            self.exth_length, = struct.unpack_from(b'>L', self.header,
                                                   self.exth_offset + 4)
            self.exth_length = ((self.exth_length + 3) >>
                                2) << 2  # round to next 4 byte boundary
            self.exth = self.header[self.exth_offset:self.exth_offset +
                                    self.exth_length]

        # parse the exth / metadata
        self.parseMetaData()

        # self.mlstart = self.sect.loadSection(self.start+1)
        # self.mlstart = self.mlstart[0:4]
        self.crypto_type, = struct.unpack_from(b'>H', self.header, 0xC)

        # Start sector for additional files such as images, fonts, resources, etc
        # Can be missing so fall back to default set previously
        ofst, = struct.unpack_from(b'>L', self.header, 0x6C)
        if ofst != 0xffffffff:
            self.firstresource = ofst + self.start
        ofst, = struct.unpack_from(b'>L', self.header, 0x50)
        if ofst != 0xffffffff:
            self.firstnontext = ofst + self.start

        if self.isPrintReplica():
            return

        if self.version < 8:
            # Dictionary metaOrthIndex
            self.metaOrthIndex, = struct.unpack_from(b'>L', self.header, 0x28)
            if self.metaOrthIndex != 0xffffffff:
                self.metaOrthIndex += self.start

            # Dictionary metaInflIndex
            self.metaInflIndex, = struct.unpack_from(b'>L', self.header, 0x2C)
            if self.metaInflIndex != 0xffffffff:
                self.metaInflIndex += self.start

        # handle older headers without any ncxindex info and later
        # specifically 0xe4 headers
        if self.length + 16 < 0xf8:
            return

        # NCX Index
        self.ncxidx, = struct.unpack(b'>L', self.header[0xf4:0xf8])
        if self.ncxidx != 0xffffffff:
            self.ncxidx += self.start

        # K8 specific Indexes
        if self.start != 0 or self.version == 8:
            # Index into <xml> file skeletons in RawML
            self.skelidx, = struct.unpack_from(b'>L', self.header, 0xfc)
            if self.skelidx != 0xffffffff:
                self.skelidx += self.start

            # Index into <div> sections in RawML
            self.fragidx, = struct.unpack_from(b'>L', self.header, 0xf8)
            if self.fragidx != 0xffffffff:
                self.fragidx += self.start

            # Index into Other files
            self.guideidx, = struct.unpack_from(b'>L', self.header, 0x104)
            if self.guideidx != 0xffffffff:
                self.guideidx += self.start

            # dictionaries do not seem to use the same approach in K8's
            # so disable them
            self.metaOrthIndex = 0xffffffff
            self.metaInflIndex = 0xffffffff

            # need to use the FDST record to find out how to properly unpack
            # the rawML into pieces
            # it is simply a table of start and end locations for each flow piece
            self.fdst, = struct.unpack_from(b'>L', self.header, 0xc0)
            self.fdstcnt, = struct.unpack_from(b'>L', self.header, 0xc4)
            # if cnt is 1 or less, fdst section mumber can be garbage
            if self.fdstcnt <= 1:
                self.fdst = 0xffffffff
            if self.fdst != 0xffffffff:
                self.fdst += self.start
Example #6
0
    def __init__(self, sect, sectNumber):
        self.sect = sect
        self.start = sectNumber
        self.header = self.sect.loadSection(self.start)
        if len(self.header)>20 and self.header[16:20] == b'MOBI':
            self.sect.setsectiondescription(0,"Mobipocket Header")
            self.palm = False
        elif self.sect.ident == b'TEXtREAd':
            self.sect.setsectiondescription(0, "PalmDOC Header")
            self.palm = True
        else:
            raise unpackException('Unknown File Format')

        self.records, = struct.unpack_from(b'>H', self.header, 0x8)

        # set defaults in case this is a PalmDOC
        self.title = self.sect.palmname.decode('latin-1', errors='replace')
        self.length = len(self.header)-16
        self.type = 3
        self.codepage = 1252
        self.codec = 'windows-1252'
        self.unique_id = 0
        self.version = 0
        self.hasExth = False
        self.exth = b''
        self.exth_offset = self.length + 16
        self.exth_length = 0
        self.crypto_type = 0
        self.firstnontext = self.start+self.records + 1
        self.firstresource = self.start+self.records + 1
        self.ncxidx = 0xffffffff
        self.metaOrthIndex = 0xffffffff
        self.metaInflIndex = 0xffffffff
        self.skelidx = 0xffffffff
        self.fragidx = 0xffffffff
        self.guideidx = 0xffffffff
        self.fdst = 0xffffffff
        self.mlstart = self.sect.loadSection(self.start+1)[:4]
        self.rawSize = 0
        self.metadata = dict_()

        # set up for decompression/unpacking
        self.compression, = struct.unpack_from(b'>H', self.header, 0x0)
        if self.compression == 0x4448:
            reader = HuffcdicReader()
            huffoff, huffnum = struct.unpack_from(b'>LL', self.header, 0x70)
            huffoff = huffoff + self.start
            self.sect.setsectiondescription(huffoff,"Huffman Compression Seed")
            reader.loadHuff(self.sect.loadSection(huffoff))
            for i in range(1, huffnum):
                self.sect.setsectiondescription(huffoff+i,"Huffman CDIC Compression Seed %d" % i)
                reader.loadCdic(self.sect.loadSection(huffoff+i))
            self.unpack = reader.unpack
        elif self.compression == 2:
            self.unpack = PalmdocReader().unpack
        elif self.compression == 1:
            self.unpack = UncompressedReader().unpack
        else:
            raise unpackException('invalid compression type: 0x%4x' % self.compression)

        if self.palm:
            return

        self.length, self.type, self.codepage, self.unique_id, self.version = struct.unpack(b'>LLLLL', self.header[20:40])
        codec_map = {
            1252 : 'windows-1252',
            65001: 'utf-8',
        }
        if self.codepage in codec_map:
            self.codec = codec_map[self.codepage]

        # title
        toff, tlen = struct.unpack(b'>II', self.header[0x54:0x5c])
        tend = toff + tlen
        self.title=self.header[toff:tend].decode(self.codec, errors='replace')

        exth_flag, = struct.unpack(b'>L', self.header[0x80:0x84])
        self.hasExth = exth_flag & 0x40
        self.exth_offset = self.length + 16
        self.exth_length = 0
        if self.hasExth:
            self.exth_length, = struct.unpack_from(b'>L', self.header, self.exth_offset+4)
            self.exth_length = ((self.exth_length + 3)>>2)<<2  # round to next 4 byte boundary
            self.exth = self.header[self.exth_offset:self.exth_offset+self.exth_length]

        # parse the exth / metadata
        self.parseMetaData()

        # self.mlstart = self.sect.loadSection(self.start+1)
        # self.mlstart = self.mlstart[0:4]
        self.crypto_type, = struct.unpack_from(b'>H', self.header, 0xC)

        # Start sector for additional files such as images, fonts, resources, etc
        # Can be missing so fall back to default set previously
        ofst, = struct.unpack_from(b'>L', self.header, 0x6C)
        if ofst != 0xffffffff:
            self.firstresource = ofst + self.start
        ofst, = struct.unpack_from(b'>L', self.header, 0x50)
        if ofst != 0xffffffff:
            self.firstnontext = ofst + self.start

        if self.isPrintReplica():
            return

        if self.version < 8:
            # Dictionary metaOrthIndex
            self.metaOrthIndex, = struct.unpack_from(b'>L', self.header, 0x28)
            if self.metaOrthIndex != 0xffffffff:
                self.metaOrthIndex += self.start

            # Dictionary metaInflIndex
            self.metaInflIndex, = struct.unpack_from(b'>L', self.header, 0x2C)
            if self.metaInflIndex != 0xffffffff:
                self.metaInflIndex += self.start

        # handle older headers without any ncxindex info and later
        # specifically 0xe4 headers
        if self.length + 16 < 0xf8:
            return

        # NCX Index
        self.ncxidx, = struct.unpack(b'>L', self.header[0xf4:0xf8])
        if self.ncxidx != 0xffffffff:
            self.ncxidx += self.start

        # K8 specific Indexes
        if self.start != 0 or self.version == 8:
            # Index into <xml> file skeletons in RawML
            self.skelidx, = struct.unpack_from(b'>L', self.header, 0xfc)
            if self.skelidx != 0xffffffff:
                self.skelidx += self.start

            # Index into <div> sections in RawML
            self.fragidx, = struct.unpack_from(b'>L', self.header, 0xf8)
            if self.fragidx != 0xffffffff:
                self.fragidx += self.start

            # Index into Other files
            self.guideidx, = struct.unpack_from(b'>L', self.header, 0x104)
            if self.guideidx != 0xffffffff:
                self.guideidx += self.start

            # dictionaries do not seem to use the same approach in K8's
            # so disable them
            self.metaOrthIndex = 0xffffffff
            self.metaInflIndex = 0xffffffff

            # need to use the FDST record to find out how to properly unpack
            # the rawML into pieces
            # it is simply a table of start and end locations for each flow piece
            self.fdst, = struct.unpack_from(b'>L', self.header, 0xc0)
            self.fdstcnt, = struct.unpack_from(b'>L', self.header, 0xc4)
            # if cnt is 1 or less, fdst section mumber can be garbage
            if self.fdstcnt <= 1:
                self.fdst = 0xffffffff
            if self.fdst != 0xffffffff:
                self.fdst += self.start
Example #7
0
 def number_to_field_sorted(self) -> "dict_[int, MessageField]":
     """Dict version of sorted_fields(), in format of field number to field."""
     return dict_((field.number, field) for field in self.sorted_fields())
Example #8
0
 def number_to_field(self) -> "dict_[int, MessageField]":
     """Returns the dict field number to field."""
     return dict_((field.number, field) for field in self.fields())
Example #9
0
 def value_to_names(self) -> "dict_[int, str]":
     """Returns the dict of field value to field name."""
     return dict_((field.value, field.name) for field in self.fields())
Example #10
0
 def name_to_values(self) -> "dict_[str, int]":
     """Returns the dict of field name to field value."""
     return dict_((field.name, field.value) for field in self.fields())
Example #11
0
 def options_as_dict(self) -> "dict_[str, Option]":
     return dict_((name, option) for name, option in self.options())