Beispiel #1
0
    def parseMetaData(self):
        def addValue(name, value):
            if name not in self.metadata:
                self.metadata[name] = [value]
            else:
                self.metadata[name].append(value)

        codec = self.codec
        if self.hasExth:
            extheader = self.exth
            _length, num_items = struct.unpack(b'>LL', extheader[4:12])
            extheader = extheader[12:]
            pos = 0
            for _ in range(num_items):
                id, size = struct.unpack(b'>LL', extheader[pos:pos + 8])
                content = extheader[pos + 8:pos + size]
                if id in MobiHeader.id_map_strings:
                    name = MobiHeader.id_map_strings[id]
                    addValue(name, content.decode(codec, errors='replace'))
                elif id in MobiHeader.id_map_values:
                    name = MobiHeader.id_map_values[id]
                    if size == 9:
                        value, = struct.unpack(b'B', content)
                        addValue(name, unicode_str(str(value)))
                    elif size == 10:
                        value, = struct.unpack(b'>H', content)
                        addValue(name, unicode_str(str(value)))
                    elif size == 12:
                        value, = struct.unpack(b'>L', content)
                        # handle special case of missing CoverOffset or missing ThumbOffset
                        if id == 201 or id == 202:
                            if value != 0xffffffff:
                                addValue(name, unicode_str(str(value)))
                        else:
                            addValue(name, unicode_str(str(value)))
                    else:
                        print(
                            "Warning: Bad key, size, value combination detected in EXTH ",
                            id, size, hexlify(content))
                        addValue(name, hexlify(content))
                elif id in MobiHeader.id_map_hexstrings:
                    name = MobiHeader.id_map_hexstrings[id]
                    addValue(name, hexlify(content))
                else:
                    name = unicode_str(str(id)) + ' (hex)'
                    addValue(name, hexlify(content))
                pos += size

        # add the basics to the metadata each as a list element
        self.metadata['Language'] = [self.Language()]
        self.metadata['Title'] = [unicode_str(self.title, self.codec)]
        self.metadata['Codec'] = [self.codec]
        self.metadata['UniqueID'] = [unicode_str(str(self.unique_id))]
        # if no asin create one using a uuid
        if 'ASIN' not in self.metadata:
            self.metadata['ASIN'] = [unicode_str(str(uuid.uuid4()))]
        # if no cdeType set it to "EBOK"
        if 'cdeType' not in self.metadata:
            self.metadata['cdeType'] = ['EBOK']
Beispiel #2
0
 def dump_exth(self):
     # determine text encoding
     codec = self.codec
     if (not self.hasExth) or (self.exth_length) == 0 or (self.exth == b''):
         return
     num_items, = struct.unpack(b'>L', self.exth[8:12])
     pos = 12
     print("Key Size Decription                     Value")
     for _ in range(num_items):
         id, size = struct.unpack(b'>LL', self.exth[pos:pos + 8])
         contentsize = size - 8
         content = self.exth[pos + 8:pos + size]
         if id in MobiHeader.id_map_strings:
             exth_name = MobiHeader.id_map_strings[id]
             print('{0: >3d} {1: >4d} {2: <30s} {3:s}'.format(
                 id, contentsize, exth_name,
                 content.decode(codec, errors='replace')))
         elif id in MobiHeader.id_map_values:
             exth_name = MobiHeader.id_map_values[id]
             if size == 9:
                 value, = struct.unpack(b'B', content)
                 print('{0:3d} byte {1:<30s} {2:d}'.format(
                     id, exth_name, value))
             elif size == 10:
                 value, = struct.unpack(b'>H', content)
                 print('{0:3d} word {1:<30s} 0x{2:0>4X} ({2:d})'.format(
                     id, exth_name, value))
             elif size == 12:
                 value, = struct.unpack(b'>L', content)
                 print('{0:3d} long {1:<30s} 0x{2:0>8X} ({2:d})'.format(
                     id, exth_name, value))
             else:
                 print('{0: >3d} {1: >4d} {2: <30s} (0x{3:s})'.format(
                     id, contentsize, "Bad size for " + exth_name,
                     hexlify(content)))
         elif id in MobiHeader.id_map_hexstrings:
             exth_name = MobiHeader.id_map_hexstrings[id]
             print('{0:3d} {1:4d} {2:<30s} 0x{3:s}'.format(
                 id, contentsize, exth_name, hexlify(content)))
         else:
             exth_name = "Unknown EXTH ID {0:d}".format(id)
             print("{0: >3d} {1: >4d} {2: <30s} 0x{3:s}".format(
                 id, contentsize, exth_name, hexlify(content)))
         pos += size
     return
Beispiel #3
0
def describe(data):
    txtans = ''
    hexans = hexlify(data)
    for i in data:
        if bord(i) < 32 or bord(i) > 127:
            txtans += '?'
        else:
            txtans += bchar(i).decode('latin-1')
    return '"' + txtans + '"' + ' 0x' + hexans
Beispiel #4
0
def processFONT(i, files, rscnames, sect, data, obfuscate_data, beg, rsc_ptr):
    fontname = "font%05d" % i
    ext = '.dat'
    font_error = False
    font_data = data
    try:
        usize, fflags, dstart, xor_len, xor_start = struct.unpack_from(b'>LLLLL',data,4)
    except:
        print("Failed to extract font: {0:s} from section {1:d}".format(fontname,i))
        font_error = True
        ext = '.failed'
        pass
    if not font_error:
        print("Extracting font:", fontname)
        font_data = data[dstart:]
        extent = len(font_data)
        extent = min(extent, 1040)
        if fflags & 0x0002:
            # obfuscated so need to de-obfuscate the first 1040 bytes
            key = bytearray(data[xor_start: xor_start+ xor_len])
            buf = bytearray(font_data)
            for n in range(extent):
                buf[n] ^=  key[n%xor_len]
            font_data = bytes(buf)
        if fflags & 0x0001:
            # ZLIB compressed data
            font_data = zlib.decompress(font_data)
        hdr = font_data[0:4]
        if hdr == b'\0\1\0\0' or hdr == b'true' or hdr == b'ttcf':
            ext = '.ttf'
        elif hdr == b'OTTO':
            ext = '.otf'
        else:
            print("Warning: unknown font header %s" % hexlify(hdr))
        if (ext == '.ttf' or ext == '.otf') and (fflags & 0x0002):
            obfuscate_data.append(fontname + ext)
        fontname += ext
        outfnt = os.path.join(files.imgdir, fontname)
        with open(pathof(outfnt), 'wb') as f:
            f.write(font_data)
        rscnames.append(fontname)
        sect.setsectiondescription(i,"Font {0:s}".format(fontname))
        if rsc_ptr == -1:
            rsc_ptr = i - beg
    return rscnames, obfuscate_data, rsc_ptr
Beispiel #5
0
    def dumpheader(self):
        # first 16 bytes are not part of the official mobiheader
        # but we will treat it as such
        # so section 0 is 16 (decimal) + self.length in total == at least 0x108 bytes for Mobi 8 headers
        print(
            "Dumping section %d, Mobipocket Header version: %d, total length %d"
            % (self.start, self.version, self.length + 16))
        self.hdr = {}
        # set it up for the proper header version
        if self.version == 0:
            self.mobi_header = MobiHeader.palmdoc_header
            self.mobi_header_sorted_keys = MobiHeader.palmdoc_header_sorted_keys
        elif self.version < 8:
            self.mobi_header = MobiHeader.mobi6_header
            self.mobi_header_sorted_keys = MobiHeader.mobi6_header_sorted_keys
        else:
            self.mobi_header = MobiHeader.mobi8_header
            self.mobi_header_sorted_keys = MobiHeader.mobi8_header_sorted_keys

        # parse the header information
        for key in self.mobi_header_sorted_keys:
            (pos, format, tot_len) = self.mobi_header[key]
            if pos < (self.length + 16):
                val, = struct.unpack_from(format, self.header, pos)
                self.hdr[key] = val

        if 'title_offset' in self.hdr:
            title_offset = self.hdr['title_offset']
            title_length = self.hdr['title_length']
        else:
            title_offset = 0
            title_length = 0
        if title_offset == 0:
            title_offset = len(self.header)
            title_length = 0
            self.title = self.sect.palmname.decode('latin-1', errors='replace')
        else:
            self.title = self.header[title_offset:title_offset +
                                     title_length].decode(self.codec,
                                                          errors='replace')
            # title record always padded with two nul bytes and then padded with nuls to next 4 byte boundary
            title_length = ((title_length + 2 + 3) >> 2) << 2

        self.extra1 = self.header[self.exth_offset +
                                  self.exth_length:title_offset]
        self.extra2 = self.header[title_offset + title_length:]

        print("Mobipocket header from section %d" % self.start)
        print("     Offset  Value Hex Dec        Description")
        for key in self.mobi_header_sorted_keys:
            (pos, format, tot_len) = self.mobi_header[key]
            if pos < (self.length + 16):
                if key != 'magic':
                    fmt_string = "0x{0:0>3X} ({0:3d}){1: >" + str(
                        9 - 2 * tot_len) + "s}0x{2:0>" + str(
                            2 * tot_len) + "X} {2:10d} {3:s}"
                else:
                    self.hdr[key] = unicode_str(self.hdr[key])
                    fmt_string = "0x{0:0>3X} ({0:3d}){2:>11s}            {3:s}"
                print(fmt_string.format(pos, " ", self.hdr[key], key))
        print("")

        if self.exth_length > 0:
            print("EXTH metadata, offset %d, padded length %d" %
                  (self.exth_offset, self.exth_length))
            self.dump_exth()
            print("")

        if len(self.extra1) > 0:
            print("Extra data between EXTH and Title, length %d" %
                  len(self.extra1))
            print(hexlify(self.extra1))
            print("")

        if title_length > 0:
            print("Title in header at offset %d, padded length %d: '%s'" %
                  (title_offset, title_length, self.title))
            print("")

        if len(self.extra2) > 0:
            print("Extra data between Title and end of header, length %d" %
                  len(self.extra2))
            print(hexlify(self.extra2))
            print("")
Beispiel #6
0
def dump_contexth(cpage, extheader):
    # determine text encoding
    codec = 'windows-1252'
    codec_map = {
        1252: 'windows-1252',
        65001: 'utf-8',
    }
    if cpage in codec_map:
        codec = codec_map[cpage]
    if extheader == b'':
        return
    id_map_strings = {
        1: 'Drm Server Id',
        2: 'Drm Commerce Id',
        3: 'Drm Ebookbase Book Id',
        4: 'Drm Ebookbase Dep Id',
        100: 'Creator',
        101: 'Publisher',
        102: 'Imprint',
        103: 'Description',
        104: 'ISBN',
        105: 'Subject',
        106: 'Published',
        107: 'Review',
        108: 'Contributor',
        109: 'Rights',
        110: 'SubjectCode',
        111: 'Type',
        112: 'Source',
        113: 'ASIN',
        114: 'versionNumber',
        117: 'Adult',
        118: 'Retail-Price',
        119: 'Retail-Currency',
        120: 'TSC',
        122: 'fixed-layout',
        123: 'book-type',
        124: 'orientation-lock',
        126: 'original-resolution',
        127: 'zero-gutter',
        128: 'zero-margin',
        129: 'MetadataResourceURI',
        132: 'RegionMagnification',
        150: 'LendingEnabled',
        200: 'DictShortName',
        501: 'cdeType',
        502: 'last_update_time',
        503: 'Updated_Title',
        504: 'CDEContentKey',
        505: 'AmazonContentReference',
        506: 'Title-Language',
        507: 'Title-Display-Direction',
        508: 'Title-Pronunciation',
        509: 'Title-Collation',
        510: 'Secondary-Title',
        511: 'Secondary-Title-Language',
        512: 'Secondary-Title-Direction',
        513: 'Secondary-Title-Pronunciation',
        514: 'Secondary-Title-Collation',
        515: 'Author-Language',
        516: 'Author-Display-Direction',
        517: 'Author-Pronunciation',
        518: 'Author-Collation',
        519: 'Author-Type',
        520: 'Publisher-Language',
        521: 'Publisher-Display-Direction',
        522: 'Publisher-Pronunciation',
        523: 'Publisher-Collation',
        524: 'Content-Language-Tag',
        525: 'primary-writing-mode',
        526: 'NCX-Ingested-By-Software',
        527: 'page-progression-direction',
        528: 'override-kindle-fonts',
        529: 'Compression-Upgraded',
        530: 'Soft-Hyphens-In-Content',
        531: 'Dictionary_In_Langague',
        532: 'Dictionary_Out_Language',
        533: 'Font_Converted',
        534: 'Amazon_Creator_Info',
        535: 'Creator-Build-Tag',
        536:
        'HD-Media-Containers-Info',  # CONT_Header is 0, Ends with CONTAINER_BOUNDARY (or Asset_Type?)
        538: 'Resource-Container-Fidelity',
        539: 'HD-Container-Mimetype',
        540: 'Sample-For_Special-Purpose',
        541: 'Kindletool-Operation-Information',
        542: 'Container_Id',
        543: 'Asset-Type',  # FONT_CONTAINER, BW_CONTAINER, HD_CONTAINER
        544: 'Unknown_544',
    }
    id_map_values = {
        115: 'sample',
        116: 'StartOffset',
        121: 'Mobi8-Boundary-Section',
        125: 'Embedded-Record-Count',
        130: 'Offline-Sample',
        131: 'Metadata-Record-Offset',
        201: 'CoverOffset',
        202: 'ThumbOffset',
        203: 'HasFakeCover',
        204: 'Creator-Software',
        205: 'Creator-Major-Version',
        206: 'Creator-Minor-Version',
        207: 'Creator-Build-Number',
        401: 'Clipping-Limit',
        402: 'Publisher-Limit',
        404: 'Text-to-Speech-Disabled',
        406: 'Rental-Expiration-Time',
    }
    id_map_hexstrings = {
        208: 'Watermark_(hex)',
        209: 'Tamper-Proof-Keys_(hex)',
        300: 'Font-Signature_(hex)',
        403: 'Unknown_(403)_(hex)',
        405: 'Ownership-Type_(hex)',
        407: 'Unknown_(407)_(hex)',
        420: 'Multimedia-Content-Reference_(hex)',
        450: 'Locations_Match_(hex)',
        451: 'Full-Story-Length_(hex)',
        452: 'Sample-Start_Location_(hex)',
        453: 'Sample-End-Location_(hex)',
    }
    _length, num_items = struct.unpack(b'>LL', extheader[4:12])
    extheader = extheader[12:]
    pos = 0
    for _ in range(num_items):
        id, size = struct.unpack(b'>LL', extheader[pos:pos + 8])
        content = extheader[pos + 8:pos + size]
        if id in id_map_strings:
            name = id_map_strings[id]
            print('\n    Key: "%s"\n        Value: "%s"' %
                  (name, content.decode(codec, errors='replace')))
        elif id in id_map_values:
            name = id_map_values[id]
            if size == 9:
                value, = struct.unpack(b'B', content)
                print('\n    Key: "%s"\n        Value: 0x%01x' % (name, value))
            elif size == 10:
                value, = struct.unpack(b'>H', content)
                print('\n    Key: "%s"\n        Value: 0x%02x' % (name, value))
            elif size == 12:
                value, = struct.unpack(b'>L', content)
                print('\n    Key: "%s"\n        Value: 0x%04x' % (name, value))
            else:
                print("\nError: Value for %s has unexpected size of %s" %
                      (name, size))
        elif id in id_map_hexstrings:
            name = id_map_hexstrings[id]
            print('\n    Key: "%s"\n        Value: 0x%s' %
                  (name, hexlify(content)))
        else:
            print("\nWarning: Unknown metadata with id %s found" % id)
            name = str(id) + ' (hex)'
            print('    Key: "%s"\n        Value: 0x%s' %
                  (name, hexlify(content)))
        pos += size
    return