Esempio n. 1
0
def _parse_ereader_header132(raw):
    h = Storage()
    h.compression, _unknown1, h.encoding, h.number_small_pages, h.number_large_pages, h.non_text_records, h.number_chapters, h.number_small_index, h.number_large_index, h.number_images, h.number_links, h.metadata_available, _unknown2, h.number_footnotes, h.number_sidebars, h.chapter_index_records, h.magic_2560, h.small_page_index_record, h.large_page_index_record, h.image_data_record, h.links_record, h.metadata_record, _unknown3, h.footnote_record, h.sidebar_record, h.last_data_record, = struct.unpack(
        ">HLHHHHHHHHHHHHHHHHHHHHHHH", raw[:54]
    )

    return h
Esempio n. 2
0
def _parse_palmdoc_header(raw):
    h = Storage()

    h.compression, _unused, h.text_length, h.record_count, h.record_size, h.current_position, = struct.unpack(
        ">HHLHHL", raw[0:0x10]
    )

    return h
Esempio n. 3
0
def parse_opf_xml (rawxml):
    rawxml, encoding = xml_to_unicode(rawxml, strip_encoding_pats=True, resolve_entities=True, assume_utf8=True)
    rawxml = rawxml[rawxml.find('<'):]
    tree = etree.fromstring(rawxml, etree.XMLParser(recover=True))

    opf = Storage()

    for section in ('metadata', 'manifest', 'spine', 'guide'):
        subtree = tree.find('opf:%s' % section, namespaces=NAMESPACES)
        if subtree is not None:
            for el in subtree.getchildren():
                opf.setdefault(section, []).append((el.tag, el.attrib, el.text))

    return opf
Esempio n. 4
0
def _parse_plucker_header(raw):
    h = Storage()

    h.uid, h.compression, h.records, = struct.unpack(">HHH", raw[0:6])
    h.home_html = None

    reserved = {}
    for i in xrange(h.records):
        adv = 4 * i
        name, = struct.unpack(">H", raw[6 + adv : 8 + adv])
        id, = struct.unpack(">H", raw[8 + adv : 10 + adv])
        reserved[id] = name
        if name == 0:
            h.home_html = id
    h.reserved = reserved

    return h
Esempio n. 5
0
def _parse_pdb_header(stream):
    pdbheader = Storage()

    # PDB fields
    pdbheader.name, pdbheader.attributes, pdbheader.version, pdbheader.creation_timestamp, pdbheader.modification_timestamp, pdbheader.last_backup_timestamp, pdbheader.modification_number, pdbheader.appinfo_offset, pdbheader.sortinfo_offset, pdbheader.type, pdbheader.creator, pdbheader.uniqueidseed, pdbheader.nextrecordlistid, pdbheader.num_records, = struct.unpack(
        ">32sHHLLLLLL4s4sLLH", stream.read(78)
    )

    # record offsets and lengths
    records = []
    start = struct.unpack(">LBBBB", stream.read(8))[0]
    for n in range(1, pdbheader.num_records):
        next_start = struct.unpack(">LBBBB", stream.read(8))[0]
        records.append((start, next_start - start))
        start = next_start
    stream.seek(0, 2)
    end = stream.tell()
    records.append((start, end - start))
    pdbheader.records = records

    # Clean up some of the fields
    pdbheader.name = re.sub("[^-A-Za-z0-9'\";:,. ]+", "_", pdbheader.name.replace("\x00", ""))

    return pdbheader
Esempio n. 6
0
def _parse_exth_header (raw):
    exth = Storage()

    exth.identifier, \
    exth.header_length, \
    exth.record_count, \
        = struct.unpack('>4sLL', raw[:12])

    exthdata = raw[12:]
    pos = 0

    records = []
    records_left = exth.record_count
    while records_left > 0:
        records_left -= 1
        record = Storage()
        record.type, \
        record.length, \
            = struct.unpack('>LL', exthdata[pos:pos + 8])
        record.data = exthdata[pos+8:pos+record.length]
        pos += record.length
        records.append(record)
    exth.records = records
    return exth
Esempio n. 7
0
def _parse_ztxt_header(raw):
    h = Storage()
    h.version, h.record_count, h.data_size, h.record_size, h.number_bookmarks, h.bookmark_record, h.number_annotations, h.annotation_record, h.flags, _reserved, h.crc32, = struct.unpack(
        ">HHLHHHHHBBL", raw[0:24]
    )
    return h
Esempio n. 8
0
def _parse_ereader_header202(raw):
    # Unfortunately, this header format is mostly unknown
    h = Storage()
    h.version, _unknown, h.non_text_records, = struct.unpack(">H6sH", raw[:10])

    return h
Esempio n. 9
0
def _parse_mobi_header (raw):
    mobiheader = Storage()

    mobiheader.compression, \
    _unused, \
    mobiheader.text_length, \
    mobiheader.record_count, \
    mobiheader.record_size, \
    mobiheader.encryption, \
    _unknown, \
        = struct.unpack('>HHLHHHH', raw[0:0x10])

    # Some ancient MOBI files have no more metadata than this
    if len(raw) <= 16:
        return mobiheader

    mobiheader.identifier, \
    mobiheader.header_length, \
    mobiheader.mobi_type, \
    mobiheader.text_encoding, \
    mobiheader.unique_id, \
    mobiheader.file_version, \
    mobiheader.ortographic_index_record, \
    mobiheader.inflection_index_record, \
    mobiheader.index_names_record, \
    mobiheader.index_keys_record, \
    mobiheader.extra_index0_record, \
    mobiheader.extra_index1_record, \
    mobiheader.extra_index2_record, \
    mobiheader.extra_index3_record, \
    mobiheader.extra_index4_record, \
    mobiheader.extra_index5_record, \
    mobiheader.first_nonbook_record, \
    mobiheader.fullname_offset, \
    mobiheader.fullname_length, \
    mobiheader.locale, \
    mobiheader.dictionary_input_language, \
    mobiheader.dictionary_output_language, \
    mobiheader.min_version, \
    mobiheader.first_image_record, \
    mobiheader.huffman_record, \
    mobiheader.huffman_record_count, \
    mobiheader.huffman_table_record, \
    mobiheader.huffman_table_length, \
    mobiheader.exth_flags, \
        = struct.unpack('>4sLLLLLLLLLLLLLLLLLLLLLLLLLLLL', raw[0x10:0x84])

    if len(raw) >= 0xb4:
        mobiheader.drm_offset, \
        mobiheader.drm_count, \
        mobiheader.drm_size, \
        mobiheader.drm_flags, \
            = struct.unpack('>LLLL', raw[0xa4:0xb4])

    if mobiheader.header_length < 0xe4 or \
       mobiheader.header_length > 0xf8:
        mobiheader.extra_flags = 0
    else:
        mobiheader.extra_flags, = struct.unpack('>H', raw[0xf2:0xf4])

    fullname_end = mobiheader.fullname_offset + mobiheader.fullname_length
    if fullname_end < len(raw):
        mobiheader.fullname = raw[mobiheader.fullname_offset:fullname_end]
    else:
        mobiheader.fullname = None

    if mobiheader.exth_flags & 0x40:
        mobiheader.exth = _parse_exth_header(raw[16 + mobiheader.header_length:])

    return mobiheader