Esempio n. 1
0
    def __call__(self, **kwargs):
        positions = {}
        for name, val in kwargs.iteritems():
            if name not in self:
                raise KeyError('Not a valid header field: %r' % name)
            self[name] = val

        buf = BytesIO()
        buf.write(bytes(self.HEADER_NAME))
        for name, val in self.iteritems():
            val = self.format_value(name, val)
            positions[name] = buf.tell()
            if val is None:
                raise ValueError('Dynamic field %r not set' % name)
            if isinstance(val, (int, long)):
                fmt = b'H' if name in self.SHORT_FIELDS else b'I'
                val = pack(b'>' + fmt, val)
            buf.write(val)

        for pos_field, field in self.POSITIONS.iteritems():
            buf.seek(positions[pos_field])
            buf.write(pack(b'>I', positions[field]))

        ans = buf.getvalue()
        if self.ALIGN_BLOCK:
            ans = align_block(ans)
        return ans
Esempio n. 2
0
    def __call__(self, **kwargs):
        positions = {}
        for name, val in kwargs.iteritems():
            if name not in self:
                raise KeyError('Not a valid header field: %r'%name)
            self[name] = val

        buf = BytesIO()
        buf.write(bytes(self.HEADER_NAME))
        for name, val in self.iteritems():
            val = self.format_value(name, val)
            positions[name] = buf.tell()
            if val is None:
                raise ValueError('Dynamic field %r not set'%name)
            if isinstance(val, (int, long)):
                fmt = b'H' if name in self.SHORT_FIELDS else b'I'
                val = pack(b'>'+fmt, val)
            buf.write(val)

        for pos_field, field in self.POSITIONS.iteritems():
            buf.seek(positions[pos_field])
            buf.write(pack(b'>I', positions[field]))

        ans = buf.getvalue()
        if self.ALIGN_BLOCK:
            ans = align_block(ans)
        return ans
Esempio n. 3
0
    def create_index_record(self, secondary=False):  # {{{
        header_length = 192
        buf = io.BytesIO()
        indices = list(
            SecondaryIndexEntry.entries()) if secondary else self.indices

        # Write index entries
        offsets = []
        for i in indices:
            offsets.append(buf.tell())
            buf.write(i.bytestring)

        index_block = align_block(buf.getvalue())

        # Write offsets to index entries as an IDXT block
        idxt_block = b'IDXT'
        buf.seek(0), buf.truncate(0)
        for offset in offsets:
            buf.write(pack(b'>H', header_length + offset))
        idxt_block = align_block(idxt_block + buf.getvalue())
        body = index_block + idxt_block

        header = b'INDX'
        buf.seek(0), buf.truncate(0)
        buf.write(pack(b'>I', header_length))
        buf.write(b'\0' * 4)  # Unknown
        buf.write(pack(b'>I', 1))  # Header type? Or index record number?
        buf.write(b'\0' * 4)  # Unknown
        # IDXT block offset
        buf.write(pack(b'>I', header_length + len(index_block)))
        # Number of index entries
        buf.write(pack(b'>I', len(offsets)))
        # Unknown
        buf.write(b'\xff' * 8)
        # Unknown
        buf.write(b'\0' * 156)

        header += buf.getvalue()

        ans = header + body
        if len(ans) > 0x10000:
            raise ValueError('Too many entries (%d) in the TOC' % len(offsets))
        return ans
Esempio n. 4
0
    def create_index_record(self, secondary=False):  # {{{
        header_length = 192
        buf = StringIO()
        indices = list(
            SecondaryIndexEntry.entries()) if secondary else self.indices

        # Write index entries
        offsets = []
        for i in indices:
            offsets.append(buf.tell())
            buf.write(i.bytestring)

        index_block = align_block(buf.getvalue())

        # Write offsets to index entries as an IDXT block
        idxt_block = b'IDXT'
        buf.seek(0), buf.truncate(0)
        for offset in offsets:
            buf.write(pack(b'>H', header_length + offset))
        idxt_block = align_block(idxt_block + buf.getvalue())
        body = index_block + idxt_block

        header = b'INDX'
        buf.seek(0), buf.truncate(0)
        buf.write(pack(b'>I', header_length))
        buf.write(b'\0' * 4)  # Unknown
        buf.write(pack(b'>I', 1))  # Header type? Or index record number?
        buf.write(b'\0' * 4)  # Unknown
        # IDXT block offset
        buf.write(pack(b'>I', header_length + len(index_block)))
        # Number of index entries
        buf.write(pack(b'>I', len(offsets)))
        # Unknown
        buf.write(b'\xff' * 8)
        # Unknown
        buf.write(b'\0' * 156)

        header += buf.getvalue()

        ans = header + body
        if len(ans) > 0x10000:
            raise ValueError('Too many entries (%d) in the TOC' % len(offsets))
        return ans
Esempio n. 5
0
    def __call__(self):
        self.control_bytes = self.calculate_control_bytes_for_each_entry(
            self.entries)

        index_blocks, idxt_blocks, record_counts, last_indices = [BytesIO()], [
            BytesIO()
        ], [0], [b'']
        buf = BytesIO()

        RECORD_LIMIT = 0x10000 - self.HEADER_LENGTH - 1048  # kindlegen uses 1048 (there has to be some margin because of block alignment)

        for i, (index_num, tags) in enumerate(self.entries):
            control_bytes = self.control_bytes[i]
            buf.seek(0), buf.truncate(0)
            index_num = (index_num.encode('utf-8') if isinstance(
                index_num, unicode) else index_num)
            raw = bytearray(index_num)
            raw.insert(0, len(index_num))
            buf.write(bytes(raw))
            buf.write(bytes(bytearray(control_bytes)))
            for tag in self.tag_types:
                values = tags.get(tag.name, None)
                if values is None:
                    continue
                try:
                    len(values)
                except TypeError:
                    values = [values]
                if values:
                    for val in values:
                        try:
                            buf.write(encint(val))
                        except ValueError:
                            raise ValueError('Invalid values for %r: %r' %
                                             (tag, values))
            raw = buf.getvalue()
            offset = index_blocks[-1].tell()
            idxt_pos = idxt_blocks[-1].tell()
            if offset + idxt_pos + len(raw) + 2 > RECORD_LIMIT:
                index_blocks.append(BytesIO())
                idxt_blocks.append(BytesIO())
                record_counts.append(0)
                offset = idxt_pos = 0
                last_indices.append(b'')
            record_counts[-1] += 1
            idxt_blocks[-1].write(pack(b'>H', self.HEADER_LENGTH + offset))
            index_blocks[-1].write(raw)
            last_indices[-1] = index_num

        index_records = []
        for index_block, idxt_block, record_count in zip(
                index_blocks, idxt_blocks, record_counts):
            index_block = align_block(index_block.getvalue())
            idxt_block = align_block(b'IDXT' + idxt_block.getvalue())
            # Create header for this index record
            header = b'INDX'
            buf.seek(0), buf.truncate(0)
            buf.write(pack(b'>I', self.HEADER_LENGTH))
            buf.write(b'\0' * 4)  # Unknown
            buf.write(
                pack(b'>I', 1)
            )  # Header type (0 for Index header record and 1 for Index records)
            buf.write(b'\0' * 4)  # Unknown

            # IDXT block offset
            buf.write(pack(b'>I', self.HEADER_LENGTH + len(index_block)))

            # Number of index entries in this record
            buf.write(pack(b'>I', record_count))

            buf.write(b'\xff' * 8)  # Unknown

            buf.write(b'\0' * 156)  # Unknown

            header += buf.getvalue()
            index_records.append(header + index_block + idxt_block)
            if len(index_records[-1]) > 0x10000:
                raise ValueError(
                    'Failed to rollover index blocks for very large index.')

        # Create the Index Header record
        tagx = self.generate_tagx()

        # Geometry of the index records is written as index entries pointed to
        # by the IDXT records
        buf.seek(0), buf.truncate()
        idxt = [b'IDXT']
        pos = IndexHeader.HEADER_LENGTH + len(tagx)
        for last_idx, num in zip(last_indices, record_counts):
            start = buf.tell()
            idxt.append(pack(b'>H', pos))
            buf.write(bytes(bytearray([len(last_idx)])) + last_idx)
            buf.write(pack(b'>H', num))
            pos += buf.tell() - start

        header = {
            'num_of_entries': sum(r for r in record_counts),
            'num_of_records': len(index_records),
            'num_of_cncx': len(self.cncx),
            'tagx': align_block(tagx),
            'geometry': align_block(buf.getvalue()),
            'idxt': align_block(b''.join(idxt)),
        }
        header = IndexHeader()(**header)
        self.records = [header] + index_records
        self.records.extend(self.cncx.records)
        return self.records
Esempio n. 6
0
    def __call__(self):
        self.control_bytes = self.calculate_control_bytes_for_each_entry(
                self.entries)

        rendered_entries = []
        index, idxt, buf = BytesIO(), BytesIO(), BytesIO()
        IndexEntry = namedtuple('IndexEntry', 'offset length raw')
        last_lead_text = b''
        too_large = ValueError('Index has too many entries, calibre does not'
                    ' support generating multiple index records at this'
                    ' time.')

        for i, x in enumerate(self.entries):
            control_bytes = self.control_bytes[i]
            leading_text, tags = x
            buf.seek(0), buf.truncate(0)
            leading_text = (leading_text.encode('utf-8') if
                    isinstance(leading_text, unicode) else leading_text)
            raw = bytearray(leading_text)
            raw.insert(0, len(leading_text))
            buf.write(bytes(raw))
            buf.write(bytes(bytearray(control_bytes)))
            for tag in self.tag_types:
                values = tags.get(tag.name, None)
                if values is None: continue
                try:
                    len(values)
                except TypeError:
                    values = [values]
                if values:
                    for val in values:
                        try:
                            buf.write(encint(val))
                        except ValueError:
                            raise ValueError('Invalid values for %r: %r'%(
                                tag, values))
            raw = buf.getvalue()
            offset = index.tell()
            if offset + self.HEADER_LENGTH >= 0x10000:
                raise too_large
            rendered_entries.append(IndexEntry(offset, len(raw), raw))
            idxt.write(pack(b'>H', self.HEADER_LENGTH+offset))
            index.write(raw)
            last_lead_text = leading_text

        index_block = align_block(index.getvalue())
        idxt_block = align_block(b'IDXT' + idxt.getvalue())
        body = index_block + idxt_block
        if len(body) + self.HEADER_LENGTH >= 0x10000:
            raise too_large
        header = b'INDX'
        buf.seek(0), buf.truncate(0)
        buf.write(pack(b'>I', self.HEADER_LENGTH))
        buf.write(b'\0'*4) # Unknown
        buf.write(pack(b'>I', 1)) # Header type? Or index record number?
        buf.write(b'\0'*4) # Unknown

        # IDXT block offset
        buf.write(pack(b'>I', self.HEADER_LENGTH + len(index_block)))

        # Number of index entries
        buf.write(pack(b'>I', len(rendered_entries)))

        buf.write(b'\xff'*8) # Unknown

        buf.write(b'\0'*156) # Unknown

        header += buf.getvalue()
        index_record = header + body

        tagx = self.generate_tagx()
        idxt = (b'IDXT' + pack(b'>H', IndexHeader.HEADER_LENGTH + len(tagx)) +
                b'\0')
        # Last index
        idx = bytes(bytearray([len(last_lead_text)])) + last_lead_text
        idx += pack(b'>H', len(rendered_entries))

        header = {
                'num_of_entries': len(rendered_entries),
                'num_of_cncx': len(self.cncx),
                'tagx':tagx,
                'last_index':align_block(idx),
                'idxt':idxt
        }
        header = IndexHeader()(**header)
        self.records = [header, index_record]
        self.records.extend(self.cncx.records)
        return self.records
Esempio n. 7
0
    def generate_record0(self):  # MOBI header {{{
        metadata = self.oeb.metadata
        bt = 0x002
        if self.primary_index_record_idx is not None:
            if False and self.indexer.is_flat_periodical:
                # Disabled as setting this to 0x102 causes the Kindle to not
                # auto archive the issues
                bt = 0x102
            elif self.indexer.is_periodical:
                # If you change this, remember to change the cdetype in the EXTH
                # header as well
                bt = 0x103 if self.indexer.is_flat_periodical else 0x101

        from calibre.ebooks.mobi.writer8.exth import build_exth
        exth = build_exth(metadata,
                prefer_author_sort=self.opts.prefer_author_sort,
                is_periodical=self.is_periodical,
                share_not_sync=self.opts.share_not_sync,
                cover_offset=self.cover_offset,
                thumbnail_offset=self.thumbnail_offset,
                start_offset=self.serializer.start_offset, mobi_doctype=bt
                )
        first_image_record = None
        if self.resources:
            used_images = self.serializer.used_images
            first_image_record  = len(self.records)
            self.resources.serialize(self.records, used_images)
        last_content_record = len(self.records) - 1

        # FCIS/FLIS (Seems to serve no purpose)
        flis_number = len(self.records)
        self.records.append(FLIS)
        fcis_number = len(self.records)
        self.records.append(fcis(self.text_length))

        # EOF record
        self.records.append(b'\xE9\x8E\x0D\x0A')

        record0 = io.BytesIO()
        # The MOBI Header
        record0.write(pack(b'>HHIHHHH',
            self.compression,  # compression type # compression type
            0,  # Unused
            self.text_length,  # Text length
            self.last_text_record_idx,  # Number of text records or last tr idx
            RECORD_SIZE,  # Text record size
            0,  # Unused
            0  # Unused
        ))  # 0 - 15 (0x0 - 0xf)
        uid = random.randint(0, 0xffffffff)
        title = normalize(unicode_type(metadata.title[0])).encode('utf-8')

        # 0x0 - 0x3
        record0.write(b'MOBI')

        # 0x4 - 0x7   : Length of header
        # 0x8 - 0x11  : MOBI type
        #   type    meaning
        #   0x002   MOBI book (chapter - chapter navigation)
        #   0x101   News - Hierarchical navigation with sections and articles
        #   0x102   News feed - Flat navigation
        #   0x103   News magazine - same as 0x101
        # 0xC - 0xF   : Text encoding (65001 is utf-8)
        # 0x10 - 0x13 : UID
        # 0x14 - 0x17 : Generator version

        record0.write(pack(b'>IIIII',
            0xe8, bt, 65001, uid, 6))

        # 0x18 - 0x1f : Unknown
        record0.write(b'\xff' * 8)

        # 0x20 - 0x23 : Secondary index record
        sir = 0xffffffff
        if (self.primary_index_record_idx is not None and
                self.indexer.secondary_record_offset is not None):
            sir = (self.primary_index_record_idx +
                    self.indexer.secondary_record_offset)
        record0.write(pack(b'>I', sir))

        # 0x24 - 0x3f : Unknown
        record0.write(b'\xff' * 28)

        # 0x40 - 0x43 : Offset of first non-text record
        record0.write(pack(b'>I',
            self.first_non_text_record_idx))

        # 0x44 - 0x4b : title offset, title length
        record0.write(pack(b'>II',
            0xe8 + 16 + len(exth), len(title)))

        # 0x4c - 0x4f : Language specifier
        record0.write(iana2mobi(
            unicode_type(metadata.language[0])))

        # 0x50 - 0x57 : Input language and Output language
        record0.write(b'\0' * 8)

        # 0x58 - 0x5b : Format version
        # 0x5c - 0x5f : First image record number
        record0.write(pack(b'>II',
            6, first_image_record if first_image_record else len(self.records)))

        # 0x60 - 0x63 : First HUFF/CDIC record number
        # 0x64 - 0x67 : Number of HUFF/CDIC records
        # 0x68 - 0x6b : First DATP record number
        # 0x6c - 0x6f : Number of DATP records
        record0.write(b'\0' * 16)

        # 0x70 - 0x73 : EXTH flags
        # Bit 6 (0b1000000) being set indicates the presence of an EXTH header
        # Bit 12 being set indicates the presence of embedded fonts
        # The purpose of the other bits is unknown
        exth_flags = 0b1010000
        if self.is_periodical:
            exth_flags |= 0b1000
        if self.resources.has_fonts:
            exth_flags |= 0b1000000000000
        record0.write(pack(b'>I', exth_flags))

        # 0x74 - 0x93 : Unknown
        record0.write(b'\0' * 32)

        # 0x94 - 0x97 : DRM offset
        # 0x98 - 0x9b : DRM count
        # 0x9c - 0x9f : DRM size
        # 0xa0 - 0xa3 : DRM flags
        record0.write(pack(b'>IIII',
            0xffffffff, 0xffffffff, 0, 0))

        # 0xa4 - 0xaf : Unknown
        record0.write(b'\0'*12)

        # 0xb0 - 0xb1 : First content record number
        # 0xb2 - 0xb3 : last content record number
        # (Includes Image, DATP, HUFF, DRM)
        record0.write(pack(b'>HH', 1, last_content_record))

        # 0xb4 - 0xb7 : Unknown
        record0.write(b'\0\0\0\x01')

        # 0xb8 - 0xbb : FCIS record number
        record0.write(pack(b'>I', fcis_number))

        # 0xbc - 0xbf : Unknown (FCIS record count?)
        record0.write(pack(b'>I', 1))

        # 0xc0 - 0xc3 : FLIS record number
        record0.write(pack(b'>I', flis_number))

        # 0xc4 - 0xc7 : Unknown (FLIS record count?)
        record0.write(pack(b'>I', 1))

        # 0xc8 - 0xcf : Unknown
        record0.write(b'\0'*8)

        # 0xd0 - 0xdf : Unknown
        record0.write(pack(b'>IIII', 0xffffffff, 0, 0xffffffff, 0xffffffff))

        # 0xe0 - 0xe3 : Extra record data
        # Extra record data flags:
        #   - 0b1  : <extra multibyte bytes><size>
        #   - 0b10 : <TBS indexing description of this HTML record><size>
        #   - 0b100: <uncrossable breaks><size>
        # Setting bit 2 (0x2) disables <guide><reference type="start"> functionality
        extra_data_flags = 0b1  # Has multibyte overlap bytes
        if self.primary_index_record_idx is not None:
            extra_data_flags |= 0b10
        if WRITE_UNCROSSABLE_BREAKS:
            extra_data_flags |= 0b100
        record0.write(pack(b'>I', extra_data_flags))

        # 0xe4 - 0xe7 : Primary index record
        record0.write(pack(b'>I', 0xffffffff if self.primary_index_record_idx
            is None else self.primary_index_record_idx))

        record0.write(exth)
        record0.write(title)
        record0 = record0.getvalue()
        # Add some buffer so that Amazon can add encryption information if this
        # MOBI is submitted for publication
        record0 += (b'\0' * (1024*8))
        self.records[0] = align_block(record0)
Esempio n. 8
0
    def create_header(self, secondary=False):  # {{{
        buf = io.BytesIO()
        if secondary:
            tagx_block = TAGX().secondary
        else:
            tagx_block = (TAGX().periodical
                          if self.is_periodical else TAGX().flat_book)
        header_length = 192

        # Ident 0 - 4
        buf.write(b'INDX')

        # Header length 4 - 8
        buf.write(pack(b'>I', header_length))

        # Unknown 8-16
        buf.write(b'\0' * 8)

        # Index type: 0 - normal, 2 - inflection 16 - 20
        buf.write(pack(b'>I', 2))

        # IDXT offset 20-24
        buf.write(pack(b'>I', 0))  # Filled in later

        # Number of index records 24-28
        buf.write(pack(b'>I', 1 if secondary else len(self.records)))

        # Index Encoding 28-32
        buf.write(pack(b'>I', 65001))  # utf-8

        # Unknown 32-36
        buf.write(b'\xff' * 4)

        # Number of index entries 36-40
        indices = list(
            SecondaryIndexEntry.entries()) if secondary else self.indices
        buf.write(pack(b'>I', len(indices)))

        # ORDT offset 40-44
        buf.write(pack(b'>I', 0))

        # LIGT offset 44-48
        buf.write(pack(b'>I', 0))

        # Number of LIGT entries 48-52
        buf.write(pack(b'>I', 0))

        # Number of CNCX records 52-56
        buf.write(pack(b'>I', 0 if secondary else len(self.cncx.records)))

        # Unknown 56-180
        buf.write(b'\0' * 124)

        # TAGX offset 180-184
        buf.write(pack(b'>I', header_length))

        # Unknown 184-192
        buf.write(b'\0' * 8)

        # TAGX block
        buf.write(tagx_block)

        num = len(indices)

        # The index of the last entry in the NCX
        idx = indices[-1].index
        if isinstance(idx, numbers.Integral):
            idx = encode_number_as_hex(idx)
        else:
            idx = idx.encode('ascii')
            idx = (bytes(bytearray([len(idx)]))) + idx
        buf.write(idx)

        # The number of entries in the NCX
        buf.write(pack(b'>H', num))

        # Padding
        pad = (4 - (buf.tell() % 4)) % 4
        if pad:
            buf.write(b'\0' * pad)

        idxt_offset = buf.tell()

        buf.write(b'IDXT')
        buf.write(pack(b'>H', header_length + len(tagx_block)))
        buf.write(b'\0')
        buf.seek(20)
        buf.write(pack(b'>I', idxt_offset))

        return align_block(buf.getvalue())
Esempio n. 9
0
    def create_header(self, secondary=False):  # {{{
        buf = io.BytesIO()
        if secondary:
            tagx_block = TAGX().secondary
        else:
            tagx_block = (TAGX().periodical if self.is_periodical else
                                TAGX().flat_book)
        header_length = 192

        # Ident 0 - 4
        buf.write(b'INDX')

        # Header length 4 - 8
        buf.write(pack(b'>I', header_length))

        # Unknown 8-16
        buf.write(b'\0'*8)

        # Index type: 0 - normal, 2 - inflection 16 - 20
        buf.write(pack(b'>I', 2))

        # IDXT offset 20-24
        buf.write(pack(b'>I', 0))  # Filled in later

        # Number of index records 24-28
        buf.write(pack(b'>I', 1 if secondary else len(self.records)))

        # Index Encoding 28-32
        buf.write(pack(b'>I', 65001))  # utf-8

        # Unknown 32-36
        buf.write(b'\xff'*4)

        # Number of index entries 36-40
        indices = list(SecondaryIndexEntry.entries()) if secondary else self.indices
        buf.write(pack(b'>I', len(indices)))

        # ORDT offset 40-44
        buf.write(pack(b'>I', 0))

        # LIGT offset 44-48
        buf.write(pack(b'>I', 0))

        # Number of LIGT entries 48-52
        buf.write(pack(b'>I', 0))

        # Number of CNCX records 52-56
        buf.write(pack(b'>I', 0 if secondary else len(self.cncx.records)))

        # Unknown 56-180
        buf.write(b'\0'*124)

        # TAGX offset 180-184
        buf.write(pack(b'>I', header_length))

        # Unknown 184-192
        buf.write(b'\0'*8)

        # TAGX block
        buf.write(tagx_block)

        num = len(indices)

        # The index of the last entry in the NCX
        idx = indices[-1].index
        if isinstance(idx, numbers.Integral):
            idx = encode_number_as_hex(idx)
        else:
            idx = idx.encode('ascii')
            idx = (bytes(bytearray([len(idx)]))) + idx
        buf.write(idx)

        # The number of entries in the NCX
        buf.write(pack(b'>H', num))

        # Padding
        pad = (4 - (buf.tell()%4))%4
        if pad:
            buf.write(b'\0'*pad)

        idxt_offset = buf.tell()

        buf.write(b'IDXT')
        buf.write(pack(b'>H', header_length + len(tagx_block)))
        buf.write(b'\0')
        buf.seek(20)
        buf.write(pack(b'>I', idxt_offset))

        return align_block(buf.getvalue())
Esempio n. 10
0
    def __call__(self):
        self.control_bytes = self.calculate_control_bytes_for_each_entry(
                self.entries)

        index_blocks, idxt_blocks, record_counts, last_indices = [BytesIO()], [BytesIO()], [0], [b'']
        buf = BytesIO()

        RECORD_LIMIT = 0x10000 - self.HEADER_LENGTH - 1048  # kindlegen uses 1048 (there has to be some margin because of block alignment)

        for i, (index_num, tags) in enumerate(self.entries):
            control_bytes = self.control_bytes[i]
            buf.seek(0), buf.truncate(0)
            index_num = (index_num.encode('utf-8') if isinstance(index_num, unicode) else index_num)
            raw = bytearray(index_num)
            raw.insert(0, len(index_num))
            buf.write(bytes(raw))
            buf.write(bytes(bytearray(control_bytes)))
            for tag in self.tag_types:
                values = tags.get(tag.name, None)
                if values is None:
                    continue
                try:
                    len(values)
                except TypeError:
                    values = [values]
                if values:
                    for val in values:
                        try:
                            buf.write(encint(val))
                        except ValueError:
                            raise ValueError('Invalid values for %r: %r'%(
                                tag, values))
            raw = buf.getvalue()
            offset = index_blocks[-1].tell()
            idxt_pos = idxt_blocks[-1].tell()
            if offset + idxt_pos + len(raw) + 2 > RECORD_LIMIT:
                index_blocks.append(BytesIO())
                idxt_blocks.append(BytesIO())
                record_counts.append(0)
                offset = idxt_pos = 0
                last_indices.append(b'')
            record_counts[-1] += 1
            idxt_blocks[-1].write(pack(b'>H', self.HEADER_LENGTH+offset))
            index_blocks[-1].write(raw)
            last_indices[-1] = index_num

        index_records = []
        for index_block, idxt_block, record_count in zip(index_blocks, idxt_blocks, record_counts):
            index_block = align_block(index_block.getvalue())
            idxt_block = align_block(b'IDXT' + idxt_block.getvalue())
            # Create header for this index record
            header = b'INDX'
            buf.seek(0), buf.truncate(0)
            buf.write(pack(b'>I', self.HEADER_LENGTH))
            buf.write(b'\0'*4)  # Unknown
            buf.write(pack(b'>I', 1))  # Header type (0 for Index header record and 1 for Index records)
            buf.write(b'\0'*4)  # Unknown

            # IDXT block offset
            buf.write(pack(b'>I', self.HEADER_LENGTH + len(index_block)))

            # Number of index entries in this record
            buf.write(pack(b'>I', record_count))

            buf.write(b'\xff'*8)  # Unknown

            buf.write(b'\0'*156)  # Unknown

            header += buf.getvalue()
            index_records.append(header + index_block + idxt_block)
            if len(index_records[-1]) > 0x10000:
                raise ValueError('Failed to rollover index blocks for very large index.')

        # Create the Index Header record
        tagx = self.generate_tagx()

        # Geometry of the index records is written as index entries pointed to
        # by the IDXT records
        buf.seek(0), buf.truncate()
        idxt = [b'IDXT']
        pos = IndexHeader.HEADER_LENGTH + len(tagx)
        for last_idx, num in zip(last_indices, record_counts):
            start = buf.tell()
            idxt.append(pack(b'>H', pos))
            buf.write(bytes(bytearray([len(last_idx)])) + last_idx)
            buf.write(pack(b'>H', num))
            pos += buf.tell() - start

        header = {
                'num_of_entries': sum(r for r in record_counts),
                'num_of_records': len(index_records),
                'num_of_cncx': len(self.cncx),
                'tagx':align_block(tagx),
                'geometry':align_block(buf.getvalue()),
                'idxt':align_block(b''.join(idxt)),
        }
        header = IndexHeader()(**header)
        self.records = [header] + index_records
        self.records.extend(self.cncx.records)
        return self.records
Esempio n. 11
0
    def generate_record0(self): #  MOBI header {{{
        metadata = self.oeb.metadata
        bt = 0x002
        if self.primary_index_record_idx is not None:
            if False and self.indexer.is_flat_periodical:
                # Disabled as setting this to 0x102 causes the Kindle to not
                # auto archive the issues
                bt = 0x102
            elif self.indexer.is_periodical:
                # If you change this, remember to change the cdetype in the EXTH
                # header as well
                bt = 0x103 if self.indexer.is_flat_periodical else 0x101

        from calibre.ebooks.mobi.writer8.exth import build_exth
        exth = build_exth(metadata,
                prefer_author_sort=self.opts.prefer_author_sort,
                is_periodical=self.is_periodical,
                share_not_sync=self.opts.share_not_sync,
                cover_offset=self.cover_offset,
                thumbnail_offset=self.thumbnail_offset,
                start_offset=self.serializer.start_offset, mobi_doctype=bt
                )
        first_image_record = None
        if self.resources:
            used_images = self.serializer.used_images
            first_image_record  = len(self.records)
            self.resources.serialize(self.records, used_images)
        last_content_record = len(self.records) - 1

        # FCIS/FLIS (Seems to serve no purpose)
        flis_number = len(self.records)
        self.records.append(FLIS)
        fcis_number = len(self.records)
        self.records.append(fcis(self.text_length))

        # EOF record
        self.records.append(b'\xE9\x8E\x0D\x0A')

        record0 = StringIO()
        # The MOBI Header
        record0.write(pack(b'>HHIHHHH',
            self.compression, # compression type # compression type
            0, # Unused
            self.text_length, # Text length
            self.last_text_record_idx, # Number of text records or last tr idx
            RECORD_SIZE, # Text record size
            0, # Unused
            0  # Unused
        )) # 0 - 15 (0x0 - 0xf)
        uid = random.randint(0, 0xffffffff)
        title = normalize(unicode(metadata.title[0])).encode('utf-8')

        # 0x0 - 0x3
        record0.write(b'MOBI')

        # 0x4 - 0x7   : Length of header
        # 0x8 - 0x11  : MOBI type
        #   type    meaning
        #   0x002   MOBI book (chapter - chapter navigation)
        #   0x101   News - Hierarchical navigation with sections and articles
        #   0x102   News feed - Flat navigation
        #   0x103   News magazine - same as 0x101
        # 0xC - 0xF   : Text encoding (65001 is utf-8)
        # 0x10 - 0x13 : UID
        # 0x14 - 0x17 : Generator version

        record0.write(pack(b'>IIIII',
            0xe8, bt, 65001, uid, 6))

        # 0x18 - 0x1f : Unknown
        record0.write(b'\xff' * 8)

        # 0x20 - 0x23 : Secondary index record
        sir = 0xffffffff
        if (self.primary_index_record_idx is not None and
                self.indexer.secondary_record_offset is not None):
            sir = (self.primary_index_record_idx +
                    self.indexer.secondary_record_offset)
        record0.write(pack(b'>I', sir))

        # 0x24 - 0x3f : Unknown
        record0.write(b'\xff' * 28)

        # 0x40 - 0x43 : Offset of first non-text record
        record0.write(pack(b'>I',
            self.first_non_text_record_idx))

        # 0x44 - 0x4b : title offset, title length
        record0.write(pack(b'>II',
            0xe8 + 16 + len(exth), len(title)))

        # 0x4c - 0x4f : Language specifier
        record0.write(iana2mobi(
            str(metadata.language[0])))

        # 0x50 - 0x57 : Input language and Output language
        record0.write(b'\0' * 8)

        # 0x58 - 0x5b : Format version
        # 0x5c - 0x5f : First image record number
        record0.write(pack(b'>II',
            6, first_image_record if first_image_record else len(self.records)))

        # 0x60 - 0x63 : First HUFF/CDIC record number
        # 0x64 - 0x67 : Number of HUFF/CDIC records
        # 0x68 - 0x6b : First DATP record number
        # 0x6c - 0x6f : Number of DATP records
        record0.write(b'\0' * 16)

        # 0x70 - 0x73 : EXTH flags
        # Bit 6 (0b1000000) being set indicates the presence of an EXTH header
        # Bit 12 being set indicates the presence of embedded fonts
        # The purpose of the other bits is unknown
        exth_flags = 0b1010000
        if self.is_periodical:
            exth_flags |= 0b1000
        if self.resources.has_fonts:
            exth_flags |= 0b1000000000000
        record0.write(pack(b'>I', exth_flags))

        # 0x74 - 0x93 : Unknown
        record0.write(b'\0' * 32)

        # 0x94 - 0x97 : DRM offset
        # 0x98 - 0x9b : DRM count
        # 0x9c - 0x9f : DRM size
        # 0xa0 - 0xa3 : DRM flags
        record0.write(pack(b'>IIII',
            0xffffffff, 0xffffffff, 0, 0))


        # 0xa4 - 0xaf : Unknown
        record0.write(b'\0'*12)

        # 0xb0 - 0xb1 : First content record number
        # 0xb2 - 0xb3 : last content record number
        # (Includes Image, DATP, HUFF, DRM)
        record0.write(pack(b'>HH', 1, last_content_record))

        # 0xb4 - 0xb7 : Unknown
        record0.write(b'\0\0\0\x01')

        # 0xb8 - 0xbb : FCIS record number
        record0.write(pack(b'>I', fcis_number))

        # 0xbc - 0xbf : Unknown (FCIS record count?)
        record0.write(pack(b'>I', 1))

        # 0xc0 - 0xc3 : FLIS record number
        record0.write(pack(b'>I', flis_number))

        # 0xc4 - 0xc7 : Unknown (FLIS record count?)
        record0.write(pack(b'>I', 1))

        # 0xc8 - 0xcf : Unknown
        record0.write(b'\0'*8)

        # 0xd0 - 0xdf : Unknown
        record0.write(pack(b'>IIII', 0xffffffff, 0, 0xffffffff, 0xffffffff))

        # 0xe0 - 0xe3 : Extra record data
        # Extra record data flags:
        #   - 0b1  : <extra multibyte bytes><size>
        #   - 0b10 : <TBS indexing description of this HTML record><size>
        #   - 0b100: <uncrossable breaks><size>
        # Setting bit 2 (0x2) disables <guide><reference type="start"> functionality
        extra_data_flags = 0b1 # Has multibyte overlap bytes
        if self.primary_index_record_idx is not None:
            extra_data_flags |= 0b10
        if WRITE_UNCROSSABLE_BREAKS:
            extra_data_flags |= 0b100
        record0.write(pack(b'>I', extra_data_flags))

        # 0xe4 - 0xe7 : Primary index record
        record0.write(pack(b'>I', 0xffffffff if self.primary_index_record_idx
            is None else self.primary_index_record_idx))

        record0.write(exth)
        record0.write(title)
        record0 = record0.getvalue()
        # Add some buffer so that Amazon can add encryption information if this
        # MOBI is submitted for publication
        record0 += (b'\0' * (1024*8))
        self.records[0] = align_block(record0)
Esempio n. 12
0
    def __call__(self):
        self.control_bytes = self.calculate_control_bytes_for_each_entry(
            self.entries)

        rendered_entries = []
        index, idxt, buf = BytesIO(), BytesIO(), BytesIO()
        IndexEntry = namedtuple('IndexEntry', 'offset length raw')
        last_lead_text = b''
        too_large = ValueError(
            'Index has too many entries, calibre does not'
            ' support generating multiple index records at this'
            ' time.')

        for i, x in enumerate(self.entries):
            control_bytes = self.control_bytes[i]
            leading_text, tags = x
            buf.seek(0), buf.truncate(0)
            leading_text = (leading_text.encode('utf-8') if isinstance(
                leading_text, unicode) else leading_text)
            raw = bytearray(leading_text)
            raw.insert(0, len(leading_text))
            buf.write(bytes(raw))
            buf.write(bytes(bytearray(control_bytes)))
            for tag in self.tag_types:
                values = tags.get(tag.name, None)
                if values is None: continue
                try:
                    len(values)
                except TypeError:
                    values = [values]
                if values:
                    for val in values:
                        try:
                            buf.write(encint(val))
                        except ValueError:
                            raise ValueError('Invalid values for %r: %r' %
                                             (tag, values))
            raw = buf.getvalue()
            offset = index.tell()
            if offset + self.HEADER_LENGTH >= 0x10000:
                raise too_large
            rendered_entries.append(IndexEntry(offset, len(raw), raw))
            idxt.write(pack(b'>H', self.HEADER_LENGTH + offset))
            index.write(raw)
            last_lead_text = leading_text

        index_block = align_block(index.getvalue())
        idxt_block = align_block(b'IDXT' + idxt.getvalue())
        body = index_block + idxt_block
        if len(body) + self.HEADER_LENGTH >= 0x10000:
            raise too_large
        header = b'INDX'
        buf.seek(0), buf.truncate(0)
        buf.write(pack(b'>I', self.HEADER_LENGTH))
        buf.write(b'\0' * 4)  # Unknown
        buf.write(pack(b'>I', 1))  # Header type? Or index record number?
        buf.write(b'\0' * 4)  # Unknown

        # IDXT block offset
        buf.write(pack(b'>I', self.HEADER_LENGTH + len(index_block)))

        # Number of index entries
        buf.write(pack(b'>I', len(rendered_entries)))

        buf.write(b'\xff' * 8)  # Unknown

        buf.write(b'\0' * 156)  # Unknown

        header += buf.getvalue()
        index_record = header + body

        tagx = self.generate_tagx()
        idxt = (b'IDXT' + pack(b'>H', IndexHeader.HEADER_LENGTH + len(tagx)) +
                b'\0')
        # Last index
        idx = bytes(bytearray([len(last_lead_text)])) + last_lead_text
        idx += pack(b'>H', len(rendered_entries))

        header = {
            'num_of_entries': len(rendered_entries),
            'num_of_cncx': len(self.cncx),
            'tagx': tagx,
            'last_index': align_block(idx),
            'idxt': idxt
        }
        header = IndexHeader()(**header)
        self.records = [header, index_record]
        self.records.extend(self.cncx.records)
        return self.records