Example #1
0
    def create_indices(self):
        self.skel_records = SkelIndex(self.skel_table)()
        self.chunk_records = ChunkIndex(self.chunk_table)()
        self.ncx_records = []
        toc = self.oeb.toc
        entries = []
        is_periodical = self.opts.mobi_periodical
        if toc.count() < 1:
            self.log.warn('Document has no ToC, MOBI will have no NCX index')
            return

        # Flatten the ToC into a depth first list
        fl = toc.iterdescendants()
        for i, item in enumerate(fl):
            entry = {
                'id': id(item),
                'index': i,
                'label': (item.title or _('Unknown')),
                'children': []
            }
            entry['depth'] = getattr(item, 'ncx_hlvl', 0)
            p = getattr(item, 'ncx_parent', None)
            if p is not None:
                entry['parent_id'] = p
            for child in item:
                child.ncx_parent = entry['id']
                child.ncx_hlvl = entry['depth'] + 1
                entry['children'].append(id(child))
            if is_periodical:
                if item.author:
                    entry['author'] = item.author
                if item.description:
                    entry['description'] = item.description
            entries.append(entry)
            href = item.href or ''
            href, frag = href.partition('#')[0::2]
            aid = self.id_map.get((href, frag), None)
            if aid is None:
                aid = self.id_map.get((href, ''), None)
            if aid is None:
                pos, fid = 0, 0
                chunk = self.chunk_table[pos]
                offset = chunk.insert_pos + fid
            else:
                pos, fid, offset = self.aid_offset_map[aid]

            entry['pos_fid'] = (pos, fid)
            entry['offset'] = offset

        # The Kindle requires entries to be sorted by (depth, playorder)
        # However, I cannot figure out how to deal with non linear ToCs, i.e.
        # ToCs whose nth entry at depth d has an offset after its n+k entry at
        # the same depth, so we sort on (depth, offset) instead. This re-orders
        # the ToC to be linear. A non-linear ToC causes section to section
        # jumping to not work. kindlegen somehow handles non-linear tocs, but I
        # cannot figure out how.
        original = sorted(entries,
                          key=lambda entry: (entry['depth'], entry['index']))
        linearized = sorted(entries,
                            key=lambda entry:
                            (entry['depth'], entry['offset']))
        is_non_linear = original != linearized
        entries = linearized
        is_non_linear = False  # False as we are using the linearized entries

        if is_non_linear:
            for entry in entries:
                entry['kind'] = 'chapter'

        for i, entry in enumerate(entries):
            entry['index'] = i
        id_to_index = {entry['id']: entry['index'] for entry in entries}

        # Write the hierarchical information
        for entry in entries:
            children = entry.pop('children')
            if children:
                entry['first_child'] = id_to_index[children[0]]
                entry['last_child'] = id_to_index[children[-1]]
            if 'parent_id' in entry:
                entry['parent'] = id_to_index[entry.pop('parent_id')]

        # Write the lengths
        def get_next_start(entry):
            enders = [
                e['offset'] for e in entries if e['depth'] <= entry['depth']
                and e['offset'] > entry['offset']
            ]
            if enders:
                return min(enders)
            return len(self.flows[0])

        for entry in entries:
            entry['length'] = get_next_start(entry) - entry['offset']

        self.has_tbs = apply_trailing_byte_sequences(
            entries, self.records, self.uncompressed_record_lengths)
        idx_type = NonLinearNCXIndex if is_non_linear else NCXIndex
        self.ncx_records = idx_type(entries)()
Example #2
0
    def create_indices(self):
        self.skel_records = SkelIndex(self.skel_table)()
        self.chunk_records = ChunkIndex(self.chunk_table)()
        self.ncx_records = []
        toc = self.oeb.toc
        entries = []
        is_periodical = self.opts.mobi_periodical
        if toc.count() < 2:
            self.log.warn('Document has no ToC, MOBI will have no NCX index')
            return

        # Flatten the ToC into a depth first list
        fl = toc.iterdescendants()
        for i, item in enumerate(fl):
            entry = {'id': id(item), 'index': i, 'label':(item.title or
                _('Unknown')), 'children':[]}
            entry['depth'] = getattr(item, 'ncx_hlvl', 0)
            p = getattr(item, 'ncx_parent', None)
            if p is not None:
                entry['parent_id'] = p
            for child in item:
                child.ncx_parent = entry['id']
                child.ncx_hlvl = entry['depth'] + 1
                entry['children'].append(id(child))
            if is_periodical:
                if item.author:
                    entry['author'] = item.author
                if item.description:
                    entry['description'] = item.description
            entries.append(entry)
            href = item.href or ''
            href, frag = href.partition('#')[0::2]
            aid = self.id_map.get((href, frag), None)
            if aid is None:
                aid = self.id_map.get((href, ''), None)
            if aid is None:
                pos, fid = 0, 0
                chunk = self.chunk_table[pos]
                offset = chunk.insert_pos + fid
            else:
                pos, fid, offset = self.aid_offset_map[aid]

            entry['pos_fid'] = (pos, fid)
            entry['offset'] = offset

        # The Kindle requires entries to be sorted by (depth, playorder)
        # However, I cannot figure out how to deal with non linear ToCs, i.e.
        # ToCs whose nth entry at depth d has an offset after its n+k entry at
        # the same depth, so we sort on (depth, offset) instead. This re-orders
        # the ToC to be linear. A non-linear ToC causes section to section
        # jumping to not work. kindlegen somehow handles non-linear tocs, but I
        # cannot figure out how.
        original = sorted(entries,
                key=lambda entry: (entry['depth'], entry['index']))
        linearized = sorted(entries,
                key=lambda entry: (entry['depth'], entry['offset']))
        is_non_linear = original != linearized
        entries = linearized
        is_non_linear = False # False as we are using the linearized entries

        if is_non_linear:
            for entry in entries:
                entry['kind'] = 'chapter'

        for i, entry in enumerate(entries):
            entry['index'] = i
        id_to_index = {entry['id']:entry['index'] for entry in entries}

        # Write the hierarchical information
        for entry in entries:
            children = entry.pop('children')
            if children:
                entry['first_child'] = id_to_index[children[0]]
                entry['last_child'] = id_to_index[children[-1]]
            if 'parent_id' in entry:
                entry['parent'] = id_to_index[entry.pop('parent_id')]

        # Write the lengths
        def get_next_start(entry):
            enders = [e['offset'] for e in entries if e['depth'] <=
                    entry['depth'] and e['offset'] > entry['offset']]
            if enders:
                return min(enders)
            return len(self.flows[0])
        for entry in entries:
            entry['length'] = get_next_start(entry) - entry['offset']

        self.has_tbs = apply_trailing_byte_sequences(entries, self.records,
                self.uncompressed_record_lengths)
        idx_type = NonLinearNCXIndex if is_non_linear else NCXIndex
        self.ncx_records = idx_type(entries)()