Exemple #1
0
def import_book_as_epub(srcpath, destpath, log=default_log):
    if not destpath.lower().endswith('.epub'):
        raise ValueError('Can only import books into the EPUB format, not %s' %
                         (os.path.basename(destpath)))
    with TemporaryDirectory('eei') as tdir:
        tdir = os.path.abspath(
            os.path.realpath(tdir)
        )  # Needed to handle the multiple levels of symlinks for /tmp on OS X
        plumber = Plumber(srcpath, tdir, log)
        plumber.setup_options()
        if srcpath.lower().endswith('.opf'):
            plumber.opts.dont_package = True
        if hasattr(plumber.opts, 'no_process'):
            plumber.opts.no_process = True
        plumber.input_plugin.for_viewer = True
        with plumber.input_plugin, open(plumber.input, 'rb') as inf:
            pathtoopf = plumber.input_plugin(inf, plumber.opts,
                                             plumber.input_fmt, log, {}, tdir)
        if hasattr(pathtoopf, 'manifest'):
            from calibre.ebooks.oeb.iterator.book import write_oebbook
            pathtoopf = write_oebbook(pathtoopf, tdir)

        c = Container(tdir, pathtoopf, log)
        auto_fill_manifest(c)
        # Auto fix all HTML/CSS
        for name, mt in iteritems(c.mime_map):
            if mt in set(OEB_DOCS) | set(OEB_STYLES):
                c.parsed(name)
                c.dirty(name)
        c.commit()

        zf = initialize_container(destpath, opf_name=c.opf_name)
        with zf:
            for name in c.name_path_map:
                zf.writestr(name, c.raw_data(name, decode=False))
Exemple #2
0
def import_book_as_epub(srcpath, destpath, log=default_log):
    if not destpath.lower().endswith('.epub'):
        raise ValueError('Can only import books into the EPUB format, not %s' % (os.path.basename(destpath)))
    with TemporaryDirectory('eei') as tdir:
        tdir = os.path.abspath(os.path.realpath(tdir))  # Needed to handle the multiple levels of symlinks for /tmp on OS X
        plumber = Plumber(srcpath, tdir, log)
        plumber.setup_options()
        if srcpath.lower().endswith('.opf'):
            plumber.opts.dont_package = True
        if hasattr(plumber.opts, 'no_process'):
            plumber.opts.no_process = True
        plumber.input_plugin.for_viewer = True
        with plumber.input_plugin, open(plumber.input, 'rb') as inf:
            pathtoopf = plumber.input_plugin(inf, plumber.opts, plumber.input_fmt, log, {}, tdir)
        if hasattr(pathtoopf, 'manifest'):
            from calibre.ebooks.oeb.iterator.book import write_oebbook
            pathtoopf = write_oebbook(pathtoopf, tdir)

        c = Container(tdir, pathtoopf, log)
        auto_fill_manifest(c)
        # Auto fix all HTML/CSS
        for name, mt in c.mime_map.iteritems():
            if mt in set(OEB_DOCS) | set(OEB_STYLES):
                c.parsed(name)
                c.dirty(name)
        c.commit()

        zf = initialize_container(destpath, opf_name=c.opf_name)
        with zf:
            for name in c.name_path_map:
                zf.writestr(name, c.raw_data(name, decode=False))
Exemple #3
0
def extract_book(pathtoebook, tdir, log=None, view_kepub=False, processed=False, only_input_plugin=False):
    from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
    from calibre.utils.logging import default_log
    log = log or default_log
    plumber = Plumber(pathtoebook, tdir, log, view_kepub=view_kepub)
    plumber.setup_options()
    if pathtoebook.lower().endswith('.opf'):
        plumber.opts.dont_package = True
    if hasattr(plumber.opts, 'no_process'):
        plumber.opts.no_process = True

    plumber.input_plugin.for_viewer = True
    with plumber.input_plugin, open(plumber.input, 'rb') as inf:
        pathtoopf = plumber.input_plugin(inf,
            plumber.opts, plumber.input_fmt, log, {}, tdir)

        if not only_input_plugin:
            # Run the HTML preprocess/parsing from the conversion pipeline as
            # well
            if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'} and
                    not hasattr(pathtoopf, 'manifest')):
                if hasattr(pathtoopf, 'manifest'):
                    pathtoopf = write_oebbook(pathtoopf, tdir)
                pathtoopf = create_oebbook(log, pathtoopf, plumber.opts)

        if hasattr(pathtoopf, 'manifest'):
            pathtoopf = write_oebbook(pathtoopf, tdir)

    book_format = os.path.splitext(pathtoebook)[1][1:].upper()
    if getattr(plumber.input_plugin, 'is_kf8', False):
        fs = ':joint' if getattr(plumber.input_plugin, 'mobi_is_joint', False) else ''
        book_format = 'KF8' + fs
    return book_format, pathtoopf, plumber.input_fmt
Exemple #4
0
def extract_book(pathtoebook, tdir, log=None, view_kepub=False, processed=False, only_input_plugin=False):
    from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
    from calibre.utils.logging import default_log
    log = log or default_log
    plumber = Plumber(pathtoebook, tdir, log, view_kepub=view_kepub)
    plumber.setup_options()
    if pathtoebook.lower().endswith('.opf'):
        plumber.opts.dont_package = True
    if hasattr(plumber.opts, 'no_process'):
        plumber.opts.no_process = True

    plumber.input_plugin.for_viewer = True
    with plumber.input_plugin, open(plumber.input, 'rb') as inf:
        pathtoopf = plumber.input_plugin(inf,
            plumber.opts, plumber.input_fmt, log, {}, tdir)

        if not only_input_plugin:
            # Run the HTML preprocess/parsing from the conversion pipeline as
            # well
            if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'} and
                    not hasattr(pathtoopf, 'manifest')):
                if hasattr(pathtoopf, 'manifest'):
                    pathtoopf = write_oebbook(pathtoopf, tdir)
                pathtoopf = create_oebbook(log, pathtoopf, plumber.opts)

        if hasattr(pathtoopf, 'manifest'):
            pathtoopf = write_oebbook(pathtoopf, tdir)

    book_format = os.path.splitext(pathtoebook)[1][1:].upper()
    if getattr(plumber.input_plugin, 'is_kf8', False):
        fs = ':joint' if getattr(plumber.input_plugin, 'mobi_is_joint', False) else ''
        book_format = 'KF8' + fs
    return book_format, pathtoopf, plumber.input_fmt
Exemple #5
0
def opf_to_azw3(opf, outpath, log):
    from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
    plumber = Plumber(opf, outpath, log)
    plumber.setup_options()
    inp = plugin_for_input_format('azw3')
    outp = plugin_for_output_format('azw3')
    plumber.opts.mobi_passthrough = True
    oeb = create_oebbook(log, opf, plumber.opts)
    set_cover(oeb)
    outp.convert(oeb, outpath, inp, plumber.opts, log)
Exemple #6
0
def opf_to_azw3(opf, outpath, log):
    from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
    plumber = Plumber(opf, outpath, log)
    plumber.setup_options()
    inp = plugin_for_input_format('azw3')
    outp = plugin_for_output_format('azw3')
    plumber.opts.mobi_passthrough = True
    oeb = create_oebbook(log, opf, plumber.opts)
    set_cover(oeb)
    outp.convert(oeb, outpath, inp, plumber.opts, log)
Exemple #7
0
def do_rebuild(opf, dest_path):
    plumber = Plumber(opf, dest_path, default_log)
    plumber.setup_options()
    inp = plugin_for_input_format('azw3')
    outp = plugin_for_output_format('azw3')

    plumber.opts.mobi_passthrough = True
    oeb = create_oebbook(default_log, opf, plumber.opts)
    set_cover(oeb)
    outp.convert(oeb, dest_path, inp, plumber.opts, default_log)
Exemple #8
0
def do_rebuild(opf, dest_path):
    plumber = Plumber(opf, dest_path, default_log)
    plumber.setup_options()
    inp = plugin_for_input_format('azw3')
    outp = plugin_for_output_format('azw3')

    plumber.opts.mobi_passthrough = True
    oeb = create_oebbook(default_log, opf, plumber.opts)
    set_cover(oeb)
    outp.convert(oeb, dest_path, inp, plumber.opts, default_log)
 def commit(self, outpath=None, keep_parsed=False):
     super(AZW3Container, self).commit(keep_parsed=keep_parsed)
     if outpath is None:
         outpath = self.pathtoazw3
     from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
     opf = self.name_path_map[self.opf_name]
     plumber = Plumber(opf, outpath, self.log)
     plumber.setup_options()
     inp = plugin_for_input_format('azw3')
     outp = plugin_for_output_format('azw3')
     plumber.opts.mobi_passthrough = True
     oeb = create_oebbook(default_log, opf, plumber.opts)
     set_cover(oeb)
     outp.convert(oeb, outpath, inp, plumber.opts, default_log)
Exemple #10
0
 def commit(self, outpath=None, keep_parsed=False):
     super(AZW3Container, self).commit(keep_parsed=keep_parsed)
     if outpath is None:
         outpath = self.pathtoazw3
     from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
     opf = self.name_path_map[self.opf_name]
     plumber = Plumber(opf, outpath, self.log)
     plumber.setup_options()
     inp = plugin_for_input_format('azw3')
     outp = plugin_for_output_format('azw3')
     plumber.opts.mobi_passthrough = True
     oeb = create_oebbook(default_log, opf, plumber.opts)
     set_cover(oeb)
     outp.convert(oeb, outpath, inp, plumber.opts, default_log)
def opf_to_book(opf, outpath, container):
    from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
    class Item(Manifest.Item):
        def _parse_css(self, data):
            # The default CSS parser used by oeb.base inserts the h namespace
            # and resolves all @import rules. We dont want that.
            return container.parse_css(data)
    def specialize(oeb):
        oeb.manifest.Item = Item

    plumber = Plumber(opf, outpath, container.log)
    plumber.setup_options()

    class Reader(OEBReader):
        def _metadata_from_opf(self, opf):
            for e in xpath(opf, 'o2:metadata//o2:meta'):
                if e.attrib.get('name') == 'original-resolution':
                    comic_book_exth_values['original-resolution'] = e.attrib.get('content', '660x800')
            return OEBReader._metadata_from_opf(self, opf)


    oeb = create_oebbook(container.log, opf, plumber.opts, specialize=specialize, reader=Reader)

    fixup_metadata(oeb)
    set_cover_image(oeb)

    plumber.opts.dont_compress = True
    plumber.opts.toc_title = None
    plumber.opts.mobi_toc_at_start = False
    plumber.opts.no_inline_toc = True
    plumber.opts.mobi_periodical = False

    res = Resources(oeb, plumber.opts, False, process_images=False)
    
    if path.splitext(outpath)[1] != '.azw3':
        plumber.run()
    else:
        book = create_kf8_book(oeb, plumber.opts, res)        
        book.opts.prefer_author_sort = False
        book.opts.share_not_sync = False
        print ('\nWriting out: {}\n'.format(outpath))
        book.write(outpath)
Exemple #12
0
def create_book(mi,
                path,
                fmt='epub',
                opf_name='metadata.opf',
                html_name='start.xhtml',
                toc_name='toc.ncx'):
    ''' Create an empty book in the specified format at the specified location. '''
    if fmt not in valid_empty_formats:
        raise ValueError('Cannot create empty book in the %s format' % fmt)
    if fmt == 'txt':
        with open(path, 'wb') as f:
            if not mi.is_null('title'):
                f.write(as_bytes(mi.title))
        return
    if fmt == 'docx':
        from calibre.ebooks.conversion.plumber import Plumber
        from calibre.ebooks.docx.writer.container import DOCX
        from calibre.utils.logging import default_log
        p = Plumber('a.docx', 'b.docx', default_log)
        p.setup_options()
        # Use the word default of one inch page margins
        for x in 'left right top bottom'.split():
            setattr(p.opts, 'margin_' + x, 72)
        DOCX(p.opts, default_log).write(path, mi, create_empty_document=True)
        return
    path = os.path.abspath(path)
    lang = 'und'
    opf = metadata_to_opf(mi, as_string=False)
    for l in opf.xpath('//*[local-name()="language"]'):
        if l.text:
            lang = l.text
            break
    lang = lang_as_iso639_1(lang) or lang

    opfns = OPF_NAMESPACES['opf']
    m = opf.makeelement('{%s}manifest' % opfns)
    opf.insert(1, m)
    i = m.makeelement('{%s}item' % opfns, href=html_name, id='start')
    i.set('media-type', guess_type('a.xhtml'))
    m.append(i)
    i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx')
    i.set('media-type', guess_type(toc_name))
    m.append(i)
    s = opf.makeelement('{%s}spine' % opfns, toc="ncx")
    opf.insert(2, s)
    i = s.makeelement('{%s}itemref' % opfns, idref='start')
    s.append(i)
    CONTAINER = '''\
<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
   <rootfiles>
      <rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
   </rootfiles>
</container>
    '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8')
    HTML = P('templates/new_book.html', data=True).decode('utf-8').replace(
        '_LANGUAGE_', prepare_string_for_xml(lang, True)).replace(
            '_TITLE_', prepare_string_for_xml(mi.title)).replace(
                '_AUTHORS_',
                prepare_string_for_xml(authors_to_string(
                    mi.authors))).encode('utf-8')
    h = parse(HTML)
    pretty_html_tree(None, h)
    HTML = serialize(h, 'text/html')
    ncx = etree.tostring(create_toc(mi, opf, html_name, lang),
                         encoding='utf-8',
                         xml_declaration=True,
                         pretty_print=True)
    pretty_xml_tree(opf)
    opf = etree.tostring(opf,
                         encoding='utf-8',
                         xml_declaration=True,
                         pretty_print=True)
    if fmt == 'azw3':
        with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir):
            for name, data in ((opf_name, opf), (html_name, HTML), (toc_name,
                                                                    ncx)):
                with open(name, 'wb') as f:
                    f.write(data)
            c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name,
                          DevNull())
            opf_to_azw3(opf_name, path, c)
    else:
        with ZipFile(path, 'w', compression=ZIP_STORED) as zf:
            zf.writestr('mimetype',
                        b'application/epub+zip',
                        compression=ZIP_STORED)
            zf.writestr('META-INF/', b'', 0o755)
            zf.writestr('META-INF/container.xml', CONTAINER)
            zf.writestr(opf_name, opf)
            zf.writestr(html_name, HTML)
            zf.writestr(toc_name, ncx)
Exemple #13
0
    def __enter__(self,
                  processed=False,
                  only_input_plugin=False,
                  run_char_count=True,
                  read_anchor_map=True,
                  extract_embedded_fonts_for_qt=False):
        ''' Convert an ebook file into an exploded OEB book suitable for
        display in viewers/preprocessing etc. '''

        from calibre.ebooks.conversion.plumber import Plumber, create_oebbook

        self.delete_on_exit = []
        self._tdir = TemporaryDirectory('_ebook_iter')
        self.base = self._tdir.__enter__()
        plumber = Plumber(self.pathtoebook, self.base, self.log)
        plumber.setup_options()
        if self.pathtoebook.lower().endswith('.opf'):
            plumber.opts.dont_package = True
        if hasattr(plumber.opts, 'no_process'):
            plumber.opts.no_process = True

        plumber.input_plugin.for_viewer = True
        with plumber.input_plugin, open(plumber.input, 'rb') as inf:
            self.pathtoopf = plumber.input_plugin(inf, plumber.opts,
                                                  plumber.input_fmt, self.log,
                                                  {}, self.base)

            if not only_input_plugin:
                # Run the HTML preprocess/parsing from the conversion pipeline as
                # well
                if (processed
                        or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'}
                        and not hasattr(self.pathtoopf, 'manifest')):
                    if hasattr(self.pathtoopf, 'manifest'):
                        self.pathtoopf = write_oebbook(self.pathtoopf,
                                                       self.base)
                    self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
                                                    plumber.opts)

            if hasattr(self.pathtoopf, 'manifest'):
                self.pathtoopf = write_oebbook(self.pathtoopf, self.base)

        self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
        if getattr(plumber.input_plugin, 'is_kf8', False):
            self.book_format = 'KF8'

        self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
        if self.opf is None:
            self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
        self.language = self.opf.language
        if self.language:
            self.language = self.language.lower()
        ordered = [i for i in self.opf.spine if i.is_linear] + \
                  [i for i in self.opf.spine if not i.is_linear]
        self.spine = []
        Spiny = partial(SpineItem,
                        read_anchor_map=read_anchor_map,
                        run_char_count=run_char_count)
        is_comic = plumber.input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}
        for i in ordered:
            spath = i.path
            mt = None
            if i.idref is not None:
                mt = self.opf.manifest.type_for_id(i.idref)
            if mt is None:
                mt = guess_type(spath)[0]
            try:
                self.spine.append(Spiny(spath, mime_type=mt))
                if is_comic:
                    self.spine[-1].is_single_page = True
            except:
                self.log.warn('Missing spine item:', repr(spath))

        cover = self.opf.cover
        if cover and self.ebook_ext in {
                'lit', 'mobi', 'prc', 'opf', 'fb2', 'azw', 'azw3'
        }:
            cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
            rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
            chtml = (TITLEPAGE %
                     prepare_string_for_xml(rcpath, True)).encode('utf-8')
            with open(cfile, 'wb') as f:
                f.write(chtml)
            self.spine[0:0] = [Spiny(cfile, mime_type='application/xhtml+xml')]
            self.delete_on_exit.append(cfile)

        if self.opf.path_to_html_toc is not None and \
           self.opf.path_to_html_toc not in self.spine:
            try:
                self.spine.append(Spiny(self.opf.path_to_html_toc))
            except:
                import traceback
                traceback.print_exc()

        sizes = [i.character_count for i in self.spine]
        self.pages = [
            math.ceil(i / float(self.CHARACTERS_PER_PAGE)) for i in sizes
        ]
        for p, s in zip(self.pages, self.spine):
            s.pages = p
        start = 1

        for s in self.spine:
            s.start_page = start
            start += s.pages
            s.max_page = s.start_page + s.pages - 1
        self.toc = self.opf.toc
        if read_anchor_map:
            create_indexing_data(self.spine, self.toc)

        self.read_bookmarks()

        if extract_embedded_fonts_for_qt:
            from calibre.ebooks.oeb.iterator.extract_fonts import extract_fonts
            try:
                extract_fonts(self.opf, self.log)
            except:
                ol = self.log.filter_level
                self.log.filter_level = self.log.DEBUG
                self.log.exception('Failed to extract fonts')
                self.log.filter_level = ol

        return self
Exemple #14
0
def create_book(mi, path, fmt='epub', opf_name='metadata.opf', html_name='start.xhtml', toc_name='toc.ncx'):
    ''' Create an empty book in the specified format at the specified location. '''
    if fmt not in valid_empty_formats:
        raise ValueError('Cannot create empty book in the %s format' % fmt)
    if fmt == 'txt':
        with open(path, 'wb') as f:
            if not mi.is_null('title'):
                f.write(mi.title)
        return
    if fmt == 'docx':
        from calibre.ebooks.conversion.plumber import Plumber
        from calibre.ebooks.docx.writer.container import DOCX
        from calibre.utils.logging import default_log
        p = Plumber('a.docx', 'b.docx', default_log)
        p.setup_options()
        # Use the word default of one inch page margins
        for x in 'left right top bottom'.split():
            setattr(p.opts, 'margin_' + x, 72)
        DOCX(p.opts, default_log).write(path, mi, create_empty_document=True)
        return
    path = os.path.abspath(path)
    lang = 'und'
    opf = metadata_to_opf(mi, as_string=False)
    for l in opf.xpath('//*[local-name()="language"]'):
        if l.text:
            lang = l.text
            break
    lang = lang_as_iso639_1(lang) or lang

    opfns = OPF_NAMESPACES['opf']
    m = opf.makeelement('{%s}manifest' % opfns)
    opf.insert(1, m)
    i = m.makeelement('{%s}item' % opfns, href=html_name, id='start')
    i.set('media-type', guess_type('a.xhtml'))
    m.append(i)
    i = m.makeelement('{%s}item' % opfns, href=toc_name, id='ncx')
    i.set('media-type', guess_type(toc_name))
    m.append(i)
    s = opf.makeelement('{%s}spine' % opfns, toc="ncx")
    opf.insert(2, s)
    i = s.makeelement('{%s}itemref' % opfns, idref='start')
    s.append(i)
    CONTAINER = '''\
<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
   <rootfiles>
      <rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
   </rootfiles>
</container>
    '''.format(prepare_string_for_xml(opf_name, True)).encode('utf-8')
    HTML = P('templates/new_book.html', data=True).decode('utf-8').replace(
        '_LANGUAGE_', prepare_string_for_xml(lang, True)
    ).replace(
        '_TITLE_', prepare_string_for_xml(mi.title)
    ).replace(
        '_AUTHORS_', prepare_string_for_xml(authors_to_string(mi.authors))
    ).encode('utf-8')
    h = parse(HTML)
    pretty_html_tree(None, h)
    HTML = serialize(h, 'text/html')
    ncx = etree.tostring(create_toc(mi, opf, html_name, lang), encoding='utf-8', xml_declaration=True, pretty_print=True)
    pretty_xml_tree(opf)
    opf = etree.tostring(opf, encoding='utf-8', xml_declaration=True, pretty_print=True)
    if fmt == 'azw3':
        with TemporaryDirectory('create-azw3') as tdir, CurrentDir(tdir):
            for name, data in ((opf_name, opf), (html_name, HTML), (toc_name, ncx)):
                with open(name, 'wb') as f:
                    f.write(data)
            c = Container(os.path.dirname(os.path.abspath(opf_name)), opf_name, DevNull())
            opf_to_azw3(opf_name, path, c)
    else:
        with ZipFile(path, 'w', compression=ZIP_STORED) as zf:
            zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED)
            zf.writestr('META-INF/', b'', 0755)
            zf.writestr('META-INF/container.xml', CONTAINER)
            zf.writestr(opf_name, opf)
            zf.writestr(html_name, HTML)
            zf.writestr(toc_name, ncx)
Exemple #15
0
    def __enter__(self, processed=False, only_input_plugin=False,
                  run_char_count=True, read_anchor_map=True, view_kepub=False, read_links=True):
        ''' Convert an ebook file into an exploded OEB book suitable for
        display in viewers/preprocessing etc. '''

        from calibre.ebooks.conversion.plumber import Plumber, create_oebbook

        self.delete_on_exit = []
        self._tdir = TemporaryDirectory('_ebook_iter')
        self.base  = self._tdir.__enter__()
        plumber = Plumber(self.pathtoebook, self.base, self.log, view_kepub=view_kepub)
        plumber.setup_options()
        if self.pathtoebook.lower().endswith('.opf'):
            plumber.opts.dont_package = True
        if hasattr(plumber.opts, 'no_process'):
            plumber.opts.no_process = True

        plumber.input_plugin.for_viewer = True
        with plumber.input_plugin, open(plumber.input, 'rb') as inf:
            self.pathtoopf = plumber.input_plugin(inf,
                plumber.opts, plumber.input_fmt, self.log,
                {}, self.base)

            if not only_input_plugin:
                # Run the HTML preprocess/parsing from the conversion pipeline as
                # well
                if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'}
                        and not hasattr(self.pathtoopf, 'manifest')):
                    if hasattr(self.pathtoopf, 'manifest'):
                        self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
                    self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
                            plumber.opts)

            if hasattr(self.pathtoopf, 'manifest'):
                self.pathtoopf = write_oebbook(self.pathtoopf, self.base)

        self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
        if getattr(plumber.input_plugin, 'is_kf8', False):
            fs = ':joint' if getattr(plumber.input_plugin, 'mobi_is_joint', False) else ''
            self.book_format = 'KF8' + fs

        self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
        if self.opf is None:
            self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
        self.language = self.opf.language
        if self.language:
            self.language = self.language.lower()
        ordered = [i for i in self.opf.spine if i.is_linear] + \
                  [i for i in self.opf.spine if not i.is_linear]
        self.spine = []
        Spiny = partial(SpineItem, read_anchor_map=read_anchor_map, read_links=read_links,
                run_char_count=run_char_count, from_epub=self.book_format == 'EPUB')
        is_comic = plumber.input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}
        for i in ordered:
            spath = i.path
            mt = None
            if i.idref is not None:
                mt = self.opf.manifest.type_for_id(i.idref)
            if mt is None:
                mt = guess_type(spath)[0]
            try:
                self.spine.append(Spiny(spath, mime_type=mt))
                if is_comic:
                    self.spine[-1].is_single_page = True
            except:
                self.log.warn('Missing spine item:', repr(spath))

        cover = self.opf.cover
        if cover and self.ebook_ext in {'lit', 'mobi', 'prc', 'opf', 'fb2',
                                        'azw', 'azw3', 'docx', 'htmlz'}:
            cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
            rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
            chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8')
            with open(cfile, 'wb') as f:
                f.write(chtml)
            self.spine[0:0] = [Spiny(cfile,
                mime_type='application/xhtml+xml')]
            self.delete_on_exit.append(cfile)

        if self.opf.path_to_html_toc is not None and \
           self.opf.path_to_html_toc not in self.spine:
            try:
                self.spine.append(Spiny(self.opf.path_to_html_toc))
            except:
                import traceback
                traceback.print_exc()

        sizes = [i.character_count for i in self.spine]
        self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes]
        for p, s in zip(self.pages, self.spine):
            s.pages = p
        start = 1

        for s in self.spine:
            s.start_page = start
            start += s.pages
            s.max_page = s.start_page + s.pages - 1
        self.toc = self.opf.toc
        if read_anchor_map:
            create_indexing_data(self.spine, self.toc)

        self.verify_links()

        self.read_bookmarks()

        return self