Exemple #1
0
def import_book_as_epub(srcpath, destpath, log=default_log):
    if not destpath.lower().endswith('.epub'):
        raise ValueError('Can only import books into the EPUB format, not %s' %
                         (os.path.basename(destpath)))
    with TemporaryDirectory('eei') as tdir:
        tdir = os.path.abspath(
            os.path.realpath(tdir)
        )  # Needed to handle the multiple levels of symlinks for /tmp on OS X
        plumber = Plumber(srcpath, tdir, log)
        plumber.setup_options()
        if srcpath.lower().endswith('.opf'):
            plumber.opts.dont_package = True
        if hasattr(plumber.opts, 'no_process'):
            plumber.opts.no_process = True
        plumber.input_plugin.for_viewer = True
        with plumber.input_plugin, open(plumber.input, 'rb') as inf:
            pathtoopf = plumber.input_plugin(inf, plumber.opts,
                                             plumber.input_fmt, log, {}, tdir)
        if hasattr(pathtoopf, 'manifest'):
            from calibre.ebooks.oeb.iterator.book import write_oebbook
            pathtoopf = write_oebbook(pathtoopf, tdir)

        c = Container(tdir, pathtoopf, log)
        auto_fill_manifest(c)
        # Auto fix all HTML/CSS
        for name, mt in iteritems(c.mime_map):
            if mt in set(OEB_DOCS) | set(OEB_STYLES):
                c.parsed(name)
                c.dirty(name)
        c.commit()

        zf = initialize_container(destpath, opf_name=c.opf_name)
        with zf:
            for name in c.name_path_map:
                zf.writestr(name, c.raw_data(name, decode=False))
Exemple #2
0
def import_book_as_epub(srcpath, destpath, log=default_log):
    if not destpath.lower().endswith('.epub'):
        raise ValueError('Can only import books into the EPUB format, not %s' % (os.path.basename(destpath)))
    with TemporaryDirectory('eei') as tdir:
        tdir = os.path.abspath(os.path.realpath(tdir))  # Needed to handle the multiple levels of symlinks for /tmp on OS X
        plumber = Plumber(srcpath, tdir, log)
        plumber.setup_options()
        if srcpath.lower().endswith('.opf'):
            plumber.opts.dont_package = True
        if hasattr(plumber.opts, 'no_process'):
            plumber.opts.no_process = True
        plumber.input_plugin.for_viewer = True
        with plumber.input_plugin, open(plumber.input, 'rb') as inf:
            pathtoopf = plumber.input_plugin(inf, plumber.opts, plumber.input_fmt, log, {}, tdir)
        if hasattr(pathtoopf, 'manifest'):
            from calibre.ebooks.oeb.iterator.book import write_oebbook
            pathtoopf = write_oebbook(pathtoopf, tdir)

        c = Container(tdir, pathtoopf, log)
        auto_fill_manifest(c)
        # Auto fix all HTML/CSS
        for name, mt in c.mime_map.iteritems():
            if mt in set(OEB_DOCS) | set(OEB_STYLES):
                c.parsed(name)
                c.dirty(name)
        c.commit()

        zf = initialize_container(destpath, opf_name=c.opf_name)
        with zf:
            for name in c.name_path_map:
                zf.writestr(name, c.raw_data(name, decode=False))
Exemple #3
0
def extract_book(pathtoebook, tdir, log=None, view_kepub=False, processed=False, only_input_plugin=False):
    from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
    from calibre.utils.logging import default_log
    log = log or default_log
    plumber = Plumber(pathtoebook, tdir, log, view_kepub=view_kepub)
    plumber.setup_options()
    if pathtoebook.lower().endswith('.opf'):
        plumber.opts.dont_package = True
    if hasattr(plumber.opts, 'no_process'):
        plumber.opts.no_process = True

    plumber.input_plugin.for_viewer = True
    with plumber.input_plugin, open(plumber.input, 'rb') as inf:
        pathtoopf = plumber.input_plugin(inf,
            plumber.opts, plumber.input_fmt, log, {}, tdir)

        if not only_input_plugin:
            # Run the HTML preprocess/parsing from the conversion pipeline as
            # well
            if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'} and
                    not hasattr(pathtoopf, 'manifest')):
                if hasattr(pathtoopf, 'manifest'):
                    pathtoopf = write_oebbook(pathtoopf, tdir)
                pathtoopf = create_oebbook(log, pathtoopf, plumber.opts)

        if hasattr(pathtoopf, 'manifest'):
            pathtoopf = write_oebbook(pathtoopf, tdir)

    book_format = os.path.splitext(pathtoebook)[1][1:].upper()
    if getattr(plumber.input_plugin, 'is_kf8', False):
        fs = ':joint' if getattr(plumber.input_plugin, 'mobi_is_joint', False) else ''
        book_format = 'KF8' + fs
    return book_format, pathtoopf, plumber.input_fmt
Exemple #4
0
def extract_book(pathtoebook, tdir, log=None, view_kepub=False, processed=False, only_input_plugin=False):
    from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
    from calibre.utils.logging import default_log
    log = log or default_log
    plumber = Plumber(pathtoebook, tdir, log, view_kepub=view_kepub)
    plumber.setup_options()
    if pathtoebook.lower().endswith('.opf'):
        plumber.opts.dont_package = True
    if hasattr(plumber.opts, 'no_process'):
        plumber.opts.no_process = True

    plumber.input_plugin.for_viewer = True
    with plumber.input_plugin, open(plumber.input, 'rb') as inf:
        pathtoopf = plumber.input_plugin(inf,
            plumber.opts, plumber.input_fmt, log, {}, tdir)

        if not only_input_plugin:
            # Run the HTML preprocess/parsing from the conversion pipeline as
            # well
            if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'} and
                    not hasattr(pathtoopf, 'manifest')):
                if hasattr(pathtoopf, 'manifest'):
                    pathtoopf = write_oebbook(pathtoopf, tdir)
                pathtoopf = create_oebbook(log, pathtoopf, plumber.opts)

        if hasattr(pathtoopf, 'manifest'):
            pathtoopf = write_oebbook(pathtoopf, tdir)

    book_format = os.path.splitext(pathtoebook)[1][1:].upper()
    if getattr(plumber.input_plugin, 'is_kf8', False):
        fs = ':joint' if getattr(plumber.input_plugin, 'mobi_is_joint', False) else ''
        book_format = 'KF8' + fs
    return book_format, pathtoopf, plumber.input_fmt
Exemple #5
0
    def __enter__(self,
                  processed=False,
                  only_input_plugin=False,
                  run_char_count=True,
                  read_anchor_map=True,
                  extract_embedded_fonts_for_qt=False):
        ''' Convert an ebook file into an exploded OEB book suitable for
        display in viewers/preprocessing etc. '''

        from calibre.ebooks.conversion.plumber import Plumber, create_oebbook

        self.delete_on_exit = []
        self._tdir = TemporaryDirectory('_ebook_iter')
        self.base = self._tdir.__enter__()
        plumber = Plumber(self.pathtoebook, self.base, self.log)
        plumber.setup_options()
        if self.pathtoebook.lower().endswith('.opf'):
            plumber.opts.dont_package = True
        if hasattr(plumber.opts, 'no_process'):
            plumber.opts.no_process = True

        plumber.input_plugin.for_viewer = True
        with plumber.input_plugin, open(plumber.input, 'rb') as inf:
            self.pathtoopf = plumber.input_plugin(inf, plumber.opts,
                                                  plumber.input_fmt, self.log,
                                                  {}, self.base)

            if not only_input_plugin:
                # Run the HTML preprocess/parsing from the conversion pipeline as
                # well
                if (processed
                        or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'}
                        and not hasattr(self.pathtoopf, 'manifest')):
                    if hasattr(self.pathtoopf, 'manifest'):
                        self.pathtoopf = write_oebbook(self.pathtoopf,
                                                       self.base)
                    self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
                                                    plumber.opts)

            if hasattr(self.pathtoopf, 'manifest'):
                self.pathtoopf = write_oebbook(self.pathtoopf, self.base)

        self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
        if getattr(plumber.input_plugin, 'is_kf8', False):
            self.book_format = 'KF8'

        self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
        if self.opf is None:
            self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
        self.language = self.opf.language
        if self.language:
            self.language = self.language.lower()
        ordered = [i for i in self.opf.spine if i.is_linear] + \
                  [i for i in self.opf.spine if not i.is_linear]
        self.spine = []
        Spiny = partial(SpineItem,
                        read_anchor_map=read_anchor_map,
                        run_char_count=run_char_count)
        is_comic = plumber.input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}
        for i in ordered:
            spath = i.path
            mt = None
            if i.idref is not None:
                mt = self.opf.manifest.type_for_id(i.idref)
            if mt is None:
                mt = guess_type(spath)[0]
            try:
                self.spine.append(Spiny(spath, mime_type=mt))
                if is_comic:
                    self.spine[-1].is_single_page = True
            except:
                self.log.warn('Missing spine item:', repr(spath))

        cover = self.opf.cover
        if cover and self.ebook_ext in {
                'lit', 'mobi', 'prc', 'opf', 'fb2', 'azw', 'azw3'
        }:
            cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
            rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
            chtml = (TITLEPAGE %
                     prepare_string_for_xml(rcpath, True)).encode('utf-8')
            with open(cfile, 'wb') as f:
                f.write(chtml)
            self.spine[0:0] = [Spiny(cfile, mime_type='application/xhtml+xml')]
            self.delete_on_exit.append(cfile)

        if self.opf.path_to_html_toc is not None and \
           self.opf.path_to_html_toc not in self.spine:
            try:
                self.spine.append(Spiny(self.opf.path_to_html_toc))
            except:
                import traceback
                traceback.print_exc()

        sizes = [i.character_count for i in self.spine]
        self.pages = [
            math.ceil(i / float(self.CHARACTERS_PER_PAGE)) for i in sizes
        ]
        for p, s in zip(self.pages, self.spine):
            s.pages = p
        start = 1

        for s in self.spine:
            s.start_page = start
            start += s.pages
            s.max_page = s.start_page + s.pages - 1
        self.toc = self.opf.toc
        if read_anchor_map:
            create_indexing_data(self.spine, self.toc)

        self.read_bookmarks()

        if extract_embedded_fonts_for_qt:
            from calibre.ebooks.oeb.iterator.extract_fonts import extract_fonts
            try:
                extract_fonts(self.opf, self.log)
            except:
                ol = self.log.filter_level
                self.log.filter_level = self.log.DEBUG
                self.log.exception('Failed to extract fonts')
                self.log.filter_level = ol

        return self
Exemple #6
0
    def __enter__(self, processed=False, only_input_plugin=False,
                  run_char_count=True, read_anchor_map=True, view_kepub=False, read_links=True):
        ''' Convert an ebook file into an exploded OEB book suitable for
        display in viewers/preprocessing etc. '''

        from calibre.ebooks.conversion.plumber import Plumber, create_oebbook

        self.delete_on_exit = []
        self._tdir = TemporaryDirectory('_ebook_iter')
        self.base  = self._tdir.__enter__()
        plumber = Plumber(self.pathtoebook, self.base, self.log, view_kepub=view_kepub)
        plumber.setup_options()
        if self.pathtoebook.lower().endswith('.opf'):
            plumber.opts.dont_package = True
        if hasattr(plumber.opts, 'no_process'):
            plumber.opts.no_process = True

        plumber.input_plugin.for_viewer = True
        with plumber.input_plugin, open(plumber.input, 'rb') as inf:
            self.pathtoopf = plumber.input_plugin(inf,
                plumber.opts, plumber.input_fmt, self.log,
                {}, self.base)

            if not only_input_plugin:
                # Run the HTML preprocess/parsing from the conversion pipeline as
                # well
                if (processed or plumber.input_fmt.lower() in {'pdb', 'pdf', 'rb'}
                        and not hasattr(self.pathtoopf, 'manifest')):
                    if hasattr(self.pathtoopf, 'manifest'):
                        self.pathtoopf = write_oebbook(self.pathtoopf, self.base)
                    self.pathtoopf = create_oebbook(self.log, self.pathtoopf,
                            plumber.opts)

            if hasattr(self.pathtoopf, 'manifest'):
                self.pathtoopf = write_oebbook(self.pathtoopf, self.base)

        self.book_format = os.path.splitext(self.pathtoebook)[1][1:].upper()
        if getattr(plumber.input_plugin, 'is_kf8', False):
            fs = ':joint' if getattr(plumber.input_plugin, 'mobi_is_joint', False) else ''
            self.book_format = 'KF8' + fs

        self.opf = getattr(plumber.input_plugin, 'optimize_opf_parsing', None)
        if self.opf is None:
            self.opf = OPF(self.pathtoopf, os.path.dirname(self.pathtoopf))
        self.language = self.opf.language
        if self.language:
            self.language = self.language.lower()
        ordered = [i for i in self.opf.spine if i.is_linear] + \
                  [i for i in self.opf.spine if not i.is_linear]
        self.spine = []
        Spiny = partial(SpineItem, read_anchor_map=read_anchor_map, read_links=read_links,
                run_char_count=run_char_count, from_epub=self.book_format == 'EPUB')
        is_comic = plumber.input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}
        for i in ordered:
            spath = i.path
            mt = None
            if i.idref is not None:
                mt = self.opf.manifest.type_for_id(i.idref)
            if mt is None:
                mt = guess_type(spath)[0]
            try:
                self.spine.append(Spiny(spath, mime_type=mt))
                if is_comic:
                    self.spine[-1].is_single_page = True
            except:
                self.log.warn('Missing spine item:', repr(spath))

        cover = self.opf.cover
        if cover and self.ebook_ext in {'lit', 'mobi', 'prc', 'opf', 'fb2',
                                        'azw', 'azw3', 'docx', 'htmlz'}:
            cfile = os.path.join(self.base, 'calibre_iterator_cover.html')
            rcpath = os.path.relpath(cover, self.base).replace(os.sep, '/')
            chtml = (TITLEPAGE%prepare_string_for_xml(rcpath, True)).encode('utf-8')
            with open(cfile, 'wb') as f:
                f.write(chtml)
            self.spine[0:0] = [Spiny(cfile,
                mime_type='application/xhtml+xml')]
            self.delete_on_exit.append(cfile)

        if self.opf.path_to_html_toc is not None and \
           self.opf.path_to_html_toc not in self.spine:
            try:
                self.spine.append(Spiny(self.opf.path_to_html_toc))
            except:
                import traceback
                traceback.print_exc()

        sizes = [i.character_count for i in self.spine]
        self.pages = [math.ceil(i/float(self.CHARACTERS_PER_PAGE)) for i in sizes]
        for p, s in zip(self.pages, self.spine):
            s.pages = p
        start = 1

        for s in self.spine:
            s.start_page = start
            start += s.pages
            s.max_page = s.start_page + s.pages - 1
        self.toc = self.opf.toc
        if read_anchor_map:
            create_indexing_data(self.spine, self.toc)

        self.verify_links()

        self.read_bookmarks()

        return self