Esempio n. 1
0
def test():  # {{{
    # TODO(gryf): move this test to separate file.
    from ebook_converter.ptempfile import TemporaryDirectory
    from ebook_converter import CurrentDir
    from glob import glob
    # TODO(gryf): make the sample image out of pillow or smth
    # img = image_from_data(I('lt.png', data=True, allow_user_override=False))
    with TemporaryDirectory() as tdir, CurrentDir(tdir):
        save_image(img, 'test.jpg')
        ret = optimize_jpeg('test.jpg')
        if ret is not None:
            raise SystemExit('optimize_jpeg failed: %s' % ret)
        ret = encode_jpeg('test.jpg')
        if ret is not None:
            raise SystemExit('encode_jpeg failed: %s' % ret)
        # TODO(gryf): make the sample image out of pillow or smth. for sure
        # tempfile would be better idea.
        #shutil.copyfile(I('lt.png'), 'test.png')
        ret = optimize_png('test.png')
        if ret is not None:
            raise SystemExit('optimize_png failed: %s' % ret)
        if glob('*.bak'):
            raise SystemExit('Spurious .bak files left behind')
    quantize_image(img)
    oil_paint_image(img)
    gaussian_sharpen_image(img)
    gaussian_blur_image(img)
    despeckle_image(img)
    remove_borders_from_image(img)
    image_to_data(img, fmt='GIF')
    raw = subprocess.Popen([get_exe_path('JxrDecApp'), '-h'], creationflags=0, stdout=subprocess.PIPE).stdout.read()
    if b'JPEG XR Decoder Utility' not in raw:
        raise SystemExit('Failed to run JxrDecApp')
Esempio n. 2
0
    def convert_text(self, oeb_book):
        import json
        from ebook_converter.ebooks.pdf.html_writer import convert
        self.get_cover_data()
        self.process_fonts()

        if self.opts.pdf_use_document_margins and self.stored_page_margins:
            for href, margins in self.stored_page_margins.items():
                item = oeb_book.manifest.hrefs.get(href)
                if item is not None:
                    root = item.data
                    if hasattr(root, 'xpath') and margins:
                        root.set('data-calibre-pdf-output-page-margins',
                                 json.dumps(margins))

        with TemporaryDirectory('_pdf_out') as oeb_dir:
            from ebook_converter.customize.ui import plugin_for_output_format
            oeb_dir = os.path.realpath(oeb_dir)
            oeb_output = plugin_for_output_format('oeb')
            oeb_output.convert(oeb_book, oeb_dir, self.input_plugin, self.opts,
                               self.log)
            opfpath = glob.glob(os.path.join(oeb_dir, '*.opf'))[0]
            convert(opfpath,
                    self.opts,
                    metadata=self.metadata,
                    output_path=self.output_path,
                    log=self.log,
                    cover_data=self.cover_data,
                    report_progress=self.report_progress)
Esempio n. 3
0
 def __enter__(self, *args):
     """
     Add this plugin to the python path so that it's contents become
     directly importable.  Useful when bundling large python libraries into
     the plugin. Use it like this::
         with plugin:
             import something
     """
     if self.plugin_path is not None:
         from ebook_converter.utils.zipfile import ZipFile
         zf = ZipFile(self.plugin_path)
         extensions = {x.rpartition('.')[-1].lower() for x in
                       zf.namelist()}
         zip_safe = True
         for ext in ('pyd', 'so', 'dll', 'dylib'):
             if ext in extensions:
                 zip_safe = False
                 break
         if zip_safe:
             sys.path.insert(0, self.plugin_path)
             self.sys_insertion_path = self.plugin_path
         else:
             from ebook_converter.ptempfile import TemporaryDirectory
             self._sys_insertion_tdir = TemporaryDirectory('plugin_unzip')
             self.sys_insertion_path = (self._sys_insertion_tdir.
                                        __enter__(*args))
             zf.extractall(self.sys_insertion_path)
             sys.path.insert(0, self.sys_insertion_path)
         zf.close()
Esempio n. 4
0
def render_html_data(path_to_html, width, height):
    from ebook_converter.ptempfile import TemporaryDirectory
    from ebook_converter.utils.ipc.simple_worker import fork_job, WorkerError
    result = {}

    def report_error(text=''):
        print(f'Failed to render {path_to_html}')
        # file=sys.stderr)
        if text:
            print(text)  # , file=sys.stderr)
        if result and result['stdout_stderr']:
            with open(result['stdout_stderr'], 'rb') as f:
                print(f.read())  # , file=sys.stderr)

    with TemporaryDirectory('-render-html') as tdir:
        try:
            result = fork_job('ebook_converter.ebooks.render_html',
                              'main',
                              args=(path_to_html, tdir, 'jpeg'))
        except WorkerError as e:
            report_error(e.orig_tb)
        else:
            if result['result']:
                with open(os.path.join(tdir, 'rendered.jpeg'), 'rb') as f:
                    return f.read()
            else:
                report_error()
Esempio n. 5
0
    def convert(self, stream, options, file_ext, log, accelerators):
        from ebook_converter.ebooks.metadata.toc import TOC
        from ebook_converter.ebooks.metadata.opf2 import OPFCreator
        from ebook_converter.utils.zipfile import ZipFile

        self.options = options
        self.log = log
        pages, images = [], []
        toc = TOC()

        if file_ext == 'pmlz':
            log.debug('De-compressing content to temporary directory...')
            with TemporaryDirectory('_unpmlz') as tdir:
                zf = ZipFile(stream)
                zf.extractall(tdir)

                pmls = glob.glob(os.path.join(tdir, '*.pml'))
                for pml in pmls:
                    html_name = os.path.splitext(
                        os.path.basename(pml))[0] + '.html'
                    html_path = os.path.join(os.getcwd(), html_name)

                    pages.append(html_name)
                    log.debug('Processing PML item %s...', pml)
                    ttoc = self.process_pml(pml, html_path)
                    toc += ttoc
                images = self.get_images(stream, tdir, True)
        else:
            toc = self.process_pml(stream, 'index.html')
            pages.append('index.html')

            if hasattr(stream, 'name'):
                images = self.get_images(
                    stream, os.path.abspath(os.path.dirname(stream.name)))

        # We want pages to be orded alphabetically.
        pages.sort()

        manifest_items = []
        for item in pages + images:
            manifest_items.append((item, None))

        from ebook_converter.ebooks.metadata.meta import get_metadata
        log.debug('Reading metadata from input file...')
        mi = get_metadata(stream, 'pml')
        if 'images/cover.png' in images:
            mi.cover = 'images/cover.png'
        opf = OPFCreator(os.getcwd(), mi)
        log.debug('Generating manifest...')
        opf.create_manifest(manifest_items)
        opf.create_spine(pages)
        opf.set_toc(toc)
        with open('metadata.opf', 'wb') as opffile:
            with open('toc.ncx', 'wb') as tocfile:
                opf.render(opffile, tocfile, 'toc.ncx')

        return os.path.join(os.getcwd(), 'metadata.opf')
Esempio n. 6
0
def get_metadata(stream, cover=True):
    with TemporaryDirectory('_pdf_metadata_read') as pdfpath:
        stream.seek(0)
        with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f:
            shutil.copyfileobj(stream, f)
        info = read_info(pdfpath, bool(cover))
        if info is None:
            raise ValueError('Could not read info dict from PDF')
        covpath = os.path.join(pdfpath, 'cover.jpg')
        cdata = None
        if cover and os.path.exists(covpath):
            with open(covpath, 'rb') as f:
                cdata = f.read()

    title = info.get('Title', None) or 'Unknown'
    au = info.get('Author', None)
    if au is None:
        au = ['Unknown']
    else:
        au = string_to_authors(au)
    mi = MetaInformation(title, au)

    creator = info.get('Creator', None)
    if creator:
        mi.book_producer = creator

    keywords = info.get('Keywords', None)
    mi.tags = []
    if keywords:
        mi.tags = [x.strip() for x in keywords.split(',')]
        isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)]
        if isbn:
            mi.isbn = isbn = isbn[0]
        mi.tags = [x for x in mi.tags if check_isbn(x) != isbn]

    subject = info.get('Subject', None)
    if subject:
        mi.tags.insert(0, subject)

    if 'xmp_metadata' in info:
        from ebook_converter.ebooks.metadata.xmp import consolidate_metadata
        mi = consolidate_metadata(mi, info)

    # Look for recognizable identifiers in the info dict, if they were not
    # found in the XMP metadata
    for scheme, check_func in {'doi': check_doi, 'isbn': check_isbn}.items():
        if scheme not in mi.get_identifiers():
            for k, v in info.items():
                if k != 'xmp_metadata':
                    val = check_func(v)
                    if val:
                        mi.set_identifier(scheme, val)
                        break

    if cdata:
        mi.cover_data = ('jpeg', cdata)
    return mi
Esempio n. 7
0
    def convert(self, stream, options, file_ext, log, accelerators):
        # NOTE(gryf): for some reason, those import cannot be moved to the top
        # of module.
        from ebook_converter.ebooks.chm.metadata import get_metadata_from_reader
        from ebook_converter.customize.ui import plugin_for_input_format
        self.opts = options

        log.debug('Processing CHM...')
        with TemporaryDirectory('_chm2oeb') as tdir:
            if not isinstance(tdir, str):
                tdir = tdir.decode(filesystem_encoding)
            html_input = plugin_for_input_format('html')
            for opt in html_input.options:
                setattr(options, opt.option.name, opt.recommended_value)
            no_images = False  # options.no_images
            chm_name = stream.name
            # chm_data = stream.read()

            # closing stream so CHM can be opened by external library
            stream.close()
            log.debug('tdir=%s', tdir)
            log.debug('stream.name=%s', stream.name)
            debug_dump = False
            odi = options.debug_pipeline
            if odi:
                debug_dump = os.path.join(odi, 'input')
            mainname = self._chmtohtml(tdir,
                                       chm_name,
                                       no_images,
                                       log,
                                       debug_dump=debug_dump)
            mainpath = os.path.join(tdir, mainname)

            try:
                metadata = get_metadata_from_reader(self._chm_reader)
            except Exception:
                log.exception('Failed to read metadata, using filename')
                from ebook_converter.ebooks.metadata.book.base import Metadata
                metadata = Metadata(os.path.basename(chm_name))
            encoding = (self._chm_reader.get_encoding()
                        or options.input_encoding or 'cp1252')
            self._chm_reader.CloseCHM()

            options.debug_pipeline = None
            options.input_encoding = 'utf-8'
            uenc = encoding
            if os.path.abspath(mainpath) in self._chm_reader.re_encoded_files:
                uenc = 'utf-8'
            htmlpath, toc = self._create_html_root(mainpath, log, uenc)
            oeb = self._create_oebbook_html(htmlpath, tdir, options, log,
                                            metadata)
            options.debug_pipeline = odi
            if toc.count() > 1:
                oeb.toc = self.parse_html_toc(oeb.spine[0])
                oeb.manifest.remove(oeb.spine[0])
                oeb.auto_generated_toc = False
        return oeb
Esempio n. 8
0
def load_jxr_data(data):
    with TemporaryDirectory() as tdir:
        with open(os.path.join(tdir, 'input.jxr'), 'wb') as f:
            f.write(data)
        cmd = [get_exe_path('JxrDecApp'), '-i', 'input.jxr', '-o', 'output.tif']
        creationflags = 0
        subprocess.Popen(cmd, cwd=tdir, stdout=open(os.devnull, 'wb'), stderr=subprocess.STDOUT, creationflags=creationflags).wait()
        i = QImage()
        if not i.load(os.path.join(tdir, 'output.tif')):
            raise NotImage('Failed to convert JPEG-XR image')
        return i
Esempio n. 9
0
 def _cover_from_html(self, hcover):
     from ebook_converter.ebooks import render_html_svg_workaround
     with TemporaryDirectory('_html_cover') as tdir:
         writer = OEBWriter()
         writer(self.oeb, tdir)
         path = os.path.join(tdir, unquote(hcover.href))
         data = render_html_svg_workaround(path, self.logger)
         if not data:
             data = b''
     id, href = self.oeb.manifest.generate('cover', 'cover.jpg')
     item = self.oeb.manifest.add(id, href, base.JPEG_MIME, data=data)
     return item
Esempio n. 10
0
        def test_shared_file(self):
            eq = self.assertEqual

            with TemporaryDirectory() as tdir:
                fname = os.path.join(tdir, 'test.txt')
                with share_open(fname, 'wb') as f:
                    f.write(b'a' * 20 * 1024)
                    eq(fname, f.name)
                f = share_open(fname, 'rb')
                eq(f.read(1), b'a')
                os.remove(fname)
                eq(f.read(1), b'a')
                f2 = share_open(fname, 'w+b')
                f2.write(b'b' * 10 * 1024)
                f2.seek(0)
                eq(f.read(10000), b'a'*10000)
                eq(f2.read(100), b'b' * 100)
                f3 = share_open(fname, 'rb')
                eq(f3.read(100), b'b' * 100)
Esempio n. 11
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from ebook_converter.ebooks.pml.pmlml import PMLMLizer
        from ebook_converter.utils.zipfile import ZipFile

        with TemporaryDirectory('_pmlz_output') as tdir:
            pmlmlizer = PMLMLizer(log)
            pml = str(pmlmlizer.extract_content(oeb_book, opts))
            with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
                out.write(pml.encode(opts.pml_output_encoding, 'replace'))

            img_path = os.path.join(tdir, 'index_img')
            if not os.path.exists(img_path):
                os.makedirs(img_path)
            self.write_images(oeb_book.manifest, pmlmlizer.image_hrefs,
                              img_path, opts)

            log.debug('Compressing output...')
            pmlz = ZipFile(output_path, 'w')
            pmlz.add_dir(tdir)
Esempio n. 12
0
    def convert(self, oeb, output_path, input_plugin, opts, log):
        self.log, self.opts, self.oeb = log, opts, oeb

        lrf_opts = LRFOptions(output_path, opts, oeb)

        if input_plugin.is_image_collection:
            self.convert_images(input_plugin.get_images(), lrf_opts,
                                getattr(opts, 'wide', False))
            return

        self.flatten_toc()

        from ebook_converter.ptempfile import TemporaryDirectory
        with TemporaryDirectory('_lrf_output') as tdir:
            from ebook_converter.customize.ui import plugin_for_output_format
            oeb_output = plugin_for_output_format('oeb')
            oeb_output.convert(oeb, tdir, input_plugin, opts, log)
            opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
            from ebook_converter.ebooks.lrf.html.convert_from import process_file
            process_file(os.path.join(tdir, opf), lrf_opts, self.log)
Esempio n. 13
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from ebook_converter.ebooks.oeb.base import OEB_IMAGES
        from ebook_converter.utils.zipfile import ZipFile
        from lxml import etree

        with TemporaryDirectory('_txtz_output') as tdir:
            # TXT
            txt_name = 'index.txt'
            if opts.txt_output_formatting.lower() == 'textile':
                txt_name = 'index.text'
            with TemporaryFile(txt_name) as tf:
                TXTOutput.convert(self, oeb_book, tf, input_plugin, opts, log)
                shutil.copy(tf, os.path.join(tdir, txt_name))

            # Images
            for item in oeb_book.manifest:
                if item.media_type in OEB_IMAGES:
                    if hasattr(self.writer, 'images'):
                        path = os.path.join(tdir, 'images')
                        if item.href in self.writer.images:
                            href = self.writer.images[item.href]
                        else:
                            continue
                    else:
                        path = os.path.join(tdir, os.path.dirname(item.href))
                        href = os.path.basename(item.href)
                    if not os.path.exists(path):
                        os.makedirs(path)
                    with open(os.path.join(path, href), 'wb') as imgf:
                        imgf.write(item.data)

            # Metadata
            with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
                mdataf.write(etree.tostring(oeb_book.metadata.to_opf1()))

            txtz = ZipFile(output_path, 'w')
            txtz.add_dir(tdir)
Esempio n. 14
0
    def run(self, htmlfile):
        import codecs
        from ebook_converter.ptempfile import TemporaryDirectory
        from ebook_converter.gui2.convert.gui_conversion import gui_convert
        from ebook_converter.customize.conversion import OptionRecommendation
        from ebook_converter.ebooks.epub import initialize_container

        with TemporaryDirectory('_plugin_html2zip') as tdir:
            recs =[('debug_pipeline', tdir, OptionRecommendation.HIGH)]
            recs.append(['keep_ligatures', True, OptionRecommendation.HIGH])
            if self.site_customization and self.site_customization.strip():
                sc = self.site_customization.strip()
                enc, _, bf = sc.partition('|')
                if enc:
                    try:
                        codecs.lookup(enc)
                    except Exception:
                        print('Ignoring invalid input encoding for HTML: %s',
                              enc)
                    else:
                        recs.append(['input_encoding', enc, OptionRecommendation.HIGH])
                if bf == 'bf':
                    recs.append(['breadth_first', True,
                        OptionRecommendation.HIGH])
            gui_convert(htmlfile, tdir, recs, abort_after_input_dump=True)
            of = self.temporary_file('_plugin_html2zip.zip')
            tdir = os.path.join(tdir, 'input')
            opf = glob.glob(os.path.join(tdir, '*.opf'))[0]
            ncx = glob.glob(os.path.join(tdir, '*.ncx'))
            if ncx:
                os.remove(ncx[0])
            epub = initialize_container(of.name, os.path.basename(opf))
            epub.add_dir(tdir)
            epub.close()

        return of.name
Esempio n. 15
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from ebook_converter.ebooks.snb.snbfile import SNBFile
        from ebook_converter.ebooks.snb.snbml import SNBMLizer, ProcessFileName

        self.opts = opts
        from ebook_converter.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
        try:
            rasterizer = SVGRasterizer()
            rasterizer(oeb_book, opts)
        except Unavailable:
            log.warn('SVG rasterizer unavailable, SVG will not be converted')

        # Create temp dir
        with TemporaryDirectory('_snb_output') as tdir:
            # Create stub directories
            snbfDir = os.path.join(tdir, 'snbf')
            snbcDir = os.path.join(tdir, 'snbc')
            snbiDir = os.path.join(tdir, 'snbc/images')
            os.mkdir(snbfDir)
            os.mkdir(snbcDir)
            os.mkdir(snbiDir)

            # Process Meta data
            meta = oeb_book.metadata
            if meta.title:
                title = str(meta.title[0])
            else:
                title = ''
            authors = [str(x) for x in meta.creator if x.role == 'aut']
            if meta.publisher:
                publishers = str(meta.publisher[0])
            else:
                publishers = ''
            if meta.language:
                lang = str(meta.language[0]).upper()
            else:
                lang = ''
            if meta.description:
                abstract = str(meta.description[0])
            else:
                abstract = ''

            # Process Cover
            g, m, s = oeb_book.guide, oeb_book.manifest, oeb_book.spine
            href = None
            if 'titlepage' not in g:
                if 'cover' in g:
                    href = g['cover'].href

            # Output book info file
            bookInfoTree = etree.Element("book-snbf", version="1.0")
            headTree = etree.SubElement(bookInfoTree, "head")
            etree.SubElement(headTree, "name").text = title
            etree.SubElement(headTree, "author").text = ' '.join(authors)
            etree.SubElement(headTree, "language").text = lang
            etree.SubElement(headTree, "rights")
            etree.SubElement(headTree, "publisher").text = publishers
            etree.SubElement(
                headTree, "generator").text = __appname__ + ' ' + __version__
            etree.SubElement(headTree, "created")
            etree.SubElement(headTree, "abstract").text = abstract
            if href is not None:
                etree.SubElement(headTree,
                                 "cover").text = ProcessFileName(href)
            else:
                etree.SubElement(headTree, "cover")
            with open(os.path.join(snbfDir, 'book.snbf'), 'wb') as f:
                f.write(
                    etree.tostring(bookInfoTree,
                                   pretty_print=True,
                                   encoding='utf-8'))

            # Output TOC
            tocInfoTree = etree.Element("toc-snbf")
            tocHead = etree.SubElement(tocInfoTree, "head")
            tocBody = etree.SubElement(tocInfoTree, "body")
            outputFiles = {}
            if oeb_book.toc.count() == 0:
                log.warn('This SNB file has no Table of Contents. '
                         'Creating a default TOC')
                first = next(iter(oeb_book.spine))
                oeb_book.toc.add('Start page', first.href)
            else:
                first = next(iter(oeb_book.spine))
                if oeb_book.toc[0].href != first.href:
                    # The pages before the fist item in toc will be stored as
                    # "Cover Pages".
                    # oeb_book.toc does not support "insert", so we generate
                    # the tocInfoTree directly instead of modifying the toc
                    ch = etree.SubElement(tocBody, "chapter")
                    ch.set("src", ProcessFileName(first.href) + ".snbc")
                    ch.text = 'Cover pages'
                    outputFiles[first.href] = []
                    outputFiles[first.href].append(("", "Cover pages"))

            for tocitem in oeb_book.toc:
                if tocitem.href.find('#') != -1:
                    item = tocitem.href.split('#')
                    if len(item) != 2:
                        log.error('Error in TOC item: %s' % tocitem)
                    else:
                        if item[0] in outputFiles:
                            outputFiles[item[0]].append(
                                (item[1], tocitem.title))
                        else:
                            outputFiles[item[0]] = []
                            if "" not in outputFiles[item[0]]:
                                outputFiles[item[0]].append(
                                    ("", tocitem.title + " (Preface)"))
                                ch = etree.SubElement(tocBody, "chapter")
                                ch.set("src",
                                       ProcessFileName(item[0]) + ".snbc")
                                ch.text = tocitem.title + " (Preface)"
                            outputFiles[item[0]].append(
                                (item[1], tocitem.title))
                else:
                    if tocitem.href in outputFiles:
                        outputFiles[tocitem.href].append(("", tocitem.title))
                    else:
                        outputFiles[tocitem.href] = []
                        outputFiles[tocitem.href].append(("", tocitem.title))
                ch = etree.SubElement(tocBody, "chapter")
                ch.set("src", ProcessFileName(tocitem.href) + ".snbc")
                ch.text = tocitem.title

            etree.SubElement(tocHead, "chapters").text = '%d' % len(tocBody)

            with open(os.path.join(snbfDir, 'toc.snbf'), 'wb') as f:
                f.write(
                    etree.tostring(tocInfoTree,
                                   pretty_print=True,
                                   encoding='utf-8'))

            # Output Files
            oldTree = None
            mergeLast = False
            lastName = None
            for item in s:
                from ebook_converter.ebooks.oeb.base import OEB_DOCS, OEB_IMAGES
                if m.hrefs[item.href].media_type in OEB_DOCS:
                    if item.href not in outputFiles:
                        log.debug(
                            'File %s is unused in TOC. Continue in last chapter'
                            % item.href)
                        mergeLast = True
                    else:
                        if oldTree is not None and mergeLast:
                            log.debug('Output the modified chapter again: %s' %
                                      lastName)
                            with open(os.path.join(snbcDir, lastName),
                                      'wb') as f:
                                f.write(
                                    etree.tostring(oldTree,
                                                   pretty_print=True,
                                                   encoding='utf-8'))
                            mergeLast = False

                    log.debug('Converting %s to snbc...' % item.href)
                    snbwriter = SNBMLizer(log)
                    snbcTrees = None
                    if not mergeLast:
                        snbcTrees = snbwriter.extract_content(
                            oeb_book, item, outputFiles[item.href], opts)
                        for subName in snbcTrees:
                            postfix = ''
                            if subName != '':
                                postfix = '_' + subName
                            lastName = ProcessFileName(item.href + postfix +
                                                       ".snbc")
                            oldTree = snbcTrees[subName]
                            with open(os.path.join(snbcDir, lastName),
                                      'wb') as f:
                                f.write(
                                    etree.tostring(oldTree,
                                                   pretty_print=True,
                                                   encoding='utf-8'))
                    else:
                        log.debug('Merge %s with last TOC item...' % item.href)
                        snbwriter.merge_content(oldTree, oeb_book, item,
                                                [('', "Start")], opts)

            # Output the last one if needed
            log.debug('Output the last modified chapter again: %s' % lastName)
            if oldTree is not None and mergeLast:
                with open(os.path.join(snbcDir, lastName), 'wb') as f:
                    f.write(
                        etree.tostring(oldTree,
                                       pretty_print=True,
                                       encoding='utf-8'))
                mergeLast = False

            for item in m:
                if m.hrefs[item.href].media_type in OEB_IMAGES:
                    log.debug('Converting image: %s ...' % item.href)
                    content = m.hrefs[item.href].data
                    # Convert & Resize image
                    self.HandleImage(
                        content,
                        os.path.join(snbiDir, ProcessFileName(item.href)))

            # Package as SNB File
            snbFile = SNBFile()
            snbFile.FromDir(tdir)
            snbFile.Output(output_path)
Esempio n. 16
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from ebook_converter.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
        from ebook_converter.ebooks.metadata.opf2 import OPF, metadata_to_opf
        from ebook_converter.utils.zipfile import ZipFile
        from ebook_converter.utils.filenames import ascii_filename

        # HTML
        if opts.htmlz_css_type == 'inline':
            from ebook_converter.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
            OEB2HTMLizer = OEB2HTMLInlineCSSizer
        elif opts.htmlz_css_type == 'tag':
            from ebook_converter.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer
            OEB2HTMLizer = OEB2HTMLNoCSSizer
        else:
            from ebook_converter.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer

        with TemporaryDirectory(u'_htmlz_output') as tdir:
            htmlizer = OEB2HTMLizer(log)
            html = htmlizer.oeb2html(oeb_book, opts)

            fname = u'index'
            if opts.htmlz_title_filename:
                from ebook_converter.utils.filenames import shorten_components_to
                fname = shorten_components_to(100, (ascii_filename(str(oeb_book.metadata.title[0])),))[0]
            with open(os.path.join(tdir, fname+u'.html'), 'wb') as tf:
                if isinstance(html, str):
                    html = html.encode('utf-8')
                tf.write(html)

            # CSS
            if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external':
                with open(os.path.join(tdir, u'style.css'), 'wb') as tf:
                    tf.write(htmlizer.get_css(oeb_book))

            # Images
            images = htmlizer.images
            if images:
                if not os.path.exists(os.path.join(tdir, u'images')):
                    os.makedirs(os.path.join(tdir, u'images'))
                for item in oeb_book.manifest:
                    if item.media_type in OEB_IMAGES and item.href in images:
                        if item.media_type == SVG_MIME:
                            data = etree.tostring(item.data, encoding='unicode')
                        else:
                            data = item.data
                        fname = os.path.join(tdir, u'images', images[item.href])
                        with open(fname, 'wb') as img:
                            img.write(data)

            # Cover
            cover_path = None
            try:
                cover_data = None
                if oeb_book.metadata.cover:
                    term = oeb_book.metadata.cover[0].term
                    cover_data = oeb_book.guide[term].item.data
                if cover_data:
                    from ebook_converter.utils.img import save_cover_data_to
                    cover_path = os.path.join(tdir, u'cover.jpg')
                    with open(cover_path, 'w') as cf:
                        cf.write('')
                    save_cover_data_to(cover_data, cover_path)
            except:
                import traceback
                traceback.print_exc()

            # Metadata
            with open(os.path.join(tdir, u'metadata.opf'), 'wb') as mdataf:
                opf = OPF(io.BytesIO(etree.tostring(oeb_book.metadata.to_opf1(), encoding='UTF-8')))
                mi = opf.to_book_metadata()
                if cover_path:
                    mi.cover = u'cover.jpg'
                mdataf.write(metadata_to_opf(mi))

            htmlz = ZipFile(output_path, 'w')
            htmlz.add_dir(tdir)
Esempio n. 17
0
    def convert(self, oeb, output_path, input_plugin, opts, log):
        self.log, self.opts, self.oeb = log, opts, oeb

        if self.opts.epub_inline_toc:
            from ebook_converter.ebooks.mobi.writer8.toc import TOCAdder
            opts.mobi_toc_at_start = not opts.epub_toc_at_end
            opts.mobi_passthrough = False
            opts.no_inline_toc = False
            TOCAdder(oeb, opts, replace_previous_inline_toc=True, ignore_existing_toc=True)

        if self.opts.epub_flatten:
            from ebook_converter.ebooks.oeb.transforms.filenames import FlatFilenames
            FlatFilenames()(oeb, opts)
        else:
            from ebook_converter.ebooks.oeb.transforms.filenames import UniqueFilenames
            UniqueFilenames()(oeb, opts)

        self.workaround_ade_quirks()
        self.workaround_webkit_quirks()
        self.upshift_markup()
        from ebook_converter.ebooks.oeb.transforms.rescale import RescaleImages
        RescaleImages(check_colorspaces=True)(oeb, opts)

        from ebook_converter.ebooks.oeb.transforms.split import Split
        split = Split(not self.opts.dont_split_on_page_breaks,
                max_flow_size=self.opts.flow_size*1024
                )
        split(self.oeb, self.opts)

        from ebook_converter.ebooks.oeb.transforms.cover import CoverManager
        cm = CoverManager(
                no_default_cover=self.opts.no_default_epub_cover,
                no_svg_cover=self.opts.no_svg_cover,
                preserve_aspect_ratio=self.opts.preserve_cover_aspect_ratio)
        cm(self.oeb, self.opts, self.log)

        self.workaround_sony_quirks()

        if self.oeb.toc.count() == 0:
            self.log.warn('This EPUB file has no Table of Contents. '
                    'Creating a default TOC')
            first = next(iter(self.oeb.spine))
            self.oeb.toc.add('Start', first.href)

        identifiers = oeb.metadata['identifier']
        _uuid = None
        for x in identifiers:
            if (x.get(base.tag('opf', 'scheme'), None).lower() == 'uuid' or
                    str(x).startswith('urn:uuid:')):
                _uuid = str(x).split(':')[-1]
                break
        encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])

        if _uuid is None:
            self.log.warn('No UUID identifier found')
            _uuid = str(uuid.uuid4())
            oeb.metadata.add('identifier', _uuid, scheme='uuid', id=_uuid)

        if encrypted_fonts and not _uuid.startswith('urn:uuid:'):
            # Apparently ADE requires this value to start with urn:uuid:
            # for some absurd reason, or it will throw a hissy fit and refuse
            # to use the obfuscated fonts.
            for x in identifiers:
                if str(x) == _uuid:
                    x.content = 'urn:uuid:' + _uuid

        with TemporaryDirectory('_epub_output') as tdir:
            from ebook_converter.customize.ui import plugin_for_output_format
            metadata_xml = None
            extra_entries = []
            if self.is_periodical:
                if self.opts.output_profile.epub_periodical_format == 'sony':
                    from ebook_converter.ebooks.epub.periodical import sony_metadata
                    metadata_xml, atom_xml = sony_metadata(oeb)
                    extra_entries = [('atom.xml', 'application/atom+xml', atom_xml)]
            oeb_output = plugin_for_output_format('oeb')
            oeb_output.convert(oeb, tdir, input_plugin, opts, log)
            opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
            self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)
                    if x.endswith('.ncx')][0])
            if self.opts.epub_version == '3':
                self.upgrade_to_epub3(tdir, opf)
            encryption = None
            if encrypted_fonts:
                encryption = self.encrypt_fonts(encrypted_fonts, tdir, _uuid)

            from ebook_converter.ebooks.epub import initialize_container
            with initialize_container(output_path, os.path.basename(opf),
                    extra_entries=extra_entries) as epub:
                epub.add_dir(tdir)
                if encryption is not None:
                    epub.writestr('META-INF/encryption.xml', as_bytes(encryption))
                if metadata_xml is not None:
                    epub.writestr('META-INF/metadata.xml',
                            metadata_xml.encode('utf-8'))
            if opts.extract_to is not None:
                from ebook_converter.utils.zipfile import ZipFile
                if os.path.exists(opts.extract_to):
                    if os.path.isdir(opts.extract_to):
                        shutil.rmtree(opts.extract_to)
                    else:
                        os.remove(opts.extract_to)
                os.mkdir(opts.extract_to)
                with ZipFile(output_path) as zf:
                    zf.extractall(path=opts.extract_to)
                self.log.info('EPUB extracted to', opts.extract_to)
Esempio n. 18
0
    def convert(self, stream, options, file_ext, log,
                accelerators):
        import uuid

        from ebook_converter.ebooks.oeb.base import DirContainer
        from ebook_converter.ebooks.snb.snbfile import SNBFile

        log.debug("Parsing SNB file...")
        snbFile = SNBFile()
        try:
            snbFile.Parse(stream)
        except Exception:
            raise ValueError("Invalid SNB file")
        if not snbFile.IsValid():
            log.debug("Invalid SNB file")
            raise ValueError("Invalid SNB file")
        log.debug("Handle meta data ...")
        from ebook_converter.ebooks.conversion.plumber import create_oebbook
        oeb = create_oebbook(log, None, options,
                             encoding=options.input_encoding, populate=False)
        meta = snbFile.GetFileStream('snbf/book.snbf')
        if meta is not None:
            meta = etree.fromstring(meta)
            item_map = {'title': './/head/name',
                        'creator': './/head/author',
                        'language': './/head/language',
                        'generator': './/head/generator',
                        'publisher': './/head/publisher',
                        'cover': './/head/cover'}
            d = {}
            for key, item in item_map.items():
                node = meta.find(item)
                if node is not None:
                    d[key] = node.text if node.text is not None else ''
                else:
                    d[key] = ''

            oeb.metadata.add('title', d['title'])
            oeb.metadata.add('creator', d['creator'], attrib={'role': 'aut'})
            oeb.metadata.add('language',
                             d['language'].lower().replace('_', '-'))
            oeb.metadata.add('generator', d['generator'])
            oeb.metadata.add('publisher', d['publisher'])
            if d['cover'] != '':
                oeb.guide.add('cover', 'Cover', d['cover'])

        bookid = str(uuid.uuid4())
        oeb.metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
        for ident in oeb.metadata.identifier:
            if 'id' in ident.attrib:
                oeb.uid = oeb.metadata.identifier[0]
                break

        with TemporaryDirectory('_snb2oeb', keep=True) as tdir:
            log.debug('Process TOC ...')
            toc = snbFile.GetFileStream('snbf/toc.snbf')
            oeb.container = DirContainer(tdir, log)
            if toc is not None:
                toc = etree.fromstring(toc)
                i = 1
                for ch in toc.find('.//body'):
                    chapterName = ch.text
                    chapterSrc = ch.get('src')
                    fname = 'ch_%d.htm' % i
                    data = snbFile.GetFileStream('snbc/' + chapterSrc)
                    if data is None:
                        continue
                    snbc = etree.fromstring(data)
                    lines = []
                    for line in snbc.find('.//body'):
                        if line.tag == 'text':
                            lines.append('<p>%s</p>' % html_encode(line.text))
                        elif line.tag == 'img':
                            lines.append('<p><img src="%s" /></p>' %
                                         html_encode(line.text))
                    with open(os.path.join(tdir, fname), 'wb') as f:
                        f.write((HTML_TEMPLATE %
                                 (chapterName,
                                  '\n'.join(lines))).encode('utf-8',
                                                            'replace'))
                    oeb.toc.add(ch.text, fname)
                    id, href = oeb.manifest.generate(
                        id='html', href=ascii_filename(fname))
                    item = oeb.manifest.add(id, href, 'text/html')
                    item.html_input_href = fname
                    oeb.spine.add(item, True)
                    i = i + 1
                imageFiles = snbFile.OutputImageFiles(tdir)
                for f, m in imageFiles:
                    id, href = oeb.manifest.generate(id='image',
                                                     href=ascii_filename(f))
                    item = oeb.manifest.add(id, href, m)
                    item.html_input_href = f

        return oeb