Example #1
0
    def generate_toc(self, oeb_book, ref_url, output_dir):
        '''
        Generate table of contents
        '''

        with directory.CurrentDir(output_dir):

            def build_node(current_node, parent=None):
                if parent is None:
                    parent = etree.Element('ul')
                elif len(current_node.nodes):
                    parent = element(parent, ('ul'))
                for node in current_node.nodes:
                    point = element(parent, 'li')
                    href = relpath(
                        os.path.abspath(polyglot.unquote(node.href)),
                        os.path.dirname(ref_url))
                    if isinstance(href, bytes):
                        href = href.decode('utf-8')
                    link = element(point, 'a', href=clean_xml_chars(href))
                    title = node.title
                    if isinstance(title, bytes):
                        title = title.decode('utf-8')
                    if title:
                        title = re.sub(r'\s+', ' ', title)
                    link.text = clean_xml_chars(title)
                    build_node(node, point)
                return parent

            wrap = etree.Element('div')
            wrap.append(build_node(oeb_book.toc))
            return wrap
Example #2
0
    def extract_content(self, output_dir):
        from ebook_converter.ebooks.pml.pmlconverter import pml_to_html

        output_dir = os.path.abspath(output_dir)

        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        pml = ''
        for i in range(1, self.header_record.num_text_pages + 1):
            self.log.debug('Extracting text page %i', i)
            pml += self.get_text_page(i)

        title = self.mi.title
        if not isinstance(title, str):
            title = title.decode('utf-8', 'replace')

        html = '<html><head><title>%s</title></head><body>%s</body></html>' % \
            (title, pml_to_html(pml))

        with directory.CurrentDir(output_dir):
            with open('index.html', 'wb') as index:
                self.log.debug('Writing text to index.html')
                index.write(html.encode('utf-8'))

        if not os.path.exists(os.path.join(output_dir, 'images/')):
            os.makedirs(os.path.join(output_dir, 'images/'))
        images = []
        with directory.CurrentDir(os.path.join(output_dir, 'images/')):
            for i in range(self.header_record.non_text_offset,
                           len(self.sections)):
                name, img = self.get_image(i)
                if name:
                    images.append(name)
                    with open(name, 'wb') as imgf:
                        self.log.debug('Writing image %s to images/', name)
                        imgf.write(img)

        opf_path = self.create_opf(output_dir, images)

        return opf_path
Example #3
0
    def dump_images(self, output_dir):
        '''
        This is primarily used for debugging and 3rd party tools to
        get the images in the file.
        '''
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        with directory.CurrentDir(output_dir):
            for i in range(0, self.header_record.num_image_pages):
                name, img = self.get_image(
                    self.header_record.image_data_offset + i)
                with open(name, 'wb') as imgf:
                    imgf.write(img)
Example #4
0
    def create_opf(self, output_dir, images):
        with directory.CurrentDir(output_dir):
            opf = OPFCreator(output_dir, self.mi)

            manifest = [('index.html', None)]

            for i in images:
                manifest.append((os.path.join('images/', i), None))

            opf.create_manifest(manifest)
            opf.create_spine(['index.html'])
            with open('metadata.opf', 'wb') as opffile:
                opf.render(opffile)

        return os.path.join(output_dir, 'metadata.opf')
Example #5
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):

        self.log, self.opts = log, opts
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        with directory.CurrentDir(output_path):
            results = oeb_book.to_opf2(page_map=True)
            for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME):
                href, root = results.pop(key, [None, None])
                if root is not None:
                    if key == OPF_MIME:
                        try:
                            self.workaround_nook_cover_bug(root)
                        except:
                            self.log.exception('Something went wrong while '
                                               'trying to workaround Nook '
                                               'cover bug, ignoring')
                        try:
                            self.workaround_pocketbook_cover_bug(root)
                        except:
                            self.log.exception('Something went wrong while '
                                               'trying to workaround '
                                               'Pocketbook cover bug, '
                                               'ignoring')
                        self.migrate_lang_code(root)
                    raw = etree.tostring(root,
                                         pretty_print=True,
                                         encoding='utf-8',
                                         xml_declaration=True)
                    if key == OPF_MIME:
                        # Needed as I can't get lxml to output opf:role and
                        # not output <opf:metadata> as well
                        raw = re.sub(br'(<[/]{0,1})opf:', br'\1', raw)
                    with open(href, 'wb') as f:
                        f.write(raw)

            for item in oeb_book.manifest:
                if (not self.opts.expand_css and item.media_type in OEB_STYLES
                        and hasattr(item.data, 'cssText')
                        and 'nook' not in self.opts.output_profile.short_name):
                    condense_sheet(item.data)
                path = os.path.abspath(polyglot.unquote(item.href))
                dir = os.path.dirname(path)
                if not os.path.exists(dir):
                    os.makedirs(dir)
                with open(path, 'wb') as f:
                    f.write(item.bytes_representation)
                item.unload_data_from_memory(memory=path)
Example #6
0
 def encrypt_fonts(self, uris, tdir, _uuid):  # {{{
     key = re.sub(r'[^a-fA-F0-9]', '', _uuid)
     if len(key) < 16:
         raise ValueError('UUID identifier %r is invalid' % _uuid)
     key = bytearray(polyglot.from_hex_bytes((key + key)[:32]))
     paths = []
     with directory.CurrentDir(tdir):
         paths = [os.path.join(*x.split('/')) for x in uris]
         uris = dict(zip(uris, paths))
         fonts = []
         for uri in list(uris.keys()):
             path = uris[uri]
             if not os.path.exists(path):
                 uris.pop(uri)
                 continue
             self.log.debug('Encrypting font: %s', uri)
             with open(path, 'r+b') as f:
                 data = f.read(1024)
                 if len(data) >= 1024:
                     data = bytearray(data)
                     f.seek(0)
                     f.write(
                         bytes(
                             bytearray(data[i] ^ key[i % 16]
                                       for i in range(1024))))
                 else:
                     self.log.warning('Font %s is invalid, ignoring', path)
             if not isinstance(uri, str):
                 uri = uri.decode('utf-8')
             fonts.append('''
             <enc:EncryptedData>
                 <enc:EncryptionMethod Algorithm="http://ns.adobe.com/pdf/enc#RC"/>
                 <enc:CipherData>
                 <enc:CipherReference URI="%s"/>
                 </enc:CipherData>
             </enc:EncryptedData>
             ''' % (uri.replace('"', '\\"')))
         if fonts:
             ans = '''<encryption
                 xmlns="urn:oasis:names:tc:opendocument:xmlns:container"
                 xmlns:enc="http://www.w3.org/2001/04/xmlenc#"
                 xmlns:deenc="http://ns.adobe.com/digitaleditions/enc">
                 '''
             ans += '\n'.join(fonts)
             ans += '\n</encryption>'
             return ans
Example #7
0
    def __call__(self, stream, odir, log):
        from ebook_converter.utils.zipfile import ZipFile
        from ebook_converter.ebooks.metadata.odt import get_metadata
        from ebook_converter.ebooks.metadata.opf2 import OPFCreator

        if not os.path.exists(odir):
            os.makedirs(odir)
        with directory.CurrentDir(odir):
            log.info('Extracting ODT file...')
            stream.seek(0)
            mi = get_metadata(stream, 'odt')
            if not mi.title:
                mi.title = 'Unknown'
            if not mi.authors:
                mi.authors = ['Unknown']
            self.filter_load(stream, mi, log)

            # NOTE(gryf): Here is a workaround for ODF2XHTML.xhtml() method,
            # which expects, that all lines are strings.
            html = ''.join([str(l) for l in self.lines])

            # A blanket img specification like this causes problems
            # with EPUB output as the containing element often has
            # an absolute height and width set that is larger than
            # the available screen real estate
            html = html.replace('img { width: 100%; height: 100%; }', '')
            # odf2xhtml creates empty title tag
            html = html.replace('<title></title>',
                                '<title>%s</title>' % (mi.title, ))
            try:
                html = self.fix_markup(html, log)
            except:
                log.exception('Failed to filter CSS, conversion may be slow')
            with open('index.xhtml', 'wb') as f:
                f.write(polyglot.as_bytes(html))
            zf = ZipFile(stream, 'r')
            self.extract_pictures(zf)
            opf = OPFCreator(os.path.abspath(os.getcwd()), mi)
            opf.create_manifest([(os.path.abspath(os.path.join(r, f2)), None)
                                 for r, _, fnames in os.walk(os.getcwd())
                                 for f2 in fnames])
            opf.create_spine([os.path.abspath('index.xhtml')])
            with open('metadata.opf', 'wb') as f:
                opf.render(f)
            return os.path.abspath('metadata.opf')
Example #8
0
    def __call__(self, stream, options, file_ext, log,
                 accelerators, output_dir):
        try:
            log.info('InputFormatPlugin: %s running', self.name)
            if hasattr(stream, 'name'):
                log.info('on: %s', stream.name)
        except:
            # In case stdout is broken
            pass

        with directory.CurrentDir(output_dir):
            for x in os.listdir('.'):
                shutil.rmtree(x) if os.path.isdir(x) else os.remove(x)

            ret = self.convert(stream, options, file_ext,
                               log, accelerators)

        return ret
Example #9
0
    def create_opf(self, output_dir, images, toc):
        with directory.CurrentDir(output_dir):
            if 'cover.png' in images:
                self.mi.cover = os.path.join('images', 'cover.png')

            opf = OPFCreator(output_dir, self.mi)

            manifest = [('index.html', None)]

            for i in images:
                manifest.append((os.path.join('images', i), None))

            opf.create_manifest(manifest)
            opf.create_spine(['index.html'])
            opf.set_toc(toc)
            with open('metadata.opf', 'wb') as opffile:
                with open('toc.ncx', 'wb') as tocfile:
                    opf.render(opffile, tocfile, 'toc.ncx')

        return os.path.join(output_dir, 'metadata.opf')
Example #10
0
def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
    '''
    Convert the pdf into html using the pdftohtml app.
    This will write the html as index.html into output_dir.
    It will also write all extracted images to the output_dir
    '''

    pdfsrc = os.path.join(output_dir, 'src.pdf')
    index = os.path.join(output_dir, 'index.' + ('xml' if as_xml else 'html'))

    with open(pdf_path, 'rb') as src, open(pdfsrc, 'wb') as dest:
        shutil.copyfileobj(src, dest)

    with directory.CurrentDir(output_dir):
        cmd = [
            'pdftohtml', '-enc', 'UTF-8', '-noframes', '-p', '-nomerge',
            '-nodrm',
            os.path.basename(pdfsrc),
            os.path.basename(index)
        ]

        if no_images:
            cmd.append('-i')
        if as_xml:
            cmd.append('-xml')

        logf = PersistentTemporaryFile('pdftohtml_log')

        try:
            ret = subprocess.call(cmd, stderr=logf._fd, stdout=logf._fd)
        except OSError as err:
            if err.errno == errno.ENOENT:
                raise ConversionError('Could not find pdftohtml, check it is '
                                      'in your PATH')
            else:
                raise

        logf.flush()
        logf.close()

        with open(logf.name) as fobj:
            out = fobj.read().strip()

        if ret != 0:
            raise ConversionError('pdftohtml failed with return code: '
                                  '%d\n%s' % (ret, out))
        if out:
            print("pdftohtml log:")
            print(out)
        if not os.path.exists(index) or os.stat(index).st_size < 100:
            raise DRMError()

        if not as_xml:
            with open(index, 'r+b') as i:
                raw = i.read().decode('utf-8', 'replace')
                raw = flip_images(raw)
                raw = raw.replace(
                    '<head', '<!-- created by ebook-converter\'s'
                    ' pdftohtml -->\n  <head', 1)
                i.seek(0)
                i.truncate()
                # versions of pdftohtml >= 0.20 output self closing <br> tags,
                # this breaks the pdf heuristics regexps, so replace them
                raw = raw.replace('<br/>', '<br>')
                raw = re.sub(r'<a\s+name=(\d+)',
                             r'<a id="\1"',
                             raw,
                             flags=re.I)
                raw = re.sub(r'<a id="(\d+)"', r'<a id="p\1"', raw, flags=re.I)
                raw = re.sub(r'<a href="index.html#(\d+)"',
                             r'<a href="#p\1"',
                             raw,
                             flags=re.I)
                raw = entities.xml_replace_entities(raw)
                raw = raw.replace('\u00a0', ' ')

                i.write(raw.encode('utf-8'))

            cmd = [
                'pdftohtml', '-f', '1', '-l', '1', '-xml', '-i', '-enc',
                'UTF-8', '-noframes', '-p', '-nomerge', '-nodrm', '-q',
                '-stdout',
                os.path.basename(pdfsrc)
            ]

            raw = subprocess.check_output(cmd).strip()
            if raw:
                parse_outline(raw, output_dir)

        try:
            os.remove(pdfsrc)
        except Exception:
            pass
Example #11
0
    def extract_content(self, output_dir):
        from ebook_converter.ebooks.pml.pmlconverter import footnote_to_html, sidebar_to_html
        from ebook_converter.ebooks.pml.pmlconverter import PML_HTMLizer

        output_dir = os.path.abspath(output_dir)

        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        title = self.mi.title
        if not isinstance(title, str):
            title = title.decode('utf-8', 'replace')
        html = '<html><head><title>%s</title></head><body>' % title

        pml = ''
        for i in range(1, self.header_record.num_text_pages + 1):
            self.log.debug('Extracting text page %i', i)
            pml += self.get_text_page(i)
        hizer = PML_HTMLizer()
        html += hizer.parse_pml(pml, 'index.html')
        toc = hizer.get_toc()

        if self.header_record.footnote_count > 0:
            html += '<br /><h1>%s</h1>' % 'Footnotes'
            footnoteids = re.findall(
                '\\w+(?=\x00)',
                self.section_data(self.header_record.footnote_offset).decode(
                    'cp1252' if self.encoding is None else self.encoding))
            for fid, i in enumerate(
                    range(
                        self.header_record.footnote_offset + 1,
                        self.header_record.footnote_offset +
                        self.header_record.footnote_count)):
                self.log.debug('Extracting footnote page %i', i)
                if fid < len(footnoteids):
                    fid = footnoteids[fid]
                else:
                    fid = ''
                html += footnote_to_html(fid, self.decompress_text(i))

        if self.header_record.sidebar_count > 0:
            html += '<br /><h1>%s</h1>' % 'Sidebar'
            sidebarids = re.findall(
                '\\w+(?=\x00)',
                self.section_data(self.header_record.sidebar_offset).decode(
                    'cp1252' if self.encoding is None else self.encoding))
            for sid, i in enumerate(
                    range(
                        self.header_record.sidebar_offset + 1,
                        self.header_record.sidebar_offset +
                        self.header_record.sidebar_count)):
                self.log.debug('Extracting sidebar page %i', i)
                if sid < len(sidebarids):
                    sid = sidebarids[sid]
                else:
                    sid = ''
                html += sidebar_to_html(sid, self.decompress_text(i))

        html += '</body></html>'

        with directory.CurrentDir(output_dir):
            with open('index.html', 'wb') as index:
                self.log.debug('Writing text to index.html')
                index.write(html.encode('utf-8'))

        if not os.path.exists(os.path.join(output_dir, 'images/')):
            os.makedirs(os.path.join(output_dir, 'images/'))
        images = []
        with directory.CurrentDir(os.path.join(output_dir, 'images/')):
            for i in range(0, self.header_record.num_image_pages):
                name, img = self.get_image(
                    self.header_record.image_data_offset + i)
                images.append(name)
                with open(name, 'wb') as imgf:
                    self.log.debug('Writing image %s to images/', name)
                    imgf.write(img)

        opf_path = self.create_opf(output_dir, images, toc)

        return opf_path
Example #12
0
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from ebook_converter.utils import zipfile
        from templite import Templite
        from ebook_converter.ebooks.html.meta import EasyMeta

        # read template files
        if opts.template_html_index is not None:
            with open(opts.template_html_index, 'rb') as f:
                template_html_index_data = f.read()
        else:
            with open(
                    pkg_resources.resource_filename(
                        'ebook_converter',
                        'data/html_export_default_index.tmpl')) as fobj:
                template_html_index_data = fobj.read().decode()

        if opts.template_html is not None:
            with open(opts.template_html, 'rb') as f:
                template_html_data = f.read()
        else:
            with open(
                    pkg_resources.resource_filename(
                        'ebook_converter',
                        'data/html_export_default.tmpl')) as fobj:
                template_html_data = fobj.read().decode()

        if opts.template_css is not None:
            with open(opts.template_css, 'rb') as f:
                template_css_data = f.read()
        else:
            with open(
                    pkg_resources.resource_filename(
                        'ebook_converter',
                        'data/html_export_default.css')) as fobj:
                template_css_data = fobj.read().decode()

        template_html_index_data = template_html_index_data.decode('utf-8')
        template_html_data = template_html_data.decode('utf-8')
        template_css_data = template_css_data.decode('utf-8')

        self.log = log
        self.opts = opts
        meta = EasyMeta(oeb_book.metadata)

        tempdir = os.path.realpath(PersistentTemporaryDirectory())
        output_file = os.path.join(
            tempdir,
            os.path.basename(re.sub(r'\.zip', '', output_path) + '.html'))
        output_dir = re.sub(r'\.html', '', output_file) + '_files'

        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        css_path = output_dir + os.sep + 'calibreHtmlOutBasicCss.css'
        with open(css_path, 'wb') as f:
            f.write(template_css_data.encode('utf-8'))

        with open(output_file, 'wb') as f:
            html_toc = self.generate_html_toc(oeb_book, output_file,
                                              output_dir)
            templite = Templite(template_html_index_data)
            nextLink = oeb_book.spine[0].href
            nextLink = relpath(output_dir + os.sep + nextLink,
                               os.path.dirname(output_file))
            cssLink = relpath(os.path.abspath(css_path),
                              os.path.dirname(output_file))
            tocUrl = relpath(output_file, os.path.dirname(output_file))
            t = templite.render(has_toc=bool(oeb_book.toc.count()),
                                toc=html_toc,
                                meta=meta,
                                nextLink=nextLink,
                                tocUrl=tocUrl,
                                cssLink=cssLink,
                                firstContentPageLink=nextLink)
            if isinstance(t, str):
                t = t.encode('utf-8')
            f.write(t)

        with directory.CurrentDir(output_dir):
            for item in oeb_book.manifest:
                path = os.path.abspath(polyglot.unquote(item.href))
                dir = os.path.dirname(path)
                if not os.path.exists(dir):
                    os.makedirs(dir)
                if item.spine_position is not None:
                    with open(path, 'wb') as f:
                        pass
                else:
                    with open(path, 'wb') as f:
                        f.write(item.bytes_representation)
                    item.unload_data_from_memory(memory=path)

            for item in oeb_book.spine:
                path = os.path.abspath(polyglot.unquote(item.href))
                dir = os.path.dirname(path)
                root = item.data.getroottree()

                # get & clean HTML <HEAD>-data
                head = root.xpath(
                    '//h:head',
                    namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
                head_content = etree.tostring(head,
                                              pretty_print=True,
                                              encoding='unicode')
                head_content = re.sub(r'\<\/?head.*\>', '', head_content)
                head_content = re.sub(
                    re.compile(r'\<style.*\/style\>', re.M | re.S), '',
                    head_content)
                head_content = re.sub(r'<(title)([^>]*)/>', r'<\1\2></\1>',
                                      head_content)

                # get & clean HTML <BODY>-data
                body = root.xpath(
                    '//h:body',
                    namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
                ebook_content = etree.tostring(body,
                                               pretty_print=True,
                                               encoding='unicode')
                ebook_content = re.sub(r'\<\/?body.*\>', '', ebook_content)
                ebook_content = re.sub(r'<(div|a|span)([^>]*)/>',
                                       r'<\1\2></\1>', ebook_content)

                # generate link to next page
                if item.spine_position + 1 < len(oeb_book.spine):
                    nextLink = oeb_book.spine[item.spine_position + 1].href
                    nextLink = relpath(os.path.abspath(nextLink), dir)
                else:
                    nextLink = None

                # generate link to previous page
                if item.spine_position > 0:
                    prevLink = oeb_book.spine[item.spine_position - 1].href
                    prevLink = relpath(os.path.abspath(prevLink), dir)
                else:
                    prevLink = None

                cssLink = relpath(os.path.abspath(css_path), dir)
                tocUrl = relpath(output_file, dir)
                firstContentPageLink = oeb_book.spine[0].href

                # render template
                templite = Templite(template_html_data)
                toc = lambda: self.generate_html_toc(oeb_book, path, output_dir
                                                     )
                t = templite.render(ebookContent=ebook_content,
                                    prevLink=prevLink,
                                    nextLink=nextLink,
                                    has_toc=bool(oeb_book.toc.count()),
                                    toc=toc,
                                    tocUrl=tocUrl,
                                    head_content=head_content,
                                    meta=meta,
                                    cssLink=cssLink,
                                    firstContentPageLink=firstContentPageLink)

                # write html to file
                with open(path, 'wb') as f:
                    f.write(t.encode('utf-8'))
                item.unload_data_from_memory(memory=path)

        zfile = zipfile.ZipFile(output_path, "w")
        zfile.add_dir(output_dir, os.path.basename(output_dir))
        zfile.write(output_file, os.path.basename(output_file),
                    zipfile.ZIP_DEFLATED)

        if opts.extract_to:
            if os.path.exists(opts.extract_to):
                shutil.rmtree(opts.extract_to)
            os.makedirs(opts.extract_to)
            zfile.extractall(opts.extract_to)
            self.log.info('Zip file extracted to %s', opts.extract_to)

        zfile.close()

        # cleanup temp dir
        shutil.rmtree(tempdir)
Example #13
0
    def extract_content(self, output_dir):
        # Each text record is independent (unless the continuation
        # value is set in the previous record). Put each converted
        # text recored into a separate file. We will reference the
        # home.html file as the first file and let the HTML input
        # plugin assemble the order based on hyperlinks.
        with directory.CurrentDir(output_dir):
            for uid, num in self.uid_text_secion_number.items():
                self.log.debug('Writing record with uid: %s as %s.html', uid,
                               uid)
                with open('%s.html' % uid, 'wb') as htmlf:
                    html = u'<html><body>'
                    section_header, section_data = self.sections[num]
                    if section_header.type == DATATYPE_PHTML:
                        html += self.process_phtml(
                            section_data.data,
                            section_data.header.paragraph_offsets)
                    elif section_header.type == DATATYPE_PHTML_COMPRESSED:
                        d = self.decompress_phtml(section_data.data)
                        html += self.process_phtml(
                            d, section_data.header.paragraph_offsets).decode(
                                self.get_text_uid_encoding(section_header.uid),
                                'replace')
                    html += '</body></html>'
                    htmlf.write(html.encode('utf-8'))

        # Images.
        # Cache the image sizes in case they are used by a composite image.
        images = set()
        if not os.path.exists(os.path.join(output_dir, 'images/')):
            os.makedirs(os.path.join(output_dir, 'images/'))
        with directory.CurrentDir(os.path.join(output_dir, 'images/')):
            # Single images.
            for uid, num in self.uid_image_section_number.items():
                section_header, section_data = self.sections[num]
                if section_data:
                    idata = None
                    if section_header.type == DATATYPE_TBMP:
                        idata = section_data
                    elif section_header.type == DATATYPE_TBMP_COMPRESSED:
                        if self.header_record.compression == 1:
                            idata = decompress_doc(section_data)
                        elif self.header_record.compression == 2:
                            idata = zlib.decompress(section_data)
                    try:
                        save_cover_data_to(idata,
                                           '%s.jpg' % uid,
                                           compression_quality=70)
                        images.add(uid)
                        self.log.debug(
                            'Wrote image with uid %s to '
                            'images/%s.jpg', uid, uid)
                    except Exception as e:
                        self.log.error('Failed to write image with uid %s: %s',
                                       uid, e)
                else:
                    self.log.error(
                        'Failed to write image with uid %s: '
                        'No data.', uid)
            # Composite images.
            # We're going to use the already compressed .jpg images here.
            for uid, num in self.uid_composite_image_section_number.items():
                try:
                    section_header, section_data = self.sections[num]
                    # Get the final width and height.
                    width = 0
                    height = 0
                    for row in section_data.layout:
                        row_width = 0
                        col_height = 0
                        for col in row:
                            if col not in images:
                                raise Exception('Image with uid: %s missing.' %
                                                col)
                            w, h = identify(open('%s.jpg' % col, 'rb'))[1:]
                            row_width += w
                            if col_height < h:
                                col_height = h
                        if width < row_width:
                            width = row_width
                        height += col_height
                    # Create a new image the total size of all image
                    # parts. Put the parts into the new image.
                    with Canvas(width, height) as canvas:
                        y_off = 0
                        for row in section_data.layout:
                            x_off = 0
                            largest_height = 0
                            for col in row:
                                im = image_from_data(
                                    open('%s.jpg' % col, 'rb').read())
                                canvas.compose(im, x_off, y_off)
                                w, h = im.width(), im.height()
                                x_off += w
                                if largest_height < h:
                                    largest_height = h
                            y_off += largest_height
                    with open('%s.jpg' % uid) as out:
                        out.write(canvas.export(compression_quality=70))
                    self.log.debug(
                        'Wrote composite image with uid %s to '
                        'images/%s.jpg', uid, uid)
                except Exception as e:
                    self.log.error(
                        'Failed to write composite image with '
                        'uid %s: %s', uid, e)

        # Run the HTML through the html processing plugin.
        from ebook_converter.customize.ui import plugin_for_input_format
        html_input = plugin_for_input_format('html')
        for opt in html_input.options:
            setattr(self.options, opt.option.name, opt.recommended_value)
        self.options.input_encoding = 'utf-8'
        odi = self.options.debug_pipeline
        self.options.debug_pipeline = None
        # Determine the home.html record uid. This should be set in the
        # reserved values in the metadata recored. home.html is the first
        # text record (should have hyper link references to other records)
        # in the document.
        try:
            home_html = self.header_record.home_html
            if not home_html:
                home_html = self.uid_text_secion_number.items()[0][0]
        except:
            raise Exception('Could not determine home.html')
        # Generate oeb from html conversion.
        oeb = html_input.convert(open('%s.html' % home_html, 'rb'),
                                 self.options, 'html', self.log, {})
        self.options.debug_pipeline = odi

        return oeb