Esempio n. 1
0
    def __call__(self, oeb, opts):
        import css_parser
        self.log = oeb.logger
        self.opts = opts
        self.oeb = oeb

        for item in oeb.manifest.items:
            self.current_item = item
            if etree.iselement(item.data):
                rewrite_links(self.current_item.data, self.url_replacer)
            elif hasattr(item.data, 'cssText'):
                css_parser.replaceUrls(item.data, self.url_replacer)

        if self.oeb.guide:
            for ref in self.oeb.guide.values():
                href = urlnormalize(ref.href)
                href, frag = urllib.parse.urldefrag(href)
                replacement = self.rename_map.get(href, None)
                if replacement is not None:
                    nhref = replacement
                    if frag:
                        nhref += '#' + frag
                    ref.href = nhref

        if self.oeb.toc:
            self.fix_toc_entry(self.oeb.toc)
Esempio n. 2
0
 def fix_links(self):
     '''
     Fix references to the split files in other content files.
     '''
     for item in self.oeb.manifest:
         if etree.iselement(item.data):
             self.current_item = item
             base.rewrite_links(item.data, self.rewrite_links)
Esempio n. 3
0
 def mlize_spine(self, oeb_book):
     output = ['']
     for item in oeb_book.spine:
         self.log.debug('Converting %s to Markdown formatted TXT...' % item.href)
         self.rewrite_ids(item.data, item)
         rewrite_links(item.data, partial(self.rewrite_link, page=item))
         stylizer = Stylizer(item.data, item.href, oeb_book, self.opts, self.opts.output_profile)
         output += self.dump_text(item.data.find(XHTML('body')), stylizer)
         output.append('\n\n')
     return ''.join(output)
Esempio n. 4
0
 def mlize_spine(self, oeb_book):
     output = [
         u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /><title>%s</title></head><body>'
         % (prepare_string_for_xml(self.book_title))
     ]
     for item in oeb_book.spine:
         self.log.debug('Converting %s to HTML...' % item.href)
         self.rewrite_ids(item.data, item)
         base.rewrite_links(item.data, partial(self.rewrite_link,
                                               page=item))
         stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
         output += self.dump_text(item.data.find(base.tag('xhtml', 'body')),
                                  stylizer, item)
         output.append('\n\n')
     output.append('</body></html>')
     return ''.join(output)
Esempio n. 5
0
 def mlize_spine(self, oeb_book):
     output = []
     for item in oeb_book.spine:
         self.log.debug('Converting %s to HTML...' % item.href)
         self.rewrite_ids(item.data, item)
         base.rewrite_links(item.data, partial(self.rewrite_link,
                                               page=item))
         stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
         output += self.dump_text(item.data.find(base.tag('xhtml', 'body')),
                                  stylizer, item)
         output.append('\n\n')
     if self.opts.htmlz_class_style == 'external':
         css = u'<link href="style.css" rel="stylesheet" type="text/css" />'
     else:
         css = u'<style type="text/css">' + self.get_css(
             oeb_book) + u'</style>'
     title = u'<title>%s</title>' % prepare_string_for_xml(self.book_title)
     output = [u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" />'] + \
         [css] + [title, u'</head><body>'] + output + [u'</body></html>']
     return ''.join(output)
Esempio n. 6
0
    def create_oebbook(self, htmlpath, basedir, opts, log, mi):
        import uuid
        from ebook_converter.ebooks.conversion.plumber import create_oebbook
        from ebook_converter.ebooks.oeb.base import (DirContainer,
            rewrite_links, urlnormalize, BINARY_MIME, OEB_STYLES,
            xpath, urlquote)
        from ebook_converter.ebooks.oeb.transforms.metadata import \
            meta_info_to_oeb_metadata
        from ebook_converter.ebooks.html.input import get_filelist
        from ebook_converter.ebooks.metadata import string_to_authors
        from ebook_converter.utils.localization import canonicalize_lang
        import css_parser, logging
        css_parser.log.setLevel(logging.WARN)
        self.OEB_STYLES = OEB_STYLES
        oeb = create_oebbook(log, None, opts, self,
                encoding=opts.input_encoding, populate=False)
        self.oeb = oeb

        metadata = oeb.metadata
        meta_info_to_oeb_metadata(mi, metadata, log)
        if not metadata.language:
            l = canonicalize_lang(getattr(opts, 'language', None))
            if not l:
                oeb.logger.warn('Language not specified')
                l = get_lang().replace('_', '-')
            metadata.add('language', l)
        if not metadata.creator:
            a = getattr(opts, 'authors', None)
            if a:
                a = string_to_authors(a)
            if not a:
                oeb.logger.warn('Creator not specified')
                a = [self.oeb.translate('Unknown')]
            for aut in a:
                metadata.add('creator', aut)
        if not metadata.title:
            oeb.logger.warn('Title not specified')
            metadata.add('title', self.oeb.translate('Unknown'))
        bookid = str(uuid.uuid4())
        metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
        for ident in metadata.identifier:
            if 'id' in ident.attrib:
                self.oeb.uid = metadata.identifier[0]
                break

        filelist = get_filelist(htmlpath, basedir, opts, log)
        filelist = [f for f in filelist if not f.is_binary]
        htmlfile_map = {}
        for f in filelist:
            path = f.path
            oeb.container = DirContainer(os.path.dirname(path), log,
                    ignore_opf=True)
            bname = os.path.basename(path)
            id, href = oeb.manifest.generate(id='html', href=sanitize_file_name(bname))
            htmlfile_map[path] = href
            item = oeb.manifest.add(id, href, 'text/html')
            if path == htmlpath and '%' in path:
                bname = urlquote(bname)
            item.html_input_href = bname
            oeb.spine.add(item, True)

        self.added_resources = {}
        self.log = log
        self.log('Normalizing filename cases')
        for path, href in htmlfile_map.items():
            self.added_resources[path] = href
        self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
        self.urldefrag = urllib.parse.urldefrag
        self.BINARY_MIME = BINARY_MIME

        self.log('Rewriting HTML links')
        for f in filelist:
            path = f.path
            dpath = os.path.dirname(path)
            oeb.container = DirContainer(dpath, log, ignore_opf=True)
            href = htmlfile_map[path]
            try:
                item = oeb.manifest.hrefs[href]
            except KeyError:
                item = oeb.manifest.hrefs[urlnormalize(href)]
            rewrite_links(item.data,
                          functools.partial(self.resource_adder, base=dpath))

        for item in oeb.manifest.values():
            if item.media_type in self.OEB_STYLES:
                dpath = None
                for path, href in self.added_resources.items():
                    if href == item.href:
                        dpath = os.path.dirname(path)
                        break
                css_parser.replaceUrls(item.data,
                        functools.partial(self.resource_adder, base=dpath))

        toc = self.oeb.toc
        self.oeb.auto_generated_toc = True
        titles = []
        headers = []
        for item in self.oeb.spine:
            if not item.linear:
                continue
            html = item.data
            title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
            title = re.sub(r'\s+', ' ', title.strip())
            if title:
                titles.append(title)
            headers.append('(unlabled)')
            for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
                expr = '/h:html/h:body//h:%s[position()=1]/text()'
                header = ''.join(xpath(html, expr % tag))
                header = re.sub(r'\s+', ' ', header.strip())
                if header:
                    headers[-1] = header
                    break
        use = titles
        if len(titles) > len(set(titles)):
            use = headers
        for title, item in zip(use, self.oeb.spine):
            if not item.linear:
                continue
            toc.add(title, item.href)

        oeb.container = DirContainer(os.getcwd(), oeb.log, ignore_opf=True)
        return oeb