def workaround_fire_bugs(self, jacket): # The idiotic Fire crashes when trying to render the table used to # layout the jacket from ebook_converter.ebooks.oeb.base import XHTML for table in jacket.data.xpath('//*[local-name()="table"]'): table.tag = XHTML('div') for tr in table.xpath('descendant::*[local-name()="tr"]'): cols = tr.xpath('descendant::*[local-name()="td"]') tr.tag = XHTML('div') for td in cols: td.tag = XHTML('span' if cols else 'div')
def remove_links_to(container, predicate): ''' predicate must be a function that takes the arguments (name, href, fragment=None) and returns True iff the link should be removed ''' from ebook_converter.ebooks.oeb.base import iterlinks, OEB_DOCS, OEB_STYLES, XPath, XHTML stylepath = XPath('//h:style') styleattrpath = XPath('//*[@style]') changed = set() for name, mt in container.mime_map.items(): removed = False if mt in OEB_DOCS: root = container.parsed(name) for el, attr, href, pos in iterlinks(root, find_links_in_css=False): hname = container.href_to_name(href, name) frag = href.partition('#')[-1] if predicate(hname, href, frag): if attr is None: el.text = None else: if el.tag == XHTML('link') or el.tag == XHTML('img'): extract(el) else: del el.attrib[attr] removed = True for tag in stylepath(root): if tag.text and (tag.get('type') or 'text/css').lower() == 'text/css': sheet = container.parse_css(tag.text) if remove_links_in_sheet( partial(container.href_to_name, base=name), sheet, predicate): tag.text = css_text(sheet) removed = True for tag in styleattrpath(root): style = tag.get('style') if style: style = container.parse_css(style, is_declaration=True) if remove_links_in_declaration( partial(container.href_to_name, base=name), style, predicate): removed = True tag.set('style', css_text(style)) elif mt in OEB_STYLES: removed = remove_links_in_sheet( partial(container.href_to_name, base=name), container.parsed(name), predicate) if removed: changed.add(name) tuple(map(container.dirty, changed)) return changed
def mlize_spine(self, oeb_book): output = [''] for item in oeb_book.spine: self.log.debug('Converting %s to Markdown formatted TXT...' % item.href) self.rewrite_ids(item.data, item) rewrite_links(item.data, partial(self.rewrite_link, page=item)) stylizer = Stylizer(item.data, item.href, oeb_book, self.opts, self.opts.output_profile) output += self.dump_text(item.data.find(XHTML('body')), stylizer) output.append('\n\n') return ''.join(output)
def postprocess_book(self, oeb, opts, log): from ebook_converter.ebooks.oeb.base import XPath, XHTML for item in oeb.spine: root = item.data if not hasattr(root, 'xpath'): continue for bad in ('metadata', 'guide'): metadata = XPath('//h:'+bad)(root) if metadata: for x in metadata: x.getparent().remove(x) body = XPath('//h:body')(root) if body: body = body[0] if len(body) == 1 and body[0].tag == XHTML('pre'): pre = body[0] from ebook_converter.ebooks.txt.processor import \ convert_basic, separate_paragraphs_single_line from ebook_converter.ebooks.chardet import xml_to_unicode self.log('LIT file with all text in singe <pre> tag ' 'detected') html = separate_paragraphs_single_line(pre.text) html = convert_basic(html).replace('<html>', '<html xmlns="%s">' % const.XHTML_NS) html = xml_to_unicode(html, strip_encoding_pats=True, resolve_entities=True)[0] if opts.smarten_punctuation: # SmartyPants skips text inside <pre> tags from ebook_converter.ebooks.conversion import \ preprocess html = preprocess.smarten_punctuation(html, self.log) root = etree.fromstring(html) body = XPath('//h:body')(root) pre.tag = XHTML('div') pre.text = '' for elem in body: ne = copy.deepcopy(elem) pre.append(ne)
def link_stylesheets(container, names, sheets, remove=False, mtype='text/css'): from ebook_converter.ebooks.oeb.base import XPath, XHTML changed_names = set() snames = set(sheets) lp = XPath('//h:link[@href]') hp = XPath('//h:head') for name in names: root = container.parsed(name) if remove: for link in lp(root): if (link.get('type', mtype) or mtype) == mtype: container.remove_from_xml(link) changed_names.add(name) container.dirty(name) existing = { container.href_to_name(l.get('href'), name) for l in lp(root) if (l.get('type', mtype) or mtype) == mtype } extra = snames - existing if extra: changed_names.add(name) try: parent = hp(root)[0] except (TypeError, IndexError): parent = root.makeelement(XHTML('head')) container.insert_into_xml(root, parent, index=0) for sheet in sheets: if sheet in extra: container.insert_into_xml( parent, parent.makeelement(XHTML('link'), rel='stylesheet', type=mtype, href=container.name_to_href( sheet, name))) container.dirty(name) return changed_names
def epubify_markup(self, root, log): from ebook_converter.ebooks.oeb.base import XPath, XHTML # Fix empty title tags for t in XPath('//h:title')(root): if not t.text: t.text = u' ' # Fix <p><div> constructs as the asinine epubchecker complains # about them pdiv = XPath('//h:p/h:div') for div in pdiv(root): div.getparent().tag = XHTML('div') # Remove the position:relative as it causes problems with some epub # renderers. Remove display: block on an image inside a div as it is # redundant and prevents text-align:center from working in ADE # Also ensure that the img is contained in its containing div imgpath = XPath('//h:div/h:img[@style]') for img in imgpath(root): div = img.getparent() if len(div) == 1: style = div.attrib.get('style', '') if style and not style.endswith(';'): style = style + ';' style += 'position:static' # Ensures position of containing div is static # Ensure that the img is always contained in its frame div.attrib['style'] = style img.attrib['style'] = 'max-width: 100%; max-height: 100%' # Handle anchored images. The default markup + CSS produced by # odf2xhtml works with WebKit but not with ADE. So we convert the # common cases of left/right/center aligned block images to work on # both webkit and ADE. We detect the case of setting the side margins # to auto and map it to an appropriate text-align directive, which # works in both WebKit and ADE. # https://bugs.launchpad.net/bugs/1063207 # https://bugs.launchpad.net/calibre/+bug/859343 imgpath = XPath('descendant::h:div/h:div/h:img') for img in imgpath(root): div2 = img.getparent() div1 = div2.getparent() if (len(div1), len(div2)) != (1, 1): continue cls = div1.get('class', '') first_rules = list( filter(None, [self.get_css_for_class(x) for x in cls.split()])) has_align = False for r in first_rules: if r.style.getProperty(u'text-align') is not None: has_align = True ml = mr = None if not has_align: aval = None cls = div2.get(u'class', u'') rules = list( filter(None, [self.get_css_for_class(x) for x in cls.split()])) for r in rules: ml = r.style.getPropertyCSSValue(u'margin-left') or ml mr = r.style.getPropertyCSSValue(u'margin-right') or mr ml = getattr(ml, 'value', None) mr = getattr(mr, 'value', None) if ml == mr == u'auto': aval = u'center' elif ml == u'auto' and mr != u'auto': aval = 'right' elif ml != u'auto' and mr == u'auto': aval = 'left' if aval is not None: style = div1.attrib.get('style', '').strip() if style and not style.endswith(';'): style = style + ';' style += 'text-align:%s' % aval has_align = True div1.attrib['style'] = style if has_align: # This is needed for ADE, without it the text-align has no # effect style = div2.attrib['style'] div2.attrib['style'] = 'display:inline;' + style