def extract_svg_into_flows(self): images = {} for item in self.oeb.manifest: if item.media_type == SVG_MIME: data = self.data(item) images[item.href] = len(self.flows) self.flows.append( etree.tostring(data, encoding='UTF-8', with_tail=True, xml_declaration=True)) for item in self.oeb.spine: root = self.data(item) for svg in XPath('//svg:svg')(root): raw = etree.tostring(svg, encoding=unicode, with_tail=False) idx = len(self.flows) self.flows.append(raw) p = svg.getparent() pos = p.index(svg) img = etree.Element(XHTML('img'), src="kindle:flow:%s?mime=image/svg+xml" % to_ref(idx)) p.insert(pos, img) extract(svg) for img in XPath('//h:img[@src]')(root): src = img.get('src') abshref = item.abshref(src) idx = images.get(abshref, None) if idx is not None: img.set('src', 'kindle:flow:%s?mime=image/svg+xml' % to_ref(idx))
def extract_svg_into_flows(self): images = {} for item in self.oeb.manifest: if item.media_type == SVG_MIME: data = self.data(item) images[item.href] = len(self.flows) self.flows.append(etree.tostring(data, encoding='UTF-8', with_tail=True, xml_declaration=True)) for item in self.oeb.spine: root = self.data(item) for svg in XPath('//svg:svg')(root): raw = etree.tostring(svg, encoding=unicode, with_tail=False) idx = len(self.flows) self.flows.append(raw) p = svg.getparent() pos = p.index(svg) img = etree.Element(XHTML('img'), src="kindle:flow:%s?mime=image/svg+xml"%to_ref(idx)) p.insert(pos, img) extract(svg) for img in XPath('//h:img[@src]')(root): src = img.get('src') abshref = item.abshref(src) idx = images.get(abshref, None) if idx is not None: img.set('src', 'kindle:flow:%s?mime=image/svg+xml'% to_ref(idx))
def remove_namespaces(self, root): lang = None for attr, val in root.attrib.iteritems(): if attr.rpartition('}')[-1] == 'lang': lang = val # Remove all namespace information from the tree. This means namespaced # tags have their namespaces removed and all namespace declarations are # removed. We have to do this manual cloning of the tree as there is no # other way to remove namespace declarations in lxml. This is done so # that serialization creates clean HTML 5 markup with no namespaces. We # insert the XHTML namespace manually after serialization. The # preceding layers should have removed svg and any other non html # namespaced tags. attrib = {'lang': lang} if lang else {} if 'class' in root.attrib: attrib['class'] = root.attrib['class'] if 'style' in root.attrib: attrib['style'] = root.attrib['style'] nroot = etree.Element('html', attrib=attrib) nroot.text = root.text nroot.tail = '\n' # Remove Comments and ProcessingInstructions as kindlegen seems to # remove them as well for tag in root.iterdescendants(): if tag.tag in {etree.Comment, etree.ProcessingInstruction}: extract(tag) for tag in root.iterdescendants(): if tag.tag == etree.Entity: elem = etree.Entity(tag.name) else: tn = tag.tag if tn is not None: tn = tn.rpartition('}')[-1] attrib = { k.rpartition('}')[-1]: v for k, v in tag.attrib.iteritems() } try: elem = nroot.makeelement(tn, attrib=attrib) except ValueError: attrib = { k: v for k, v in attrib.iteritems() if ':' not in k } elem = nroot.makeelement(tn, attrib=attrib) elem.text = tag.text elem.tail = tag.tail parent = node_from_path(nroot, path_to_node(tag.getparent())) parent.append(elem) return nroot
def remove_namespaces(self, root): lang = None for attr, val in iteritems(root.attrib): if attr.rpartition('}')[-1] == 'lang': lang = val # Remove all namespace information from the tree. This means namespaced # tags have their namespaces removed and all namespace declarations are # removed. We have to do this manual cloning of the tree as there is no # other way to remove namespace declarations in lxml. This is done so # that serialization creates clean HTML 5 markup with no namespaces. We # insert the XHTML namespace manually after serialization. The # preceding layers should have removed svg and any other non html # namespaced tags. attrib = {'lang':lang} if lang else {} if 'class' in root.attrib: attrib['class'] = root.attrib['class'] if 'style' in root.attrib: attrib['style'] = root.attrib['style'] nroot = etree.Element('html', attrib=attrib) nroot.text = root.text nroot.tail = '\n' # Remove Comments and ProcessingInstructions as kindlegen seems to # remove them as well for tag in root.iterdescendants(): if tag.tag in {etree.Comment, etree.ProcessingInstruction}: extract(tag) for tag in root.iterdescendants(): if tag.tag == etree.Entity: elem = etree.Entity(tag.name) else: tn = tag.tag if tn is not None: tn = tn.rpartition('}')[-1] attrib = {k.rpartition('}')[-1]:v for k, v in iteritems(tag.attrib)} try: elem = nroot.makeelement(tn, attrib=attrib) except ValueError: attrib = {k:v for k, v in iteritems(attrib) if ':' not in k} elem = nroot.makeelement(tn, attrib=attrib) elem.text = tag.text elem.tail = tag.tail parent = node_from_path(nroot, path_to_node(tag.getparent())) parent.append(elem) return nroot
def extract_css_into_flows(self): inlines = defaultdict(list) # Ensure identical <style>s not repeated sheets = {} for item in self.oeb.manifest: if item.media_type in OEB_STYLES: if not self.opts.expand_css and hasattr(item.data, 'cssText'): condense_sheet(self.data(item)) data = self.data(item).cssText sheets[item.href] = len(self.flows) self.flows.append(force_unicode(data, 'utf-8')) for item in self.oeb.spine: root = self.data(item) for link in XPath('//h:link[@href]')(root): href = item.abshref(link.get('href')) idx = sheets.get(href, None) if idx is not None: idx = to_ref(idx) link.set('href', 'kindle:flow:%s?mime=text/css' % idx) for tag in XPath('//h:style')(root): p = tag.getparent() idx = p.index(tag) raw = tag.text if not raw or not raw.strip(): extract(tag) continue repl = etree.Element(XHTML('link'), type='text/css', rel='stylesheet') repl.tail = '\n' p.insert(idx, repl) extract(tag) inlines[raw].append(repl) for raw, elems in inlines.iteritems(): idx = to_ref(len(self.flows)) self.flows.append(raw) for link in elems: link.set('href', 'kindle:flow:%s?mime=text/css' % idx)
def extract_css_into_flows(self): inlines = defaultdict(list) # Ensure identical <style>s not repeated sheets = {} for item in self.oeb.manifest: if item.media_type in OEB_STYLES: if not self.opts.expand_css and hasattr(item.data, 'cssText'): condense_sheet(self.data(item)) data = self.data(item).cssText sheets[item.href] = len(self.flows) self.flows.append(force_unicode(data, 'utf-8')) for item in self.oeb.spine: root = self.data(item) for link in XPath('//h:link[@href]')(root): href = item.abshref(link.get('href')) idx = sheets.get(href, None) if idx is not None: idx = to_ref(idx) link.set('href', 'kindle:flow:%s?mime=text/css'%idx) for tag in XPath('//h:style')(root): p = tag.getparent() idx = p.index(tag) raw = tag.text if not raw or not raw.strip(): extract(tag) continue repl = etree.Element(XHTML('link'), type='text/css', rel='stylesheet') repl.tail='\n' p.insert(idx, repl) extract(tag) inlines[raw].append(repl) for raw, elems in inlines.iteritems(): idx = to_ref(len(self.flows)) self.flows.append(raw) for link in elems: link.set('href', 'kindle:flow:%s?mime=text/css'%idx)
def extract_css_into_flows(self): inlines = defaultdict(list) # Ensure identical <style>s not repeated sheets = {} passthrough = getattr(self.opts, 'mobi_passthrough', False) for item in self.oeb.manifest: if item.media_type in OEB_STYLES: sheet = self.data(item) if not passthrough and not self.opts.expand_css and hasattr( item.data, 'cssText'): condense_sheet(sheet) sheets[item.href] = len(self.flows) self.flows.append(sheet) def fix_import_rules(sheet): changed = False for rule in sheet.cssRules.rulesOfType(CSSRule.IMPORT_RULE): if rule.href: href = item.abshref(rule.href) idx = sheets.get(href, None) if idx is not None: idx = to_ref(idx) rule.href = 'kindle:flow:%s?mime=text/css' % idx changed = True return changed for item in self.oeb.spine: root = self.data(item) for link in XPath('//h:link[@href]')(root): href = item.abshref(link.get('href')) idx = sheets.get(href, None) if idx is not None: idx = to_ref(idx) link.set('href', 'kindle:flow:%s?mime=text/css' % idx) for tag in XPath('//h:style')(root): p = tag.getparent() idx = p.index(tag) raw = tag.text if not raw or not raw.strip(): extract(tag) continue sheet = cssutils.parseString(raw, validate=False) if fix_import_rules(sheet): raw = force_unicode(sheet.cssText, 'utf-8') repl = etree.Element(XHTML('link'), type='text/css', rel='stylesheet') repl.tail = '\n' p.insert(idx, repl) extract(tag) inlines[raw].append(repl) for raw, elems in inlines.iteritems(): idx = to_ref(len(self.flows)) self.flows.append(raw) for link in elems: link.set('href', 'kindle:flow:%s?mime=text/css' % idx) for item in self.oeb.manifest: if item.media_type in OEB_STYLES: sheet = self.data(item) if hasattr(sheet, 'cssRules'): fix_import_rules(sheet) for i, sheet in enumerate(tuple(self.flows)): if hasattr(sheet, 'cssText'): self.flows[i] = force_unicode(sheet.cssText, 'utf-8')
def extract_css_into_flows(self): inlines = defaultdict(list) # Ensure identical <style>s not repeated sheets = {} for item in self.oeb.manifest: if item.media_type in OEB_STYLES: sheet = self.data(item) if not self.opts.expand_css and hasattr(item.data, 'cssText'): condense_sheet(sheet) sheets[item.href] = len(self.flows) self.flows.append(sheet) def fix_import_rules(sheet): changed = False for rule in sheet.cssRules.rulesOfType(CSSRule.IMPORT_RULE): if rule.href: href = item.abshref(rule.href) idx = sheets.get(href, None) if idx is not None: idx = to_ref(idx) rule.href = 'kindle:flow:%s?mime=text/css'%idx changed = True return changed for item in self.oeb.spine: root = self.data(item) for link in XPath('//h:link[@href]')(root): href = item.abshref(link.get('href')) idx = sheets.get(href, None) if idx is not None: idx = to_ref(idx) link.set('href', 'kindle:flow:%s?mime=text/css'%idx) for tag in XPath('//h:style')(root): p = tag.getparent() idx = p.index(tag) raw = tag.text if not raw or not raw.strip(): extract(tag) continue sheet = cssutils.parseString(raw, validate=False) if fix_import_rules(sheet): raw = force_unicode(sheet.cssText, 'utf-8') repl = etree.Element(XHTML('link'), type='text/css', rel='stylesheet') repl.tail='\n' p.insert(idx, repl) extract(tag) inlines[raw].append(repl) for raw, elems in inlines.iteritems(): idx = to_ref(len(self.flows)) self.flows.append(raw) for link in elems: link.set('href', 'kindle:flow:%s?mime=text/css'%idx) for item in self.oeb.manifest: if item.media_type in OEB_STYLES: sheet = self.data(item) if hasattr(sheet, 'cssRules'): fix_import_rules(sheet) for i, sheet in enumerate(tuple(self.flows)): if hasattr(sheet, 'cssText'): self.flows[i] = force_unicode(sheet.cssText, 'utf-8')