def latex_math(text): if not TEXOID_ENABLED: return text tree = lxml_tree.fromstring(text) texoid = TexoidRenderer() for latex in tree.xpath('.//latex'): result = texoid.get_result(latex.text) if not result: tag = html.Element('pre') tag.text = 'LaTeX rendering error\n' + latex.text elif 'error' not in result: img = html.Element('img') img.set('src', result['svg']) img.set('onerror', "this.src='%s';this.onerror=null" % result['png']) ident = result['meta'] img.set('width', ident['width']) img.set('height', ident['height']) style = [] if 'inline' not in latex.attrib: tag = html.Element('div') style += ['text-align: center'] else: tag = html.Element('span') style += ['max-width: 100%', 'height: %s' % ident['height'], 'max-height: %s' % ident['height'], 'width: %s' % ident['height']] tag.set('style', ';'.join(style)) tag.append(img) else: tag = html.Element('pre') tag.text = result['error'] latex.getparent().replace(latex, tag) return tree
def process(self, text): doc = lxml_tree.fromstring(text) for block in doc.xpath('//text()'): result = inline_math.sub(self._sub_inline, block) result = display_math.sub(self._sub_display, result) if result == block: continue last = block.getparent() into_text = block.is_text if into_text: last.text = '' else: last.tail = '' for item in html.fragments_fromstring(result): if isinstance(item, ElementBase): if into_text: last.insert(0, item) else: next = last.getnext() if next is None: last.getparent().append(item) else: next.addprevious(item) last = item into_text = False elif into_text: last.text += item else: last.tail = item return doc
def absolute_links(text, url): tree = lxml_tree.fromstring(text) for anchor in tree.xpath('.//a'): href = anchor.get('href') if href: anchor.set('href', urljoin(url, href)) return tree
def parse_html(self, string): doc = lxml_tree.fromstring(string) for img in doc.xpath('.//img'): if img.get('src'): img.set('src', self._rewrite_url(img.get('src'))) for obj in doc.xpath('.//object'): if obj.get('data'): obj.set('data', self._rewrite_url(obj.get('data'))) return doc
def no_follow_external_links(text): tree = lxml_tree.fromstring(text) for anchor in tree.xpath('.//a'): href = anchor.get('href') if href: url = urlparse(href) if url.netloc and url.netloc not in whitelist: anchor.set('rel', 'nofollow') return tree
def parse_html(self, string): doc = lxml_tree.fromstring(string) for img in doc.xpath(".//img"): if img.get("src"): img.set("src", self._rewrite_url(img.get("src"))) for obj in doc.xpath(".//object"): if obj.get("data"): obj.set("data", self._rewrite_url(obj.get("data"))) return doc
def lazy_load(text): tree = lxml_tree.fromstring(text) for img in tree.xpath('.//img'): src = img.get('src') if src.startswith('data'): continue noscript = html.Element('noscript') noscript.append(deepcopy(img)) img.addprevious(noscript) img.set('data-src', src) img.set('src', blank) img.set('class', img.get('class') + ' unveil' if img.get('class') else 'unveil') return tree
def lazy_load(text): blank = static('blank.gif') tree = lxml_tree.fromstring(text) for img in tree.xpath('.//img'): src = img.get('src') if src.startswith('data'): continue noscript = html.Element('noscript') noscript.append(deepcopy(img)) img.addprevious(noscript) img.set('data-src', src) img.set('src', blank) img.set('class', img.get('class') + ' unveil' if img.get('class') else 'unveil') return tree
def reference(text): tree = lxml_tree.fromstring(text) texts = [] tails = [] queries = defaultdict(list) for element in tree.iter(): if element.text: populate_list(queries, texts, element, *process_reference(element.text)) if element.tail: populate_list(queries, tails, element, *process_reference(element.tail)) results = {type: reference_map[type][1](values) for type, values in queries.items()} update_tree(texts, results, is_tail=False) update_tree(tails, results, is_tail=True) return tree
def latex_math(text): if not TEXOID_ENABLED: return text tree = lxml_tree.fromstring(text) texoid = TexoidRenderer() for latex in tree.xpath('.//latex'): result = texoid.get_result(latex.text) if not result: tag = html.Element('pre') tag.text = 'LaTeX rendering error\n' + latex.text elif 'error' not in result: img = html.Element('img') img.set('src', result['svg']) img.set('onerror', "this.src='%s';this.onerror=null" % result['png']) ident = result['meta'] img.set('width', ident['width']) img.set('height', ident['height']) style = [] if 'inline' not in latex.attrib: tag = html.Element('div') style += ['text-align: center'] else: tag = html.Element('span') style += [ 'max-width: 100%', 'height: %s' % ident['height'], 'max-height: %s' % ident['height'], 'width: %s' % ident['height'] ] tag.set('style', ';'.join(style)) tag.append(img) else: tag = html.Element('pre') tag.text = result['error'] latex.getparent().replace(latex, tag) return tree