def parse(self, text): """Parses and renders a text as HTML regarding current format""" if self.format == 'markdown': try: import markdown except ImportError: raise RuntimeError(u"Looks like markdown is not installed") return markdown.markdown(text) elif self.format == 'restructuredtext': try: from rst import html_parts, html_body except ImportError: raise RuntimeError(u"Looks like docutils are not installed") html = html_body(text, input_encoding=self.encoding, output_encoding=self.encoding) html = re.sub(r'<div.*?>\n', r'', html, re.DOTALL | re.UNICODE) html = re.sub(r'</div>\n', r'', html, re.DOTALL | re.UNICODE) html = re.sub(r'<hr class=".*?" />\n', r'<hr />\n', html, re.DOTALL | re.UNICODE) return html else: raise NotImplementedError(u"Unsupported format %s, cannot parse" % self.format)
def parse(self, text): """Parses and renders a text as HTML regarding current format.""" if self.format == 'markdown': try: import markdown except ImportError: raise RuntimeError(u"Looks like markdown is not installed") return markdown.markdown(text, self.extensions) elif self.format == 'restructuredtext': try: from rst import html_parts, html_body except ImportError: raise RuntimeError(u"Looks like docutils are not installed") html = html_body(text, input_encoding=self.encoding) html = re.sub(r'<p class="sys.+\n.+ion\.', r'', html, re.DOTALL | re.UNICODE) # Pretty hackish html = re.sub(r'<h1 class="title">', r'<h1>', html, re.DOTALL | re.UNICODE) html = re.sub(r'<hr class=".*?" />\n', r'<hr />\n', html, re.DOTALL | re.UNICODE) return html else: raise NotImplementedError(u"Unsupported format %s, cannot parse" % self.format)
def parse(self, text): """Parses and renders a text as HTML regarding current format. """ if self.format == 'markdown': try: import markdown except ImportError: raise RuntimeError(u"Looks like markdown is not installed") return markdown.markdown(text, self.md_extensions) elif self.format == 'restructuredtext': try: from rst import html_body except ImportError: raise RuntimeError(u"Looks like docutils are not installed") html = html_body(text, input_encoding=self.encoding) # RST generates pretty much markup to be removed in our case for (pattern, replacement, mode) in self.RST_REPLACEMENTS: html = re.sub(pattern, replacement, html, mode) return html.strip() elif self.format == 'textile': try: import textile except ImportError: raise RuntimeError(u"Looks like textile is not installed") return textile.textile(text, encoding=self.encoding) else: raise NotImplementedError(u"Unsupported format %s, cannot parse" % self.format)
def parse(self, text): """Parses and renders a text as HTML regarding current format. """ if self.format == 'markdown': try: import markdown except ImportError: raise RuntimeError(u"Looks like markdown is not installed") if text.startswith(u'\ufeff'): # check for unicode BOM text = text[1:] return markdown.markdown(text, self.md_extensions) elif self.format == 'restructuredtext': try: from rst import html_body except ImportError: raise RuntimeError(u"Looks like docutils are not installed") html = html_body(text, input_encoding=self.encoding) # RST generates pretty much markup to be removed in our case for (pattern, replacement, mode) in self.RST_REPLACEMENTS: html = re.sub(re.compile(pattern, mode), replacement, html, 0) return html.strip() elif self.format == 'textile': try: import textile except ImportError: raise RuntimeError(u"Looks like textile is not installed") return textile.textile(text, encoding=self.encoding) else: raise NotImplementedError(u"Unsupported format %s, cannot parse" % self.format)
def parse(self, text): """Parses and renders a text as HTML regarding current format.""" if self.format == 'markdown': try: import markdown except ImportError: raise RuntimeError(u"Looks like markdown is not installed") return markdown.markdown(text) elif self.format == 'restructuredtext': try: from rst import html_parts, html_body except ImportError: raise RuntimeError(u"Looks like docutils are not installed") html = html_body(text, input_encoding=self.encoding) html = re.sub(r'<div.*?>', r'', html, re.UNICODE) html = re.sub(r'</div>', r'', html, re.UNICODE) html = re.sub(r'<p class="system-message-\w+">.*?</p>', r'', html, re.UNICODE) html = re.sub(r'Document or section may not begin with a transition\.', r'', html, re.UNICODE) html = re.sub(r'<h(\d+?).*?>', r'<h\1>', html, re.DOTALL | re.UNICODE) html = re.sub(r'<hr.*?>\n', r'<hr />\n', html, re.DOTALL | re.UNICODE) return html.strip() else: raise NotImplementedError(u"Unsupported format %s, cannot parse" % self.format)
def parse(self, text): """Parses and renders a text as HTML regarding current format. """ if self.format == 'markdown': try: import markdown except ImportError: raise RuntimeError(u"Looks like markdown is not installed") if text.startswith(u'\ufeff'): # check for unicode BOM text = text[1:] return markdown.markdown(text, self.md_extensions) elif self.format == 'restructuredtext': try: from rst import html_body except ImportError: raise RuntimeError(u"Looks like docutils are not installed") html = html_body(text, input_encoding=self.encoding) # RST generates pretty much markup to be removed in our case if re.search('<hr.*?>', html): #writing with Manual hr for (pattern, replacement, mode) in self.RST_REPLACEMENTS+self.RST_REPLACEMENTS_WITH_HR: html = re.sub(re.compile(pattern, mode), replacement, html, 0) else: # we add hr before every <hx> for (pattern, replacement, mode) in self.RST_REPLACEMENTS+self.RST_REPLACEMENTS_WITHOUT_HR: html = re.sub(re.compile(pattern, mode), replacement, html, 0) html = re.sub('<hr />', '', html, 1) # replace the first one return html.strip() elif self.format == 'textile': try: import textile except ImportError: raise RuntimeError(u"Looks like textile is not installed") return textile.textile(text, encoding=self.encoding) else: raise NotImplementedError(u"Unsupported format %s, cannot parse" % self.format)
def parse(self, text): """Parses and renders a text as HTML regarding current format""" if self.format == 'markdown': try: import markdown except ImportError: raise RuntimeError(u"Looks like markdown is not installed") return markdown.markdown(text) elif self.format == 'restructuredtext': try: from rst import html_parts, html_body except ImportError: raise RuntimeError(u"Looks like docutils are not installed") html = html_body(text, input_encoding=self.encoding, output_encoding=self.encoding) classless_html = re.sub(' class=".+"', '', html) return classless_html.replace('<div>', '').replace('</div>', '') else: raise NotImplementedError(u"Unsupported format, cannot parse")
# -*- coding: utf-8 -*- # print "Latin-1:", "unicode über alles!".decode('utf-8').encode('latin-1') # print "Utf-8:", "unicode über alles!".decode('utf-8').encode('utf-8') # print "Windows:", "unicode über alles!".decode('utf-8').encode('cp1252') import sys, os reload(sys) sys.setdefaultencoding("utf-8") from rst import html_body import codecs # USAGE: # python parse.py doc/index.rst if len(sys.argv) > 1: f = sys.argv[1] else: print 'Error:no file' if os.path.isfile(f): with codecs.open(f,'r', 'utf-8') as rst: d = rst.read() # XXX: output to stdout will raise a error if not set default encoding # http://stackoverflow.com/questions/3828723/why-we-need-sys-setdefaultencodingutf-8-in-a-py-script # print d.decode('utf8') print html_body(d.decode('utf8')) else: print 'Error:not a valid file'