def to_html(cls, kb_entry): r = kb_entry.body.replace("\r", "") parsed = parseString(title=kb_entry.subject, raw=r, wikidb=cls.NOCDB(kb_entry)) preprocess(parsed) xhtml = MWXHTMLWriter() xhtml.writeBook(parsed) block = ET.tostring(xhtml.xmlbody) return block
def getXHTML(wikitext): db = DummyDB() r = parseString(title="test", raw=wikitext, wikidb=db) preprocess(r) show(sys.stdout, r) dbw = MWXHTMLWriter() dbw.writeBook(r) return dbw.asstring()
def getXHTML(wikitext): db = DummyDB() r = parseString(title="", raw=wikitext, wikidb=db) preprocess(r) dbw = MWXHTMLWriter() with SuppressOutput(): dbw.writeBook(r) return dbw.asstring()
def getXHTML(wikitext, title, language): db = DummyDB() db.normalize_and_get_page = noop r = parseString(title=title, raw=wikitext, wikidb=db, lang=language) if not r: return None preprocess(r) removeLangLinks(r) dbw = MWXHTMLWriter() dbw.writeBook(r) return dbw.asstring()
def _servXML(self, args, query, dialect="mwxml"): if not len(args): self._doc(error="require articlename") return unknown = [k for k in query if k not in ("debug", "imageresolver")] if unknown: return self._doc(error="unknown option %r" % unknown) title = args.pop() base_url = "http://%s/" % ("/".join(args) or default_baseurl) debug = bool(query.setdefault("debug", [default_debug])[0]) language = "en" # FIXME namespace="en.wikipedia.org" # FIXME print "_servXML", title, base_url, debug db = mwapidb.WikiDB(base_url) db.print_template = None # deactivate print template lookups tree = db.getParsedArticle(title, revision=None) if dialect == "mwxhtml": xhtmlwriter.preprocess(tree) dbw = xhtmlwriter.MWXHTMLWriter(imagesrcresolver=imagesrcresolver, debug=False) elif dialect == "mwxml": advtree.buildAdvancedTree(tree) # this should be optional dbw = xhtmlwriter.MWXMLWriter() # 1:1 XML from parse tree elif dialect == "dbxml": from mwlib import docbookwriter docbookwriter.preprocess(tree) dbw = docbookwriter.DocBookWriter(imagesrcresolver=imagesrcresolver, debug=debug) else: raise Exception, "unkonwn export" dbw.writeBook(tree) if debug: dbw.writeparsetree(tree) response = dbw.asstring() self.send_response(200) self.send_header("Content-type", "text/xml") self.send_header("Content-length", str(len(response))) self.end_headers() self.wfile.write(response) # shut down the connection self.wfile.flush()
def to_html(cls, kb_entry): from mwlib.uparser import parseString from mwlib.xhtmlwriter import MWXHTMLWriter, preprocess try: import xml.etree.ElementTree as ET except: from elementtree import ElementTree as ET r = kb_entry.body.replace("\r", "") parsed = parseString(title=kb_entry.subject, raw=r, wikidb=cls.NOCDB(kb_entry)) preprocess(parsed) xhtml = MWXHTMLWriter() xhtml.writeBook(parsed) block = ET.tostring(xhtml.xmlbody) return block
def get_xhtml(wikitext): r = parseString(title="", raw=wikitext) preprocess(r) dbw = MyWriter() dbw.writeBook(r) text = dbw.asstring() text = re.sub('<p />', '', text) text = re.sub('<p> ', '<p>', text) text = re.sub(' </p>', '</p>', text) text = re.sub('</p><p>', '</p>\n<p>', text) text = re.sub(' <br />       ', '</p>\n<p>', text) text = re.sub('      ', '<p>', text) text = re.sub('</dd><dd>', '</dd>\n<dd>', text) text = re.sub('<body><div class="mwx.article"><h1 />', '', text) text = re.sub('</div></body>', '', text) return text
def run(self): raw = u'\n'.join(self.content) # empty wikidb db = DummyDB() # run parser and pre-processors parsed = parseString(title='Export', raw=raw, wikidb=db) preprocess(parsed) # write XHTML xhtml = MWXHTMLWriter() xhtml.writeBook(parsed) # remove the H1 heading (title) from the document article = xhtml.xmlbody.getchildren()[0] article.remove(article.getchildren()[0]) # remove caption # render to string block = ET.tostring(xhtml.xmlbody) return [nodes.raw('', block, format='html')]