def simpleparse( raw): # !!! USE FOR DEBUGGING ONLY !!! does not use post processors import sys from mwlib import dummydb db = dummydb.DummyDB() tokens = scanner.tokenize(raw) r = parser.Parser(tokens, "unknown").parse() parser.show(sys.stdout, r, 0) return r
def write_wiki_html(self, htmlout, title, article_text): tokens = scanner.tokenize(article_text, title) wiki_parsed = parser.Parser(tokens, title).parse() wiki_parsed.caption = title imagedb = WPImageDB(self.base_path + '/images/') writer = WPHTMLWriter(self.wikidb.dataretriever, htmlout, images=imagedb, lang=self.lang) writer.write(wiki_parsed) self.links_cache[title] = writer.links_list return writer.math_processed
def write_wiki_html(self, htmlout, title, article_text): tokens = scanner.tokenize(article_text, title) wiki_parsed = parser.Parser(tokens, title).parse() wiki_parsed.caption = title imagedb = WPImageDB(self.base_path + '/images/') writer = WPHTMLWriter(self.index, htmlout, images=imagedb, lang=self.lang) writer.write(wiki_parsed) return writer.math_processed
def parseString(title=None, raw=None, wikidb=None, revision=None): """parse article with title from raw mediawiki text""" assert title is not None if raw is None: raw = wikidb.getRawArticle(title, revision=revision) assert raw is not None, "cannot get article %r" % (title, ) if wikidb: te = expander.Expander(raw, pagename=title, wikidb=wikidb) input = te.expandTemplates() else: input = raw tokens = scanner.tokenize(input, title) a = parser.Parser(tokens, title).parse() a.caption = title for x in postprocessors: x(a) return a
def _parse(txt): """parse text....and try to return a 'better' (some inner) node""" from mwlib import scanner, parser tokens = scanner.tokenize(txt) res = parser.Parser(tokens, "unknown").parse() # res is an parser.Article. if len(res.children) != 1: res.__class__ = parser.Node return res res = res.children[0] if res.__class__ == parser.Paragraph: res.__class__ = parser.Node if len(res.children) != 1: return res return res.children[0]
def parseString( title=None, raw=None, wikidb=None, revision=None, lang=None, interwikimap=None, ): """parse article with title from raw mediawiki text""" assert title is not None, 'no title given' if raw is None: raw = wikidb.getRawArticle(title, revision=revision) assert raw is not None, "cannot get article %r" % (title, ) if wikidb: te = expander.Expander(raw, pagename=title, wikidb=wikidb) input = te.expandTemplates() if lang is None and hasattr(wikidb, 'getSource'): src = wikidb.getSource(title, revision=revision) if src: lang = src.get('language') if interwikimap is None and hasattr(wikidb, 'getInterwikiMap'): interwikimap = wikidb.getInterwikiMap(title, revision=revision) else: input = raw tokens = scanner.tokenize(input, title) a = parser.Parser(tokens, title, lang=lang, interwikimap=interwikimap).parse() a.caption = title for x in postprocessors: x(a, title=title, revision=revision, wikidb=wikidb, lang=lang) return a