def test_colspan(): raw = '''<table><tr><td colspan="bogus">no colspan </td></tr></table>''' r = parseString(title='t', raw=raw) buildAdvancedTree(r) assert r.getChildNodesByClass(Cell)[0].colspan is 1 raw = '''<table><tr><td colspan="-1">no colspan </td></tr></table>''' r = parseString(title='t', raw=raw) buildAdvancedTree(r) assert r.getChildNodesByClass(Cell)[0].colspan is 1 raw = '''<table><tr><td colspan="2">colspan1</td></tr></table>''' r = parseString(title='t', raw=raw) buildAdvancedTree(r) assert r.getChildNodesByClass(Cell)[0].colspan is 2
def convert_pagecontent(title, content): """ Convert a string in Mediawiki content format to a string in Dokuwiki content format. """ # this is a hack for mwlib discarding the content of <nowiki> tags # and replacing them with plaintext parsed HTML versions of the # content (pragmatic, but not what we want) nowiki_plaintext = [] # Instead we save the content here, replace it with the "magic" placeholder # tag <__yamdwe_nowiki> and the index where the content was saved, then pass # the list of nowiki content into the parser as context. def add_nowiki_block(match): nowiki_plaintext.append(match.group(0)) return "<__yamdwe_nowiki>%d</__yamdwe_nowiki>" % (len(nowiki_plaintext)-1,) content = re.sub(r"<nowiki>.+?</nowiki>", add_nowiki_block, content) root = uparser.parseString(title, content) # create parse tree context = {} context["list_stack"] = [] context["nowiki_plaintext"] = nowiki_plaintext # hacky way of attaching to child nodes result = convert(root, context, False) # mwlib doesn't parse NOTOC, so check for it manually if re.match(r"^\s*__NOTOC__\s*$", content, re.MULTILINE): result = "~~NOTOC~~"+("\n" if not result.startswith("\n") else "")+result return result
def test_image_link(): t = uparser.parseString('', u'[[画像:Tajima mihonoura03s3200.jpg]]', lang='ja') r = t.find(parser.ImageLink)[0] assert r.target == u'画像:Tajima mihonoura03s3200.jpg' assert r.namespace == 6, "wrong namespace"
def convert_pagecontent(title, content): """ Convert a string in Mediawiki content format to a string in Dokuwiki content format. """ # this is a hack for mwlib discarding the content of <nowiki> tags # and replacing them with plaintext parsed HTML versions of the # content (pragmatic, but not what we want) nowiki_plaintext = [] # Instead we save the content here, replace it with the "magic" placeholder # tag <__yamdwe_nowiki> and the index where the content was saved, then pass # the list of nowiki content into the parser as context. def add_nowiki_block(match): nowiki_plaintext.append(match.group(0)) return "<__yamdwe_nowiki>%d</__yamdwe_nowiki>" % ( len(nowiki_plaintext) - 1, ) content = re.sub(r"<nowiki>.+?</nowiki>", add_nowiki_block, content) root = uparser.parseString(title, content) # create parse tree context = {} context["list_stack"] = [] context[ "nowiki_plaintext"] = nowiki_plaintext # hacky way of attaching to child nodes result = convert(root, context, False) # mwlib doesn't parse NOTOC, so check for it manually if re.match(r"^\s*__NOTOC__\s*$", content, re.MULTILINE): result = "~~NOTOC~~" + ("\n" if not result.startswith("\n") else "") + result return result
def test_identity(): raw = """ <br/> <br/> <br/> <br/> <br/> <br/> <br/> <br/> """.decode("utf8") db = DummyDB() r = parseString(title="X33", raw=raw, wikidb=db) buildAdvancedTree(r) _treesanity(r) brs = r.getChildNodesByClass(BreakingReturn) for i, br in enumerate(brs): assert br in br.siblings assert i == _idIndex(br.parent.children, br) assert len([x for x in br.parent.children if x is not br]) == len(brs) - 1 for bbr in brs: if br is bbr: continue assert br == bbr assert br is not bbr
def test_copy(): raw = """ ===[[Leuchtturm|Leuchttürme]] auf Fehmarn=== *[[Leuchtturm Flügge]] super da *[[Leuchtturm Marienleuchte]] da auch *[[Leuchtturm Strukkamphuk]] spitze *[[Leuchtturm Staberhuk]] supi *[[Leuchtturm Westermarkelsdorf]] """.decode( "utf8" ) db = DummyDB() r = parseString(title="X33", raw=raw, wikidb=db) buildAdvancedTree(r) c = r.copy() _treesanity(c) def _check(n1, n2): assert n1.caption == n2.caption assert n1.__class__ == n2.__class__ assert len(n1.children) == len(n2.children) for i, c1 in enumerate(n1): _check(c1, n2.children[i]) _check(r, c)
def getUserLinks(raw): def isUserLink(node): return isinstance(node, parser.NamespaceLink) and node.namespace == 2 # NS_USER result = list(set([u.target for u in uparser.parseString(title, raw=raw, wikidb=wikidb).filter(isUserLink)])) result.sort() return result
def parse_tree(file_name): dp = dumpparser.DumpParser("data/wiki-data.xml") data = [] for recipe in dp: data.append( (recipe.pageid, recipe.title, parser.parseString(recipe.text, recipe.text)) ) return data
def test_tag_expand_vs_uniq(): db = DictDB( Foo = """{{#tag:pre|inside pre}}""" ) r=uparser.parseString(title="Foo", wikidb=db) core.show(r) pre = r.find(parser.PreFormatted) assert len(pre)==1, "expected a preformatted node"
def to_html(cls, kb_entry): r = kb_entry.body.replace("\r", "") parsed = parseString(title=kb_entry.subject, raw=r, wikidb=cls.NOCDB(kb_entry)) preprocess(parsed) xhtml = MWXHTMLWriter() xhtml.writeBook(parsed) block = ET.tostring(xhtml.xmlbody) return block
def test_tag_expand_vs_uniq(): db = DictDB( Foo="""{{#tag:pre|inside pre}}""" ) r = uparser.parseString(title="Foo", wikidb=db) core.show(r) pre = r.find(parser.PreFormatted) assert len(pre) == 1, "expected a preformatted node"
def getXHTML(wikitext): db = DummyDB() r = parseString(title="", raw=wikitext, wikidb=db) preprocess(r) dbw = MWXHTMLWriter() with SuppressOutput(): dbw.writeBook(r) return dbw.asstring()
def getAdvTree(fn): from mwlib.dummydb import DummyDB from mwlib.uparser import parseString db = DummyDB() input = unicode(open(fn).read(), 'utf8') r = parseString(title=fn, raw=input, wikidb=db) buildAdvancedTree(r) return r
def getXHTML(wikitext): db = DummyDB() r = parseString(title="test", raw=wikitext, wikidb=db) preprocess(r) show(sys.stdout, r) dbw = MWXHTMLWriter() dbw.writeBook(r) return dbw.asstring()
def parse(): parser = optparse.OptionParser( usage="%prog [-a|--all] --config CONFIG [ARTICLE1 ...]") parser.add_option("-a", "--all", action="store_true", help="parse all articles") parser.add_option("--tb", action="store_true", help="show traceback on error") parser.add_option("-c", "--config", help="configuration file/URL/shortcut") options, args = parser.parse_args() if not args and not options.all: parser.error("missing option.") if not options.config: parser.error("missing --config argument") articles = [unicode(x, 'utf-8') for x in args] conf = options.config import traceback from mwlib import wiki, uparser w = wiki.makewiki(conf) db = w.wiki if options.all: if not hasattr(db, "articles"): raise RuntimeError( "%s does not support iterating over all articles" % (db, )) articles = db.articles() import time for x in articles: try: page = db.normalize_and_get_page(x, 0) if page: raw = page.rawtext else: raw = None # yes, raw can be None, when we have a redirect to a non-existing article. if raw is None: continue stime = time.time() a = uparser.parseString(x, raw=raw, wikidb=db) except Exception as err: print "F", repr(x), err if options.tb: traceback.print_exc() else: print "G", time.time() - stime, repr(x)
def parse_wiki(name, wiki, make_math_png=False): c = cdbwiki.WikiDB(default_wiki_dir) a = uparser.parseString(name, raw=wiki, wikidb=c) out = StringIO.StringIO() mr = rendermath.Renderer(basedir=default_math_dir, lazy=(not make_math_png)) w = htmlwriter.HTMLWriter(out, images=None, math_renderer=mr) w.write(a) return out.getvalue()
def simpleparse(raw): # !!! USE FOR DEBUGGING ONLY !!! import sys from mwlib import dummydb, parser from mwlib.uparser import parseString input = raw.decode('utf8') r = parseString(title="title", raw=input, wikidb=dummydb.DummyDB()) buildAdvancedTree(r) parser.show(sys.stdout, r, 0) return r
def getParsedArticle(self, title, revision=None): raw = self.getRawArticle(title, revision=revision) if raw is None: return None article = self._getArticle(title, revision=revision) lang = None source = self.getSource(title, revision=revision) if source is not None: lang = source.get('language') return uparser.parseString(title=title, raw=raw, wikidb=self, lang=lang)
def test_ulist(): """http://code.pediapress.com/wiki/ticket/222""" raw = u""" * A item *: B Previous item continues. """ r = parseString(title='t', raw=raw) buildAdvancedTree(r) # parser.show(sys.stdout, r) assert len(r.getChildNodesByClass(Item)) == 1
def getXHTML(wikitext, title, language): db = DummyDB() db.normalize_and_get_page = noop r = parseString(title=title, raw=wikitext, wikidb=db, lang=language) if not r: return None preprocess(r) removeLangLinks(r) dbw = MWXHTMLWriter() dbw.writeBook(r) return dbw.asstring()
def getXML(wikitext): db = DummyDB() r = parseString(title="test", raw=wikitext, wikidb=db) print "before preprocess" show(sys.stdout, r) preprocess(r) print "after preprocess" show(sys.stdout, r) dbw = DocBookWriter() dbw.dbwriteArticle(r) return dbw.asstring()
def parse(self, title, text): out = StringIO.StringIO() metadata = defaultdict(list) parsed = parseString(title, raw=text, wikidb = self.db) w = htmlwriter.HTMLWriter(out, metadata , self.options) print metadata w.write(parsed) # meta_data = [ ('key-word', ['first']), ("category", ['pierwsza', 'druga', 'trzecia']) ] return (metadata, out.getvalue())
def main(titulo, archin, archout): out = codecs.open(archout, "w", "utf8") inp = codecs.open(archin, "r", "utf8") article = inp.read() inp.close() p = uparser.parseString(titulo, raw=article, wikidb=dummydb.DummyDB()) w = htmlwriter.HTMLWriter(out) w.write(p) out.close()
def getXML(wikitext): db = DummyDB() r = parseString(title="test", raw=wikitext, wikidb=db) advtree.buildAdvancedTree(r) preprocess(r) mwlib.parser.show(sys.stdout, r) odfw = ODFWriter() odfw.writeTest(r) validate(odfw) xml = odfw.asstring() # print xml # usefull to inspect generateded xml return xml
def getUserLinks(raw): def isUserLink(node): return isinstance(node, parser.NamespaceLink) and node.namespace == 2 # NS_USER result = sorted(set([ u.target for u in uparser.parseString(title, raw=raw, wikidb=wikidb, ).filter(isUserLink) ])) return result
def convert_pagecontent(title, content): """ Convert a string in Mediawiki content format to a string in Dokuwiki content format. """ # wrap the "magic" marker tag <__mw_nowiki> around <nowiki>, as # as mwlib just discards it otherwise and we can't detect it within the parser. # We keep the inner <nowiki> so the mwlib parser still skips that content content = re.sub(r"<nowiki>.+</nowiki>", lambda e: "<__mw_nowiki>"+e.group(0)+"</__mw_nowiki>", content) root = uparser.parseString(title, content) # create parse tree return convert(root, False)
def parse(): parser = optparse.OptionParser(usage="%prog [-a|--all] --config CONFIG [ARTICLE1 ...]") parser.add_option("-a", "--all", action="store_true", help="parse all articles") parser.add_option("--tb", action="store_true", help="show traceback on error") parser.add_option("-c", "--config", help="configuration file/URL/shortcut") options, args = parser.parse_args() if not args and not options.all: parser.error("missing option.") if not options.config: parser.error("missing --config argument") articles = [unicode(x, 'utf-8') for x in args] conf = options.config import traceback from mwlib import wiki, uparser w = wiki.makewiki(conf) db = w.wiki if options.all: if not hasattr(db, "articles"): raise RuntimeError("%s does not support iterating over all articles" % (db, )) articles = db.articles() import time for x in articles: try: page = db.normalize_and_get_page(x, 0) if page: raw = page.rawtext else: raw = None # yes, raw can be None, when we have a redirect to a non-existing article. if raw is None: continue stime=time.time() a=uparser.parseString(x, raw=raw, wikidb=db) except Exception, err: print "F", repr(x), err if options.tb: traceback.print_exc() else: print "G", time.time()-stime, repr(x)
def main(): segmenter = MediaWikiWikiSegmenter() templdb = nuwiki.adapt(WikiDB(templdbPath, lang="ja")) contentdb = WikiDB(contentdbPath, lang="ja") for title, text in contentdb.reader.iteritems(): tree = parseString(title=title, raw=text, wikidb=templdb) print >>sys.stderr, title.encode("utf-8") output = segmenter.traverse(tree, [], 0) output = segmenter.cleanOutput(output) segmenter.printOutput(output, False)
def test_attributes(): t1 = ''' {| |- STYLE="BACKGROUND:#FFDEAD;" |stuff |} ''' r = parseString(title='t', raw=t1) buildAdvancedTree(r) n = r.getChildNodesByClass(Row)[0] print n.attributes, n.style assert isinstance(n.style, dict) assert isinstance(n.attributes, dict) assert n.style["background"] == "#FFDEAD"
def test_defintion_list(): """http://code.pediapress.com/wiki/ticket/221""" raw = u''';termA :descr1 ''' for i in range(2): r = parseString(title='t', raw=raw) buildAdvancedTree(r) dls = r.getChildNodesByClass(DefinitionList) assert len(dls) == 1 assert dls[0].getChildNodesByClass(DefinitionTerm) assert dls[0].getChildNodesByClass(DefinitionDescription) raw = raw.replace('\n', '')
def convert_pagecontent(title, content): """ Convert a string in Mediawiki content format to a string in Dokuwiki content format. """ # wrap the "magic" marker tag <__mw_nowiki> around <nowiki>, as # as mwlib just discards it otherwise and we can't detect it within the parser. # We keep the inner <nowiki> so the mwlib parser still skips that content content = re.sub(r"<nowiki>.+</nowiki>", lambda e: "<__mw_nowiki>" + e.group(0) + "</__mw_nowiki>", content) root = uparser.parseString(title, content) # create parse tree return convert(root, False)
def main(): for fn in sys.argv[1:]: from mwlib.dummydb import DummyDB from mwlib.uparser import parseString db = DummyDB() input = unicode(open(fn).read(), 'utf8') r = parseString(title=fn, raw=input, wikidb=db) parser.show(sys.stdout, r) preprocess(r) parser.show(sys.stdout, r) dbw = MWXHTMLWriter() dbw.writeBook(r) nf = open("%s.html" % fn, "w") nf.write(dbw.asstring())
def html(): parser = optparse.OptionParser(usage="%prog --conf CONF ARTICLE [...]") parser.add_option("-c", "--conf", help="config file") options, args = parser.parse_args() if not args: parser.error("missing ARTICLE argument") articles = [unicode(x, 'utf-8') for x in args] conf = options.conf if not options.conf: parser.error("missing --conf argument") import StringIO import tempfile import os import webbrowser from mwlib import wiki, uparser, htmlwriter res = wiki.makewiki(conf) db = res['wiki'] images = res['images'] for a in articles: raw=db.getRawArticle(a) if not raw: continue out=StringIO.StringIO() out.write("""<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> <head> <meta http-equiv="content-type" content="text/html; charset="utf-8"></meta> <link rel="stylesheet" href="pedia.css" /> </head> <body> """) a=uparser.parseString(x, raw=raw, wikidb=db) w=htmlwriter.HTMLWriter(out, images) w.write(a) fd, htmlfile = tempfile.mkstemp(".html") os.close(fd) open(htmlfile, "wb").write(out.getvalue().encode('utf-8')) webbrowser.open("file://"+htmlfile)
def parse(self): """ Create a parse tree and then extract data for article from it. """ # if the page was missing, return if self.missing(): return self.parsetree = uparser.parseString(title=self.title, raw=self.wikitext) text, links = get_text_and_links(self.parsetree, self.ignoreSections) plaintext = u''.join(text) # Remove newlines and spaces that occur at beginning of text self.plaintext = plaintext.lstrip(' \n') self.links = links
def html(): parser = optparse.OptionParser(usage="%prog --conf CONF ARTICLE [...]") parser.add_option("-c", "--conf", help="config file") options, args = parser.parse_args() if not args: parser.error("missing ARTICLE argument") articles = [unicode(x, 'utf-8') for x in args] conf = options.conf if not options.conf: parser.error("missing --conf argument") import StringIO import tempfile import os import webbrowser from mwlib import wiki, uparser, htmlwriter res = wiki.makewiki(conf) db = res['wiki'] images = res['images'] for a in articles: raw = db.getRawArticle(a) if not raw: continue out = StringIO.StringIO() out.write("""<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> <head> <meta http-equiv="content-type" content="text/html; charset="utf-8"></meta> <link rel="stylesheet" href="pedia.css" /> </head> <body> """) a = uparser.parseString(x, raw=raw, wikidb=db) w = htmlwriter.HTMLWriter(out, images) w.write(a) fd, htmlfile = tempfile.mkstemp(".html") os.close(fd) open(htmlfile, "wb").write(out.getvalue().encode('utf-8')) webbrowser.open("file://" + htmlfile)
def renderMW(txt, filesuffix=None): parseTree = uparser.parseString(title='Test', raw=txt) advtree.buildAdvancedTree(parseTree) tc = TreeCleaner(parseTree) tc.cleanAll() tmpdir = tempfile.mkdtemp() rw = RlWriter(test_mode=True) rw.wikiTitle = 'testwiki' rw.tmpdir = tmpdir rw.imgDB = dummyImageDB(basedir=tmpdir) elements = rw.write(parseTree) renderElements(elements, filesuffix, tmpdir)
def _parse_wiki(input): db = DummyDB() out = StringIO.StringIO() if input.endswith(chr(13) + chr(10)): input = input.replace(chr(13) + chr(10), chr(10)) if input.endswith(chr(13)): input = input.replace(chr(13), chr(10)) try: p = parseString("title", input.decode("utf8")) except Exception, ex: raise ex return u'Unable to parse input!'
def run(self): raw = u'\n'.join(self.content) # empty wikidb db = DummyDB() # run parser and pre-processors parsed = parseString(title='Export', raw=raw, wikidb=db) preprocess(parsed) # write XHTML xhtml = MWXHTMLWriter() xhtml.writeBook(parsed) # remove the H1 heading (title) from the document article = xhtml.xmlbody.getchildren()[0] article.remove(article.getchildren()[0]) # remove caption # render to string block = ET.tostring(xhtml.xmlbody) return [nodes.raw('', block, format='html')]
def get_xhtml(wikitext): r = parseString(title="", raw=wikitext) preprocess(r) dbw = MyWriter() dbw.writeBook(r) text = dbw.asstring() text = re.sub('<p />', '', text) text = re.sub('<p> ', '<p>', text) text = re.sub(' </p>', '</p>', text) text = re.sub('</p><p>', '</p>\n<p>', text) text = re.sub(' <br />       ', '</p>\n<p>', text) text = re.sub('      ', '<p>', text) text = re.sub('</dd><dd>', '</dd>\n<dd>', text) text = re.sub('<body><div class="mwx.article"><h1 />', '', text) text = re.sub('</div></body>', '', text) return text
def to_html(cls, kb_entry): from mwlib.uparser import parseString from mwlib.xhtmlwriter import MWXHTMLWriter, preprocess try: import xml.etree.ElementTree as ET except: from elementtree import ElementTree as ET r = kb_entry.body.replace("\r", "") parsed = parseString(title=kb_entry.subject, raw=r, wikidb=cls.NOCDB(kb_entry)) preprocess(parsed) xhtml = MWXHTMLWriter() xhtml.writeBook(parsed) block = ET.tostring(xhtml.xmlbody) return block
def parseArticle( self, title, revision=None, raw=None, wikidb=None, imagedb=None, ): """Parse article with given title, revision and raw wikitext, adding all referenced templates and images, but not adding the article itself. @param title: title of article @type title: unicode @param revision: revision of article (optional) @type revision: int @param raw: wikitext of article @type raw: unicode @param wikidb: WikiDB to use @param imagedb: ImageDB to use (optional) """ recorddb = RecordDB(wikidb, self.articles, self.templates, self.sources) parse_tree = uparser.parseString( title, revision=revision, raw=raw, wikidb=recorddb, ) if imagedb is None: return for node in parse_tree.allchildren(): if isinstance(node, parser.ImageLink): self.addImage(node.target, imagedb=imagedb, wikidb=wikidb) elif isinstance(node, parser.TagNode) and node.caption == 'imagemap': imagemap = getattr(node, 'imagemap', None) if imagemap is not None: imagelink = getattr(imagemap, 'imagelink', None) if imagelink is not None: self.addImage(imagelink.target, imagedb=imagedb, wikidb=wikidb)
def parseArticle(self, title, revision=None, raw=None, wikidb=None, imagedb=None, ): """Parse article with given title, revision and raw wikitext, adding all referenced templates and images, but not adding the article itself. @param title: title of article @type title: unicode @param revision: revision of article (optional) @type revision: int @param raw: wikitext of article @type raw: unicode @param wikidb: WikiDB to use @param imagedb: ImageDB to use (optional) """ recorddb = RecordDB(wikidb, self.articles, self.templates, self.sources) parse_tree = uparser.parseString(title, revision=revision, raw=raw, wikidb=recorddb, ) if imagedb is None: return stats = self.node_stats for node in parse_tree.allchildren(): if isinstance(node, parser.ImageLink): self.image_infos.add((node.target, imagedb, wikidb)) elif isinstance(node, parser.TagNode) and node.caption == 'imagemap': imagemap = getattr(node, 'imagemap', None) if imagemap is not None: imagelink = getattr(imagemap, 'imagelink', None) if imagelink is not None: self.image_infos.add((imagelink.target, imagedb, wikidb)) # stats k, w = utils.get_nodeweight(node) stats[k] = stats.get(k, 0) + w
def get_authors_from_template_args(template): args = get_template_args(template, expander) author_arg = args.get('Author', None) if author_arg: # userlinks = getUserLinks(author_arg) # if userlinks: # return userlinks node = uparser.parseString('', raw=args['Author'], wikidb=wikidb) advtree.extendClasses(node) txt = node.getAllDisplayText().strip() if txt: return [txt] if args.args: return getUserLinks('\n'.join([args.get(i, u'') for i in range(len(args.args))])) return []
def getParsedArticle(self, title, revision=None): if revision: page = self.nuwiki.get_page(None, revision) else: page = self.normalize_and_get_page(title, 0) if page: raw = page.rawtext else: raw = None if raw is None: return None from mwlib import uparser return uparser.parseString(title=title, raw=raw, wikidb=self, lang=self.siteinfo["general"]["lang"])
def print_tree(wikitext): """Print all of the nodes in the parse tree created from the wikitext.""" clean_wiki = wiki_parser.remove_templates(wiki_parser.unescape(wikitext)) tree = uparser.parseString(title='', raw=clean_wiki) result = '' node_stack = deque([(tree, 0)]) while len(node_stack) > 0: (node, level) = node_stack.popleft() node_str = str(node) if hasattr(node, 'type'): node_str = str(node.type) + ' | ' + str(node) result += (level * '\t') + node_str + '\n' children = deque([]) for c in node.children: children.appendleft((c, level + 1)) node_stack.extendleft(children) return result
def main(): for fn in sys.argv[1:]: from mwlib.dummydb import DummyDB from mwlib.uparser import parseString db = DummyDB() input = unicode(open(fn).read(), 'utf8') r = parseString(title=fn, raw=input, wikidb=db) #parser.show(sys.stdout, r) #advtree.buildAdvancedTree(r) #tc = TreeCleaner(r) #tc.cleanAll() preprocess(r) parser.show(sys.stdout, r) odf = ODFWriter() odf.writeTest(r) doc = odf.getDoc() #doc.toXml("%s.xml"%fn) doc.save(fn, True)
def test_definitiondescription(): raw = u""" == test == :One ::Two :::Three ::::Four """ db = DummyDB() r = parseString(title="t", raw=raw, wikidb=db) parser.show(sys.stdout, r) buildAdvancedTree(r) dd = r.getChildNodesByClass(DefinitionDescription) print "DD:", dd for c in dd: assert c.indentlevel == 1 assert len(dd) == 4