def simpleparse(
        raw):  # !!! USE FOR DEBUGGING ONLY !!! does not use post processors
    import sys
    from mwlib import dummydb
    db = dummydb.DummyDB()

    tokens = scanner.tokenize(raw)
    r = parser.Parser(tokens, "unknown").parse()
    parser.show(sys.stdout, r, 0)
    return r
    def write_wiki_html(self, htmlout, title, article_text):
        tokens = scanner.tokenize(article_text, title)

        wiki_parsed = parser.Parser(tokens, title).parse()
        wiki_parsed.caption = title

        imagedb = WPImageDB(self.base_path + '/images/')
        writer = WPHTMLWriter(self.wikidb.dataretriever, htmlout,
                images=imagedb, lang=self.lang)
        writer.write(wiki_parsed)
        self.links_cache[title] = writer.links_list
        return writer.math_processed
Exemple #3
0
    def write_wiki_html(self, htmlout, title, article_text):
        tokens = scanner.tokenize(article_text, title)

        wiki_parsed = parser.Parser(tokens, title).parse()
        wiki_parsed.caption = title

        imagedb = WPImageDB(self.base_path + '/images/')
        writer = WPHTMLWriter(self.index,
                              htmlout,
                              images=imagedb,
                              lang=self.lang)
        writer.write(wiki_parsed)
        return writer.math_processed
def parseString(title=None, raw=None, wikidb=None, revision=None):
    """parse article with title from raw mediawiki text"""
    assert title is not None

    if raw is None:
        raw = wikidb.getRawArticle(title, revision=revision)
        assert raw is not None, "cannot get article %r" % (title, )
    if wikidb:
        te = expander.Expander(raw, pagename=title, wikidb=wikidb)
        input = te.expandTemplates()
    else:
        input = raw

    tokens = scanner.tokenize(input, title)

    a = parser.Parser(tokens, title).parse()
    a.caption = title
    for x in postprocessors:
        x(a)
    return a
def _parse(txt):
    """parse text....and try to return a 'better' (some inner) node"""

    from mwlib import scanner, parser

    tokens = scanner.tokenize(txt)
    res = parser.Parser(tokens, "unknown").parse()

    # res is an parser.Article.
    if len(res.children) != 1:
        res.__class__ = parser.Node
        return res

    res = res.children[0]
    if res.__class__ == parser.Paragraph:
        res.__class__ = parser.Node

    if len(res.children) != 1:
        return res
    return res.children[0]
Exemple #6
0
def parseString(
    title=None,
    raw=None,
    wikidb=None,
    revision=None,
    lang=None,
    interwikimap=None,
):
    """parse article with title from raw mediawiki text"""

    assert title is not None, 'no title given'

    if raw is None:
        raw = wikidb.getRawArticle(title, revision=revision)
        assert raw is not None, "cannot get article %r" % (title, )
    if wikidb:
        te = expander.Expander(raw, pagename=title, wikidb=wikidb)
        input = te.expandTemplates()
        if lang is None and hasattr(wikidb, 'getSource'):
            src = wikidb.getSource(title, revision=revision)
            if src:
                lang = src.get('language')
        if interwikimap is None and hasattr(wikidb, 'getInterwikiMap'):
            interwikimap = wikidb.getInterwikiMap(title, revision=revision)
    else:
        input = raw

    tokens = scanner.tokenize(input, title)

    a = parser.Parser(tokens, title, lang=lang,
                      interwikimap=interwikimap).parse()
    a.caption = title
    for x in postprocessors:
        x(a, title=title, revision=revision, wikidb=wikidb, lang=lang)

    return a