def handle_templates(self, raw, title="Unnamed"): """ Returns a copy of raw where templates are either expanded, removed or kept """ self._update_expander(title) parsed = parser.parse(raw, included=False, replace_tags=self.exp.uniquifier.replace_tags) m = [] for i in parsed: logger.debug(repr(i)) if isinstance(i, nodes.Template): #t = MyTemplate(self, i) t = i res = [] t.flatten(self.exp, expander.get_template_args(t, self.exp), res) m.extend(res) else: m.append(i) ret = [] self._handle_marks(m, ret) logger.debug('cache size: ' + str(len(self.exp.parsedTemplateCache))) return self._expand(ret)
def mark_infobox(self, name, raw): res = parser.parse(raw, replace_tags=self.replace_tags) if not name.lower().startswith("infobox"): return res print "marking infobox %r" % name return (u"<div templatename=%s>\n" % quoteattr(name), res, u"</div>")
def write_templates_into_f(raw, f): """Extracts the templates from the raw text and writes them to the file @p f.""" from mwlib.templ.misc import DictDB from mwlib.expander import get_template_args from mwlib.templ.evaluate import Expander from mwlib.templ.parser import parse from mwlib.templ.nodes import Template e = Expander("", wikidb=DictDB()) todo = [parse(raw, replace_tags=e.replace_tags)] result = True while todo: n = todo.pop() if isinstance(n, basestring): continue if isinstance(n, Template) and isinstance(n[0], basestring): d = get_template_args(n, e) f.write(u"Template\t{0}\n".format(unicode(n[0]).strip().replace("\n", "")).encode("utf-8")) for i in range(len(d)): try: f.write(unicode(d[i]).encode("utf-8") + "\n") except TypeError: result = False except AttributeError: # handling some mwlib bug. it raises this exception somehow result = False f.write("\n") todo.extend(n) return result
def write_templates_into_f(raw, f): """Extracts the templates from the raw text and writes them to the file @p f.""" from mwlib.templ.misc import DictDB from mwlib.expander import get_template_args from mwlib.templ.evaluate import Expander from mwlib.templ.parser import parse from mwlib.templ.nodes import Template e=Expander('', wikidb=DictDB()) todo = [parse(raw, replace_tags=e.replace_tags)] result = True while todo: n = todo.pop() if isinstance(n, basestring): continue if isinstance(n, Template) and isinstance(n[0], basestring): d = get_template_args(n, e) f.write(u"Template\t{0}\n".format(unicode(n[0]).strip().replace("\n", "")).encode("utf-8")) for i in range(len(d)): try: f.write(unicode(d[i]).encode("utf-8") + "\n") except TypeError: result = False except AttributeError: # handling some mwlib bug. it raises this exception somehow result = False f.write("\n") todo.extend(n) return result
class Expander(object): magic_displaytitle = None # set via {{DISPLAYTITLE:...}} def __init__(self, txt, pagename="", wikidb=None, recursion_limit=100): assert wikidb is not None, "must supply wikidb argument in Expander.__init__" self.pagename = pagename self.db = wikidb self.uniquifier = Uniquifier() si = None try: si = self.db.get_siteinfo() except Exception, err: print 'Caught: %s' % err if si is None: print "WARNING: failed to get siteinfo from %r" % (self.db, ) si = siteinfo.get_siteinfo("de") self.nshandler = nshandler = nshandling.nshandler(si) self.siteinfo = si if self.db and hasattr(self.db, "getSource"): source = self.db.getSource(pagename) or metabook.source() local_values = source.locals or u"" local_values = mwlocals.parse_locals(local_values) else: local_values = None source = {} # XXX we really should call Expander with a nuwiki.page object. revisionid = 0 if self.db and hasattr(self.db, "nuwiki") and pagename: page = self.db.nuwiki.get_page(self.pagename) if page is not None: revisionid = getattr(page, 'revid', 0) or 0 self.resolver = magics.MagicResolver(pagename=pagename, revisionid=revisionid) self.resolver.siteinfo = si self.resolver.nshandler = nshandler self.resolver.wikidb = wikidb self.resolver.local_values = local_values self.resolver.source = source self.recursion_limit = recursion_limit self.recursion_count = 0 self.aliasmap = parser.aliasmap(self.siteinfo) self.parsed = parser.parse(txt, included=False, replace_tags=self.replace_tags, siteinfo=self.siteinfo) # show(self.parsed) self.parsedTemplateCache = {}
def getContributorsFromInformationTemplate(raw, title, wikidb): from mwlib.expander import find_template, get_templates, get_template_args, Expander from mwlib import uparser, parser, advtree from mwlib.templ.parser import parse def getUserLinks(raw): def isUserLink(node): return isinstance(node, parser.NamespaceLink) and node.namespace == 2 # NS_USER result = list(set([ u.target for u in uparser.parseString(title, raw=raw, wikidb=wikidb, ).filter(isUserLink) ])) result.sort() return result def get_authors_from_template_args(template): args = get_template_args(template, expander) author_arg = args.get('Author', None) if author_arg: # userlinks = getUserLinks(author_arg) # if userlinks: # return userlinks node = uparser.parseString('', raw=args['Author'], wikidb=wikidb) advtree.extendClasses(node) txt = node.getAllDisplayText().strip() if txt: return [txt] if args.args: return getUserLinks('\n'.join([args.get(i, u'') for i in range(len(args.args))])) return [] expander = Expander(u'', title, wikidb) parsed_raw = [parse(raw, replace_tags=expander.replace_tags)] template = find_template(None, 'Information', parsed_raw[:]) if template is not None: authors = get_authors_from_template_args(template) if authors: return authors authors = [] for template in get_templates(raw): t = find_template(None, template, parsed_raw[:]) if t is not None: authors.extend(get_authors_from_template_args(t)) if authors: return authors return getUserLinks(raw)
def find_template(raw, name): """Return Template node with given name or None if there is no such template""" e=Expander('', wikidb=DictDB()) todo = [parse(raw, replace_tags=e.replace_tags)] while todo: n = todo.pop() if isinstance(n, basestring): continue if isinstance(n, Template) and isinstance(n[0], basestring): if n[0] == name: return n todo.extend(n)
def find_template(raw, name, parsed_raw=None): """Return Template node with given name or None if there is no such template""" if not parsed_raw: e = Expander('', wikidb=DictDB()) todo = [parse(raw, replace_tags=e.replace_tags)] else: todo = parsed_raw while todo: n = todo.pop() if isinstance(n, basestring): continue if isinstance(n, Template) and isinstance(n[0], basestring): if n[0] == name: return n todo.extend(n)
def get_templates(raw, title=u""): used = set() e=Expander('', wikidb=DictDB()) todo = [parse(raw, replace_tags=e.replace_tags)] while todo: n = todo.pop() if isinstance(n, basestring): continue if isinstance(n, Template) and isinstance(n[0], basestring): name = n[0] if name.startswith("/"): name = title+name used.add(name) todo.extend(n) return used
def get_templates(raw, title=u""): used = set() e = Expander('', wikidb=DictDB()) todo = [parse(raw, replace_tags=e.replace_tags)] while todo: n = todo.pop() if isinstance(n, basestring): continue if isinstance(n, Template) and isinstance(n[0], basestring): name = n[0] if name.startswith("/"): name = title + name used.add(name) todo.extend(n) return used
def getImageTemplatesAndArgs(self, name, wikidb=None): from mwlib.expander import get_templates, get_template_args page = self.get_image_description_page(name) if page is not None: templates = get_templates(page.rawtext) from mwlib.expander import find_template from mwlib.templ.evaluate import Expander from mwlib.templ.parser import parse from mwlib.templ.misc import DictDB args = set() e = Expander('', wikidb=DictDB()) # avoid parsing with every call to find_template parsed_raw = [parse(page.rawtext, replace_tags=e.replace_tags)] for t in templates: tmpl = find_template(None, t, parsed_raw[:]) arg_list = tmpl[1] for arg in arg_list: if isinstance(arg, basestring) and len(arg) > 3 and ' ' not in arg: args.add(arg) templates.update(args) return templates return []
def parseAndExpand(self, txt, keep_uniq=False): parsed = parser.parse(txt, included=False, replace_tags=self.replace_tags) return self._expand(parsed, keep_uniq=keep_uniq)
def _parse_raw_template(self, name, raw): return parser.parse(raw, replace_tags=self.replace_tags)