コード例 #1
0
ファイル: template.py プロジェクト: larsjsol/wcb
    def handle_templates(self, raw, title="Unnamed"):
        """
        Returns a copy of raw where templates are either expanded, removed or kept
        """
        self._update_expander(title)

        parsed = parser.parse(raw, included=False, replace_tags=self.exp.uniquifier.replace_tags)
        m = []
        for i in parsed:
            logger.debug(repr(i))
            if isinstance(i, nodes.Template):
                #t = MyTemplate(self, i)
                t = i
                res = []
                t.flatten(self.exp, expander.get_template_args(t, self.exp), res)

                m.extend(res)

            else:
                m.append(i)
        ret = []
        self._handle_marks(m, ret)
        logger.debug('cache size: ' + str(len(self.exp.parsedTemplateCache)))

        return  self._expand(ret)
コード例 #2
0
ファイル: infobox_magic.py プロジェクト: ASaifM/mwlib
def mark_infobox(self, name, raw):
    
    res = parser.parse(raw, replace_tags=self.replace_tags)
    if not name.lower().startswith("infobox"):
        return res
    print "marking infobox %r" % name
    return (u"<div templatename=%s>\n" % quoteattr(name), res, u"</div>")
コード例 #3
0
ファイル: wikitext_to_conll.py プロジェクト: juditacs/hunmisc
    def write_templates_into_f(raw, f):
        """Extracts the templates from the raw text and writes them to the file @p f."""
        from mwlib.templ.misc import DictDB
        from mwlib.expander import get_template_args
        from mwlib.templ.evaluate import Expander
        from mwlib.templ.parser import parse
        from mwlib.templ.nodes import Template

        e = Expander("", wikidb=DictDB())
        todo = [parse(raw, replace_tags=e.replace_tags)]
        result = True
        while todo:
            n = todo.pop()
            if isinstance(n, basestring):
                continue

            if isinstance(n, Template) and isinstance(n[0], basestring):
                d = get_template_args(n, e)
                f.write(u"Template\t{0}\n".format(unicode(n[0]).strip().replace("\n", "")).encode("utf-8"))
                for i in range(len(d)):
                    try:
                        f.write(unicode(d[i]).encode("utf-8") + "\n")
                    except TypeError:
                        result = False
                    except AttributeError:
                        # handling some mwlib bug. it raises this exception somehow
                        result = False
                f.write("\n")
            todo.extend(n)
        return result
コード例 #4
0
ファイル: infobox_magic.py プロジェクト: vprusa/mwlib
def mark_infobox(self, name, raw):

    res = parser.parse(raw, replace_tags=self.replace_tags)
    if not name.lower().startswith("infobox"):
        return res
    print "marking infobox %r" % name
    return (u"<div templatename=%s>\n" % quoteattr(name), res, u"</div>")
コード例 #5
0
    def write_templates_into_f(raw, f):
        """Extracts the templates from the raw text and writes them to the file @p f."""
        from mwlib.templ.misc import DictDB
        from mwlib.expander import get_template_args
        from mwlib.templ.evaluate import Expander
        from mwlib.templ.parser import parse
        from mwlib.templ.nodes import Template
        
        e=Expander('', wikidb=DictDB())  
        todo = [parse(raw, replace_tags=e.replace_tags)]
        result = True
        while todo:
            n = todo.pop()
            if isinstance(n, basestring):
                continue

            if isinstance(n, Template) and isinstance(n[0], basestring):
                d = get_template_args(n, e)
                f.write(u"Template\t{0}\n".format(unicode(n[0]).strip().replace("\n", "")).encode("utf-8"))
                for i in range(len(d)):
                    try:
                        f.write(unicode(d[i]).encode("utf-8") + "\n")
                    except TypeError:
                        result = False
                    except AttributeError:
                        # handling some mwlib bug. it raises this exception somehow
                        result = False
                f.write("\n")
            todo.extend(n)
        return result
コード例 #6
0
ファイル: evaluate.py プロジェクト: vprusa/mwlib
class Expander(object):
    magic_displaytitle = None  # set via {{DISPLAYTITLE:...}}

    def __init__(self, txt, pagename="", wikidb=None, recursion_limit=100):
        assert wikidb is not None, "must supply wikidb argument in Expander.__init__"
        self.pagename = pagename
        self.db = wikidb
        self.uniquifier = Uniquifier()

        si = None
        try:
            si = self.db.get_siteinfo()
        except Exception, err:
            print 'Caught: %s' % err

        if si is None:
            print "WARNING: failed to get siteinfo from %r" % (self.db, )
            si = siteinfo.get_siteinfo("de")

        self.nshandler = nshandler = nshandling.nshandler(si)
        self.siteinfo = si

        if self.db and hasattr(self.db, "getSource"):
            source = self.db.getSource(pagename) or metabook.source()
            local_values = source.locals or u""
            local_values = mwlocals.parse_locals(local_values)
        else:
            local_values = None
            source = {}

        # XXX we really should call Expander with a nuwiki.page object.
        revisionid = 0
        if self.db and hasattr(self.db, "nuwiki") and pagename:
            page = self.db.nuwiki.get_page(self.pagename)
            if page is not None:
                revisionid = getattr(page, 'revid', 0) or 0

        self.resolver = magics.MagicResolver(pagename=pagename,
                                             revisionid=revisionid)
        self.resolver.siteinfo = si
        self.resolver.nshandler = nshandler

        self.resolver.wikidb = wikidb
        self.resolver.local_values = local_values
        self.resolver.source = source

        self.recursion_limit = recursion_limit
        self.recursion_count = 0
        self.aliasmap = parser.aliasmap(self.siteinfo)

        self.parsed = parser.parse(txt,
                                   included=False,
                                   replace_tags=self.replace_tags,
                                   siteinfo=self.siteinfo)
        # show(self.parsed)
        self.parsedTemplateCache = {}
コード例 #7
0
def getContributorsFromInformationTemplate(raw, title, wikidb):
    from mwlib.expander import find_template, get_templates, get_template_args, Expander
    from mwlib import uparser, parser, advtree
    from mwlib.templ.parser import parse
    
    def getUserLinks(raw):
        def isUserLink(node):
            return isinstance(node, parser.NamespaceLink) and node.namespace == 2 # NS_USER

        result = list(set([
            u.target
            for u in uparser.parseString(title,
                raw=raw,
                wikidb=wikidb,
            ).filter(isUserLink)
        ]))
        result.sort()
        return result

    def get_authors_from_template_args(template):
        args = get_template_args(template, expander)

        author_arg = args.get('Author', None)
        if author_arg:
            # userlinks = getUserLinks(author_arg)
            # if userlinks:
            #     return userlinks
            node = uparser.parseString('', raw=args['Author'], wikidb=wikidb)
            advtree.extendClasses(node)
            txt = node.getAllDisplayText().strip()
            if txt:
                return [txt]

        if args.args:
            return getUserLinks('\n'.join([args.get(i, u'') for i in range(len(args.args))]))

        return []

    expander = Expander(u'', title, wikidb)
    parsed_raw = [parse(raw, replace_tags=expander.replace_tags)]
    template = find_template(None, 'Information', parsed_raw[:])
    if template is not None:
        authors = get_authors_from_template_args(template)
        if authors:
            return authors
    authors = []
    for template in get_templates(raw):
        t = find_template(None, template, parsed_raw[:])
        if t is not None:
            authors.extend(get_authors_from_template_args(t))
    if authors:
        return authors
    return getUserLinks(raw)
コード例 #8
0
ファイル: expander.py プロジェクト: aarddict/mwlib
def find_template(raw, name):
    """Return Template node with given name or None if there is no such template"""
    
    e=Expander('', wikidb=DictDB())
    todo = [parse(raw, replace_tags=e.replace_tags)]
    while todo:
        n = todo.pop()
        if isinstance(n, basestring):
            continue
        if isinstance(n, Template) and isinstance(n[0], basestring):
            if n[0] == name:
                return n
        todo.extend(n)
コード例 #9
0
ファイル: expander.py プロジェクト: uncletall/mwlib
def find_template(raw, name, parsed_raw=None):
    """Return Template node with given name or None if there is no such template"""

    if not parsed_raw:
        e = Expander('', wikidb=DictDB())
        todo = [parse(raw, replace_tags=e.replace_tags)]
    else:
        todo = parsed_raw
    while todo:
        n = todo.pop()
        if isinstance(n, basestring):
            continue
        if isinstance(n, Template) and isinstance(n[0], basestring):
            if n[0] == name:
                return n
        todo.extend(n)
コード例 #10
0
ファイル: expander.py プロジェクト: ASaifM/mwlib
def get_templates(raw, title=u""):
    used = set()
    e=Expander('', wikidb=DictDB())
    todo = [parse(raw, replace_tags=e.replace_tags)]
    while todo:
        n = todo.pop()
        if isinstance(n, basestring):
            continue
        
        if isinstance(n, Template) and isinstance(n[0], basestring):
            name = n[0]
            if name.startswith("/"):
                name = title+name
            used.add(name)
            
        todo.extend(n)
        
    return used
コード例 #11
0
ファイル: expander.py プロジェクト: uncletall/mwlib
def get_templates(raw, title=u""):
    used = set()
    e = Expander('', wikidb=DictDB())
    todo = [parse(raw, replace_tags=e.replace_tags)]
    while todo:
        n = todo.pop()
        if isinstance(n, basestring):
            continue

        if isinstance(n, Template) and isinstance(n[0], basestring):
            name = n[0]
            if name.startswith("/"):
                name = title + name
            used.add(name)

        todo.extend(n)

    return used
コード例 #12
0
 def getImageTemplatesAndArgs(self, name, wikidb=None):
     from mwlib.expander import get_templates, get_template_args
     page = self.get_image_description_page(name)
     if page is not None:
         templates = get_templates(page.rawtext)
         from mwlib.expander import find_template
         from mwlib.templ.evaluate import Expander
         from mwlib.templ.parser import parse
         from mwlib.templ.misc import DictDB
         args = set()
         e = Expander('', wikidb=DictDB())
         # avoid parsing with every call to find_template
         parsed_raw = [parse(page.rawtext, replace_tags=e.replace_tags)]
         for t in templates:
             tmpl = find_template(None, t, parsed_raw[:])
             arg_list = tmpl[1]
             for arg in arg_list:
                 if isinstance(arg, basestring) and len(arg) > 3 and ' ' not in arg:
                     args.add(arg)
         templates.update(args)
         return templates
     return []
コード例 #13
0
 def parseAndExpand(self, txt, keep_uniq=False):
     parsed = parser.parse(txt,
                           included=False,
                           replace_tags=self.replace_tags)
     return self._expand(parsed, keep_uniq=keep_uniq)
コード例 #14
0
 def _parse_raw_template(self, name, raw):
     return parser.parse(raw, replace_tags=self.replace_tags)
コード例 #15
0
ファイル: evaluate.py プロジェクト: ASaifM/mwlib
 def parseAndExpand(self, txt, keep_uniq=False):
     parsed = parser.parse(txt, included=False, replace_tags=self.replace_tags)
     return self._expand(parsed, keep_uniq=keep_uniq)
コード例 #16
0
ファイル: evaluate.py プロジェクト: ASaifM/mwlib
 def _parse_raw_template(self, name, raw):
     return parser.parse(raw, replace_tags=self.replace_tags)