Ejemplo n.º 1
0
    def _word_repl(self, word, groups):
        if self.in_dd:
            name = groups.get('word_name')
            current_page = self.formatter.page.page_name
            abs_name = AbsPageName(current_page, name)
            if abs_name == current_page:
                return self.__real_val(abs_name)
            else:
                # handle anchors
                try:
                    abs_name, anchor = rsplit(abs_name, "#", 1)
                except ValueError:
                    anchor = ""
                if self.cat_re.match(abs_name):
                    return self.__real_val(abs_name)

                else:
                    if not anchor:
                        wholename = abs_name
                    else:
                        wholename = "%s#%s" % (abs_name, anchor)

                    return self.__real_val(wholename)

        return apply(wikiParser._word_repl, (self, word, groups))
Ejemplo n.º 2
0
def metas_to_abs_links(request, page, values):
    new_values = list()
    stripped = False
    for value in values:
        if is_meta_link(value) != 'link':
            new_values.append(value)
            continue
        if ((value.startswith('[[') and value.endswith(']]')) or
            (value.startswith('{{') and value.endswith('}}'))):
            stripped = True
            value = value.lstrip('[')
            value = value.lstrip('{')
        attachment = ''
        for scheme in ('attachment:', 'inline:', 'drawing:'):
            if value.startswith(scheme):
                if len(value.split('/')) == 1:
                    value = ':'.join(value.split(':')[1:])
                    if not '|' in value:
                        # If page does not have descriptive text, try
                        # to shorten the link to the attachment name.
                        value = "%s|%s" % (value.rstrip(']').rstrip('}'), value)
                    value = "%s%s/%s" % (scheme, page, value)
                else:
                    att_page = value.split(':')[1]
                    if (att_page.startswith('./') or
                        att_page.startswith('/') or
                        att_page.startswith('../')):
                        attachment = scheme
                        value = ':'.join(value.split(':')[1:])
        if (value.startswith('./') or
            value.startswith('/') or
            value.startswith('../')):
            value = AbsPageName(page, value)
        if value.startswith('#'):
            value = page + value

        value = attachment + value
        if stripped:
            if value.endswith(']'):
                value = '[[' + value 
            elif value.endswith('}'):
                value = '{{' + value 
        new_values.append(value)

    return new_values
Ejemplo n.º 3
0
def metas_to_abs_links(request, page, values):
    new_values = list()
    stripped = False
    for value in values:
        if is_meta_link(value) != 'link':
            new_values.append(value)
            continue
        if ((value.startswith('[[') and value.endswith(']]'))
                or (value.startswith('{{') and value.endswith('}}'))):
            stripped = True
            value = value.lstrip('[')
            value = value.lstrip('{')
        attachment = ''
        for scheme in ('attachment:', 'inline:', 'drawing:'):
            if value.startswith(scheme):
                if len(value.split('/')) == 1:
                    value = ':'.join(value.split(':')[1:])
                    if not '|' in value:
                        # If page does not have descriptive text, try
                        # to shorten the link to the attachment name.
                        value = "%s|%s" % (value.rstrip(']').rstrip('}'),
                                           value)
                    value = "%s%s/%s" % (scheme, page, value)
                else:
                    att_page = value.split(':')[1]
                    if (att_page.startswith('./') or att_page.startswith('/')
                            or att_page.startswith('../')):
                        attachment = scheme
                        value = ':'.join(value.split(':')[1:])
        if (value.startswith('./') or value.startswith('/')
                or value.startswith('../')):
            value = AbsPageName(page, value)
        if value.startswith('#'):
            value = page + value

        value = attachment + value
        if stripped:
            if value.endswith(']'):
                value = '[[' + value
            elif value.endswith('}'):
                value = '{{' + value
        new_values.append(value)

    return new_values
Ejemplo n.º 4
0
def _metatable_parseargs(request, args, cat_re, temp_re):
    # Arg placeholders
    argset = set([])
    keyspec = list()
    excluded_keys = list()
    orderspec = list()
    limitregexps = dict()
    limitops = dict()

    # Capacity for storing indirection keys in metadata comparisons
    # and regexps, eg. k->c=/.+/
    indirection_keys = list()

    # list styles
    styles = dict()

    # Flag: were there page arguments?
    pageargs = False

    # Regex preprocessing
    for arg in (x.strip() for x in args.split(',') if x.strip()):
        # metadata key spec, move on
        if arg.startswith('||') and arg.endswith('||'):
            # take order, strip empty ones, look at styles
            for key in arg.split('||'):
                if not key:
                    continue
                # Grab styles
                if key.startswith('<') and '>' in key:
                    style = parseAttributes(request, key[1:], '>')
                    key = key[key.index('>') + 1:].strip()

                    if style:
                        styles[key] = style[0]

                # Grab key exclusions
                if key.startswith('!'):
                    excluded_keys.append(key.lstrip('!'))
                    continue

                keyspec.append(key.strip())

            continue

        op_match = False
        # Check for Python operator comparisons
        for op in OPERATORS:
            if op in arg:
                data = arg.rsplit(op)

                # If this is not a comparison but indirection,
                # continue. Good: k->s>3, bad: k->s=/.+/
                if op == '>' and data[0].endswith('-'):
                    continue

                # Must have real comparison
                if not len(data) == 2:
                    if op == '==':
                        data.append('')
                    else:
                        continue

                key, comp = map(string.strip, data)

                # Add indirection key
                if '->' in key:
                    indirection_keys.append(key)

                limitops.setdefault(key, list()).append((comp, op))
                op_match = True

            # One of the operators matched, no need to go forward
            if op_match:
                break

        # One of the operators matched, process next arg
        if op_match:
            continue

        # Metadata regexp, move on
        if '=' in arg:
            data = arg.split("=")
            key = data[0]

            # Add indirection key
            if '->' in key:
                indirection_keys.append(key)

            val = '='.join(data[1:])

            # Assume that value limits are regexps, if
            # not, escape them into exact regexp matches
            if not REGEX_RE.match(val):
                from MoinMoin.parser.text_moin_wiki import Parser

                # If the value is a page, make it a non-matching
                # regexp so that all link variations will generate a
                # match. An alternative would be to match from links
                # also, but in this case old-style metalinks, which
                # cannot be edited, would appear in metatables, which
                # is not wanted (old-style eg. [[Page| key: Page]])

                # Only allow non-matching regexp for values if they
                # are WikiWords. Eg. 'WikiWord some text' would match
                # 'WikiWord', emulating ye olde matching behaviour,
                # but 'nonwikiword some text' would not match
                # 'nonwikiword'
                if re.match(Parser.word_rule_js, val):
                    re_val = "(%s|" % (re.escape(val))
                else:
                    re_val = "(^%s$|" % (re.escape(val))
                # or as bracketed link
                re_val += "(?P<sta>\[\[)%s(?(sta)\]\])|" % (re.escape(val))

                # or as commented bracketed link
                re_val += "(?P<stb>\[\[)%s(?(stb)\|[^\]]*\]\]))" % \
                    (re.escape(val))

                limitregexps.setdefault(key, set()).add(
                    re.compile(re_val, re.UNICODE))

            # else strip the //:s
            else:
                if len(val) > 1:
                    val = val[1:-1]

                limitregexps.setdefault(key, set()).add(
                    re.compile(val, re.IGNORECASE | re.UNICODE))
            continue

        # order spec
        if arg.startswith('>>') or arg.startswith('<<'):
            # eg. [('<<', 'koo'), ('>>', 'kk')]
            orderspec = re.findall('(?:(<<|>>)([^<>]+))', arg)
            continue

        # Ok, we have a page arg, i.e. a page or page regexp in args
        pageargs = True

        # Normal pages, check perms, encode and move on
        if not REGEX_RE.match(arg):
            # Fix relative links
            if (arg.startswith('/') or arg.startswith('./')
                    or arg.startswith('../')):
                arg = AbsPageName(request.page.page_name, arg)

            argset.add(arg)
            continue

        # Ok, it's a page regexp

        # if there's something wrong with the regexp, ignore it and move on
        try:
            arg = arg[1:-1]
            # Fix relative links
            if (arg.startswith('/') or arg.startswith('./')
                    or arg.startswith('../')):
                arg = AbsPageName(request.page.page_name, arg)

            page_re = re.compile("%s" % arg)
        except:
            continue

        # Get all pages, check which of them match to the supplied regexp
        for page in request.graphdata:
            if page_re.match(page):
                argset.add(page)

    return (argset, pageargs, keyspec, excluded_keys, orderspec, limitregexps,
            limitops, indirection_keys, styles)
Ejemplo n.º 5
0
def add_matching_redirs(request,
                        loadedPage,
                        loadedOuts,
                        loadedMeta,
                        metakeys,
                        key,
                        curpage,
                        curkey,
                        prev='',
                        formatLinks=False,
                        linkdata=None):
    if not linkdata:
        linkdata = dict()
    args = curkey.split('->')

    inlink = False
    if args[0] == 'gwikiinlinks':
        inlink = True
        args = args[1:]

    newkey = '->'.join(args[2:])

    last = False

    if not args:
        return
    if len(args) in [1, 2]:
        last = True

    if len(args) == 1:
        linked, target_key = prev, args[0]
    else:
        linked, target_key = args[:2]

    if inlink:
        pages = request.graphdata.get_in(curpage).get(linked, set())
    else:
        pages = request.graphdata.get_out(curpage).get(linked, set())

    for indir_page in set(pages):
        # Relative pages etc
        indir_page = AbsPageName(request.page.page_name, indir_page)

        if request.user.may.read(indir_page):
            pagedata = request.graphdata.getpage(indir_page)

            outs = pagedata.get('out', dict())
            metas = pagedata.get('meta', dict())

            # Add matches at first round
            if last:
                if target_key in metas:
                    loadedMeta.setdefault(key, list())
                    linkdata.setdefault(key, dict())
                    if formatLinks:
                        values = metas_to_abs_links(request, indir_page,
                                                    metas[target_key])
                    else:
                        values = metas[target_key]
                    loadedMeta[key].extend(values)
                    linkdata[key].setdefault(indir_page, list()).extend(values)
                else:
                    linkdata.setdefault(key, dict())
                    linkdata[key].setdefault(indir_page, list())
                continue

            elif not target_key in outs:
                continue

            # Handle inlinks separately
            if 'gwikiinlinks' in metakeys:
                inLinks = inlinks_key(request, loadedPage)

                loadedOuts[key] = inLinks
                continue

            linkdata = add_matching_redirs(request, loadedPage, loadedOuts,
                                           loadedMeta, metakeys, key,
                                           indir_page, newkey, target_key,
                                           formatLinks, linkdata)

    return linkdata
Ejemplo n.º 6
0
def _metatable_parseargs(request, args, cat_re, temp_re):
    # Arg placeholders
    argset = set([])
    keyspec = list()
    excluded_keys = list()
    orderspec = list()
    limitregexps = dict()
    limitops = dict()

    # Capacity for storing indirection keys in metadata comparisons
    # and regexps, eg. k->c=/.+/
    indirection_keys = list()

    # list styles
    styles = dict()

    # Flag: were there page arguments?
    pageargs = False

    # Regex preprocessing
    for arg in (x.strip() for x in args.split(',') if x.strip()):
        # metadata key spec, move on
        if arg.startswith('||') and arg.endswith('||'):
            # take order, strip empty ones, look at styles
            for key in arg.split('||'):
                if not key:
                    continue
                # Grab styles
                if key.startswith('<') and '>' in key:
                    style = parseAttributes(request,
                                                     key[1:], '>')
                    key = key[key.index('>') + 1:].strip()

                    if style:
                        styles[key] = style[0]

                # Grab key exclusions
                if key.startswith('!'):
                    excluded_keys.append(key.lstrip('!'))
                    continue
                    
                keyspec.append(key.strip())

            continue

        op_match = False
        # Check for Python operator comparisons
        for op in OPERATORS:
            if op in arg:
                data = arg.rsplit(op)
                
                # If this is not a comparison but indirection,
                # continue. Good: k->s>3, bad: k->s=/.+/
                if op == '>' and data[0].endswith('-'):
                    continue

                # Must have real comparison
                if not len(data) == 2:
                    if op == '==':
                        data.append('')
                    else:
                        continue

                key, comp = map(string.strip, data)

                # Add indirection key
                if '->' in key:
                    indirection_keys.append(key)

                limitops.setdefault(key, list()).append((comp, op))
                op_match = True

            # One of the operators matched, no need to go forward
            if op_match:
                break

        # One of the operators matched, process next arg
        if op_match:
            continue

        # Metadata regexp, move on
        if '=' in arg:
            data = arg.split("=")
            key = data[0]

            # Add indirection key
            if '->' in key:
                indirection_keys.append(key)

            val = '='.join(data[1:])

            # Assume that value limits are regexps, if
            # not, escape them into exact regexp matches
            if not REGEX_RE.match(val):
                from MoinMoin.parser.text_moin_wiki import Parser

                # If the value is a page, make it a non-matching
                # regexp so that all link variations will generate a
                # match. An alternative would be to match from links
                # also, but in this case old-style metalinks, which
                # cannot be edited, would appear in metatables, which
                # is not wanted (old-style eg. [[Page| key: Page]])

                # Only allow non-matching regexp for values if they
                # are WikiWords. Eg. 'WikiWord some text' would match
                # 'WikiWord', emulating ye olde matching behaviour,
                # but 'nonwikiword some text' would not match
                # 'nonwikiword'
                if re.match(Parser.word_rule_js, val):
                    re_val = "(%s|" % (re.escape(val)) 
                else:
                    re_val = "(^%s$|" % (re.escape(val)) 
                # or as bracketed link
                re_val += "(?P<sta>\[\[)%s(?(sta)\]\])|" % (re.escape(val)) 

                # or as commented bracketed link
                re_val += "(?P<stb>\[\[)%s(?(stb)\|[^\]]*\]\]))" % \
                    (re.escape(val)) 
                
                limitregexps.setdefault(
                    key, set()).add(re.compile(re_val, re.UNICODE))

            # else strip the //:s
            else:
                if len(val) > 1:
                    val = val[1:-1]

                limitregexps.setdefault(
                    key, set()).add(re.compile(val, 
                                               re.IGNORECASE | re.UNICODE))
            continue

        # order spec
        if arg.startswith('>>') or arg.startswith('<<'):
            # eg. [('<<', 'koo'), ('>>', 'kk')]
            orderspec = re.findall('(?:(<<|>>)([^<>]+))', arg)
            continue

        # Ok, we have a page arg, i.e. a page or page regexp in args
        pageargs = True

        # Normal pages, check perms, encode and move on
        if not REGEX_RE.match(arg):
            # Fix relative links
            if (arg.startswith('/') or arg.startswith('./') or
                arg.startswith('../')):
                arg = AbsPageName(request.page.page_name, arg)

            argset.add(arg)
            continue

        # Ok, it's a page regexp

        # if there's something wrong with the regexp, ignore it and move on
        try:
            arg = arg[1:-1]
            # Fix relative links
            if (arg.startswith('/') or arg.startswith('./') or
                arg.startswith('../')):
                arg = AbsPageName(request.page.page_name, arg)

            page_re = re.compile("%s" % arg)
        except:
            continue

        # Get all pages, check which of them match to the supplied regexp
        for page in request.graphdata:
            if page_re.match(page):
                argset.add(page)

    return (argset, pageargs, keyspec, excluded_keys, orderspec, 
            limitregexps, limitops, indirection_keys, styles)
Ejemplo n.º 7
0
def parse_text(request, page, text):
    pagename = page.page_name
    
    newreq = request
    newreq.page = lcpage = LinkCollectingPage(newreq, pagename, text)
    parserclass = importPlugin(request.cfg, "parser",
                                   'link_collect', "Parser")
    myformatter = importPlugin(request.cfg, "formatter",
                               'nullformatter', "Formatter")
    lcpage.formatter = myformatter(newreq)
    lcpage.formatter.page = lcpage
    p = parserclass(lcpage.get_raw_body(), newreq, formatter=lcpage.formatter)
    lcpage.parser = p
    lcpage.format(p)
    
    # These are the match types that really should be noted
    linktypes = ["wikiname_bracket", "word",                  
                 "interwiki", "url", "url_bracket"]
    
    new_data = dict_with_getpage()

    # Add the page categories as links too
    categories, _, _ = parse_categories(request, text)

    # Process ACL:s
    pi, _ = get_processing_instructions(text)
    for verb, args in pi:
        if verb == u'acl':
            # Add all ACL:s on multiple lines to an one-lines
            acls = new_data.get(pagename, dict()).get('acl', '')
            acls = acls.strip() + args
            new_data.setdefault(pagename, dict())['acl'] = acls

    for metakey, value in p.definitions.iteritems():
        for ltype, item in value:
            dnode = None

            if  ltype in ['url', 'wikilink', 'interwiki', 'email']:
                dnode = item[1]
                if '#' in dnode:
                    # Fix anchor links to point to the anchor page
                    url = False
                    for schema in config.url_schemas:
                        if dnode.startswith(schema):
                            url = True
                    if not url:
                        # Do not fix URLs
                        if dnode.startswith('#'):
                            dnode = pagename
                        else:
                            dnode = dnode.split('#')[0]
                if (dnode.startswith('/') or
                    dnode.startswith('./') or
                    dnode.startswith('../')):
                    # Fix relative links
                    dnode = AbsPageName(pagename, dnode)

                hit = item[0]
            elif ltype == 'category':
                # print "adding cat", item, repr(categories)
                dnode = item
                hit = item
                if item in categories:
                    add_link(new_data, pagename, dnode, 
                             u"gwikicategory")
            elif ltype == 'meta':
                add_meta(new_data, pagename, (metakey, item))
            elif ltype == 'include':
                # No support for regexp includes, for now!
                if not item[0].startswith("^"):
                    included = AbsPageName(pagename, item[0])
                    add_link(new_data, pagename, included, u"gwikiinclude")

            if dnode:
                add_link(new_data, pagename, dnode, metakey)

    return new_data