Beispiel #1
0
def findTags(text):
    ret = []

    PTTSTATESWITCH = {":": GOTCCOL, "'": SQUOT, '"': DQUOT, "`": BQUOT}
    VALID_PTTEXT = (string.letters + string.digits + string.whitespace +
                    '[]_=/\\#.*')
    QUOTE_CLOSE = {SQUOT: "'", DQUOT: '"', BQUOT: '`'}

    offset = 0
    lentext = len(text)
    state = INIT

    backslash_save_state = 0

    textoffset = 0
    tagoffset = 0
    while offset < lentext:
        c = text[offset]
        #print 'c=%s  state=%s' % (c, _statenumtostr[state])
        if state == INIT:
            if c == '<':
                tagoffset = offset
                state = GOTLT
        elif state == GOTLT:
            if c == ':':
                state = BEGINCOMMENTCHECK
                #put in text token
                ret.append(
                    (textoffset, offset - 1, 0, text[textoffset:offset - 1]))
            else:
                state = INIT
        elif state == BEGINCOMMENTCHECK:
            if c != '*':
                state = PLAINTAGTEXT
                continue
            state = INCOMMENT
        elif state == INCOMMENT:
            if c == '*':
                state = INCOMMENTSTAR
        elif state == INCOMMENTSTAR:
            if c == ':':
                state = INCOMMENTSTARCOL
            else:
                state = INCOMMENT
        elif state == INCOMMENTSTARCOL:
            if c == '>':
                state = INIT
                textoffset = offset + 1
            else:
                state = INCOMMENT
        elif state == PLAINTAGTEXT:
            nstate = PTTSTATESWITCH.get(c)
            if nstate:
                state = nstate
            elif c == '\\':
                backslash_save_state = state
                state = BACKSLASH
            elif c not in VALID_PTTEXT:

                if c != '<':  #not beginning of next tag?
                    raise DTLexicalError(lineno=getLineno(text, offset),
                                         msg="invalid character |%s| in tag" %
                                         c)
                else:
                    raise DTLexicalError(lineno=getLineno(text, offset),
                                         msg="found < in tag")
        elif state == GOTCCOL:
            if c != ">":
                #raise DTLexicalError ( lineno = getLineno(text, offset),
                #                 msg = "got : in tag but not followed by >" )
                state = PLAINTAGTEXT
            else:
                textoffset = offset + 1
                # put in tag token
                ret.append(
                    [tagoffset, offset + 1, 1, text[tagoffset + 2:offset - 1]])
                state = INIT

        elif state in (SQUOT, DQUOT, BQUOT):
            if c == '\\':
                #print 'entering backslash save_state=', _statenumtostr[state]
                backslash_save_state = state
                state = BACKSLASH

            elif c == QUOTE_CLOSE[state]:
                state = PLAINTAGTEXT

        elif state == BACKSLASH:
            #print 'inbackslash'
            #print
            state = backslash_save_state

        offset = offset + 1
    if state != INIT:
        md = {
            PLAINTAGTEXT: "text",
            SQUOT: "in single quoted area",
            DQUOT: "in double quoted area",
            BQUOT: "in expression area",
            GOTCCOL: "after closing colon",
            BACKSLASH: "after backslash",
            INCOMMENT: "in comment"
        }
        raise DTLexicalError(
            msg="hit end of file %s in tag, beginning of "
            "tag %s at" % (md[state], text[tagoffset:tagoffset + 10] + "..."),
            lineno=getLineno(text, tagoffset))
    else:
        ret.append([textoffset, offset, 0, text[textoffset:offset]])
    return ret
Beispiel #2
0
def processTag(tagString, text, st, end):
    #print 'tag is <%s>' % tagString
    tupargs = []
    dictargs = {}
    tagname = ''
    off = 0

    tnmatch = None
    while not tnmatch:
        wsmatch = wsre.match(tagString, off, len(tagString), pcre.ANCHORED)
        if not wsmatch:
            tnmatch = plainre.match(tagString, off, len(tagString),
                                    pcre.ANCHORED)
            if not tnmatch:
                raise DTLexicalError(msg='invalid tag name',
                                     tagtext=tagString,
                                     lineno=getLineno(text, st))
            else:
                break
        off = wsmatch[0][1]

    tagname = tagString[tnmatch[0][0]:tnmatch[0][1]]
    #print 'tagname is ', tagname

    off = tnmatch[0][1]
    lts = len(tagString)
    while off < lts:
        match = tagContentsRe.match(tagString, off, lts, pcre.ANCHORED)
        if match is None:
            raise DTLexicalError(msg='invalid tag text',
                                 tagtext=tagString,
                                 lineno=getLineno(text, st))
        if match[WHITESPACE] != (-1, -1):
            mb, me = match[WHITESPACE]
            #print 'Got whitespace <%s>' % tagString[mb:me]
            off = me
        elif match[PLAIN] != (-1, -1):
            mb, me = match[PLAIN]
            #print 'Got plain <%s>' % tagString[mb:me]
            tupargs.append(tagString[mb:me])
            off = me
        elif match[PLAINEQ] != (-1, -1):
            mb, me = match[PLAINEQ]
            #print 'Got plaineq <%s>' % tagString[mb:me]
            tmb, tme = match[PLAINEQVAL]
            #print 'Got plaineqval <%s>' % tagString[tmb:tme]
            dictargs[tagString[mb:me]] = tagString[tmb:tme]
            off = tme
        elif match[PLAINEQQ] != (-1, -1):
            mb, me = match[PLAINEQQ]
            #print 'Got plaineqq <%s>' % tagString[mb:me]
            tmb, tme = match[PLAINEQQVAL]
            #print 'Got plaineqqval <%s>' % tagString[tmb:tme]
            dictargs[tagString[mb:me]] = dequote(tagString[tmb:tme])
            off = tme
        elif match[QUOTONLY] != (-1, -1):
            mb, me = match[QUOTONLY]
            #print 'Got quotonly <%s>' % tagString[mb:me]
            tupargs.append(dequote(tagString[mb:me]))
            off = me
        else:
            raise DTLexicalError(msg='unknown text in tag',
                                 tagtext=tagString[off:off + 10],
                                 lineno=getLineno(text, st))

    return tagname, tupargs, dictargs
def processTag(tagString, text, st, end):
    tupargs = []
    dictargs = {}
    tagname = ''
    off = 0

    tnmatch = None
    while not tnmatch:
        wsmatch = wsre.match(tagString, off, len(tagString))
        if not wsmatch:
            tnmatch = plainre.match(tagString, off, len(tagString))
            if not tnmatch:
                raise DTLexicalError(msg='invalid tag name',
                                     tagtext=tagString,
                                     lineno=getLineno(text, st))
            else:
                break
        off = wsmatch.end()

    tagname = tnmatch.group()
    off = tnmatch.end()
    lts = len(tagString)
    while off < lts:
        match = tagContentsRe.match(tagString, off, lts)
        if match is None:
            raise DTLexicalError(msg='invalid tag text',
                                 tagtext=tagString,
                                 lineno=getLineno(text, st))
        g = match.groupdict()

        tmp = g[WHITESPACE]
        if tmp is not None:
            off = match.end(WHITESPACE)
            continue

        tmp = g[PLAIN]
        if tmp is not None:
            tupargs.append(tmp)
            off = match.end(PLAIN)
            continue

        tmp = g[PLAINEQ]
        tmp2 = g[PLAINEQVAL]
        if tmp is not None:
            assert tmp2 is not None
            dictargs[tmp] = tmp2
            off = match.end(PLAINEQVAL)
            continue

        tmp = g[PLAINEQQ]
        tmp2 = g[PLAINEQQVAL]
        if tmp is not None:
            assert tmp2 is not None
            dictargs[tmp] = dequote(tmp2)
            off = match.end(PLAINEQQVAL)
            continue

        tmp = g[QUOTONLY]
        if tmp is not None:
            tupargs.append(dequote(tmp))
            off = match.end(QUOTONLY)
            continue

        raise DTLexicalError(msg='unknown text in tag',
                             tagtext=tagString[off:off + 10],
                             lineno=getLineno(text, st))

    return tagname, tupargs, dictargs