コード例 #1
0
ファイル: reTools.py プロジェクト: tml/osteele.com
def reMatchStatePairs(re, str):
    """Return a list of (re, str), where re is a the regular
    expression with <SPAN>s inserted over all the matched characters,
    and str is the string with <SPAN>s likewise inserted."""
    #print re, '~=', str
    pairs = []
    fsa = compileRE(re, recordSourcePositions=1)
    states = fsa.epsilonClosure(fsa.initialState)
    positions = [] #todo: everything that starts here?
    for i in range(len(str)):
        if i < len(str):
            #print states, '->', newStates, '(', str[i], ')'
            #todo: factor the following block with fsa.nextStateSet
            newPositions = []
            newStates = updatePositions(fsa, states, str[i], newPositions)
            #assert newStates == fsa.nextStateSet(states, str[i])
        if not newStates:
            # we ran out of matches
            # todo: show in red where the match stopped, as in the textual version
            expected = None
            for state in states:
                for t in fsa.transitionsFrom(state):
                    label = t[2]
                    if expected:
                        expected = expected + label
                    else:
                        expected = label
            return pairs, 'expected %s' % expected
        srcLabel = fsa.label
        # todo: could color newly matched states in a different color
        #todo: quote the html stuff
        rem = ''
        #print srcLabel, allStates, positions
        def htmlQuote(str):
            return ''.join([{'<': '&lt;', '>': '&gt;', '&': '&amp;'}.get(c, c) for c in str])
        for j in range(len(srcLabel)):
            c = htmlQuote(srcLabel[j])
            if j+1 in newPositions:
                rem += '<SPAN CLASS="rematchnew">%s</SPAN>' % c
                #positions.append(j)
            elif j+1 in positions:
                rem += '<SPAN CLASS="rematch">%s</SPAN>' % c
            else:
                rem += c
        s0, s1, s2 = htmlQuote(str[:i+1]), '', htmlQuote(str[i+1:])
        strm = '<SPAN CLASS="strmatch">%s</SPAN><SPAN CLASS="strmatchnew">%s</SPAN>%s' % (s0, s1, s2)
        comment = "states: %s -> %s; positions: %s -> %s; index = %d" % (states,newStates,positions,newPositions,i)
        pairs.append((rem, strm, comment))
        states = newStates
        positions += newPositions
    return pairs, [s for s in states if s in fsa.finalStates]
コード例 #2
0
ファイル: reTools.py プロジェクト: tml/osteele.com
def traceREStates(re, str, trace=1):
    fsa = compileRE(re, recordSourcePositions=1)
    states = fsa.epsilonClosure(fsa.initialState)
    for i in range(len(str)):
        newStates = fsa.nextStateSet(states, str[i])
        if newStates:
            if trace:
                print fsaLabelWithCursor(fsa, newStates), 'matches', str[:i+1] + '.' + str[i+1:]
            states = newStates
        else:
            c = CharacterSet([])
            for s0 in states:
                for _, _, label in fsa.transitionsFrom(s0):
                    if label:
                        c = c.union(label)
            print fsaLabelWithCursor(fsa, states), 'stops matching at', str[:i] + '.' + str[i:], '; expected', c
            break
コード例 #3
0
ファイル: reTools.py プロジェクト: tml/osteele.com
def simplify(str):
    # replace() is workaround for bug in simplify
    return decompileFSA(compileRE(str).minimized()).replace('?*','*')
コード例 #4
0
def simplify(str):
    return decompileFSA(compileRE(str).minimized())