def test_distinctArcs(): from nfa import NFAState state1 = NFAState() state2 = NFAState() state3 = NFAState() assert distinctArcs({}) == {} assert distinctArcs({CharacterSet.excluding(''): [state1]}) == {CharacterSet.excluding(''): [state1]} assert distinctArcs({CharacterSet.including('abc'): [state1], CharacterSet.including('def'): [state2]}) == \ {CharacterSet.including('abc'): set([state1]), CharacterSet.including('def'): set([state2])} assert distinctArcs({CharacterSet.excluding(''): [state1], CharacterSet.including('abc'): [state2]}) == \ {CharacterSet.excluding('abc'): set([state1]), CharacterSet.including('abc'): set([state1,state2])}
def fnmatch(klass, s): '''create an NFA state machine representing the fnmatch pattern @s''' nfa = NFA(NFAState()) state = nfa.initial new_state = None chars = list(s) while True: if len(chars) == 0: break # end-of-string c = chars.pop(0) new_state = NFAState() nfa.states.append(new_state) if c == '?': # single-character wildcard state.add(CharacterSet.excluding(''), new_state) elif c == '*': # multi-character wildcard state.add(CharacterSet.excluding(''), new_state) new_state.add(CharacterSet.excluding(''), new_state) elif c == '\\': # treat the next character literally if len(chars) == 0: raise 'escape at end of string' c = chars.pop(0) state.add(CharacterSet.including(c), new_state) elif c == '[': # bracket expression try: c = chars.pop(0) if c == '!': # inverted inverted = True c = chars.pop(0) else: inverted = False charset = CharacterSet.including('') last_char = None while c != ']': if c == '-' and last_char: c = chars.pop(0) charset = charset.union(CharacterSet.range(last_char, c)) last_char = None c = chars.pop(0) continue charset = charset.union(CharacterSet.including(c)) last_char = c # save last character c = sio.read(1) except IndexError, e: raise 'unterminated bracket expression' if inverted: state.add(CharacterSet.excluding('') - charset, new_state) else: state.add(charset, new_state) else: state.add(CharacterSet.including(c), new_state) state = new_state
def distinctArcs(arcs): '''for a dict of arcs { charset->(state,state) } produce a new dict { charset->(state, state) } that represents an equivalent mapping but new charsets form a partition of the union of the original charsets, where each of the new sets is a subset of one or more of the old sets''' #print 'distinctArcs(%s)' % `arcs` # nothing to do with one or zero children if len(arcs) < 2: return arcs partition = distinctCharacterSets(arcs.keys()) #print ' partition: %s' % `partition` # now we have to stick the charsets back with the appropriate states charsets = {} for charset in partition: charsets[charset] = set() for original_charset, states in arcs.items(): if (charset - original_charset).empty(): # charset is a subset of original_charset charsets[charset] = charsets[charset].union(set(states)) #print ' charsets: %s' % `charsets` # now, if we have multiple charsets going to the same set of states we # should collapse the charsets (ie: union) charset_by_states_key = {} states_by_states_key = {} for charset, states in charsets.items(): states_key = tuple([str(state.id) for state in states]) states_by_states_key[states_key] = states if charset_by_states_key.has_key(states_key): charset_by_states_key[states_key] = charset_by_states_key[states_key].union(charset) else: charset_by_states_key[states_key] = charset # connect those mappings together result = {} for states_key, charset in charset_by_states_key.items(): result[charset] = states_by_states_key[states_key] # check that our result matches our contract # make sure that none of our character sets intersect union_out = CharacterSet.including('') # empty set for cs in result.keys(): assert cs.disjoint(union_out) union_out = union_out.union(cs) # make sure that the union of result character sets == the union of the input character sets union_in = reduce(lambda a,b:a.union(b), arcs.keys()) assert union_in == union_out return result