コード例 #1
0
ファイル: dfa.py プロジェクト: ianloic/llvm-fnmatch
def test_distinctArcs():
  from nfa import NFAState
  state1 = NFAState()
  state2 = NFAState()
  state3 = NFAState()
  assert distinctArcs({}) == {}
  assert distinctArcs({CharacterSet.excluding(''): [state1]}) == {CharacterSet.excluding(''): [state1]}
  assert distinctArcs({CharacterSet.including('abc'): [state1], CharacterSet.including('def'): [state2]}) == \
      {CharacterSet.including('abc'): set([state1]), CharacterSet.including('def'): set([state2])}
  assert distinctArcs({CharacterSet.excluding(''): [state1], CharacterSet.including('abc'): [state2]}) == \
      {CharacterSet.excluding('abc'): set([state1]), CharacterSet.including('abc'): set([state1,state2])}
コード例 #2
0
ファイル: nfa.py プロジェクト: ianloic/llvm-fnmatch
 def fnmatch(klass, s):
   '''create an NFA state machine representing the fnmatch pattern @s'''
   nfa = NFA(NFAState())
   state = nfa.initial
   new_state = None
   chars = list(s)
   while True:
     if len(chars) == 0: break # end-of-string
     c = chars.pop(0)
     new_state = NFAState()
     nfa.states.append(new_state)
     if c == '?':
       # single-character wildcard
       state.add(CharacterSet.excluding(''), new_state)
     elif c == '*':
       # multi-character wildcard
       state.add(CharacterSet.excluding(''), new_state)
       new_state.add(CharacterSet.excluding(''), new_state)
     elif c == '\\':
       # treat the next character literally
       if len(chars) == 0:
         raise 'escape at end of string'
       c = chars.pop(0)
       state.add(CharacterSet.including(c), new_state)
     elif c == '[':
       # bracket expression
       try:
         c = chars.pop(0)
         if c == '!': # inverted
           inverted = True
           c = chars.pop(0)
         else:
           inverted = False
         charset = CharacterSet.including('')
         last_char = None
         while c != ']':
           if c == '-' and last_char:
             c = chars.pop(0)
             charset = charset.union(CharacterSet.range(last_char, c))
             last_char = None
             c = chars.pop(0)
             continue
           charset = charset.union(CharacterSet.including(c))
           last_char = c # save last character
           c = sio.read(1)
       except IndexError, e:
         raise 'unterminated bracket expression'
       if inverted:
         state.add(CharacterSet.excluding('') - charset, new_state)
       else:
         state.add(charset, new_state)
     else:
       state.add(CharacterSet.including(c), new_state)
     state = new_state
コード例 #3
0
ファイル: dfa.py プロジェクト: ianloic/llvm-fnmatch
def distinctArcs(arcs):
  '''for a dict of arcs { charset->(state,state) } produce a new dict 
  { charset->(state, state) } that represents an equivalent mapping
  but new charsets form a partition of the union of the original
  charsets, where each of the new sets is a subset of one or more of
  the old sets'''

  #print 'distinctArcs(%s)' % `arcs`

  # nothing to do with one or zero children
  if len(arcs) < 2:
    return arcs

  partition = distinctCharacterSets(arcs.keys())

  #print ' partition: %s' % `partition`

  # now we have to stick the charsets back with the appropriate states
  charsets = {}
  for charset in partition:
    charsets[charset] = set()
    for original_charset, states in arcs.items():
      if (charset - original_charset).empty():
        # charset is a subset of original_charset
        charsets[charset] = charsets[charset].union(set(states))

  #print ' charsets: %s' % `charsets`

  # now, if we have multiple charsets going to the same set of states we
  # should collapse the charsets (ie: union)
  charset_by_states_key = {}
  states_by_states_key = {}
  for charset, states in charsets.items():
    states_key = tuple([str(state.id) for state in states])
    states_by_states_key[states_key] = states
    if charset_by_states_key.has_key(states_key):
      charset_by_states_key[states_key] = charset_by_states_key[states_key].union(charset)
    else:
      charset_by_states_key[states_key] = charset

  # connect those mappings together
  result = {}
  for states_key, charset in charset_by_states_key.items():
    result[charset] = states_by_states_key[states_key]

  # check that our result matches our contract
  # make sure that none of our character sets intersect
  union_out = CharacterSet.including('') # empty set
  for cs in result.keys():
    assert cs.disjoint(union_out)
    union_out = union_out.union(cs)
  # make sure that the union of result character sets == the union of the input character sets
  union_in = reduce(lambda a,b:a.union(b), arcs.keys())
  assert union_in == union_out

  return result