def addDelta(self, node, input, dest): if input not in self._charset: raise AutomataError('%s not in charset.' % input) if type(node) is Node: if type(dest) is set and all([type(i) is Node for i in dest]): if len(dest): if node in self._deltas: if input in self._deltas[node]: self._deltas[node][input] = self._deltas[node][ input].union(dest) else: self._deltas[node][input] = dest else: self._deltas[node] = {input: dest} elif type(dest) is Node: if node in self._deltas: if input in self._deltas[node]: self._deltas[node][input].add(dest) else: self._deltas[node][input] = set([dest]) else: self._deltas[node] = {input: set([dest])} else: raise AutomataError( 'Delta destination must be a Node or a set of nodes, not %s.' % type(dest).__name__) else: raise AutomataError('Delta source must be Node, not %s.' % type(node).__name__)
def toGraphViz(self, size=None): if not self.isValid(): raise AutomataError('Machine is not in a valid state.') if size and type(size) is not tuple: raise AutomataError('Size must be a tuple of x,y.') nodes = list(self._nodes) start = nodes.index(self._start) out = StringIO() out.write('digraph NFA {\n') out.write(' rankdir = LR;\n') out.write(' root = %s;\n' % start) if size: out.write(' size = "%s,%s";\n' % size) out.write('\n') out.write(' node [shape = circle fontname = "Lucida Console"];\n') out.write(' edge [fontname = "Lucida Console" arrowhead = vee];\n') out.write('\n') for i in range(len(nodes)): if nodes[i] in self._terminals: out.write(' %d [shape = doublecircle label ="%s"];\n' % (i, nodes[i].label)) else: out.write(' %d [label = "%s"];\n' % (i, nodes[i].label)) out.write('\n') out.write(' "" [style = invis width = 0 height = 0];\n') out.write('\n') out.write(' "" -> %d [arrowsize = 1.5 penwidth = 0];\n' % start) out.write('\n') for nodeFrom in self._deltas: for char in self._deltas[nodeFrom]: for nodeTo in self._deltas[nodeFrom][char]: src = nodes.index(nodeFrom) dest = nodes.index(nodeTo) char = char or u'\u03bb'.encode('utf8') # Lambda if (nodeFrom in self._terminals and nodeFrom != self._start) or nodeTo == self._start: out.write( ' %d -> %d [label = "%s" constraint = false];\n' % (src, dest, char)) else: out.write(' %d -> %d [label = "%s"];\n' % (src, dest, char)) out.write('\n') out.write('}\n') return out.getvalue()
def addDelta(self, node, input, dest): if input not in self._charset: raise AutomataError('%s not in charset.' % input) if type(node) is Node and type(dest) is Node: if node in self._deltas: self._deltas[node][input] = dest else: self._deltas[node] = {input: dest} else: raise AutomataError('Delta source and destination must be Nodes.')
def remDelta(self, node, input): if input not in self._charset: raise AutomataError('%s not in charset.' % input) if type(node) is Node: if node in self._deltas and input in self._deltas[node]: self._deltas[node].pop(input) if len(self._deltas[node]) == 0: del self._deltas[node] else: raise AutomataError('Delta source must be a Node, not %s' % type(node).__name__)
def remDelta(self, node, input): if set(input) - (self._charset.union(set('()+*'))): raise AutomataError('%s contains symbols not in charset.' % input) if type(node) is Node: if node in self._deltas and input in self._deltas[node]: self._deltas[node].pop(input) if len(self._deltas[node]) == 0: del self._deltas[node] else: raise AutomataError('Delta source must be a Node, not %s' % type(node).__name__)
def apply(self, input, start): if not self.isValid(): raise AutomataError('Machine is not in a valid state.') curState = start while len(input): curSymbol = input[0] input = input[1:] if curSymbol not in self._charset: raise AutomataError('Invalid symbol in input: %s.' % curSymbol) curState = self._deltas[curState][curSymbol] return curState
def getDelta(self, node): if type(node) is Node: if node in self._deltas: return self._deltas[node] else: return {} else: raise AutomataError('Delta source must be a Node, not %s' % type(node).__name__)
def apply(self, input, start): if not self.isValid(): raise AutomataError('Machine is not in a valid state.') if not (type(start) is set and all([type(i) is Node for i in start])): raise AutomataError( 'NFA execution must start from a set of states.') if len(start) == 0: return set() curStates = start input += '\x00' # Extra lambda transition while len(input): # For each symbol # Apply lambda transitions lastCount = 0 while len(curStates) != lastCount: lastCount = len(curStates) for state in tuple( curStates): # copy, curState modified in the loop if state in self._deltas and '' in self._deltas[state]: curStates = curStates.union(self._deltas[state]['']) # Load symbol nextStates = set() curSymbol = input[0] input = input[1:] # Parse symbol if curSymbol != '\x00': if curSymbol not in self._charset: raise AutomataError('Invalid symbol in input: %s.' % curSymbol) for state in curStates: if state in self._deltas and curSymbol in self._deltas[ state]: nextStates = nextStates.union( self._deltas[state][curSymbol]) curStates = nextStates return curStates
def __init__(self, nfa): if type(nfa) is not NFA: raise AutomataError('Can create a NetworkNFA only from an NFA.') if all([len(i) == 1 for i in nfa.charset]): self._charset = copy(nfa._charset) else: self._charset = set(['{%s}' % i for i in nfa._charset]) self._nodes = copy(nfa._nodes) self._deltas = copyDeltas(nfa._deltas) self._start = nfa._start self._terminals = copy(nfa._terminals)
def minimize(self): # NOTE: Algorithm not optimal (and messy). Should be refactored later. if not self.isValid(): raise AutomataError('Machine is not in a valid state.') nodes = sorted(list(self._nodes), key=Node._getLabel) count = len(nodes) table = [[False for j in range(count)] for i in range(count)] unresolved = 0 # Initial Table for i in range(1, count): for j in range(i): if len( set([nodes[i], nodes[j]]).intersection(self._terminals)) == 1: table[i][j] = True else: unresolved += 1 table[i][j] = [] for char in self._charset: m = nodes.index(self.apply([char], nodes[i])) n = nodes.index(self.apply([char], nodes[j])) if n > m: t = n n = m m = t if m != n: table[i][j].append((m, n)) # Resolve all unresolved table cells while unresolved > 0: for i in range(1, count): for j in range(0, i): if table[i][j] == []: table[i][j] = False unresolved -= 1 elif type(table[i][j]) is list: for c in range(len(table[i][j])): if c > len(table[i][j]) - 1: # ugly patch break m = table[i][j][c][0] n = table[i][j][c][1] if table[m][n] is True: table[i][j] = True unresolved -= 1 break elif table[m][n] is False or table[m][n] == []: table[i][j].pop(c) elif type(table[m][n]) is list: table[i][j].pop(c) table[i][j] += table[m][n] # Remove equivalent states for i in range(1, count): for j in range(0, i): if table[i][j] == False: if self._start == nodes[i]: t = j j = i i = t if nodes[i] in self._nodes: self._nodes.remove(nodes[i]) del self._deltas[nodes[i]] if nodes[i] in self._terminals: self._terminals.remove(nodes[i]) for src in self._deltas: for char in self._deltas[src]: if self._deltas[src][char] == nodes[i]: self._deltas[src][char] = nodes[j] return table
def nfa2regex(nfa): if not nfa.isValid(): raise AutomataError( 'NFA must be in a valid state to be converted to a regex.') network = NetworkNFA(nfa) if DEBUG: print('START', network) # Take care of multi-terminals # if len(network.terminals) > 1: ## end = Node('qf') # network.addNode(end) # for i in copy(network.terminals): ## network.addDelta(i, '', end) # network.remTerminal(i) # network.addTerminal(end) # Add a dummy start and end nodes start = Node('qs') network.addNode(start) network.addDelta(start, '', network.start) network.start = start end = Node('qf') network.addNode(end) for i in network.terminals: network.addDelta(i, '', end) network.remTerminal(i) network.addTerminal(end) if DEBUG: print('Dummies added: ', network) # Collapse connections for src in network.nodes: delta_temp = network.getDelta(src) for dest in network.nodes: chars = [] for input in delta_temp: if input and dest in delta_temp[input]: chars.append(input) if len(chars): for c in chars: delta_temp[c].remove(dest) if len(delta_temp[c]) == 0: del delta_temp[c] if len(chars) > 1: chars = '(' + '+'.join(chars) + ')' else: chars = '+'.join(chars) network.addDelta(src, chars, dest) if DEBUG: print('Collapsed: ', network) # Collect pliable nodes pliableNodes = list(network.nodes) pliableNodes.remove(network.start) for n in network.terminals: pliableNodes.remove(n) # Build a distance-from-terminal table nodeFinalDist = {} maxDist = len(network.nodes)**len(network.nodes) # Lazy for n in network.nodes: nodeFinalDist[n] = maxDist nodeFinalDist[network.terminals[0]] = 0 toProcess = list(network.nodes) toProcess.remove(network.terminals[0]) while len(toProcess): for node in toProcess: dests = network.getDelta(node).values() if len(dests) == 0: dests = set([]) else: dests = reduce(set.union, network.getDelta(node).values()) if len(dests) == 0: toProcess.remove(node) else: minDist = min([nodeFinalDist[i] for i in dests]) if minDist != maxDist: nodeFinalDist[node] = minDist + 1 toProcess.remove(node) # Sort pliable nodes by distance from terminal pliableNodes.sort(key=lambda x: nodeFinalDist[x], reverse=True) if DEBUG: print('Pliables: ', pliableNodes) for node in pliableNodes: # Remove Node network.remNode(node) # Save delta delta = copy(network.getDelta(node)) # Convert loops to regex loops = [] for input in delta: if node in delta[input]: if len(input): loops.append(input) loopRegex = '+'.join(loops) if len(loopRegex) > 1 and not (loopRegex[0] == '(' and loopRegex[-1] == ')'): loopRegex = '(' + loopRegex + ')*' elif len(loopRegex) >= 1: loopRegex = loopRegex + '*' # Remove loops for input in copy(delta): if delta[input] == set([node]): del delta[input] elif node in delta[input]: delta[input].remove(node) # Search lambda-closure equivalence if '' in delta and (len(delta) != 1 or len(delta['']) != 1): eligible = [] for dest in delta['']: delta_temp = network.getDelta(dest) if '' in delta_temp and node in delta_temp['']: eligible.append(dest) if len(eligible): replaceNode(network, node, eligible[0]) continue # Remove delta try: del network._deltas[node] except KeyError: # No deltas remaining, had only loops continue if DEBUG: print('Working on connections: ', node, delta) # Check all possible connections through this node deltas_temp = copyDeltas(network._deltas) for src in deltas_temp: for input in deltas_temp[src]: tempDeltaDest = network.getDelta(src)[input] if node in tempDeltaDest: tempDeltaDest.remove(node) if len(tempDeltaDest) == 0: network.remDelta(src, input) for input2 in delta: for dest in delta[input2]: if not (src == dest and (input + loopRegex + input2) == ''): network.addDelta(src, input + loopRegex + input2, dest) if DEBUG: print('New Delta:', src, input, loopRegex, input2, dest, network) # Extract common prefix/suffix branches = network.getDelta(network.start).keys() if len(branches) == 1: regex = branches[0] else: prefix = commonprefix(branches) suffix = commonsuffix(branches) branches = [ i[len(prefix):-len(suffix)] if len(suffix) else i[len(prefix):] for i in branches ] branches.sort(key=len) if len(prefix) or len(suffix): regex = prefix + \ '(' + '+'.join([i or LAMBDA for i in branches]) + ')' + suffix else: regex = '+'.join([i or LAMBDA for i in branches]) or PHI return regex
def apply(self, input, start): raise AutomataError('NetworkNFA does not allow direct application.')