コード例 #1
0
    def addDelta(self, node, input, dest):
        if input not in self._charset:
            raise AutomataError('%s not in charset.' % input)

        if type(node) is Node:
            if type(dest) is set and all([type(i) is Node for i in dest]):
                if len(dest):
                    if node in self._deltas:
                        if input in self._deltas[node]:
                            self._deltas[node][input] = self._deltas[node][
                                input].union(dest)
                        else:
                            self._deltas[node][input] = dest
                    else:
                        self._deltas[node] = {input: dest}
            elif type(dest) is Node:
                if node in self._deltas:
                    if input in self._deltas[node]:
                        self._deltas[node][input].add(dest)
                    else:
                        self._deltas[node][input] = set([dest])
                else:
                    self._deltas[node] = {input: set([dest])}
            else:
                raise AutomataError(
                    'Delta destination must be a Node or a set of nodes, not %s.'
                    % type(dest).__name__)
        else:
            raise AutomataError('Delta source must be Node, not %s.' %
                                type(node).__name__)
コード例 #2
0
    def toGraphViz(self, size=None):
        if not self.isValid():
            raise AutomataError('Machine is not in a valid state.')
        if size and type(size) is not tuple:
            raise AutomataError('Size must be a tuple of x,y.')

        nodes = list(self._nodes)
        start = nodes.index(self._start)

        out = StringIO()

        out.write('digraph NFA {\n')
        out.write('   rankdir = LR;\n')
        out.write('   root = %s;\n' % start)
        if size:
            out.write('   size = "%s,%s";\n' % size)
        out.write('\n')

        out.write('   node [shape = circle fontname = "Lucida Console"];\n')
        out.write('   edge [fontname = "Lucida Console" arrowhead = vee];\n')
        out.write('\n')

        for i in range(len(nodes)):
            if nodes[i] in self._terminals:
                out.write('   %d [shape = doublecircle label ="%s"];\n' %
                          (i, nodes[i].label))
            else:
                out.write('   %d [label = "%s"];\n' % (i, nodes[i].label))
        out.write('\n')

        out.write('   "" [style = invis width = 0 height = 0];\n')
        out.write('\n')

        out.write('   "" -> %d [arrowsize = 1.5 penwidth = 0];\n' % start)
        out.write('\n')

        for nodeFrom in self._deltas:
            for char in self._deltas[nodeFrom]:
                for nodeTo in self._deltas[nodeFrom][char]:
                    src = nodes.index(nodeFrom)
                    dest = nodes.index(nodeTo)
                    char = char or u'\u03bb'.encode('utf8')  # Lambda
                    if (nodeFrom in self._terminals and
                            nodeFrom != self._start) or nodeTo == self._start:
                        out.write(
                            '   %d -> %d [label = "%s" constraint = false];\n'
                            % (src, dest, char))
                    else:
                        out.write('   %d -> %d [label = "%s"];\n' %
                                  (src, dest, char))
        out.write('\n')

        out.write('}\n')

        return out.getvalue()
コード例 #3
0
    def addDelta(self, node, input, dest):
        if input not in self._charset:
            raise AutomataError('%s not in charset.' % input)

        if type(node) is Node and type(dest) is Node:
            if node in self._deltas:
                self._deltas[node][input] = dest
            else:
                self._deltas[node] = {input: dest}
        else:
            raise AutomataError('Delta source and destination must be Nodes.')
コード例 #4
0
    def remDelta(self, node, input):
        if input not in self._charset:
            raise AutomataError('%s not in charset.' % input)

        if type(node) is Node:
            if node in self._deltas and input in self._deltas[node]:
                self._deltas[node].pop(input)
                if len(self._deltas[node]) == 0:
                    del self._deltas[node]
        else:
            raise AutomataError('Delta source must be a Node, not %s' %
                                type(node).__name__)
コード例 #5
0
    def remDelta(self, node, input):
        if set(input) - (self._charset.union(set('()+*'))):
            raise AutomataError('%s contains symbols not in charset.' % input)

        if type(node) is Node:
            if node in self._deltas and input in self._deltas[node]:
                self._deltas[node].pop(input)
                if len(self._deltas[node]) == 0:
                    del self._deltas[node]
        else:
            raise AutomataError('Delta source must be a Node, not %s' %
                                type(node).__name__)
コード例 #6
0
    def apply(self, input, start):
        if not self.isValid():
            raise AutomataError('Machine is not in a valid state.')

        curState = start
        while len(input):
            curSymbol = input[0]
            input = input[1:]

            if curSymbol not in self._charset:
                raise AutomataError('Invalid symbol in input: %s.' % curSymbol)

            curState = self._deltas[curState][curSymbol]

        return curState
コード例 #7
0
 def getDelta(self, node):
     if type(node) is Node:
         if node in self._deltas:
             return self._deltas[node]
         else:
             return {}
     else:
         raise AutomataError('Delta source must be a Node, not %s' %
                             type(node).__name__)
コード例 #8
0
    def apply(self, input, start):
        if not self.isValid():
            raise AutomataError('Machine is not in a valid state.')
        if not (type(start) is set and all([type(i) is Node for i in start])):
            raise AutomataError(
                'NFA execution must start from a set of states.')

        if len(start) == 0:
            return set()

        curStates = start
        input += '\x00'  # Extra lambda transition

        while len(input):  # For each symbol
            # Apply lambda transitions
            lastCount = 0

            while len(curStates) != lastCount:
                lastCount = len(curStates)
                for state in tuple(
                        curStates):  # copy, curState modified in the loop
                    if state in self._deltas and '' in self._deltas[state]:
                        curStates = curStates.union(self._deltas[state][''])

            # Load symbol
            nextStates = set()
            curSymbol = input[0]
            input = input[1:]

            # Parse symbol
            if curSymbol != '\x00':
                if curSymbol not in self._charset:
                    raise AutomataError('Invalid symbol in input: %s.' %
                                        curSymbol)
                for state in curStates:
                    if state in self._deltas and curSymbol in self._deltas[
                            state]:
                        nextStates = nextStates.union(
                            self._deltas[state][curSymbol])

                curStates = nextStates

        return curStates
コード例 #9
0
    def __init__(self, nfa):
        if type(nfa) is not NFA:
            raise AutomataError('Can create a NetworkNFA only from an NFA.')

        if all([len(i) == 1 for i in nfa.charset]):
            self._charset = copy(nfa._charset)
        else:
            self._charset = set(['{%s}' % i for i in nfa._charset])

        self._nodes = copy(nfa._nodes)
        self._deltas = copyDeltas(nfa._deltas)
        self._start = nfa._start
        self._terminals = copy(nfa._terminals)
コード例 #10
0
    def minimize(self):
        # NOTE: Algorithm not optimal (and messy). Should be refactored later.

        if not self.isValid():
            raise AutomataError('Machine is not in a valid state.')

        nodes = sorted(list(self._nodes), key=Node._getLabel)
        count = len(nodes)
        table = [[False for j in range(count)] for i in range(count)]
        unresolved = 0

        # Initial Table
        for i in range(1, count):
            for j in range(i):
                if len(
                        set([nodes[i],
                             nodes[j]]).intersection(self._terminals)) == 1:
                    table[i][j] = True
                else:
                    unresolved += 1
                    table[i][j] = []
                    for char in self._charset:
                        m = nodes.index(self.apply([char], nodes[i]))
                        n = nodes.index(self.apply([char], nodes[j]))

                        if n > m:
                            t = n
                            n = m
                            m = t

                        if m != n:
                            table[i][j].append((m, n))

        # Resolve all unresolved table cells
        while unresolved > 0:
            for i in range(1, count):
                for j in range(0, i):
                    if table[i][j] == []:
                        table[i][j] = False
                        unresolved -= 1
                    elif type(table[i][j]) is list:
                        for c in range(len(table[i][j])):
                            if c > len(table[i][j]) - 1:  # ugly patch
                                break

                            m = table[i][j][c][0]
                            n = table[i][j][c][1]
                            if table[m][n] is True:
                                table[i][j] = True
                                unresolved -= 1
                                break
                            elif table[m][n] is False or table[m][n] == []:
                                table[i][j].pop(c)
                            elif type(table[m][n]) is list:
                                table[i][j].pop(c)
                                table[i][j] += table[m][n]

        # Remove equivalent states
        for i in range(1, count):
            for j in range(0, i):
                if table[i][j] == False:
                    if self._start == nodes[i]:
                        t = j
                        j = i
                        i = t

                    if nodes[i] in self._nodes:
                        self._nodes.remove(nodes[i])
                        del self._deltas[nodes[i]]

                        if nodes[i] in self._terminals:
                            self._terminals.remove(nodes[i])

                    for src in self._deltas:
                        for char in self._deltas[src]:
                            if self._deltas[src][char] == nodes[i]:
                                self._deltas[src][char] = nodes[j]

        return table
コード例 #11
0
def nfa2regex(nfa):
    if not nfa.isValid():
        raise AutomataError(
            'NFA must be in a valid state to be converted to a regex.')

    network = NetworkNFA(nfa)

    if DEBUG:
        print('START', network)

# Take care of multi-terminals
# if len(network.terminals) > 1:
##    end = Node('qf')
# network.addNode(end)
# for i in copy(network.terminals):
##      network.addDelta(i, '', end)
# network.remTerminal(i)
# network.addTerminal(end)

# Add a dummy start and end nodes
    start = Node('qs')
    network.addNode(start)
    network.addDelta(start, '', network.start)
    network.start = start

    end = Node('qf')
    network.addNode(end)
    for i in network.terminals:
        network.addDelta(i, '', end)
        network.remTerminal(i)
    network.addTerminal(end)
    if DEBUG:
        print('Dummies added: ', network)

    # Collapse connections
    for src in network.nodes:
        delta_temp = network.getDelta(src)
        for dest in network.nodes:
            chars = []
            for input in delta_temp:
                if input and dest in delta_temp[input]:
                    chars.append(input)

            if len(chars):
                for c in chars:
                    delta_temp[c].remove(dest)
                    if len(delta_temp[c]) == 0:
                        del delta_temp[c]

                if len(chars) > 1:
                    chars = '(' + '+'.join(chars) + ')'
                else:
                    chars = '+'.join(chars)
                network.addDelta(src, chars, dest)
    if DEBUG:
        print('Collapsed: ', network)

    # Collect pliable nodes
    pliableNodes = list(network.nodes)
    pliableNodes.remove(network.start)
    for n in network.terminals:
        pliableNodes.remove(n)

    # Build a distance-from-terminal table
    nodeFinalDist = {}
    maxDist = len(network.nodes)**len(network.nodes)  # Lazy
    for n in network.nodes:
        nodeFinalDist[n] = maxDist

    nodeFinalDist[network.terminals[0]] = 0
    toProcess = list(network.nodes)
    toProcess.remove(network.terminals[0])

    while len(toProcess):
        for node in toProcess:
            dests = network.getDelta(node).values()
            if len(dests) == 0:
                dests = set([])
            else:
                dests = reduce(set.union, network.getDelta(node).values())

            if len(dests) == 0:
                toProcess.remove(node)
            else:
                minDist = min([nodeFinalDist[i] for i in dests])
                if minDist != maxDist:
                    nodeFinalDist[node] = minDist + 1
                    toProcess.remove(node)

    # Sort pliable nodes by distance from terminal
    pliableNodes.sort(key=lambda x: nodeFinalDist[x], reverse=True)
    if DEBUG:
        print('Pliables: ', pliableNodes)

    for node in pliableNodes:
        # Remove Node
        network.remNode(node)

        # Save delta
        delta = copy(network.getDelta(node))

        # Convert loops to regex
        loops = []
        for input in delta:
            if node in delta[input]:
                if len(input):
                    loops.append(input)
        loopRegex = '+'.join(loops)
        if len(loopRegex) > 1 and not (loopRegex[0] == '('
                                       and loopRegex[-1] == ')'):
            loopRegex = '(' + loopRegex + ')*'
        elif len(loopRegex) >= 1:
            loopRegex = loopRegex + '*'

        # Remove loops
        for input in copy(delta):
            if delta[input] == set([node]):
                del delta[input]
            elif node in delta[input]:
                delta[input].remove(node)

        # Search lambda-closure equivalence
        if '' in delta and (len(delta) != 1 or len(delta['']) != 1):
            eligible = []
            for dest in delta['']:
                delta_temp = network.getDelta(dest)
                if '' in delta_temp and node in delta_temp['']:
                    eligible.append(dest)

            if len(eligible):
                replaceNode(network, node, eligible[0])
                continue

        # Remove delta
        try:
            del network._deltas[node]
        except KeyError:  # No deltas remaining, had only loops
            continue

        if DEBUG:
            print('Working on connections: ', node, delta)
        # Check all possible connections through this node
        deltas_temp = copyDeltas(network._deltas)
        for src in deltas_temp:
            for input in deltas_temp[src]:
                tempDeltaDest = network.getDelta(src)[input]
                if node in tempDeltaDest:
                    tempDeltaDest.remove(node)
                    if len(tempDeltaDest) == 0:
                        network.remDelta(src, input)

                    for input2 in delta:
                        for dest in delta[input2]:
                            if not (src == dest and
                                    (input + loopRegex + input2) == ''):
                                network.addDelta(src,
                                                 input + loopRegex + input2,
                                                 dest)
                                if DEBUG:
                                    print('New Delta:', src, input, loopRegex,
                                          input2, dest, network)

    # Extract common prefix/suffix
    branches = network.getDelta(network.start).keys()
    if len(branches) == 1:
        regex = branches[0]
    else:
        prefix = commonprefix(branches)
        suffix = commonsuffix(branches)
        branches = [
            i[len(prefix):-len(suffix)] if len(suffix) else i[len(prefix):]
            for i in branches
        ]
        branches.sort(key=len)
        if len(prefix) or len(suffix):
            regex = prefix + \
                '(' + '+'.join([i or LAMBDA for i in branches]) + ')' + suffix
        else:
            regex = '+'.join([i or LAMBDA for i in branches]) or PHI

    return regex
コード例 #12
0
 def apply(self, input, start):
     raise AutomataError('NetworkNFA does not allow direct application.')