コード例 #1
0
    def doToposort(self):
        # We will manipulate the graph, so deepcopy it
        graphCopy = copy.deepcopy(self._nodeDict)
        # Find all valid start nodes in this graph - ones with no data dependencies themselves
        startNodeNames = []
        for nodeName, node in iteritems(graphCopy):
            if len(node.connections['in']) == 0:
                startNodeNames.append(nodeName)

        if len(startNodeNames) == 0:
            raise trfExceptions.TransformGraphException(
                trfExit.nameToCode('TRF_GRAPH_ERROR'),
                'There are no starting nodes in this graph - non-DAG graphs are not supported'
            )

        msg.debug('Found this list of start nodes for toposort: {0}'.format(
            startNodeNames))

        # The startNodeNames holds the list of nodes with their dependencies now satisfied (no input edges anymore)
        while len(startNodeNames) > 0:
            # Take the next startNodeName and zap it from the graph
            theNodeName = startNodeNames.pop()
            theNode = graphCopy[theNodeName]
            self._toposort.append(theNodeName)
            del graphCopy[theNodeName]

            # Now delete the edges this node was a source for
            msg.debug(
                'Considering connections from node {0}'.format(theNodeName))
            for connectedNodeName in theNode.connections['out']:
                graphCopy[connectedNodeName].delConnection(toExe=theNodeName,
                                                           direction='in')
                # Look for nodes which now have their dependencies satisfied
                if len(graphCopy[connectedNodeName].connections['in']) == 0:
                    startNodeNames.append(connectedNodeName)

        # If there are nodes left then the graph has cycles, which means it's not a DAG
        if len(graphCopy) > 0:
            raise trfExceptions.TransformGraphException(
                trfExit.nameToCode('TRF_GRAPH_ERROR'),
                'Graph topological sort had no more start nodes, but nodes were left {0} - non-DAG graphs are not supported'
                .format(list(graphCopy)))

        msg.debug('Topologically sorted node order: {0}'.format(
            self._toposort))

        # Now toposort the input data for nodes
        self._toposortData = []
        for nodeName in self._toposort:
            # First add input data, then output data
            for dataType in self._nodeDict[nodeName].inputDataTypes:
                if dataType not in self._toposortData:
                    self._toposortData.append(dataType)
            for dataType in self._nodeDict[nodeName].outputDataTypes:
                if dataType not in self._toposortData:
                    self._toposortData.append(dataType)

        msg.debug('Topologically sorted data order: {0}'.format(
            self._toposortData))
コード例 #2
0
    def findExecutionPath(self):        
        # Switch off all nodes, except if we have a single node which is not data driven...
        self._execution = {}
        for nodeName, node in self._nodeDict.iteritems():
            if len(self._nodeDict) == 1 and node.inputDataTypes == set() and node.inputDataTypes == set():
                self._execution[nodeName] = {'enabled' : True, 'input' : set(), 'output' : set()}
            else:
                self._execution[nodeName] = {'enabled' : False, 'input' : set(), 'output' : set()}

        dataToProduce = copy.deepcopy(self._outputData)
        dataAvailable = copy.deepcopy(self._inputData)
                
        # Consider the next data type in topo order
        while len(dataToProduce) > 0:
            nextDataType = None
            for dataType in self._toposortData:
                if dataType in dataToProduce:
                    nextDataType = dataType
                    dataToProduce.remove(nextDataType)
                    dataAvailable.update([nextDataType])
                    break

            if not nextDataType:
                msg.error('Still have to produce data type(s) {0}, but did not find anything in the toposorted data list ({1}).' 
                          ' Transform parameters/graph are broken so aborting.'.format(dataToProduce, self._toposortData))
                raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), 
                                                            'Data type graph error')

            msg.debug('Next data type to try is {0}'.format(nextDataType))
            bestPath = self._bestPath(nextDataType, dataAvailable)
            
            msg.debug('Found best path for {0}: {1}'.format(nextDataType, bestPath))

            ## @note Use @c modPath to construct an array which we iterate over in pairs of (currentNode, nextNode)
            modPath = bestPath.path + [None]
            for (nodeName, nextNodeName) in [ (n, modPath[modPath.index(n)+1]) for n in bestPath.path ]:
                self._execution[nodeName]['enabled'] = True
                # Add the necessary data types to the output of the first node and the input of the next
                if nodeName in bestPath.newData:
                    self._execution[nodeName]['output'].update(bestPath.newData[nodeName])
                    for newData in bestPath.newData[nodeName]:
                        if newData not in dataAvailable:
                            dataToProduce.update([newData])
                if nextNodeName:
                    self._execution[nextNodeName]['input'].update(bestPath.newData[nodeName])
                    if nextNodeName in bestPath.extraData:
                        self._execution[nextNodeName]['input'].update(bestPath.extraData[nodeName])
                # Add any extra data we need (from multi-exit nodes) to the data to produce list
                for extraNodeData in bestPath.extraData.itervalues():
                    for extra in extraNodeData:
                        if extra not in dataAvailable:
                            dataToProduce.update([extra])
                            
        # Now remove the fake data objects from activated nodes
        for node, props in self._execution.iteritems():
            msg.debug('Removing fake data from node {0}'.format(node))
            props['input'] -= set(['inNULL', 'outNULL'])
            props['output'] -= set(['inNULL', 'outNULL'])

        msg.debug('Execution dictionary: {0}'.format(self._execution))
コード例 #3
0
    def _bestPath(self, data, dataAvailable, startNodeName = '_start', endNodeName = None):
        
        if endNodeName is None:
            endNodeName = '_end_{0}'.format(data)
        
        if endNodeName not in self._nodeDict:
            raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), 
                'Node {0} was not found - the transform data connection definition is broken'.format(endNodeName))

        
        # Set of all considered paths
        # Initialise this with our endNode name - algorithm works back to the start
        pathSet = [graphPath(endNodeName, data),]
        
        msg.debug('Started path finding with seed path {0}'.format(pathSet[0]))
        
        # Halting condition - only one path and its first element is startNodeName
        while len(pathSet) > 1 or pathSet[0].path[0] is not startNodeName:
            msg.debug('Starting best path iteration with {0} paths in {1}'.format(len(pathSet), pathSet))
            # Copy the pathSet to do this, as we will update it
            for path in pathSet[:]:
                msg.debug('Continuing path finding with path {0}'.format(path))
                currentNodeName = path.path[0]
                if currentNodeName is startNodeName:
                    msg.debug('Path {0} has reached the start node - finished'.format(path))
                    continue
                # If there are no paths out of this node then it's a dead end - kill it
                if len(self._nodeDict[currentNodeName].connections['in']) is 0:
                    msg.debug('Path {0} is a dead end - removing'.format(path))
                    pathSet.remove(path)
                    continue
                # If there is only one path out of this node, we extend it
                if len(self._nodeDict[currentNodeName].connections['in']) is 1:
                    msg.debug('Single exit from path {0} - adding connection to {1}'.format(path, self._nodeDict[currentNodeName].connections['in'].keys()[0]))
                    self._extendPath(path, currentNodeName, self._nodeDict[currentNodeName].connections['in'].keys()[0])
                    continue
                # Else we need to clone the path for each possible exit
                msg.debug('Multiple exits from path {0} - will clone for each extra exit'.format([path]))
                for nextNodeName in self._nodeDict[currentNodeName].connections['in'].keys()[1:]:
                    newPath = copy.deepcopy(path)
                    msg.debug('Cloned exit from path {0} to {1}'.format(newPath, nextNodeName))             
                    self._extendPath(newPath, currentNodeName, nextNodeName)
                    pathSet.append(newPath)
                # Finally, use the original path to extend along the first node exit
                msg.debug('Adding exit from original path {0} to {1}'.format(path, self._nodeDict[currentNodeName].connections['in'].keys()[0]))             
                self._extendPath(path, currentNodeName, self._nodeDict[currentNodeName].connections['in'].keys()[0])

            # Now compare paths which made it to the end - only keep the shortest
            lowestCostPath = None
            for path in pathSet[:]:
                currentNodeName = path.path[0]
                if currentNodeName is startNodeName:
                    if lowestCostPath is None:
                        lowestCostPath = path
                        continue
                    if path.cost >= lowestCostPath.cost:
                        msg.debug('Path {0} is no cheaper than best path {1} - removing'.format(path, lowestCostPath))
                        pathSet.remove(path)
                    else:
                        msg.debug('Path {0} is cheaper than previous best path {1} - removing previous'.format(path, lowestCostPath))
                        pathSet.remove(lowestCostPath)
                        lowestCostPath = path
    
            # Emergency break
            if len(pathSet) == 0:
                raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), 
                                                            'No path found between {0} and {1} for {2}'.format(startNodeName, endNodeName, data))
        return pathSet[0]