def doToposort(self): # We will manipulate the graph, so deepcopy it graphCopy = copy.deepcopy(self._nodeDict) # Find all valid start nodes in this graph - ones with no data dependencies themselves startNodeNames = [] for nodeName, node in iteritems(graphCopy): if len(node.connections['in']) == 0: startNodeNames.append(nodeName) if len(startNodeNames) == 0: raise trfExceptions.TransformGraphException( trfExit.nameToCode('TRF_GRAPH_ERROR'), 'There are no starting nodes in this graph - non-DAG graphs are not supported' ) msg.debug('Found this list of start nodes for toposort: {0}'.format( startNodeNames)) # The startNodeNames holds the list of nodes with their dependencies now satisfied (no input edges anymore) while len(startNodeNames) > 0: # Take the next startNodeName and zap it from the graph theNodeName = startNodeNames.pop() theNode = graphCopy[theNodeName] self._toposort.append(theNodeName) del graphCopy[theNodeName] # Now delete the edges this node was a source for msg.debug( 'Considering connections from node {0}'.format(theNodeName)) for connectedNodeName in theNode.connections['out']: graphCopy[connectedNodeName].delConnection(toExe=theNodeName, direction='in') # Look for nodes which now have their dependencies satisfied if len(graphCopy[connectedNodeName].connections['in']) == 0: startNodeNames.append(connectedNodeName) # If there are nodes left then the graph has cycles, which means it's not a DAG if len(graphCopy) > 0: raise trfExceptions.TransformGraphException( trfExit.nameToCode('TRF_GRAPH_ERROR'), 'Graph topological sort had no more start nodes, but nodes were left {0} - non-DAG graphs are not supported' .format(list(graphCopy))) msg.debug('Topologically sorted node order: {0}'.format( self._toposort)) # Now toposort the input data for nodes self._toposortData = [] for nodeName in self._toposort: # First add input data, then output data for dataType in self._nodeDict[nodeName].inputDataTypes: if dataType not in self._toposortData: self._toposortData.append(dataType) for dataType in self._nodeDict[nodeName].outputDataTypes: if dataType not in self._toposortData: self._toposortData.append(dataType) msg.debug('Topologically sorted data order: {0}'.format( self._toposortData))
def findExecutionPath(self): # Switch off all nodes, except if we have a single node which is not data driven... self._execution = {} for nodeName, node in self._nodeDict.iteritems(): if len(self._nodeDict) == 1 and node.inputDataTypes == set() and node.inputDataTypes == set(): self._execution[nodeName] = {'enabled' : True, 'input' : set(), 'output' : set()} else: self._execution[nodeName] = {'enabled' : False, 'input' : set(), 'output' : set()} dataToProduce = copy.deepcopy(self._outputData) dataAvailable = copy.deepcopy(self._inputData) # Consider the next data type in topo order while len(dataToProduce) > 0: nextDataType = None for dataType in self._toposortData: if dataType in dataToProduce: nextDataType = dataType dataToProduce.remove(nextDataType) dataAvailable.update([nextDataType]) break if not nextDataType: msg.error('Still have to produce data type(s) {0}, but did not find anything in the toposorted data list ({1}).' ' Transform parameters/graph are broken so aborting.'.format(dataToProduce, self._toposortData)) raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), 'Data type graph error') msg.debug('Next data type to try is {0}'.format(nextDataType)) bestPath = self._bestPath(nextDataType, dataAvailable) msg.debug('Found best path for {0}: {1}'.format(nextDataType, bestPath)) ## @note Use @c modPath to construct an array which we iterate over in pairs of (currentNode, nextNode) modPath = bestPath.path + [None] for (nodeName, nextNodeName) in [ (n, modPath[modPath.index(n)+1]) for n in bestPath.path ]: self._execution[nodeName]['enabled'] = True # Add the necessary data types to the output of the first node and the input of the next if nodeName in bestPath.newData: self._execution[nodeName]['output'].update(bestPath.newData[nodeName]) for newData in bestPath.newData[nodeName]: if newData not in dataAvailable: dataToProduce.update([newData]) if nextNodeName: self._execution[nextNodeName]['input'].update(bestPath.newData[nodeName]) if nextNodeName in bestPath.extraData: self._execution[nextNodeName]['input'].update(bestPath.extraData[nodeName]) # Add any extra data we need (from multi-exit nodes) to the data to produce list for extraNodeData in bestPath.extraData.itervalues(): for extra in extraNodeData: if extra not in dataAvailable: dataToProduce.update([extra]) # Now remove the fake data objects from activated nodes for node, props in self._execution.iteritems(): msg.debug('Removing fake data from node {0}'.format(node)) props['input'] -= set(['inNULL', 'outNULL']) props['output'] -= set(['inNULL', 'outNULL']) msg.debug('Execution dictionary: {0}'.format(self._execution))
def _bestPath(self, data, dataAvailable, startNodeName = '_start', endNodeName = None): if endNodeName is None: endNodeName = '_end_{0}'.format(data) if endNodeName not in self._nodeDict: raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), 'Node {0} was not found - the transform data connection definition is broken'.format(endNodeName)) # Set of all considered paths # Initialise this with our endNode name - algorithm works back to the start pathSet = [graphPath(endNodeName, data),] msg.debug('Started path finding with seed path {0}'.format(pathSet[0])) # Halting condition - only one path and its first element is startNodeName while len(pathSet) > 1 or pathSet[0].path[0] is not startNodeName: msg.debug('Starting best path iteration with {0} paths in {1}'.format(len(pathSet), pathSet)) # Copy the pathSet to do this, as we will update it for path in pathSet[:]: msg.debug('Continuing path finding with path {0}'.format(path)) currentNodeName = path.path[0] if currentNodeName is startNodeName: msg.debug('Path {0} has reached the start node - finished'.format(path)) continue # If there are no paths out of this node then it's a dead end - kill it if len(self._nodeDict[currentNodeName].connections['in']) is 0: msg.debug('Path {0} is a dead end - removing'.format(path)) pathSet.remove(path) continue # If there is only one path out of this node, we extend it if len(self._nodeDict[currentNodeName].connections['in']) is 1: msg.debug('Single exit from path {0} - adding connection to {1}'.format(path, self._nodeDict[currentNodeName].connections['in'].keys()[0])) self._extendPath(path, currentNodeName, self._nodeDict[currentNodeName].connections['in'].keys()[0]) continue # Else we need to clone the path for each possible exit msg.debug('Multiple exits from path {0} - will clone for each extra exit'.format([path])) for nextNodeName in self._nodeDict[currentNodeName].connections['in'].keys()[1:]: newPath = copy.deepcopy(path) msg.debug('Cloned exit from path {0} to {1}'.format(newPath, nextNodeName)) self._extendPath(newPath, currentNodeName, nextNodeName) pathSet.append(newPath) # Finally, use the original path to extend along the first node exit msg.debug('Adding exit from original path {0} to {1}'.format(path, self._nodeDict[currentNodeName].connections['in'].keys()[0])) self._extendPath(path, currentNodeName, self._nodeDict[currentNodeName].connections['in'].keys()[0]) # Now compare paths which made it to the end - only keep the shortest lowestCostPath = None for path in pathSet[:]: currentNodeName = path.path[0] if currentNodeName is startNodeName: if lowestCostPath is None: lowestCostPath = path continue if path.cost >= lowestCostPath.cost: msg.debug('Path {0} is no cheaper than best path {1} - removing'.format(path, lowestCostPath)) pathSet.remove(path) else: msg.debug('Path {0} is cheaper than previous best path {1} - removing previous'.format(path, lowestCostPath)) pathSet.remove(lowestCostPath) lowestCostPath = path # Emergency break if len(pathSet) == 0: raise trfExceptions.TransformGraphException(trfExit.nameToCode('TRF_GRAPH_ERROR'), 'No path found between {0} and {1} for {2}'.format(startNodeName, endNodeName, data)) return pathSet[0]