Example #1
0
def checkNode(graph, superpipeline, source, target, nodeType, expectedDataType, values, isInput):
 
  # Define error handling,
  errors = er.consistencyErrors()

  # Get pipeline configuration data.
  data = superpipeline.pipelineConfigurationData[superpipeline.pipeline]

  # Get the required attributes.
  longFormArgument = graph.CM_getArgumentAttribute(graph.graph, source, target, 'longFormArgument')
  dataType         = graph.CM_getArgumentAttribute(graph.graph, source, target, 'dataType')

  # Check if this edge hgas been marked as a link only. This occurs when a nodes values are constructed using values from
  # another node. An edge is included to ensure that the workflow and dependencies are correct, but there will be no
  # argument associated with the edge and the following checks are not required.
  isLinkOnly = graph.CM_getArgumentAttribute(graph.graph, source, target, 'isLinkOnly')
  if not isLinkOnly:

    # If this is the first argument parsed, populate the expectedDataType variable with the data type for this argument.
    if not expectedDataType: expectedDataType = dataType
  
    # If expectedDataType is populated and this data type is different to the expectedDataType, this implies that different
    # arguments using the same values expect different data types. This is clearly impossible, so terminate.
    #TODO ERROR
    elif expectedDataType != dataType: print('dataConsistency.checkNode - 1', dataType, expectedDataType); exit(0)
  
    # Loop over each of the values for this node.
    for value in values:
  
      # Check that the data type is correct.
      #TODO ERROR
      if not isCorrectDataType(value, expectedDataType): print('dataConsistency.checkNode - 2', longFormArgument, value, dataType, type(value)); exit(0)
  
      # If this is a file, check that the extension is valid. Do not perform this check for stubs.
      if nodeType == 'file':
        if not graph.CM_getArgumentAttribute(graph.graph, source, target, 'isStub'):
          extensions = graph.CM_getArgumentAttribute(graph.graph, source, target, 'extensions')
  
          # Not all files have specified extensions. If no extensions are supplied, this check should not be performed.
          if extensions:
            task       = target if isInput else source
            fileNodeId = source if isInput else target
  
            # Fail if there was an error.
            if not checkExtensions(value, extensions):
  
              # Check if a top level pipeline argument exists.
              if longFormArgument in data.longFormArguments.keys():
                shortFormArgument = data.longFormArguments[longFormArgument].shortFormArgument
                errors.invalidExtensionPipeline(longFormArgument, shortFormArgument, value, extensions)
  
              # If no pipeline argument exists for this argument, list the task and argument.
              else:
                shortFormArgument = graph.CM_getArgumentAttribute(graph.graph, source, target, 'shortFormArgument')
                errors.invalidExtension(task, longFormArgument, shortFormArgument, value, extensions)

    # Return the expected data type
    return expectedDataType
Example #2
0
def checkRequiredArguments(graph, superpipeline, args, isTerminate):

  # Define error handling,
  errors    = er.consistencyErrors()
  isSuccess = True

  # Keep track of nodes that can have their values constructed.
  constructableNodes = []

  # Loop over the defined pipeline arguments and check that all arguments listed as required have
  # been set.
  for argument in args.arguments:

    # If the pipeline specifies if this argument is required or not, update the properties of the nodes.
    # If not specified (i.e. isRequired == None), it is left to the tool configuration to determine if
    # the argument is required. This allows the pipeline configuration file to not only specify that an
    # argument is required even if the underlying tool doesn't require the value, the pipeline can also
    # override the tools claim that the argument is required.
    if args.arguments[argument].isRequired == False:
      for nodeId in args.arguments[argument].graphNodeIds: graph.setGraphNodeAttribute(nodeId, 'isRequired', False)
    if args.arguments[argument].isRequired:

      # Loop over the associated graph nodes and see if the values are set.
      for nodeId in args.arguments[argument].graphNodeIds:
        graph.setGraphNodeAttribute(nodeId, 'isRequired', False)

        # Check if this argument was imported from a task in the pipeline. If so, determine if there are
        # any instructions for constructing the filename (if not an option). Only terminate if the argument
        # is for an option, or there are no instructions.
        hasInstructions = False
        if args.arguments[argument].isImported:
          task            = args.arguments[argument].importedFromTask
          tool            = graph.getGraphNodeAttribute(task, 'tool')
          toolData        = superpipeline.getToolData(tool)
          hasInstructions = True if toolData.getArgumentAttribute(argument, 'constructionInstructions') else False

        # If the values haven't been set, terminate. This is a pipeline argument listed as required
        # and so must be set by the user (and not constructed).
        if not graph.getGraphNodeAttribute(nodeId, 'values') and not hasInstructions:
          isSuccess         = False
          shortFormArgument = args.arguments[argument].shortFormArgument
          description       = args.arguments[argument].description
          if isTerminate: errors.unsetRequiredArgument(args.arguments[argument].longFormArgument, shortFormArgument, description)

  # Loop over all tasks in the workflow
  for task in graph.workflow:

    # Get the tool for the task.
    tool     = superpipeline.tasks[task]
    toolData = superpipeline.getToolData(tool)

    # Loop over all of the arguments for the tool and check that all required arguments have a node
    # and that the node has values.
    for argument in toolData.arguments:

      # Check if the argument is required.
      if toolData.getArgumentAttribute(argument, 'isRequired'):

        # Record if a node for this node is seen.
        foundNode = False

        # Determine if the argument is for an input file, output file or an option.
        isInput  = toolData.getArgumentAttribute(argument, 'isInput')
        isOutput = toolData.getArgumentAttribute(argument, 'isOutput')

        # If this is an output file with construction instructions, the filenames will be constructed
        # later, so this does not need to be checked. Keep track of the nodes which will be constructed
        # as these could be inputs to other tasks and so the check for existence is also not required
        # for these input files.

        # Start with input files and options.
        if not isOutput:

          # Loop over all input nodes looking for edges that use this argument.
          for nodeId in graph.CM_getInputNodes(graph.graph, task):
            edgeArgument = graph.getArgumentAttribute(nodeId, task, 'longFormArgument')

            # If this node uses the required argument.
            if edgeArgument == argument:
              foundNode = True

              # If this node has already been marked as not required (i.e. the tools requirement has been superceded
              # by instructions in the pipeline configuration file).
              if graph.getGraphNodeAttribute(nodeId, 'isRequired'):
                hasInstructions = False if graph.getArgumentAttribute(nodeId, task, 'constructionInstructions') == None else True
                hasValues       = True if len(graph.getGraphNodeAttribute(nodeId, 'values')) != 0 else False
                if not hasValues and not hasInstructions and nodeId not in constructableNodes:
                  isSuccess = False
  
                  # Check to see if this node can have it's values set with a top level pipeline argument (e.g. can
                  # be set without defining the task on the command line).
                  longFormArgument = args.arguments[graph.getGraphNodeAttribute(nodeId, 'longFormArgument')].longFormArgument
                  if longFormArgument and '.' not in longFormArgument:
  
                    # Get the short form of the pipeline argument and the argument description.
                    #shortFormArgument = args.arguments[longFormArgument].shortFormArgument
                    shortFormArgument = args.arguments[graph.getGraphNodeAttribute(nodeId, 'longFormArgument')].shortFormArgument
                    description       = graph.getGraphNodeAttribute(nodeId, 'description')
                    if isTerminate: errors.unsetRequiredArgument(longFormArgument, shortFormArgument, description)
  
                  # If this is not a top level argument, provide a different error.
                  # TODO CHECK THIS
                  else: 
  
                    # Get the short form version of the argument as well as the argument description. This is as defined
                    # for the tool, so if this argument can be set using a pipeline argument, these values are incorrect.
                    shortFormArgument = graph.getArgumentAttribute(nodeId, task, 'shortFormArgument')
                    description       = graph.getArgumentAttribute(nodeId, task, 'description')
                    if isTerminate: errors.unsetRequiredNestedArgument(task, argument, shortFormArgument, description, superpipeline.pipeline)

          # If there is no node for this argument, this means that the pipeline configuration file does not contain
          # a unique or shared node for this argument. In addition, the value has not been provided on the command
          # line. This means that no values will get assigned to this argument, so terminate.
          if not foundNode:
            instructions = toolData.getArgumentAttribute(argument, 'constructionInstructions')
            if not instructions: 
              isSuccess = False

              # Check if arguments were imported for this task. If so, check to see if this argument is therefore
              # available on the command line.
              if task == superpipeline.pipelineConfigurationData[superpipeline.pipeline].importArgumentsFromTool:
                if isTerminate: 
                  errors.unsetRequiredArgument(argument, args.arguments[argument].shortFormArgument, args.arguments[argument].description)
              else:
                if isTerminate: errors.noInputNode(task, tool, argument)
 
            # If there are instructions, but no node, construct the node.
            else:
              if instructions['method'] == 'from tool argument':
                argumentToUse = instructions['use argument']

                # Find all nodes for this task using this argument.
                for predecessorNodeId in graph.graph.predecessors(task):
                  if graph.getArgumentAttribute(predecessorNodeId, task, 'longFormArgument') == argumentToUse:
                    nodeAddress = str(predecessorNodeId + '.' + argument)

                    # Add the node and edge.
                    argumentAttributes = toolData.getArgumentData(argument)
                    graph.addFileNode(nodeAddress, nodeAddress)
                    graph.addEdge(nodeAddress, task, argumentAttributes)

                    # Attach the name of the node from which this filename is constructed to the node.
                    graph.setGraphNodeAttribute(nodeAddress, 'constructUsingNode', predecessorNodeId)

              # If there are instructions, but the construction method does not use another argument, create a node.
              else:
                nodeAddress = str(task + '.' + argument)

                # Add the node and edge.
                argumentAttributes = toolData.getArgumentData(argument)
                graph.addFileNode(nodeAddress, nodeAddress)
                graph.addEdge(nodeAddress, task, argumentAttributes)

        # Now consider output files.
        else:
          instructions = toolData.getArgumentAttribute(argument, 'constructionInstructions')

          # Loop over all output nodes looking for edges that use this argument.
          for nodeId in graph.CM_getOutputNodes(graph.graph, task):
            edgeArgument = graph.getArgumentAttribute(task, nodeId, 'longFormArgument')

            # If this node uses the required argument.
            if edgeArgument == argument:
              foundNode = True

              # If construction instructions are provided.
              if instructions:

                # If the construction is to proceed by using an argument from this task, ensure that that
                # argument is either set, or is itelf a successor to another task and so has the chance
                # of being set.
                if instructions['method'] == 'from tool argument':
                  longFormArgument = toolData.getLongFormArgument(instructions['use argument'])
                  foundNode        = False
                  for predecessorNodeId in graph.graph.predecessors(task):
                    edgeArgument = graph.getArgumentAttribute(predecessorNodeId, task, 'longFormArgument')
                    if edgeArgument == longFormArgument:
                      foundNode           = True
                      constructionNodeId = predecessorNodeId

                  # If the node being used to construct the file does not exist, then it cannot be used to 
                  # construct the filename and so some data must be missing.
                  if not foundNode:
                    isSuccess = False
                    if isTerminate:  errors.noNodeForConstruction(task, tool, argument, longFormArgument)

                  # If the node used to construct this filename exists, but it has no values or predecessors,
                  # it also will not be able to be used to construct the argument.
                  #elif not graph.getGraphNodeAttribute(constructionNodeId, 'values'):
                    #if not graph.graph.predecessors(constructionNodeId):
                      # TODO ERROR
                      #print('dataConsistency - checkRequiredArguments - cannot construct output', task, argument); exit(1)

                # Add the node to the list of nodes that have the potential to be constructed.
                if nodeId not in constructableNodes: constructableNodes.append(nodeId)

              # If no instructions are provided check that there are values supplied.
              if not instructions and not graph.getGraphNodeAttribute(nodeId, 'values'):
                isSuccess = False
                if isTerminate: errors.noConstructionMethod(task, tool, argument)

          # If no node exists for this argument, determine the course of action.
          if not foundNode:

            # If there are no instructions for constructing the filename, terminate.
            if not instructions: print('dataConsistency.checkRequiredArguments - no output node', task, argument); exit(1)

            # If there are instructions, but no node, construct the node.
            nodeAddress        = str(task + '.' + argument)
            argumentAttributes = toolData.getArgumentData(argument)

            # Determine if this node is a stub. If so, this is an output that is not shared with any other tasks, so
            # construct as many nodes as required.
            if argumentAttributes.isStub: #graph.constructOutputStubs()
              for i, stubExtension in enumerate(argumentAttributes.stubExtensions):
                modifiedNodeAddress              = str(nodeAddress + '.' + stubExtension)
                stubAttributes                   = deepcopy(argumentAttributes)
                stubAttributes.stubExtension     = stubExtension
                stubAttributes.isPrimaryStubNode = True if i == 0 else False
                graph.addFileNode(modifiedNodeAddress, modifiedNodeAddress)
                graph.addEdge(task, modifiedNodeAddress, stubAttributes)

            # If this is not a stub, add the node and edge.
            else:
              graph.addFileNode(nodeAddress, nodeAddress)
              graph.addEdge(task, nodeAddress, argumentAttributes)

  # Return if the operation was a success.
  return isSuccess