Ejemplo n.º 1
0
def addDivisionToValue(graph, superpipeline, task, nodeId, instructions, baseValues, optionArgument, optionValue, randomString):

  # If there are no base values, no values can be constructed. Return an empty list and when values are checked,
  # gkno will terminate.
  if not baseValues: return []

  # Get tool information for this task.
  tool     = gr.pipelineGraph.CM_getGraphNodeAttribute(graph, task, 'tool')
  toolData = superpipeline.getToolData(tool)

  # Get the argument with ehich the filename construction is associated.
  argument = toolData.getLongFormArgument(instructions['use argument'])

  # Determine the allowed extensions for the input argument as well as whether this is a stub.
  inputExtensions = toolData.getArgumentAttribute(argument, 'extensions')
  isStub          = toolData.getArgumentAttribute(argument, 'isStub')

  # Determine the extension to place on the filename.
  outputExtensions = gr.pipelineGraph.CM_getArgumentAttribute(graph, task, nodeId, 'extensions')

  # Determine if this is an intermediate file.
  isIntermediate = gr.pipelineGraph.CM_getGraphNodeAttribute(graph, nodeId, 'isIntermediate')

  # FIXME HANDLE STUBS
  if isStub: print('NOT HANDLED STUBS FOR DIVISIONS: constructFilenames.constructDivisions'); exit(1)

  # Loop over all the values associated with the division and add the argument and value to the filename.
  updatedValues = []
  for value in baseValues:

    # If the file being used to construct the output filename already has a path, strip this off.
    updatedValue = value.rsplit('/')[-1]

    # Get the extension on this value and remove it.
    extension = getExtension(updatedValue, inputExtensions)
    if extension == False: print('ERROR WITH EXTENSION - constructFilenames'); exit(1)
    if extension: updatedValue = updatedValue.replace('.' + str(extension), '')
    updatedValue = str(updatedValue + '_' + optionArgument.strip('-') + optionValue)

    # Check if there are instructions from the pipeline configuration file to add an extra text field
    # to the filename.
    addText = gr.pipelineGraph.CM_getGraphNodeAttribute(graph, nodeId, 'addTextToFilename')
    if addText: updatedValue += str('_' + addText)

    # If the file is an intermediate file, add a string of random text (already supplied).
    if isIntermediate: updatedValue += str('_' + superpipeline.randomString)

    # Add the updated value to the list of updated values.
    updatedValues.append(furnishExtension(instructions, updatedValue, extension, outputExtensions))

  # Return the values.
  return updatedValues
Ejemplo n.º 2
0
def constructKnownFilename(graph, superpipeline, instructions, task, nodeId, argument, isTerminate):
  tool     = gr.pipelineGraph.CM_getGraphNodeAttribute(graph, task, 'tool')
  toolData = superpipeline.getToolData(tool)

  # Get the filename to use.
  value = instructions['filename']

  # Determine the number of subphases for this graph. If there are multiple subphases, is is possible
  # that any arguments from the tool whose values are used in the construction can have as many values
  # as there are subphases.
  subphases = gr.pipelineGraph.CM_getGraphNodeAttribute(graph, task, 'subphases')

  # Loop over the number of subphases.
  if 'modify text' in instructions:
    values = [modifyText(graph, task, argument, toolData, instructions, counter, value, isTerminate) for counter in range(0, subphases)]
  else: values = [value] * subphases

  # Check if the 'directory argument' field is set. This will determine if the filename should be
  # prepended with a path defined by a tool argument.
  pathNodeId   = None
  pathArgument = None
  updatedValues = []
  if 'path argument' in instructions:
    pathArgument   = instructions['path argument'] 
    pathNodeId     = gr.pipelineGraph.CM_getNodeForInputArgument(graph, task, pathArgument)
    pathNodeValues = gr.pipelineGraph.CM_getGraphNodeAttribute(graph, pathNodeId, 'values')

    # If there is a single argument for the path node, use this value for all values.
    if len(pathNodeValues) == 1:
      for value in values: updatedValues.append(str(pathNodeValues[0] + '/' + value))

    # If there are multiple path node values, but only a single argument value, the result is a new value for each
    # subphase.
    elif len(values) == 1:
      for value in pathValues: updatedValues.append(str(values[0] + '/' + value))

    # If there are the same number of values as path node values, each path node values applies to a specific subphase.
    elif len(values) == len(pathNodeValues):
      for counter, value in enumerate(values): updatedValues.append(str(pathNodeValues[counter] + '/' + value))

    # Any other combination of values is invalid.
    #TODO ERROR
    else: print('constructFilenames.constructKnownFilename - PATH VALUES.'); exit(1)
  else: updatedValues = values

  # Set the values.
  gr.pipelineGraph.CM_setGraphNodeAttribute(graph, nodeId, 'values', updatedValues)

  # Return the constructed values.
  return updatedValues
Ejemplo n.º 3
0
def addIndexToSingleBaseValue(graph, superpipeline, task, argument, values, subphases):

  # Get the configuration data for this tool and get the long form version of the argument to 
  # use for building the filenames.
  tool     = superpipeline.tasks[task]
  toolData = superpipeline.getToolData(tool)

  # Determine the allowed extensions for the input argument as well as whether it is a stub.
  extensions = toolData.getArgumentAttribute(argument, 'extensions')

  # Get the extension on this file (this method is for an array of values of length 1).
  extension = getExtension(values[0], extensions)

  # Extract the extension.
  value = values[0].replace('.' + extension, '')

  # Generate a set of values of length len(subphases).
  updatedValues = []
  for i in range(1, subphases + 1): updatedValues.append(value + '_' + str(i) + '.' + extension)

  # Return the modified values. 
  return updatedValues
Ejemplo n.º 4
0
def checkRequiredArguments(graph, superpipeline, args, isTerminate):

  # Define error handling,
  errors    = er.consistencyErrors()
  isSuccess = True

  # Keep track of nodes that can have their values constructed.
  constructableNodes = []

  # Loop over the defined pipeline arguments and check that all arguments listed as required have
  # been set.
  for argument in args.arguments:

    # If the pipeline specifies if this argument is required or not, update the properties of the nodes.
    # If not specified (i.e. isRequired == None), it is left to the tool configuration to determine if
    # the argument is required. This allows the pipeline configuration file to not only specify that an
    # argument is required even if the underlying tool doesn't require the value, the pipeline can also
    # override the tools claim that the argument is required.
    if args.arguments[argument].isRequired == False:
      for nodeId in args.arguments[argument].graphNodeIds: graph.setGraphNodeAttribute(nodeId, 'isRequired', False)
    if args.arguments[argument].isRequired:

      # Loop over the associated graph nodes and see if the values are set.
      for nodeId in args.arguments[argument].graphNodeIds:
        graph.setGraphNodeAttribute(nodeId, 'isRequired', False)

        # Check if this argument was imported from a task in the pipeline. If so, determine if there are
        # any instructions for constructing the filename (if not an option). Only terminate if the argument
        # is for an option, or there are no instructions.
        hasInstructions = False
        if args.arguments[argument].isImported:
          task            = args.arguments[argument].importedFromTask
          tool            = graph.getGraphNodeAttribute(task, 'tool')
          toolData        = superpipeline.getToolData(tool)
          hasInstructions = True if toolData.getArgumentAttribute(argument, 'constructionInstructions') else False

        # If the values haven't been set, terminate. This is a pipeline argument listed as required
        # and so must be set by the user (and not constructed).
        if not graph.getGraphNodeAttribute(nodeId, 'values') and not hasInstructions:
          isSuccess         = False
          shortFormArgument = args.arguments[argument].shortFormArgument
          description       = args.arguments[argument].description
          if isTerminate: errors.unsetRequiredArgument(args.arguments[argument].longFormArgument, shortFormArgument, description)

  # Loop over all tasks in the workflow
  for task in graph.workflow:

    # Get the tool for the task.
    tool     = superpipeline.tasks[task]
    toolData = superpipeline.getToolData(tool)

    # Loop over all of the arguments for the tool and check that all required arguments have a node
    # and that the node has values.
    for argument in toolData.arguments:

      # Check if the argument is required.
      if toolData.getArgumentAttribute(argument, 'isRequired'):

        # Record if a node for this node is seen.
        foundNode = False

        # Determine if the argument is for an input file, output file or an option.
        isInput  = toolData.getArgumentAttribute(argument, 'isInput')
        isOutput = toolData.getArgumentAttribute(argument, 'isOutput')

        # If this is an output file with construction instructions, the filenames will be constructed
        # later, so this does not need to be checked. Keep track of the nodes which will be constructed
        # as these could be inputs to other tasks and so the check for existence is also not required
        # for these input files.

        # Start with input files and options.
        if not isOutput:

          # Loop over all input nodes looking for edges that use this argument.
          for nodeId in graph.CM_getInputNodes(graph.graph, task):
            edgeArgument = graph.getArgumentAttribute(nodeId, task, 'longFormArgument')

            # If this node uses the required argument.
            if edgeArgument == argument:
              foundNode = True

              # If this node has already been marked as not required (i.e. the tools requirement has been superceded
              # by instructions in the pipeline configuration file).
              if graph.getGraphNodeAttribute(nodeId, 'isRequired'):
                hasInstructions = False if graph.getArgumentAttribute(nodeId, task, 'constructionInstructions') == None else True
                hasValues       = True if len(graph.getGraphNodeAttribute(nodeId, 'values')) != 0 else False
                if not hasValues and not hasInstructions and nodeId not in constructableNodes:
                  isSuccess = False
  
                  # Check to see if this node can have it's values set with a top level pipeline argument (e.g. can
                  # be set without defining the task on the command line).
                  longFormArgument = args.arguments[graph.getGraphNodeAttribute(nodeId, 'longFormArgument')].longFormArgument
                  if longFormArgument and '.' not in longFormArgument:
  
                    # Get the short form of the pipeline argument and the argument description.
                    #shortFormArgument = args.arguments[longFormArgument].shortFormArgument
                    shortFormArgument = args.arguments[graph.getGraphNodeAttribute(nodeId, 'longFormArgument')].shortFormArgument
                    description       = graph.getGraphNodeAttribute(nodeId, 'description')
                    if isTerminate: errors.unsetRequiredArgument(longFormArgument, shortFormArgument, description)
  
                  # If this is not a top level argument, provide a different error.
                  # TODO CHECK THIS
                  else: 
  
                    # Get the short form version of the argument as well as the argument description. This is as defined
                    # for the tool, so if this argument can be set using a pipeline argument, these values are incorrect.
                    shortFormArgument = graph.getArgumentAttribute(nodeId, task, 'shortFormArgument')
                    description       = graph.getArgumentAttribute(nodeId, task, 'description')
                    if isTerminate: errors.unsetRequiredNestedArgument(task, argument, shortFormArgument, description, superpipeline.pipeline)

          # If there is no node for this argument, this means that the pipeline configuration file does not contain
          # a unique or shared node for this argument. In addition, the value has not been provided on the command
          # line. This means that no values will get assigned to this argument, so terminate.
          if not foundNode:
            instructions = toolData.getArgumentAttribute(argument, 'constructionInstructions')
            if not instructions: 
              isSuccess = False

              # Check if arguments were imported for this task. If so, check to see if this argument is therefore
              # available on the command line.
              if task == superpipeline.pipelineConfigurationData[superpipeline.pipeline].importArgumentsFromTool:
                if isTerminate: 
                  errors.unsetRequiredArgument(argument, args.arguments[argument].shortFormArgument, args.arguments[argument].description)
              else:
                if isTerminate: errors.noInputNode(task, tool, argument)
 
            # If there are instructions, but no node, construct the node.
            else:
              if instructions['method'] == 'from tool argument':
                argumentToUse = instructions['use argument']

                # Find all nodes for this task using this argument.
                for predecessorNodeId in graph.graph.predecessors(task):
                  if graph.getArgumentAttribute(predecessorNodeId, task, 'longFormArgument') == argumentToUse:
                    nodeAddress = str(predecessorNodeId + '.' + argument)

                    # Add the node and edge.
                    argumentAttributes = toolData.getArgumentData(argument)
                    graph.addFileNode(nodeAddress, nodeAddress)
                    graph.addEdge(nodeAddress, task, argumentAttributes)

                    # Attach the name of the node from which this filename is constructed to the node.
                    graph.setGraphNodeAttribute(nodeAddress, 'constructUsingNode', predecessorNodeId)

              # If there are instructions, but the construction method does not use another argument, create a node.
              else:
                nodeAddress = str(task + '.' + argument)

                # Add the node and edge.
                argumentAttributes = toolData.getArgumentData(argument)
                graph.addFileNode(nodeAddress, nodeAddress)
                graph.addEdge(nodeAddress, task, argumentAttributes)

        # Now consider output files.
        else:
          instructions = toolData.getArgumentAttribute(argument, 'constructionInstructions')

          # Loop over all output nodes looking for edges that use this argument.
          for nodeId in graph.CM_getOutputNodes(graph.graph, task):
            edgeArgument = graph.getArgumentAttribute(task, nodeId, 'longFormArgument')

            # If this node uses the required argument.
            if edgeArgument == argument:
              foundNode = True

              # If construction instructions are provided.
              if instructions:

                # If the construction is to proceed by using an argument from this task, ensure that that
                # argument is either set, or is itelf a successor to another task and so has the chance
                # of being set.
                if instructions['method'] == 'from tool argument':
                  longFormArgument = toolData.getLongFormArgument(instructions['use argument'])
                  foundNode        = False
                  for predecessorNodeId in graph.graph.predecessors(task):
                    edgeArgument = graph.getArgumentAttribute(predecessorNodeId, task, 'longFormArgument')
                    if edgeArgument == longFormArgument:
                      foundNode           = True
                      constructionNodeId = predecessorNodeId

                  # If the node being used to construct the file does not exist, then it cannot be used to 
                  # construct the filename and so some data must be missing.
                  if not foundNode:
                    isSuccess = False
                    if isTerminate:  errors.noNodeForConstruction(task, tool, argument, longFormArgument)

                  # If the node used to construct this filename exists, but it has no values or predecessors,
                  # it also will not be able to be used to construct the argument.
                  #elif not graph.getGraphNodeAttribute(constructionNodeId, 'values'):
                    #if not graph.graph.predecessors(constructionNodeId):
                      # TODO ERROR
                      #print('dataConsistency - checkRequiredArguments - cannot construct output', task, argument); exit(1)

                # Add the node to the list of nodes that have the potential to be constructed.
                if nodeId not in constructableNodes: constructableNodes.append(nodeId)

              # If no instructions are provided check that there are values supplied.
              if not instructions and not graph.getGraphNodeAttribute(nodeId, 'values'):
                isSuccess = False
                if isTerminate: errors.noConstructionMethod(task, tool, argument)

          # If no node exists for this argument, determine the course of action.
          if not foundNode:

            # If there are no instructions for constructing the filename, terminate.
            if not instructions: print('dataConsistency.checkRequiredArguments - no output node', task, argument); exit(1)

            # If there are instructions, but no node, construct the node.
            nodeAddress        = str(task + '.' + argument)
            argumentAttributes = toolData.getArgumentData(argument)

            # Determine if this node is a stub. If so, this is an output that is not shared with any other tasks, so
            # construct as many nodes as required.
            if argumentAttributes.isStub: #graph.constructOutputStubs()
              for i, stubExtension in enumerate(argumentAttributes.stubExtensions):
                modifiedNodeAddress              = str(nodeAddress + '.' + stubExtension)
                stubAttributes                   = deepcopy(argumentAttributes)
                stubAttributes.stubExtension     = stubExtension
                stubAttributes.isPrimaryStubNode = True if i == 0 else False
                graph.addFileNode(modifiedNodeAddress, modifiedNodeAddress)
                graph.addEdge(task, modifiedNodeAddress, stubAttributes)

            # If this is not a stub, add the node and edge.
            else:
              graph.addFileNode(nodeAddress, nodeAddress)
              graph.addEdge(task, nodeAddress, argumentAttributes)

  # Return if the operation was a success.
  return isSuccess
Ejemplo n.º 5
0
def constructFromFilename(graph, superpipeline, instructions, task, nodeId, argument, baseValues):

  # If there are no base values, no values can be constructed. Return an empty list and when values are checked,
  # gkno will terminate.
  if not baseValues: return []

  # Define a list of updated values.
  updatedValues = []

  # Get the input file from which the filenames should be built.
  try: inputArgument = instructions['use argument']
  except: print('constructFilenames.constructFromFilename - no \'use argument\' field'); exit(1)

  # Get the configuration data for this tool and get the long form version of the argument to 
  # use for building the filenames.
  tool             = superpipeline.tasks[task]
  toolData         = superpipeline.getToolData(tool)
  longFormArgument = toolData.getLongFormArgument(inputArgument)

  # Determine the allowed extensions for the input argument as well as whether it is a stub.
  extensions   = toolData.getArgumentAttribute(longFormArgument, 'extensions')
  isInputAStub = toolData.getArgumentAttribute(longFormArgument, 'isStub')

  # Determine if this output file is a stub.
  isOutputAStub = toolData.getArgumentAttribute(argument, 'isStub')

  # Get the node corresponding to the input argument.
  inputNodeId = gr.pipelineGraph.CM_getNodeForInputArgument(graph, task, inputArgument)

  # Get the associated stub extension, if one exists.
  stubExtension = gr.pipelineGraph.CM_getArgumentAttribute(graph, task, nodeId, 'stubExtension')

  # Determine if this is an intermediate file.
  isIntermediate = gr.pipelineGraph.CM_getGraphNodeAttribute(graph, nodeId, 'isIntermediate')

  # Now loop over each of the values and modify them accoriding to the provided instructions.
  for counter, value in enumerate(baseValues):

    # If the file being used to construct the output filename already has a path, strip this off.
    updatedValue = value.rsplit('/')[-1]

    # Determine the extension on the input, then create a working version of the new name with the
    # extension removed. This is only necessary if the input file is not a stub. If it is, it will
    # have no extension, so this becomes unnecessary.
    extension = getExtension(updatedValue, extensions)
    if extension: updatedValue = updatedValue.replace('.' + str(extension), '')

    # If there are instructions on text to add, add it.
    if 'modify text' in instructions: updatedValue = modifyText(graph, task, argument, toolData, instructions, counter, updatedValue, isTerminate = True)

    # Check if there are instructions from the pipeline configuration file to add an extra text field
    # to the filename.
    addText = gr.pipelineGraph.CM_getGraphNodeAttribute(graph, nodeId, 'addTextToFilename')
    if addText: updatedValue += str('_' + addText)

    # Determine the extension to place on the filename. If the file is a stub, attach the stub extension associated with this node.
    if isOutputAStub: newExtensions = [stubExtension]
    else: newExtensions = gr.pipelineGraph.CM_getArgumentAttribute(graph, task, nodeId, 'extensions')
    updatedValue  = furnishExtension(instructions, updatedValue, extension, newExtensions)

    # Add the updated value to the modifiedValues list.
    updatedValues.append(updatedValue)

  # Return the values.
  return updatedValues