Example #1
0
 def __init__(self, namespace, wms):
     logger.debug ("wms:pegasus:create-workflowmodel: %s" % namespace)
     self.wms = wms
     logger.debug ("outputdir: %s", wms.getOutputDir ())
     self.adag = ADAG (namespace)
     self.namespace = namespace
     self.files = {}
     self.exes = {}
     self.propertyMap = {}
     self.nodes = {}
     self.jobTransformations = {}
     
     self.variableMap = {
         'literalToVariable' : {},
         'literalToNodeId'   : {}
         }
Example #2
0
    def write(self, filename, name='dax'):
        """Generate Pegasus abstract workflow (DAX).

        Parameters
        ----------
        filename : `str`
            File to write the DAX to.
        name : `str`, optional
            Name of the DAX.

        Returns
        -------
        `Pegasus.ADAG`
            Abstract workflow used by Pegasus' planner.
        """
        dax = ADAG(name)

        # Add files to DAX-level replica catalog.
        catalog = {}
        for file_id in self.files:
            attrs = self.graph.node[file_id]
            f = File(attrs['lfn'])

            # Add physical file names, if any.
            urls = attrs.get('urls')
            if urls is not None:
                sites = attrs.get('sites')
                if sites is None:
                    sites = ','.join(len(urls) * ['local'])
                for url, site in zip(urls.split(','), sites.split(',')):
                    f.addPFN(PFN(url, site))

            catalog[attrs['lfn']] = f
            dax.addFile(f)

        # Add jobs to the DAX.
        for task_id in self.tasks:
            attrs = self.graph.node[task_id]
            job = Job(name=attrs['name'], id=task_id)

            # Add job command line arguments replacing any file name with
            # respective Pegasus file object.
            args = attrs.get('args')
            if args is not None and args:
                args = args.split()
                lfns = list(set(catalog) & set(args))
                if lfns:
                    indices = [args.index(lfn) for lfn in lfns]
                    for idx, lfn in zip(indices, lfns):
                        args[idx] = catalog[lfn]
                job.addArguments(*args)

            # Specify job's inputs.
            inputs = [file_id for file_id in self.graph.predecessors(task_id)]
            for file_id in inputs:
                attrs = self.graph.node[file_id]
                f = catalog[attrs['lfn']]
                job.uses(f, link=Link.INPUT)

            # Specify job's outputs
            outputs = [file_id for file_id in self.graph.successors(task_id)]
            for file_id in outputs:
                attrs = self.graph.node[file_id]
                f = catalog[attrs['lfn']]
                job.uses(f, link=Link.OUTPUT)

                streams = attrs.get('streams')
                if streams is not None:
                    if streams & 1 != 0:
                        job.setStdout(f)
                    if streams & 2 != 0:
                        job.setStderr(f)

            dax.addJob(job)

        # Add job dependencies to the DAX.
        for task_id in self.tasks:
            parents = set()
            for file_id in self.graph.predecessors(task_id):
                parents.update(self.graph.predecessors(file_id))
            for parent_id in parents:
                dax.depends(parent=dax.getJob(parent_id),
                            child=dax.getJob(task_id))

        # Finally, write down the workflow in DAX format.
        with open(filename, 'w') as f:
            dax.writeXML(f)
Example #3
0
    def generate_dax(self, daxfile):
        from Pegasus.DAX3 import ADAG, Job, File, Link

        # The DAX generator
        dax = ADAG("pipeline")

        # Some bits of metadata.  Shoulf put plenty more here.
        dax.metadata("owner", self.pipeline.owner)
        dax.metadata("basename", self.pipeline.basename)
        dax.metadata("version", self.pipeline.version)

        # string tag -> pegasus File object mapping of all the
        # inputs and outputs used by any pipeline stage.
        files = {}

        # First generate the overall inputs to the pipeline,
        # i.e. ones that are not generated by any other stage
        # but must be specified at the start
        for tag in self.pipeline.input_tags():
            path = self.info['inputs'].get(tag)
            files[tag] = File(path)

        # Now go through the pipeline in sequence.
        for stage_name, stage_class in self.pipeline.sequence():
            # The stage in the pipeline.  We describe the meaning of it
            # (which image it corresponds to)
            # in the transformation catalog generation
            job = Job(stage_name, id=stage_name)

            # Configuration files for this job.
            # These will not be built during the pipeline and must be
            # provided by the user
            for config_tag, config_filename in stage_class.config.items():
                filename = self.pipeline.cfg[stage_name]['config'][config_tag]
                config_path = os.path.join(self.config_dir(), filename)
                config = File(config_path)
                job.uses(config, link=Link.INPUT)

            # Input files for the job, either created by the user or by previous
            # stages.  In either case they should be in the "files" dictionary, because
            # precursor jobs will have been added before this one.
            for input_tag in stage_class.inputs.keys():
                job.uses(files[input_tag], link=Link.INPUT)

            # Output files from the job. These will be created by the job
            # and used by future jobs
            for output_tag, output_type in stage_class.outputs.items():
                output_filename = "{}.{}".format(output_tag, output_type)
                output = File(output_filename)
                job.uses(output,
                         link=Link.OUTPUT,
                         transfer=True,
                         register=True)
                files[output_tag] = output

            # Add this job to the pipeline
            dax.addJob(job)

            # Tell pegasus which jobs this one depends on.
            # The pipeline already knows this information.
            # The pipeline.sequence command runs through
            # the jobs in an order that guarantees that a job's predecessors are
            # always done before it is, so they will always exist in the dax by this point.
            for predecessor_name in self.pipeline.dependencies(stage_name):
                dax.depends(stage_name, predecessor_name)

        # Generate the final DAX XML file.
        dax.writeXML(open(daxfile, "w"))
Example #4
0
from tvb.recon.dax.resampling import Resampling
from tvb.recon.dax.seeg_computation import SEEGComputation
from tvb.recon.dax.seeg_gain_computation import SeegGainComputation
from tvb.recon.dax.sensor_model import SensorModel
from tvb.recon.dax.source_model import SourceModel
from tvb.recon.dax.t1_processing import T1Processing
from tvb.recon.dax.tracts_generation import TractsGeneration

if __name__ == "__main__":
    if len(sys.argv) != 3:
        sys.stderr.write("Usage: %s DAXFILE\n" % (sys.argv[0]))
        sys.exit(1)
    daxfile = sys.argv[1]
    patient_file = sys.argv[2]

    dax = ADAG("TVB-PIPELINE")
    dax.metadata("created", time.ctime())

    config = Configuration(patient_file)

    subject = config.props[ConfigKey.SUBJECT]
    trg_subject = config.props[ConfigKey.TRGSUBJECT]

    atlas_suffix = AtlasSuffix.DEFAULT

    if config.props[ConfigKey.ATLAS] == Atlas.A2009S:
        atlas_suffix = AtlasSuffix.A2009S

    t1_processing = T1Processing(subject, config.props[ConfigKey.T1_FRMT], config.props[ConfigKey.T2_FLAG],
                                 config.props[ConfigKey.T2_FRMT], config.props[ConfigKey.FLAIR_FLAG],
                                 config.props[ConfigKey.FLAIR_FRMT], config.props[ConfigKey.OPENMP_THRDS],
Example #5
0
#!/usr/bin/env python

import time
import argparse
from Pegasus.DAX3 import ADAG
from bnm.recon.pegasus.config import Configuration
from bnm.recon.pegasus.flirt import step_coregister_t1_dwi
from bnm.recon.pegasus.t1 import steps_recon_all
from bnm.recon.pegasus.diffusion import steps_dwi_preproc
from bnm.recon.pegasus.utils import write_dax

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Generate a BNM flow")
    parser.add_argument("patient_file")
    args = parser.parse_args()

    dax = ADAG("BNM")
    dax.metadata("name", "Brain Network Model Reconstruction WorkFlow")
    dax.metadata("created-at", time.ctime())
    dax.metadata("flow-configuration", args.patient_file)
    config = Configuration(args.patient_file)

    relevant_t1_job = steps_recon_all(dax, config)
    relevant_dwi_job = steps_dwi_preproc(dax, config.diffusion)
    step_coregister_t1_dwi(dax, config, relevant_t1_job, relevant_dwi_job)

    write_dax(dax, config.main_dax_path)
Example #6
0
class PegasusWorkflowModel (WorkflowModel):
    
    def __init__(self, namespace, wms):
        logger.debug ("wms:pegasus:create-workflowmodel: %s" % namespace)
        self.wms = wms
        logger.debug ("outputdir: %s", wms.getOutputDir ())
        self.adag = ADAG (namespace)
        self.namespace = namespace
        self.files = {}
        self.exes = {}
        self.propertyMap = {}
        self.nodes = {}
        self.jobTransformations = {}
        
        self.variableMap = {
            'literalToVariable' : {},
            'literalToNodeId'   : {}
            }
        
    def addToVariableMap (self, literal, variable, id):
        self.variableMap ['literalToVariable'][literal] = variable
        self.variableMap ['literalToNodeId'][literal] = id
        logger.debug ("variablematch: recorded lit=%s var=%s id=%s", literal, variable, id)

    def setProperties (self, nodeId, properties):
        logger.debug ("wms:pegasus:dax:setprops: (%s)->(%s)" % (nodeId, properties))
        self.propertyMap [nodeId] = properties
        
    def getProperties (self, nodeId):
        return self.propertyMap [nodeId]
    
    def createFile (self, fileName):
        return File (fileName)
    
    def addNode (self, id, node):
        logger.debug ("wms:pegasus:dax:add-node: (%s)->(%s)" % (node.getId(), properties))
        self.nodes [id] = node

    def getNode (self, id):
        node = None
        if id in self.nodes:
            node = self.nodes [id]
            return node
        
    def createFile (self, fileName, fileURL=None, site=None):
        #traceback.print_stack ()
        file = self.getFile (fileName)
        if not file:
            file = File (fileName)
            if not fileURL:
                fileURL = "file://%s/%s" % (self.wms.getOutputDir (), fileName)
                logger.debug ("fileurl: %s", fileURL)
            if not site:
                site = "local"
            if not isinstance(fileURL, basestring) and len (fileURL) > 0:
                fileURL = fileURL [0]
            logger.debug ("--add-pfn: (%s)(%s)(%s)", fileName, fileURL, site)
            pfn = PFN (fileURL, site)
            file.addPFN (pfn)
            self.files [fileName] = file
        return file
    def addFileToDAG (self, file):
        if not self.adag.hasFile (file):
            self.adag.addFile (file)
    def removeFileFromDAG (self, file):
        if file in self.adag.files:
            self.adag.files.remove (file)
        
    def addFile (self, fileName, fileURL=None, site=None):
        #traceback.print_stack ()
        file = self.getFile (fileName)
        if not file:
            file = File (fileName)
            if not fileURL:
                fileURL = "file://%s/%s" % (self.wms.getOutputDir (), fileName)
                logger.debug ("fileurl: %s", fileURL)
            if not site:
                site = "local"
            if not isinstance(fileURL, basestring) and len (fileURL) > 0:
                fileURL = fileURL [0]
            logger.debug ("--add-pfn: (%s)(%s)(%s)", fileName, fileURL, site)
            pfn = PFN (fileURL, site)
            file.addPFN (pfn)
            self.adag.addFile (file)
            self.files [fileName] = file
        return file
	
    def getFile (self, fileName, prefix=""):
        value = None
        key = "%s%s" % (prefix, fileName)
        if key in self.files:
            logger.debug ("wms:pegasus:dax:get-file: (%s)" % key)
            value = self.files [key]
        else:
            value = None
	return value

    def addExecutable (self, jobId, name, path, version="1.0", exe_os="linux", exe_arch="x86_64", site="local", installed="true"):
        e_exe = self.getExecutable (name)
        
        if not version:
            version = "1.0"
        if not exe_arch:
            exe_arch="x86_64"

        if not e_exe:
            e_exe = Executable (
                namespace=self.namespace, 
                name=name, 
                version=version, 
                os=exe_os, 
                arch=exe_arch, 
                installed=installed)
            if not site:
                site = "local"
            if not installed:
                installed = False
            if logging.getLogger().isEnabledFor (logging.DEBUG):
                logger.debug ("wms:pegasus:dax:add-exe: (name=[%s], path=[%s], version=[%s], os=[%s], arch=[%s], site=[%s], installed=[%s])" % 
                               (name,
                                path,
                                version,
                                exe_os,
                                exe_arch,
                                site,
                                installed))
            if not "://" in path:
                path = "file://%s" % path
            if not path:
                raise ValueError ("empty path for executable: %s at site %s" % (name, site))

            e_exe.addPFN (PFN (path, site))
            if not installed:
                e_exe.installed = installed
            self.adag.addExecutable (e_exe)
            self.exes [name] = e_exe

            transformation = Transformation (name, self.namespace, version)
            self.jobTransformations [jobId] = transformation
            
        return e_exe
    
    def getExecutable (self, name):
        key = name
        if key in self.exes:
            return self.exes [key]
        else:
            return None

    def addSubWorkflow (self, name, transformation=None):
        #self.adag.addTransformation (transformation)
        abstractJob = DAX (name)
        self.adag.addDAX (abstractJob)
        return abstractJob
    
    def addJob (self, id):
        #self.adag.addTransformation (transformation)
        transformation = self.jobTransformations [id]
        logger.debug ("wms:pegasus:add-job: transformation(%s) jobid(%s)", transformation.name, id)
        abstractJob = Job (name=transformation, id=id)
        self.adag.addJob (abstractJob)
        return abstractJob
    
    def addProfiles (self, abstractJob, profiles):
        if profiles:
            for astProfile in profiles:
                logger.debug ("wms:pegasus:add-profile: (namespace=%s,key=%s,value=%s) to job (%s)",
                               astProfile.namespace,
                               astProfile.key,
                               astProfile.value,
                               abstractJob.name)
                profile = Profile (astProfile.namespace,
                                   astProfile.key,
                                   astProfile.value)
                abstractJob.addProfile (profile)

    def addFiles (self, abstractJob, files, link):
        if files:
            for fileKey in files:
                tuple = files [fileKey]
                fileElement = tuple [0]
                file = fileElement.getDaxNode ()
                try:

                    isLeaf = tuple [2]
                    if isLeaf:
                        abstractJob.uses (file, link=link, transfer=True)
                    else:
                        abstractJob.uses (file, link=link)
                    '''
                    abstractJob.uses (file, link=link)
                    '''
                    arg = tuple [1]
                    if arg:
                        abstractJob.addArguments (arg, file)
                except DuplicateError:
                    pass

    def addInputFiles (self, abstractJob, files):
        self.addFiles (abstractJob, files, Link.INPUT)

    def addOutputFiles (self, abstractJob, files):
        self.addFiles (abstractJob, files, Link.OUTPUT)

    def addArguments (self, abstractJob, arguments):
        if arguments:
            abstractJob.addArguments (arguments)

    def addDependency (self, parent, child):
        self.adag.addDependency (Dependency (parent, child))
                        
    def writeExecutable (self, stream):
        self.adag.writeXML (stream)
        filename = "%s.%s" % (self.namespace, 'obj')
        filepath = os.path.join (self.wms.getOutputDir (), filename)
        try:
            output = None
            try:
                output = open (filepath, 'w')
                output.write (json.dumps (self.variableMap, indent=3, sort_keys=True))                
                output.flush ()                
            finally:
                if output:
                    output.close ()
        except IOError:
            traceback.print_stack ()

    def getADAG (self):
        return self.adag
Example #7
0
if len(sys.argv) != 3:
    print "Usage: %s PEGASUS_HOME" % (sys.argv[0])
    sys.exit(1)

config = ConfigParser.ConfigParser({
    'input_file': '',
    'workflow_name': 'horizontal-clustering-test',
    'executable_installed': "False",
    'clusters_size': "3",
    'clusters_maxruntime': "7"
})
config.read(sys.argv[2] + '/test.config')

# Create an abstract dag
cluster = ADAG(config.get('all', 'workflow_name'))

input_file = config.get('all', 'input_file')
if (input_file == ''):
    input_file = os.getcwd()
else:
    input_file += '/' + os.getenv('USER') + '/inputs'

# Add input file to the DAX-level replica catalog
a = File("f.a")
a.addPFN(
    PFN(
        config.get('all', 'file_url') + input_file + "/f.a",
        config.get('all', 'file_site')))
cluster.addFile(a)
Example #8
0
 def __init__(self, name, count=None, index=None):
     ADAG.__init__(self, name, count, index)
Example #9
0
    def write_dax(self, filename='workflow.dax', name='workflow'):
        """Generate Pegasus abstract workflow (DAX).

        Parameters
        ----------
        filename : `str`
            File to write the DAX to.
        name : `str`, optional
            Name of the DAX.

        Returns
        -------
        `Pegasus.ADAG`
            Abstract workflow used by Pegasus' planner.

        Raises
        ------
        `ValueError`
            If either task or file node is missing mandatory attribute.
        """
        dax = ADAG(name)

        # Process file nodes.
        for file_id in self.files:
            attrs = self.graph.node[file_id]
            try:
                name = attrs['lfn']
            except KeyError:
                msg = 'Mandatory attribute "%s" is missing.'
                raise AttributeError(msg.format('lfn'))
            file_ = File(name)

            # Add physical file names, if any.
            urls = attrs.get('pfn')
            if urls is not None:
                urls = urls.split(',')
                sites = attrs.get('sites')
                if sites is None:
                    sites = len(urls) * ['condorpool']
                for url, site in zip(urls, sites):
                    file_.addPFN(PFN(url, site))

            self.catalog[attrs['lfn']] = file_

        # Add jobs to the DAX.
        for task_id in self.tasks:
            attrs = self.graph.node[task_id]
            try:
                name = attrs['exec_name']
            except KeyError:
                msg = 'Mandatory attribute "%s" is missing.'
                raise AttributeError(msg.format('exec_name'))
            label = '{name}_{id}'.format(name=name, id=task_id)
            job = Job(name, id=task_id, node_label=label)

            # Add job command line arguments replacing any file name with
            # respective Pegasus file object.
            args = attrs.get('exec_args', [])
            if args:
                args = args.split()
                lfns = list(set(self.catalog) & set(args))
                if lfns:
                    indices = [args.index(lfn) for lfn in lfns]
                    for idx, lfn in zip(indices, lfns):
                        args[idx] = self.catalog[lfn]
                job.addArguments(*args)

            # Specify job's inputs.
            inputs = [file_id for file_id in self.graph.predecessors(task_id)]
            for file_id in inputs:
                attrs = self.graph.node[file_id]
                is_ignored = attrs.get('ignore', False)
                if not is_ignored:
                    file_ = self.catalog[attrs['lfn']]
                    job.uses(file_, link=Link.INPUT)

            # Specify job's outputs
            outputs = [file_id for file_id in self.graph.successors(task_id)]
            for file_id in outputs:
                attrs = self.graph.node[file_id]
                is_ignored = attrs.get('ignore', False)
                if not is_ignored:
                    file_ = self.catalog[attrs['lfn']]
                    job.uses(file_, link=Link.OUTPUT)

                    streams = attrs.get('streams')
                    if streams is not None:
                        if streams & 1 != 0:
                            job.setStdout(file_)
                        if streams & 2 != 0:
                            job.setStderr(file_)

            # Provide default files to store stderr and stdout, if not
            # specified explicitly.
            if job.stderr is None:
                file_ = File('{name}.out'.format(name=label))
                job.uses(file_, link=Link.OUTPUT)
                job.setStderr(file_)
            if job.stdout is None:
                file_ = File('{name}.err'.format(name=label))
                job.uses(file_, link=Link.OUTPUT)
                job.setStdout(file_)

            dax.addJob(job)

        # Add job dependencies to the DAX.
        for task_id in self.tasks:
            parents = set()
            for file_id in self.graph.predecessors(task_id):
                parents.update(self.graph.predecessors(file_id))
            for parent_id in parents:
                dax.depends(parent=dax.getJob(parent_id),
                            child=dax.getJob(task_id))

        # Finally, write down the workflow in DAX format.
        with open(filename, 'w') as f:
            dax.writeXML(f)
Example #10
0
#!/usr/bin/env python

from Pegasus.DAX3 import ADAG, File, Link, Job, Executable, PFN, Profile
import sys
import os
import ConfigParser

if len(sys.argv) != 3:
    print "Usage: %s PEGASUS_HOME" % (sys.argv[0])
    sys.exit(1)

config = ConfigParser.ConfigParser({'input_file':'', 'workflow_name':'horizontal-clustering-test', 'executable_installed':"False", 'clusters_size':"3", 'clusters_maxruntime':"7"})
config.read(sys.argv[2] + '/test.config')

# Create an abstract dag
cluster = ADAG (config.get('all', 'workflow_name'))

input_file = config.get('all', 'input_file')
if (input_file == ''):
        input_file = os.getcwd ()
else:
        input_file += '/' + os.getenv ('USER') + '/inputs'

# Add input file to the DAX-level replica catalog
a = File("f.a")
a.addPFN(PFN(config.get('all', 'file_url') + input_file + "/f.a", config.get('all', 'file_site')))
cluster.addFile(a)

for i in range (1, 3):
    sleep = Executable (namespace = "cluster", name = "level" + str (i), version = "1.0", os = "linux", arch = "x86", installed=config.getboolean('all', 'executable_installed'))
    sleep.addPFN (PFN (config.get('all', 'executable_url') + sys.argv[1] + "/bin/pegasus-keg", config.get('all', 'executable_site')))
Example #11
0
from tvb.recon.dax.resampling import Resampling
from tvb.recon.dax.seeg_computation import SEEGComputation
from tvb.recon.dax.seeg_gain_computation import SeegGainComputation
from tvb.recon.dax.sensor_model import SensorModel
from tvb.recon.dax.source_model import SourceModel
from tvb.recon.dax.t1_processing import T1Processing
from tvb.recon.dax.tracts_generation import TractsGeneration

if __name__ == "__main__":
    if len(sys.argv) != 3:
        sys.stderr.write("Usage: %s DAXFILE\n" % (sys.argv[0]))
        sys.exit(1)
    daxfile = sys.argv[1]
    patient_file = sys.argv[2]

    dax = ADAG("TVB-PIPELINE")
    dax.metadata("created", time.ctime())

    config = Configuration(patient_file)

    subject = config.props[ConfigKey.SUBJECT]
    trg_subject = config.props[ConfigKey.TRGSUBJECT]

    atlas_suffix = AtlasSuffix.DEFAULT

    if config.props[ConfigKey.ATLAS] == Atlas.A2009S:
        atlas_suffix = AtlasSuffix.A2009S

    t1_processing = T1Processing(
        subject, config.props[ConfigKey.T1_FRMT],
        config.props[ConfigKey.T2_FLAG], config.props[ConfigKey.T2_FRMT],
Example #12
0
 def _init_job_graph(self) -> ADAG:
     ret = ADAG(self.name)
     ret.metadata("name", self.name)
     ret.metadata("createdby", self.created_by)
     return ret
Example #13
0
	def generate_workflow(self):
		"Generate a workflow (DAX, config files, and replica catalog)"
		ts = datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
		dax = ADAG("mgrast-prod-%s" % ts)
		
		# These are all the global input files for the workflow
		metagenome = File(self.mgfile)
		self.add_replica(self.mgfile, os.path.abspath(self.mgfile))

		# QC job
		qcJob = Job("wrapper-qc", node_label="wrapper-qc")

		qcJob.addArguments("-input", self.mgfile)
		qcJob.addArguments("-format", self.file_format)
		qcJob.addArguments("-out_prefix", "075")
		qcJob.addArguments("-assembled", self.assembled)
		qcJob.addArguments("-filter_options", self.filter_options)
		qcJob.addArguments("-proc", "8")

		qcJob.uses(metagenome, link=Link.INPUT)
		qcJob.uses("075.assembly.coverage", link=Link.OUTPUT, transfer=False)
		qcJob.uses("075.qc.stats", link=Link.OUTPUT, transfer=False)
		qcJob.uses("075.upload.stats", link=Link.OUTPUT, transfer=False)

		qcJob.profile("globus", "maxwalltime", "60")
		qcJob.profile("globus", "hostcount", "8")
		qcJob.profile("globus", "count", "8")
		dax.addJob(qcJob)

		# Preprocess Job
		preprocessJob = Job("wrapper-preprocess", node_label="wrapper-preprocess")
		preprocessJob.addArguments("-input", self.mgfile)
		preprocessJob.addArguments("-format", self.file_format)
		preprocessJob.addArguments("-out_prefix", "100.preprocess")
		preprocessJob.addArguments("-filter_options", self.filter_options)
		
		preprocessJob.uses(metagenome, link=Link.INPUT)
		preprocessJob.uses("100.preprocess.passed.fna", link=Link.OUTPUT, transfer=False)
		preprocessJob.uses("100.preprocess.removed.fna", link=Link.OUTPUT, transfer=False)

		preprocessJob.profile("globus", "maxwalltime", "20")
		dax.addJob(preprocessJob)

		# Dereplicate Job
		dereplicateJob = Job("wrapper-dereplicate", node_label="wrapper-dereplicate")
		dereplicateJob.addArguments("-input=100.preprocess.passed.fna")
		dereplicateJob.addArguments("-out_prefix=150.dereplication")
		dereplicateJob.addArguments("-prefix_length=%s" % self.prefix_length)
		dereplicateJob.addArguments("-dereplicate=%s" % self.dereplicate)
		dereplicateJob.addArguments("-memory=10")

		dereplicateJob.uses("100.preprocess.passed.fna", link=Link.INPUT)
		dereplicateJob.uses("150.dereplication.passed.fna", link=Link.OUTPUT, transfer=False)
		dereplicateJob.uses("150.dereplication.removed.fna", link=Link.OUTPUT, transfer=False)

		dereplicateJob.profile("globus", "maxwalltime", "10")
		dax.addJob(dereplicateJob)
		dax.depends(dereplicateJob, preprocessJob)

		# Bowtie Screen Job
		bowtieJob = Job("wrapper-bowtie-screen", node_label="wrapper-bowtie-screen")
		bowtieJob.addArguments("-input=150.dereplication.passed.fna")
		bowtieJob.addArguments("-output=299.screen.passed.fna")
		bowtieJob.addArguments("-index=%s" % self.screen_indexes)
		bowtieJob.addArguments("-bowtie=%s" % self.bowtie)
		bowtieJob.addArguments("-proc=8")

		bowtieJob.uses("150.dereplication.passed.fna", link=Link.INPUT)
		bowtieJob.uses("299.screen.passed.fna", link=Link.OUTPUT, transfer=False)

		bowtieJob.profile("globus", "maxwalltime", "30")
		bowtieJob.profile("globus", "hostcount", "8")
		bowtieJob.profile("globus", "count", "8")
		dax.addJob(bowtieJob)
		dax.depends(bowtieJob, dereplicateJob)

		# Genecalling Job
		geneJob = Job("wrapper-genecalling", node_label="wrapper-genecalling")
		geneJob.addArguments("-input=299.screen.passed.fna")
		geneJob.addArguments("-out_prefix=350.genecalling.coding")
		geneJob.addArguments("-type=%s" % self.fgs_type)
		geneJob.addArguments("-size=100")
		geneJob.addArguments("-proc=8")

		geneJob.uses("299.screen.passed.fna", link=Link.INPUT)
		geneJob.uses("350.genecalling.coding.faa", link=Link.OUTPUT, transfer=False)
		geneJob.uses("350.genecalling.coding.fna", link=Link.OUTPUT, transfer=False)

		geneJob.profile("globus", "maxwalltime", "30")
		geneJob.profile("globus", "hostcount", "8")
		geneJob.profile("globus", "count", "8")
		dax.addJob(geneJob)
		dax.depends(geneJob, bowtieJob)

		# Cluster (Genecalling) Job
		cluster1Job = Job("wrapper-cluster", node_label="wrapper-cluster")
		cluster1Job.addArguments("-input=350.genecalling.coding.faa")
		cluster1Job.addArguments("-out_prefix=550.cluster")
		cluster1Job.addArguments("-aa")
		cluster1Job.addArguments("-pid=%s" % self.aa_pid)
		cluster1Job.addArguments("-memory=20")

		cluster1Job.uses("350.genecalling.coding.faa", link=Link.INPUT)
		cluster1Job.uses("550.cluster.aa%s.faa" % self.aa_pid, link=Link.OUTPUT, transfer=False)
		cluster1Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.OUTPUT, transfer=False)
		
		cluster1Job.profile("globus", "maxwalltime", "10")
		dax.addJob(cluster1Job)
		dax.depends(cluster1Job, geneJob)

		# Blat_prot Job
		blatprotJob = Job("wrapper-blat-prot", node_label="wrapper-blat-prot")
		blatprotJob.addArguments("--input=550.cluster.aa%s.faa" % self.aa_pid)
		blatprotJob.addArguments("--output=650.superblat.sims")

		blatprotJob.uses("550.cluster.aa%s.faa" % self.aa_pid, link=Link.INPUT)
		blatprotJob.uses("650.superblat.sims", link=Link.OUTPUT, transfer=False)
		
		blatprotJob.profile("globus", "maxwalltime", "2880")
                blatprotJob.profile("globus", "hostcount", "24")
                blatprotJob.profile("globus", "count", "24")
		dax.addJob(blatprotJob)
		dax.depends(blatprotJob, cluster1Job)

		# Annotate Sims (Blat Prod) Job
		annotatesims1Job = Job("wrapper-annotate-sims", node_label="wrapper-annotate-sims")
		annotatesims1Job.addArguments("-input=650.superblat.sims")
		annotatesims1Job.addArguments("-out_prefix=650")
		annotatesims1Job.addArguments("-aa")
		annotatesims1Job.addArguments("-ach_ver=%s" % self.ach_annotation_ver)
		annotatesims1Job.addArguments("-ann_file=m5nr_v1.bdb")

		annotatesims1Job.uses("650.superblat.sims", link=Link.INPUT)
		annotatesims1Job.uses("650.aa.sims.filter", link=Link.OUTPUT, transfer=False)
		annotatesims1Job.uses("650.aa.expand.protein", link=Link.OUTPUT, transfer=False)
		annotatesims1Job.uses("650.aa.expand.lca", link=Link.OUTPUT, transfer=False)
		annotatesims1Job.uses("650.aa.expand.ontology", link=Link.OUTPUT, transfer=False)
		
		annotatesims1Job.profile("globus", "maxwalltime", "720")
		dax.addJob(annotatesims1Job)
		dax.depends(annotatesims1Job, blatprotJob)

		# Search RNA Job
		searchJob = Job("wrapper-search-rna", node_label="wrapper-search-rna")
		searchJob.addArguments("-input=100.preprocess.passed.fna")
		searchJob.addArguments("-output=425.search.rna.fna")
		searchJob.addArguments("-rna_nr=%s" % self.m5rna_clust)
		searchJob.addArguments("-size=100")
		searchJob.addArguments("-proc=8")

		searchJob.uses("100.preprocess.passed.fna", link=Link.INPUT)
		searchJob.uses("425.search.rna.fna", link=Link.OUTPUT, transfer=False)

                searchJob.profile("globus", "maxwalltime", "120")
                searchJob.profile("globus", "hostcount", "8")
                searchJob.profile("globus", "count", "8")
                dax.addJob(searchJob)
		dax.depends(searchJob, preprocessJob)

		# CLuster (Search RNA) Job
		cluster2Job = Job("wrapper-cluster", node_label="wrapper-cluster")
                cluster2Job.addArguments("-input=425.search.rna.fna")
                cluster2Job.addArguments("-out_prefix=440.cluster")
                cluster2Job.addArguments("-rna")
                cluster2Job.addArguments("-pid=%s" % self.rna_pid)
                cluster2Job.addArguments("-memory=20")

                cluster2Job.uses("425.search.rna.fna", link=Link.INPUT)
                cluster2Job.uses("440.cluster.rna%s.fna" % self.rna_pid, link=Link.OUTPUT, transfer=False)
                cluster2Job.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.OUTPUT, transfer=False)

                cluster2Job.profile("globus", "maxwalltime", "30")
                dax.addJob(cluster2Job)
		dax.depends(cluster2Job, searchJob)

		# Blat_rna Job
		blatrnaJob = Job("wrapper-blat-rna", node_label="wrapper-blat-rna")
		blatrnaJob.addArguments("--input=440.cluster.rna%s.fna" % self.rna_pid)
		blatrnaJob.addArguments("-rna_nr=m5rna")
		blatrnaJob.addArguments("--output=450.rna.sims")
		blatrnaJob.addArguments("-assembled=%s" % self.assembled)

		blatrnaJob.uses("440.cluster.rna%s.fna" % self.rna_pid, link=Link.INPUT)
		blatrnaJob.uses("450.rna.sims", link=Link.OUTPUT, transfer=False)
		
		blatrnaJob.profile("globus", "maxwalltime", "20")
		dax.addJob(blatrnaJob)
		dax.depends(blatrnaJob, cluster2Job)

		# Annotate Sims (Blat RNA) Job
		annotatesims2Job = Job("wrapper-annotate-sims", node_label="wrapper-annotate-sims")
		annotatesims2Job.addArguments("-input=450.rna.sims")
		annotatesims2Job.addArguments("-out_prefix=450")
		annotatesims2Job.addArguments("-rna")
		annotatesims2Job.addArguments("-ach_ver=%s" % self.ach_annotation_ver)
		annotatesims2Job.addArguments("-ann_file=m5nr_v1.bdb")

		annotatesims2Job.uses("450.rna.sims", link=Link.INPUT)
		annotatesims2Job.uses("450.rna.sims.filter", link=Link.OUTPUT, transfer=False)
		annotatesims2Job.uses("450.rna.expand.rna", link=Link.OUTPUT, transfer=False)
		annotatesims2Job.uses("450.rna.expand.lca", link=Link.OUTPUT, transfer=False)

		annotatesims2Job.profile("globus", "maxwalltime", "30")
		dax.addJob(annotatesims2Job)
		dax.depends(annotatesims2Job, blatrnaJob)

		# Index Sim Seq Job
		indexJob = Job("wrapper-index", node_label="wrapper-index")
		indexJob.addArguments("-in_seqs=350.genecalling.coding.fna")
		indexJob.addArguments("-in_seqs=425.search.rna.fna")
		indexJob.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid)
		indexJob.addArguments("-in_maps=440.cluster.rna%s.mapping" % self.rna_pid)
		indexJob.addArguments("-in_sims=650.aa.sims.filter")
		indexJob.addArguments("-in_sims=450.rna.sims.filter")
		indexJob.addArguments("-output=700.annotation.sims.filter.seq")
		indexJob.addArguments("-ach_ver=%s" % self.ach_annotation_ver)
		indexJob.addArguments("-memory=10")
		indexJob.addArguments("-ann_file=m5nr_v1.bdb")

		indexJob.uses("350.genecalling.coding.fna", link=Link.INPUT)
		indexJob.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT)
		indexJob.uses("650.aa.sims.filter", link=Link.INPUT)
		indexJob.uses("425.search.rna.fna", link=Link.INPUT)
		indexJob.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.INPUT)
		indexJob.uses("450.rna.sims.filter", link=Link.INPUT)
		indexJob.uses("700.annotation.sims.filter.seq", link=Link.OUTPUT, transfer=False)
		indexJob.uses("700.annotation.sims.filter.seq.index", link=Link.OUTPUT, transfer=False)

		indexJob.profile("globus", "maxwalltime", "120")
                dax.addJob(indexJob)
                dax.depends(indexJob, geneJob)
                dax.depends(indexJob, cluster1Job)
                dax.depends(indexJob, cluster2Job)
                dax.depends(indexJob, searchJob)
                dax.depends(indexJob, annotatesims1Job)

		# Annotate Summary Job (13)
		summary13Job = Job("wrapper-summary", node_label="wrapper-summary")
		summary13Job.addArguments("-job=1")
		summary13Job.addArguments("-in_expand=650.aa.expand.protein")
		summary13Job.addArguments("-in_expand=450.rna.expand.rna")
		summary13Job.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid)
		summary13Job.addArguments("-in_maps=440.cluster.rna%s.mapping" % self.rna_pid)
		summary13Job.addArguments("-in_assemb=075.assembly.coverage")
		summary13Job.addArguments("-in_index=700.annotation.sims.filter.seq.index")
		summary13Job.addArguments("-output=700.annotation.md5.summary")
		summary13Job.addArguments("-nr_ver=%s" % self.ach_annotation_ver)
		summary13Job.addArguments("-type=md5")

		summary13Job.uses("075.assembly.coverage", link=Link.INPUT)
		summary13Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT)
		summary13Job.uses("650.aa.expand.protein", link=Link.INPUT)
		summary13Job.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.INPUT)
		summary13Job.uses("450.rna.expand.rna", link=Link.INPUT)
		summary13Job.uses("700.annotation.sims.filter.seq.index", link=Link.INPUT)
		summary13Job.uses("700.annotation.md5.summary", link=Link.OUTPUT, transfer=True)

		summary13Job.profile("globus", "maxwalltime", "30")
                dax.addJob(summary13Job)
                dax.depends(summary13Job, qcJob)
                dax.depends(summary13Job, cluster1Job)
                dax.depends(summary13Job, cluster2Job)
                dax.depends(summary13Job, indexJob)
                dax.depends(summary13Job, annotatesims1Job)
                dax.depends(summary13Job, annotatesims2Job)

		# Annotate Summary Job (14)
		summary14Job = Job("wrapper-summary", node_label="wrapper-summary")
		summary14Job.addArguments("-job=1")
		summary14Job.addArguments("-in_expand=650.aa.expand.protein")
		summary14Job.addArguments("-in_expand=450.rna.expand.rna")
		summary14Job.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid)
		summary14Job.addArguments("-in_maps=440.cluster.rna%s.mapping" % self.rna_pid)
		summary14Job.addArguments("-in_assemb=075.assembly.coverage")
		summary14Job.addArguments("-output=700.annotation.function.summary")
		summary14Job.addArguments("-nr_ver=%s" % self.ach_annotation_ver)
		summary14Job.addArguments("-type=function")

		summary14Job.uses("075.assembly.coverage", link=Link.INPUT)
		summary14Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT)
		summary14Job.uses("650.aa.expand.protein", link=Link.INPUT)
		summary14Job.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.INPUT)
		summary14Job.uses("450.rna.expand.rna", link=Link.INPUT)
		summary14Job.uses("700.annotation.function.summary", link=Link.OUTPUT, transfer=True)

		summary14Job.profile("globus", "maxwalltime", "30")
                dax.addJob(summary14Job)
                dax.depends(summary14Job, qcJob)
                dax.depends(summary14Job, cluster1Job)
                dax.depends(summary14Job, cluster2Job)
                dax.depends(summary14Job, annotatesims1Job)
                dax.depends(summary14Job, annotatesims2Job)

		# Annotate Summary Job (15)
		summary15Job = Job("wrapper-summary", node_label="wrapper-summary")
		summary15Job.addArguments("-job=1")
		summary15Job.addArguments("-in_expand=650.aa.expand.protein")
		summary15Job.addArguments("-in_expand=450.rna.expand.rna")
		summary15Job.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid)
		summary15Job.addArguments("-in_maps=440.cluster.rna%s.mapping" % self.rna_pid)
		summary15Job.addArguments("-in_assemb=075.assembly.coverage")
		summary15Job.addArguments("-output=700.annotation.organism.summary")
		summary15Job.addArguments("-nr_ver=%s" % self.ach_annotation_ver)
		summary15Job.addArguments("-type=organism")

		summary15Job.uses("075.assembly.coverage", link=Link.INPUT)
		summary15Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT)
		summary15Job.uses("650.aa.expand.protein", link=Link.INPUT)
		summary15Job.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.INPUT)
		summary15Job.uses("450.rna.expand.rna", link=Link.INPUT)
		summary15Job.uses("700.annotation.organism.summary", link=Link.OUTPUT, transfer=True)

		summary15Job.profile("globus", "maxwalltime", "30")
                dax.addJob(summary15Job)
                dax.depends(summary15Job, qcJob)
                dax.depends(summary15Job, cluster1Job)
                dax.depends(summary15Job, cluster2Job)
                dax.depends(summary15Job, annotatesims1Job)
                dax.depends(summary15Job, annotatesims2Job)

		# Annotate Summary Job (16)
		summary16Job = Job("wrapper-summary", node_label="wrapper-summary")
		summary16Job.addArguments("-job=1")
		summary16Job.addArguments("-in_expand=650.aa.expand.lca")
		summary16Job.addArguments("-in_expand=450.rna.expand.lca")
		summary16Job.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid)
		summary16Job.addArguments("-in_maps=440.cluster.rna%s.mapping" % self.rna_pid)
		summary16Job.addArguments("-in_assemb=075.assembly.coverage")
		summary16Job.addArguments("-output=700.annotation.lca.summary")
		summary16Job.addArguments("-nr_ver=%s" % self.ach_annotation_ver)
		summary16Job.addArguments("-type=lca")

		summary16Job.uses("075.assembly.coverage", link=Link.INPUT)
		summary16Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT)
		summary16Job.uses("650.aa.expand.lca", link=Link.INPUT)
		summary16Job.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.INPUT)
		summary16Job.uses("450.rna.expand.lca", link=Link.INPUT)
		summary16Job.uses("700.annotation.lca.summary", link=Link.OUTPUT, transfer=True)

		summary16Job.profile("globus", "maxwalltime", "30")
                dax.addJob(summary16Job)
                dax.depends(summary16Job, qcJob)
                dax.depends(summary16Job, cluster1Job)
                dax.depends(summary16Job, cluster2Job)
                dax.depends(summary16Job, annotatesims1Job)
                dax.depends(summary16Job, annotatesims2Job)

		# Annotate Summary Job (17)
		summary17Job = Job("wrapper-summary", node_label="wrapper-summary")
		summary17Job.addArguments("-job=1")
		summary17Job.addArguments("-in_expand=650.aa.expand.ontology")
		summary17Job.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid)
		summary17Job.addArguments("-in_assemb=075.assembly.coverage")
		summary17Job.addArguments("-output=700.annotation.ontology.summary")
		summary17Job.addArguments("-nr_ver=%s" % self.ach_annotation_ver)
		summary17Job.addArguments("-type=ontology")

		summary17Job.uses("075.assembly.coverage", link=Link.INPUT)
		summary17Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT)
		summary17Job.uses("650.aa.expand.ontology", link=Link.INPUT)
		summary17Job.uses("700.annotation.ontology.summary", link=Link.OUTPUT, transfer=True)

		summary17Job.profile("globus", "maxwalltime", "30")
                dax.addJob(summary17Job)
                dax.depends(summary17Job, qcJob)
                dax.depends(summary17Job, cluster1Job)
                dax.depends(summary17Job, annotatesims1Job)

		# Annotate Summary Job (18)
		summary18Job = Job("wrapper-summary", node_label="wrapper-summary")
		summary18Job.addArguments("-job=1")
		summary18Job.addArguments("-in_expand=650.aa.expand.protein")
		summary18Job.addArguments("-in_expand=450.rna.expand.rna")
		summary18Job.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid)
		summary18Job.addArguments("-in_maps=440.cluster.rna%s.mapping" % self.rna_pid)
		summary18Job.addArguments("-in_assemb=075.assembly.coverage")
		summary18Job.addArguments("-output=700.annotation.source.stats")
		summary18Job.addArguments("-nr_ver=%s" % self.ach_annotation_ver)
		summary18Job.addArguments("-type=source")

		summary18Job.uses("075.assembly.coverage", link=Link.INPUT)
		summary18Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT)
		summary18Job.uses("650.aa.expand.protein", link=Link.INPUT)
		summary18Job.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.INPUT)
		summary18Job.uses("450.rna.expand.rna", link=Link.INPUT)
		summary18Job.uses("700.annotation.source.stats", link=Link.OUTPUT, transfer=True)

		summary18Job.profile("globus", "maxwalltime", "30")
                dax.addJob(summary18Job)
                dax.depends(summary18Job, qcJob)
                dax.depends(summary18Job, cluster1Job)
                dax.depends(summary18Job, cluster2Job)
                dax.depends(summary18Job, annotatesims1Job)
                dax.depends(summary18Job, annotatesims2Job)

	
		# Write the DAX file
		dax.writeXMLFile(self.daxfile)

		# Generate the replica catalog
		self.generate_replica_catalog()
Example #14
0
    def generate_dax(self):
        "Generate a workflow (DAX, config files, and replica catalog)"
        ts = datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
        dax = ADAG("refinement-%s" % ts)

        # These are all the global input files for the workflow
        coordinates = File(self.coordinates)
        parameters = File(self.parameters)
        extended_system = File(self.extended_system)
        topfile = File(self.topfile)
        sassena_db = File(self.sassena_db)
        incoherent_db = File(self.incoherent_db)
        coherent_db = File(self.coherent_db)

        # This job untars the sassena db and makes it available to the other
        # jobs in the workflow
        untarjob = Job("tar", node_label="untar")

        if self.is_synthetic_workflow:
            untarjob.addArguments("-p", "-xzvf", sassena_db.name)
            untarjob.addArguments("-a", "tar")

            for output_file in [ "incoherent_db", "coherent_db" ]:
                untarjob.addArguments(self.keg_params.output_file("tar", output_file, eval(output_file).name))

            self.keg_params.add_keg_params(untarjob)
        else:
            untarjob.addArguments("-xzvf", sassena_db)

        untarjob.uses(sassena_db, link=Link.INPUT)
        untarjob.uses(incoherent_db, link=Link.OUTPUT, transfer=False)
        untarjob.uses(coherent_db, link=Link.OUTPUT, transfer=False)

        untarjob.profile("globus", "jobtype", "single")
        untarjob.profile("globus", "maxwalltime", "1")
        untarjob.profile("globus", "count", "1")

        dax.addJob(untarjob)

        # For each charge that was listed in the config file
        for charge in self.charges:

            structure = "Q%s.psf" % charge

            # Equilibrate files
            eq_conf = File("equilibrate_%s.conf" % charge)
            eq_coord = File("equilibrate_%s.restart.coord" % charge)
            eq_xsc = File("equilibrate_%s.restart.xsc" % charge)
            eq_vel = File("equilibrate_%s.restart.vel" % charge)

            # Production files
            prod_conf = File("production_%s.conf" % charge)
            prod_dcd = File("production_%s.dcd" % charge)

            # Ptraj files
            ptraj_conf = File("ptraj_%s.conf" % charge)
            ptraj_fit = File("ptraj_%s.fit" % charge)
            ptraj_dcd = File("ptraj_%s.dcd" % charge)

            # Sassena incoherent files
            incoherent_conf = File("sassenaInc_%s.xml" % charge)
            fqt_incoherent = File("fqt_inc_%s.hd5" % charge)

            # Sassena coherent files
            coherent_conf = File("sassenaCoh_%s.xml" % charge)
            fqt_coherent = File("fqt_coh_%s.hd5" % charge)

            # Generate psf and configuration files for this charge pipeline
            self.generate_psf(charge)
            self.generate_eq_conf(charge, structure)
            self.generate_prod_conf(charge, structure)
            self.generate_ptraj_conf(charge)
            self.generate_incoherent_conf(charge)
            self.generate_coherent_conf(charge)

            # Equilibrate job
            eqjob = Job("namd", node_label="namd_eq_%s" % charge)
            if self.is_synthetic_workflow:
                eqjob.addArguments("-p", eq_conf)
                eqjob.addArguments("-a", "namd_eq_%s" % charge)
                eqjob.addArguments("-i", eq_conf.name, structure, coordinates.name,
                    parameters.name, extended_system.name)

                task_label = "namd-eq"

                for output_file in [ "eq_coord", "eq_xsc", "eq_vel" ]:
                    eqjob.addArguments(self.keg_params.output_file(task_label, output_file, eval(output_file).name))

                self.keg_params.add_keg_params(eqjob, task_label)
            else:
                eqjob.addArguments(eq_conf)

            eqjob.uses(eq_conf, link=Link.INPUT)
            eqjob.uses(structure, link=Link.INPUT)
            eqjob.uses(coordinates, link=Link.INPUT)
            eqjob.uses(parameters, link=Link.INPUT)
            eqjob.uses(extended_system, link=Link.INPUT)
            eqjob.uses(eq_coord, link=Link.OUTPUT, transfer=False)
            eqjob.uses(eq_xsc, link=Link.OUTPUT, transfer=False)
            eqjob.uses(eq_vel, link=Link.OUTPUT, transfer=False)
            if self.is_synthetic_workflow:
                eqjob.profile("globus", "jobtype", "mpi")
                eqjob.profile("globus", "maxwalltime", "1")
                eqjob.profile("globus", "count", "8")
            else:
                eqjob.profile("globus", "jobtype", "mpi")
                eqjob.profile("globus", "maxwalltime", self.getconf("equilibrate_maxwalltime"))
                eqjob.profile("globus", "count", self.getconf("equilibrate_cores"))
            dax.addJob(eqjob)

            # Production job
            prodjob = Job("namd", node_label="namd_prod_%s" % charge)

            if self.is_synthetic_workflow:
                prodjob.addArguments("-p", prod_conf)
                prodjob.addArguments("-a", "namd_prod_%s" % charge)
                prodjob.addArguments("-i", prod_conf.name, structure, coordinates.name,
                    parameters.name, eq_coord.name, eq_xsc.name, eq_vel.name)

                task_label = "namd-prod"
                prodjob.addArguments(self.keg_params.output_file(task_label, "prod_dcd", prod_dcd.name))
                self.keg_params.add_keg_params(prodjob, task_label)
            else:
                prodjob.addArguments(prod_conf)

            prodjob.uses(prod_conf, link=Link.INPUT)
            prodjob.uses(structure, link=Link.INPUT)
            prodjob.uses(coordinates, link=Link.INPUT)
            prodjob.uses(parameters, link=Link.INPUT)
            prodjob.uses(eq_coord, link=Link.INPUT)
            prodjob.uses(eq_xsc, link=Link.INPUT)
            prodjob.uses(eq_vel, link=Link.INPUT)
            prodjob.uses(prod_dcd, link=Link.OUTPUT, transfer=True)

            if self.is_synthetic_workflow:
                prodjob.profile("globus", "jobtype", "mpi")
                prodjob.profile("globus", "maxwalltime", "6")
                prodjob.profile("globus", "count", "8")
            else:
                prodjob.profile("globus", "jobtype", "mpi")
                prodjob.profile("globus", "maxwalltime", self.getconf("production_maxwalltime"))
                prodjob.profile("globus", "count", self.getconf("production_cores"))

            dax.addJob(prodjob)
            dax.depends(prodjob, eqjob)

            # ptraj job
            ptrajjob = Job(namespace="amber", name="ptraj", node_label="amber_ptraj_%s" % charge)

            if self.is_synthetic_workflow:
                ptrajjob.addArguments("-p", topfile)
                ptrajjob.addArguments("-a", "amber_ptraj_%s" % charge)
                ptrajjob.addArguments("-i", topfile.name, ptraj_conf.name, prod_dcd.name)

                task_label = "amber-ptraj"

                for output_file in [ "ptraj_fit", "ptraj_dcd" ]:
                    ptrajjob.addArguments(self.keg_params.output_file(task_label, output_file, eval(output_file).name))

                self.keg_params.add_keg_params(ptrajjob, task_label)

            else:
                ptrajjob.addArguments(topfile)
                ptrajjob.setStdin(ptraj_conf)

            ptrajjob.uses(topfile, link=Link.INPUT)
            ptrajjob.uses(ptraj_conf, link=Link.INPUT)
            ptrajjob.uses(prod_dcd, link=Link.INPUT)
            ptrajjob.uses(ptraj_fit, link=Link.OUTPUT, transfer=True)
            ptrajjob.uses(ptraj_dcd, link=Link.OUTPUT, transfer=True)
            ptrajjob.profile("globus", "jobtype", "single")
            ptrajjob.profile("globus", "maxwalltime", self.getconf("ptraj_maxwalltime"))
            ptrajjob.profile("globus", "count", self.getconf("ptraj_cores"))
            dax.addJob(ptrajjob)
            dax.depends(ptrajjob, prodjob)

            # sassena incoherent job
            incojob = Job("sassena", node_label="sassena_inc_%s" % charge)
            if self.is_synthetic_workflow:
                incojob.addArguments("-p", "--config", incoherent_conf)
                incojob.addArguments("-a", "sassena_inc_%s" % charge)
                incojob.addArguments("-i", incoherent_conf.name, ptraj_dcd.name, incoherent_db.name, coordinates.name)

                task_label = "sassena-inc"

                incojob.addArguments(self.keg_params.output_file(task_label, "fqt_incoherent", fqt_incoherent.name))

                self.keg_params.add_keg_params(incojob, task_label)
            else:
                incojob.addArguments("--config", incoherent_conf)

            incojob.uses(incoherent_conf, link=Link.INPUT)
            incojob.uses(ptraj_dcd, link=Link.INPUT)
            incojob.uses(incoherent_db, link=Link.INPUT)
            incojob.uses(coordinates, link=Link.INPUT)
            incojob.uses(fqt_incoherent, link=Link.OUTPUT, transfer=True)

            if self.is_synthetic_workflow:
                incojob.profile("globus", "jobtype", "mpi")
                incojob.profile("globus", "maxwalltime", "6")
                incojob.profile("globus", "count", "8")
            else:
                incojob.profile("globus", "jobtype", "mpi")
                incojob.profile("globus", "maxwalltime", self.getconf("sassena_maxwalltime"))
                incojob.profile("globus", "count", self.getconf("sassena_cores"))

            dax.addJob(incojob)
            dax.depends(incojob, ptrajjob)
            dax.depends(incojob, untarjob)

            # sassena coherent job
            cojob = Job("sassena", node_label="sassena_coh_%s" % charge)
            if self.is_synthetic_workflow:
                cojob.addArguments("-p", "--config", coherent_conf)
                cojob.addArguments("-a", "sassena_coh_%s" % charge)
                cojob.addArguments("-i", coherent_conf.name, ptraj_dcd.name, coherent_db.name, coordinates.name)

                task_label = "sassena-coh"

                cojob.addArguments(self.keg_params.output_file(task_label, "fqt_coherent", fqt_coherent.name))

                self.keg_params.add_keg_params(cojob, task_label)

            else:
                cojob.addArguments("--config", coherent_conf)

            cojob.uses(coherent_conf, link=Link.INPUT)
            cojob.uses(ptraj_dcd, link=Link.INPUT)
            cojob.uses(coherent_db, link=Link.INPUT)
            cojob.uses(coordinates, link=Link.INPUT)
            cojob.uses(fqt_coherent, link=Link.OUTPUT, transfer=True)

            if self.is_synthetic_workflow:
                cojob.profile("globus", "jobtype", "mpi")
                cojob.profile("globus", "maxwalltime", "6")
                cojob.profile("globus", "count", "8")
            else:
                cojob.profile("globus", "jobtype", "mpi")
                cojob.profile("globus", "maxwalltime", self.getconf("sassena_maxwalltime"))
                cojob.profile("globus", "count", self.getconf("sassena_cores"))

            dax.addJob(cojob)
            dax.depends(cojob, prodjob)
            dax.depends(cojob, untarjob)

        # Write the DAX file
        dax.writeXMLFile(self.daxfile)
Example #15
0
 def __init__(self, name, count=None, index=None):
     ADAG.__init__(self, name, count, index)