def write(self, filename, name='dax'): """Generate Pegasus abstract workflow (DAX). Parameters ---------- filename : `str` File to write the DAX to. name : `str`, optional Name of the DAX. Returns ------- `Pegasus.ADAG` Abstract workflow used by Pegasus' planner. """ dax = ADAG(name) # Add files to DAX-level replica catalog. catalog = {} for file_id in self.files: attrs = self.graph.node[file_id] f = File(attrs['lfn']) # Add physical file names, if any. urls = attrs.get('urls') if urls is not None: sites = attrs.get('sites') if sites is None: sites = ','.join(len(urls) * ['local']) for url, site in zip(urls.split(','), sites.split(',')): f.addPFN(PFN(url, site)) catalog[attrs['lfn']] = f dax.addFile(f) # Add jobs to the DAX. for task_id in self.tasks: attrs = self.graph.node[task_id] job = Job(name=attrs['name'], id=task_id) # Add job command line arguments replacing any file name with # respective Pegasus file object. args = attrs.get('args') if args is not None and args: args = args.split() lfns = list(set(catalog) & set(args)) if lfns: indices = [args.index(lfn) for lfn in lfns] for idx, lfn in zip(indices, lfns): args[idx] = catalog[lfn] job.addArguments(*args) # Specify job's inputs. inputs = [file_id for file_id in self.graph.predecessors(task_id)] for file_id in inputs: attrs = self.graph.node[file_id] f = catalog[attrs['lfn']] job.uses(f, link=Link.INPUT) # Specify job's outputs outputs = [file_id for file_id in self.graph.successors(task_id)] for file_id in outputs: attrs = self.graph.node[file_id] f = catalog[attrs['lfn']] job.uses(f, link=Link.OUTPUT) streams = attrs.get('streams') if streams is not None: if streams & 1 != 0: job.setStdout(f) if streams & 2 != 0: job.setStderr(f) dax.addJob(job) # Add job dependencies to the DAX. for task_id in self.tasks: parents = set() for file_id in self.graph.predecessors(task_id): parents.update(self.graph.predecessors(file_id)) for parent_id in parents: dax.depends(parent=dax.getJob(parent_id), child=dax.getJob(task_id)) # Finally, write down the workflow in DAX format. with open(filename, 'w') as f: dax.writeXML(f)
#!/usr/bin/env python import time import argparse from Pegasus.DAX3 import ADAG from bnm.recon.pegasus.config import Configuration from bnm.recon.pegasus.flirt import step_coregister_t1_dwi from bnm.recon.pegasus.t1 import steps_recon_all from bnm.recon.pegasus.diffusion import steps_dwi_preproc from bnm.recon.pegasus.utils import write_dax if __name__ == "__main__": parser = argparse.ArgumentParser(description="Generate a BNM flow") parser.add_argument("patient_file") args = parser.parse_args() dax = ADAG("BNM") dax.metadata("name", "Brain Network Model Reconstruction WorkFlow") dax.metadata("created-at", time.ctime()) dax.metadata("flow-configuration", args.patient_file) config = Configuration(args.patient_file) relevant_t1_job = steps_recon_all(dax, config) relevant_dwi_job = steps_dwi_preproc(dax, config.diffusion) step_coregister_t1_dwi(dax, config, relevant_t1_job, relevant_dwi_job) write_dax(dax, config.main_dax_path)
def generate_dax(self, daxfile): from Pegasus.DAX3 import ADAG, Job, File, Link # The DAX generator dax = ADAG("pipeline") # Some bits of metadata. Shoulf put plenty more here. dax.metadata("owner", self.pipeline.owner) dax.metadata("basename", self.pipeline.basename) dax.metadata("version", self.pipeline.version) # string tag -> pegasus File object mapping of all the # inputs and outputs used by any pipeline stage. files = {} # First generate the overall inputs to the pipeline, # i.e. ones that are not generated by any other stage # but must be specified at the start for tag in self.pipeline.input_tags(): path = self.info['inputs'].get(tag) files[tag] = File(path) # Now go through the pipeline in sequence. for stage_name, stage_class in self.pipeline.sequence(): # The stage in the pipeline. We describe the meaning of it # (which image it corresponds to) # in the transformation catalog generation job = Job(stage_name, id=stage_name) # Configuration files for this job. # These will not be built during the pipeline and must be # provided by the user for config_tag, config_filename in stage_class.config.items(): filename = self.pipeline.cfg[stage_name]['config'][config_tag] config_path = os.path.join(self.config_dir(), filename) config = File(config_path) job.uses(config, link=Link.INPUT) # Input files for the job, either created by the user or by previous # stages. In either case they should be in the "files" dictionary, because # precursor jobs will have been added before this one. for input_tag in stage_class.inputs.keys(): job.uses(files[input_tag], link=Link.INPUT) # Output files from the job. These will be created by the job # and used by future jobs for output_tag, output_type in stage_class.outputs.items(): output_filename = "{}.{}".format(output_tag, output_type) output = File(output_filename) job.uses(output, link=Link.OUTPUT, transfer=True, register=True) files[output_tag] = output # Add this job to the pipeline dax.addJob(job) # Tell pegasus which jobs this one depends on. # The pipeline already knows this information. # The pipeline.sequence command runs through # the jobs in an order that guarantees that a job's predecessors are # always done before it is, so they will always exist in the dax by this point. for predecessor_name in self.pipeline.dependencies(stage_name): dax.depends(stage_name, predecessor_name) # Generate the final DAX XML file. dax.writeXML(open(daxfile, "w"))
if len(sys.argv) != 3: print "Usage: %s PEGASUS_HOME" % (sys.argv[0]) sys.exit(1) config = ConfigParser.ConfigParser({ 'input_file': '', 'workflow_name': 'horizontal-clustering-test', 'executable_installed': "False", 'clusters_size': "3", 'clusters_maxruntime': "7" }) config.read(sys.argv[2] + '/test.config') # Create an abstract dag cluster = ADAG(config.get('all', 'workflow_name')) input_file = config.get('all', 'input_file') if (input_file == ''): input_file = os.getcwd() else: input_file += '/' + os.getenv('USER') + '/inputs' # Add input file to the DAX-level replica catalog a = File("f.a") a.addPFN( PFN( config.get('all', 'file_url') + input_file + "/f.a", config.get('all', 'file_site'))) cluster.addFile(a)
def write_dax(self, filename='workflow.dax', name='workflow'): """Generate Pegasus abstract workflow (DAX). Parameters ---------- filename : `str` File to write the DAX to. name : `str`, optional Name of the DAX. Returns ------- `Pegasus.ADAG` Abstract workflow used by Pegasus' planner. Raises ------ `ValueError` If either task or file node is missing mandatory attribute. """ dax = ADAG(name) # Process file nodes. for file_id in self.files: attrs = self.graph.node[file_id] try: name = attrs['lfn'] except KeyError: msg = 'Mandatory attribute "%s" is missing.' raise AttributeError(msg.format('lfn')) file_ = File(name) # Add physical file names, if any. urls = attrs.get('pfn') if urls is not None: urls = urls.split(',') sites = attrs.get('sites') if sites is None: sites = len(urls) * ['condorpool'] for url, site in zip(urls, sites): file_.addPFN(PFN(url, site)) self.catalog[attrs['lfn']] = file_ # Add jobs to the DAX. for task_id in self.tasks: attrs = self.graph.node[task_id] try: name = attrs['exec_name'] except KeyError: msg = 'Mandatory attribute "%s" is missing.' raise AttributeError(msg.format('exec_name')) label = '{name}_{id}'.format(name=name, id=task_id) job = Job(name, id=task_id, node_label=label) # Add job command line arguments replacing any file name with # respective Pegasus file object. args = attrs.get('exec_args', []) if args: args = args.split() lfns = list(set(self.catalog) & set(args)) if lfns: indices = [args.index(lfn) for lfn in lfns] for idx, lfn in zip(indices, lfns): args[idx] = self.catalog[lfn] job.addArguments(*args) # Specify job's inputs. inputs = [file_id for file_id in self.graph.predecessors(task_id)] for file_id in inputs: attrs = self.graph.node[file_id] is_ignored = attrs.get('ignore', False) if not is_ignored: file_ = self.catalog[attrs['lfn']] job.uses(file_, link=Link.INPUT) # Specify job's outputs outputs = [file_id for file_id in self.graph.successors(task_id)] for file_id in outputs: attrs = self.graph.node[file_id] is_ignored = attrs.get('ignore', False) if not is_ignored: file_ = self.catalog[attrs['lfn']] job.uses(file_, link=Link.OUTPUT) streams = attrs.get('streams') if streams is not None: if streams & 1 != 0: job.setStdout(file_) if streams & 2 != 0: job.setStderr(file_) # Provide default files to store stderr and stdout, if not # specified explicitly. if job.stderr is None: file_ = File('{name}.out'.format(name=label)) job.uses(file_, link=Link.OUTPUT) job.setStderr(file_) if job.stdout is None: file_ = File('{name}.err'.format(name=label)) job.uses(file_, link=Link.OUTPUT) job.setStdout(file_) dax.addJob(job) # Add job dependencies to the DAX. for task_id in self.tasks: parents = set() for file_id in self.graph.predecessors(task_id): parents.update(self.graph.predecessors(file_id)) for parent_id in parents: dax.depends(parent=dax.getJob(parent_id), child=dax.getJob(task_id)) # Finally, write down the workflow in DAX format. with open(filename, 'w') as f: dax.writeXML(f)
from tvb.recon.dax.resampling import Resampling from tvb.recon.dax.seeg_computation import SEEGComputation from tvb.recon.dax.seeg_gain_computation import SeegGainComputation from tvb.recon.dax.sensor_model import SensorModel from tvb.recon.dax.source_model import SourceModel from tvb.recon.dax.t1_processing import T1Processing from tvb.recon.dax.tracts_generation import TractsGeneration if __name__ == "__main__": if len(sys.argv) != 3: sys.stderr.write("Usage: %s DAXFILE\n" % (sys.argv[0])) sys.exit(1) daxfile = sys.argv[1] patient_file = sys.argv[2] dax = ADAG("TVB-PIPELINE") dax.metadata("created", time.ctime()) config = Configuration(patient_file) subject = config.props[ConfigKey.SUBJECT] trg_subject = config.props[ConfigKey.TRGSUBJECT] atlas_suffix = AtlasSuffix.DEFAULT if config.props[ConfigKey.ATLAS] == Atlas.A2009S: atlas_suffix = AtlasSuffix.A2009S t1_processing = T1Processing( subject, config.props[ConfigKey.T1_FRMT], config.props[ConfigKey.T2_FLAG], config.props[ConfigKey.T2_FRMT],
def _init_job_graph(self) -> ADAG: ret = ADAG(self.name) ret.metadata("name", self.name) ret.metadata("createdby", self.created_by) return ret
def generate_workflow(self): "Generate a workflow (DAX, config files, and replica catalog)" ts = datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') dax = ADAG("mgrast-prod-%s" % ts) # These are all the global input files for the workflow metagenome = File(self.mgfile) self.add_replica(self.mgfile, os.path.abspath(self.mgfile)) # QC job qcJob = Job("wrapper-qc", node_label="wrapper-qc") qcJob.addArguments("-input", self.mgfile) qcJob.addArguments("-format", self.file_format) qcJob.addArguments("-out_prefix", "075") qcJob.addArguments("-assembled", self.assembled) qcJob.addArguments("-filter_options", self.filter_options) qcJob.addArguments("-proc", "8") qcJob.uses(metagenome, link=Link.INPUT) qcJob.uses("075.assembly.coverage", link=Link.OUTPUT, transfer=False) qcJob.uses("075.qc.stats", link=Link.OUTPUT, transfer=False) qcJob.uses("075.upload.stats", link=Link.OUTPUT, transfer=False) qcJob.profile("globus", "maxwalltime", "60") qcJob.profile("globus", "hostcount", "8") qcJob.profile("globus", "count", "8") dax.addJob(qcJob) # Preprocess Job preprocessJob = Job("wrapper-preprocess", node_label="wrapper-preprocess") preprocessJob.addArguments("-input", self.mgfile) preprocessJob.addArguments("-format", self.file_format) preprocessJob.addArguments("-out_prefix", "100.preprocess") preprocessJob.addArguments("-filter_options", self.filter_options) preprocessJob.uses(metagenome, link=Link.INPUT) preprocessJob.uses("100.preprocess.passed.fna", link=Link.OUTPUT, transfer=False) preprocessJob.uses("100.preprocess.removed.fna", link=Link.OUTPUT, transfer=False) preprocessJob.profile("globus", "maxwalltime", "20") dax.addJob(preprocessJob) # Dereplicate Job dereplicateJob = Job("wrapper-dereplicate", node_label="wrapper-dereplicate") dereplicateJob.addArguments("-input=100.preprocess.passed.fna") dereplicateJob.addArguments("-out_prefix=150.dereplication") dereplicateJob.addArguments("-prefix_length=%s" % self.prefix_length) dereplicateJob.addArguments("-dereplicate=%s" % self.dereplicate) dereplicateJob.addArguments("-memory=10") dereplicateJob.uses("100.preprocess.passed.fna", link=Link.INPUT) dereplicateJob.uses("150.dereplication.passed.fna", link=Link.OUTPUT, transfer=False) dereplicateJob.uses("150.dereplication.removed.fna", link=Link.OUTPUT, transfer=False) dereplicateJob.profile("globus", "maxwalltime", "10") dax.addJob(dereplicateJob) dax.depends(dereplicateJob, preprocessJob) # Bowtie Screen Job bowtieJob = Job("wrapper-bowtie-screen", node_label="wrapper-bowtie-screen") bowtieJob.addArguments("-input=150.dereplication.passed.fna") bowtieJob.addArguments("-output=299.screen.passed.fna") bowtieJob.addArguments("-index=%s" % self.screen_indexes) bowtieJob.addArguments("-bowtie=%s" % self.bowtie) bowtieJob.addArguments("-proc=8") bowtieJob.uses("150.dereplication.passed.fna", link=Link.INPUT) bowtieJob.uses("299.screen.passed.fna", link=Link.OUTPUT, transfer=False) bowtieJob.profile("globus", "maxwalltime", "30") bowtieJob.profile("globus", "hostcount", "8") bowtieJob.profile("globus", "count", "8") dax.addJob(bowtieJob) dax.depends(bowtieJob, dereplicateJob) # Genecalling Job geneJob = Job("wrapper-genecalling", node_label="wrapper-genecalling") geneJob.addArguments("-input=299.screen.passed.fna") geneJob.addArguments("-out_prefix=350.genecalling.coding") geneJob.addArguments("-type=%s" % self.fgs_type) geneJob.addArguments("-size=100") geneJob.addArguments("-proc=8") geneJob.uses("299.screen.passed.fna", link=Link.INPUT) geneJob.uses("350.genecalling.coding.faa", link=Link.OUTPUT, transfer=False) geneJob.uses("350.genecalling.coding.fna", link=Link.OUTPUT, transfer=False) geneJob.profile("globus", "maxwalltime", "30") geneJob.profile("globus", "hostcount", "8") geneJob.profile("globus", "count", "8") dax.addJob(geneJob) dax.depends(geneJob, bowtieJob) # Cluster (Genecalling) Job cluster1Job = Job("wrapper-cluster", node_label="wrapper-cluster") cluster1Job.addArguments("-input=350.genecalling.coding.faa") cluster1Job.addArguments("-out_prefix=550.cluster") cluster1Job.addArguments("-aa") cluster1Job.addArguments("-pid=%s" % self.aa_pid) cluster1Job.addArguments("-memory=20") cluster1Job.uses("350.genecalling.coding.faa", link=Link.INPUT) cluster1Job.uses("550.cluster.aa%s.faa" % self.aa_pid, link=Link.OUTPUT, transfer=False) cluster1Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.OUTPUT, transfer=False) cluster1Job.profile("globus", "maxwalltime", "10") dax.addJob(cluster1Job) dax.depends(cluster1Job, geneJob) # Blat_prot Job blatprotJob = Job("wrapper-blat-prot", node_label="wrapper-blat-prot") blatprotJob.addArguments("--input=550.cluster.aa%s.faa" % self.aa_pid) blatprotJob.addArguments("--output=650.superblat.sims") blatprotJob.uses("550.cluster.aa%s.faa" % self.aa_pid, link=Link.INPUT) blatprotJob.uses("650.superblat.sims", link=Link.OUTPUT, transfer=False) blatprotJob.profile("globus", "maxwalltime", "2880") blatprotJob.profile("globus", "hostcount", "24") blatprotJob.profile("globus", "count", "24") dax.addJob(blatprotJob) dax.depends(blatprotJob, cluster1Job) # Annotate Sims (Blat Prod) Job annotatesims1Job = Job("wrapper-annotate-sims", node_label="wrapper-annotate-sims") annotatesims1Job.addArguments("-input=650.superblat.sims") annotatesims1Job.addArguments("-out_prefix=650") annotatesims1Job.addArguments("-aa") annotatesims1Job.addArguments("-ach_ver=%s" % self.ach_annotation_ver) annotatesims1Job.addArguments("-ann_file=m5nr_v1.bdb") annotatesims1Job.uses("650.superblat.sims", link=Link.INPUT) annotatesims1Job.uses("650.aa.sims.filter", link=Link.OUTPUT, transfer=False) annotatesims1Job.uses("650.aa.expand.protein", link=Link.OUTPUT, transfer=False) annotatesims1Job.uses("650.aa.expand.lca", link=Link.OUTPUT, transfer=False) annotatesims1Job.uses("650.aa.expand.ontology", link=Link.OUTPUT, transfer=False) annotatesims1Job.profile("globus", "maxwalltime", "720") dax.addJob(annotatesims1Job) dax.depends(annotatesims1Job, blatprotJob) # Search RNA Job searchJob = Job("wrapper-search-rna", node_label="wrapper-search-rna") searchJob.addArguments("-input=100.preprocess.passed.fna") searchJob.addArguments("-output=425.search.rna.fna") searchJob.addArguments("-rna_nr=%s" % self.m5rna_clust) searchJob.addArguments("-size=100") searchJob.addArguments("-proc=8") searchJob.uses("100.preprocess.passed.fna", link=Link.INPUT) searchJob.uses("425.search.rna.fna", link=Link.OUTPUT, transfer=False) searchJob.profile("globus", "maxwalltime", "120") searchJob.profile("globus", "hostcount", "8") searchJob.profile("globus", "count", "8") dax.addJob(searchJob) dax.depends(searchJob, preprocessJob) # CLuster (Search RNA) Job cluster2Job = Job("wrapper-cluster", node_label="wrapper-cluster") cluster2Job.addArguments("-input=425.search.rna.fna") cluster2Job.addArguments("-out_prefix=440.cluster") cluster2Job.addArguments("-rna") cluster2Job.addArguments("-pid=%s" % self.rna_pid) cluster2Job.addArguments("-memory=20") cluster2Job.uses("425.search.rna.fna", link=Link.INPUT) cluster2Job.uses("440.cluster.rna%s.fna" % self.rna_pid, link=Link.OUTPUT, transfer=False) cluster2Job.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.OUTPUT, transfer=False) cluster2Job.profile("globus", "maxwalltime", "30") dax.addJob(cluster2Job) dax.depends(cluster2Job, searchJob) # Blat_rna Job blatrnaJob = Job("wrapper-blat-rna", node_label="wrapper-blat-rna") blatrnaJob.addArguments("--input=440.cluster.rna%s.fna" % self.rna_pid) blatrnaJob.addArguments("-rna_nr=m5rna") blatrnaJob.addArguments("--output=450.rna.sims") blatrnaJob.addArguments("-assembled=%s" % self.assembled) blatrnaJob.uses("440.cluster.rna%s.fna" % self.rna_pid, link=Link.INPUT) blatrnaJob.uses("450.rna.sims", link=Link.OUTPUT, transfer=False) blatrnaJob.profile("globus", "maxwalltime", "20") dax.addJob(blatrnaJob) dax.depends(blatrnaJob, cluster2Job) # Annotate Sims (Blat RNA) Job annotatesims2Job = Job("wrapper-annotate-sims", node_label="wrapper-annotate-sims") annotatesims2Job.addArguments("-input=450.rna.sims") annotatesims2Job.addArguments("-out_prefix=450") annotatesims2Job.addArguments("-rna") annotatesims2Job.addArguments("-ach_ver=%s" % self.ach_annotation_ver) annotatesims2Job.addArguments("-ann_file=m5nr_v1.bdb") annotatesims2Job.uses("450.rna.sims", link=Link.INPUT) annotatesims2Job.uses("450.rna.sims.filter", link=Link.OUTPUT, transfer=False) annotatesims2Job.uses("450.rna.expand.rna", link=Link.OUTPUT, transfer=False) annotatesims2Job.uses("450.rna.expand.lca", link=Link.OUTPUT, transfer=False) annotatesims2Job.profile("globus", "maxwalltime", "30") dax.addJob(annotatesims2Job) dax.depends(annotatesims2Job, blatrnaJob) # Index Sim Seq Job indexJob = Job("wrapper-index", node_label="wrapper-index") indexJob.addArguments("-in_seqs=350.genecalling.coding.fna") indexJob.addArguments("-in_seqs=425.search.rna.fna") indexJob.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid) indexJob.addArguments("-in_maps=440.cluster.rna%s.mapping" % self.rna_pid) indexJob.addArguments("-in_sims=650.aa.sims.filter") indexJob.addArguments("-in_sims=450.rna.sims.filter") indexJob.addArguments("-output=700.annotation.sims.filter.seq") indexJob.addArguments("-ach_ver=%s" % self.ach_annotation_ver) indexJob.addArguments("-memory=10") indexJob.addArguments("-ann_file=m5nr_v1.bdb") indexJob.uses("350.genecalling.coding.fna", link=Link.INPUT) indexJob.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT) indexJob.uses("650.aa.sims.filter", link=Link.INPUT) indexJob.uses("425.search.rna.fna", link=Link.INPUT) indexJob.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.INPUT) indexJob.uses("450.rna.sims.filter", link=Link.INPUT) indexJob.uses("700.annotation.sims.filter.seq", link=Link.OUTPUT, transfer=False) indexJob.uses("700.annotation.sims.filter.seq.index", link=Link.OUTPUT, transfer=False) indexJob.profile("globus", "maxwalltime", "120") dax.addJob(indexJob) dax.depends(indexJob, geneJob) dax.depends(indexJob, cluster1Job) dax.depends(indexJob, cluster2Job) dax.depends(indexJob, searchJob) dax.depends(indexJob, annotatesims1Job) # Annotate Summary Job (13) summary13Job = Job("wrapper-summary", node_label="wrapper-summary") summary13Job.addArguments("-job=1") summary13Job.addArguments("-in_expand=650.aa.expand.protein") summary13Job.addArguments("-in_expand=450.rna.expand.rna") summary13Job.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid) summary13Job.addArguments("-in_maps=440.cluster.rna%s.mapping" % self.rna_pid) summary13Job.addArguments("-in_assemb=075.assembly.coverage") summary13Job.addArguments("-in_index=700.annotation.sims.filter.seq.index") summary13Job.addArguments("-output=700.annotation.md5.summary") summary13Job.addArguments("-nr_ver=%s" % self.ach_annotation_ver) summary13Job.addArguments("-type=md5") summary13Job.uses("075.assembly.coverage", link=Link.INPUT) summary13Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT) summary13Job.uses("650.aa.expand.protein", link=Link.INPUT) summary13Job.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.INPUT) summary13Job.uses("450.rna.expand.rna", link=Link.INPUT) summary13Job.uses("700.annotation.sims.filter.seq.index", link=Link.INPUT) summary13Job.uses("700.annotation.md5.summary", link=Link.OUTPUT, transfer=True) summary13Job.profile("globus", "maxwalltime", "30") dax.addJob(summary13Job) dax.depends(summary13Job, qcJob) dax.depends(summary13Job, cluster1Job) dax.depends(summary13Job, cluster2Job) dax.depends(summary13Job, indexJob) dax.depends(summary13Job, annotatesims1Job) dax.depends(summary13Job, annotatesims2Job) # Annotate Summary Job (14) summary14Job = Job("wrapper-summary", node_label="wrapper-summary") summary14Job.addArguments("-job=1") summary14Job.addArguments("-in_expand=650.aa.expand.protein") summary14Job.addArguments("-in_expand=450.rna.expand.rna") summary14Job.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid) summary14Job.addArguments("-in_maps=440.cluster.rna%s.mapping" % self.rna_pid) summary14Job.addArguments("-in_assemb=075.assembly.coverage") summary14Job.addArguments("-output=700.annotation.function.summary") summary14Job.addArguments("-nr_ver=%s" % self.ach_annotation_ver) summary14Job.addArguments("-type=function") summary14Job.uses("075.assembly.coverage", link=Link.INPUT) summary14Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT) summary14Job.uses("650.aa.expand.protein", link=Link.INPUT) summary14Job.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.INPUT) summary14Job.uses("450.rna.expand.rna", link=Link.INPUT) summary14Job.uses("700.annotation.function.summary", link=Link.OUTPUT, transfer=True) summary14Job.profile("globus", "maxwalltime", "30") dax.addJob(summary14Job) dax.depends(summary14Job, qcJob) dax.depends(summary14Job, cluster1Job) dax.depends(summary14Job, cluster2Job) dax.depends(summary14Job, annotatesims1Job) dax.depends(summary14Job, annotatesims2Job) # Annotate Summary Job (15) summary15Job = Job("wrapper-summary", node_label="wrapper-summary") summary15Job.addArguments("-job=1") summary15Job.addArguments("-in_expand=650.aa.expand.protein") summary15Job.addArguments("-in_expand=450.rna.expand.rna") summary15Job.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid) summary15Job.addArguments("-in_maps=440.cluster.rna%s.mapping" % self.rna_pid) summary15Job.addArguments("-in_assemb=075.assembly.coverage") summary15Job.addArguments("-output=700.annotation.organism.summary") summary15Job.addArguments("-nr_ver=%s" % self.ach_annotation_ver) summary15Job.addArguments("-type=organism") summary15Job.uses("075.assembly.coverage", link=Link.INPUT) summary15Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT) summary15Job.uses("650.aa.expand.protein", link=Link.INPUT) summary15Job.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.INPUT) summary15Job.uses("450.rna.expand.rna", link=Link.INPUT) summary15Job.uses("700.annotation.organism.summary", link=Link.OUTPUT, transfer=True) summary15Job.profile("globus", "maxwalltime", "30") dax.addJob(summary15Job) dax.depends(summary15Job, qcJob) dax.depends(summary15Job, cluster1Job) dax.depends(summary15Job, cluster2Job) dax.depends(summary15Job, annotatesims1Job) dax.depends(summary15Job, annotatesims2Job) # Annotate Summary Job (16) summary16Job = Job("wrapper-summary", node_label="wrapper-summary") summary16Job.addArguments("-job=1") summary16Job.addArguments("-in_expand=650.aa.expand.lca") summary16Job.addArguments("-in_expand=450.rna.expand.lca") summary16Job.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid) summary16Job.addArguments("-in_maps=440.cluster.rna%s.mapping" % self.rna_pid) summary16Job.addArguments("-in_assemb=075.assembly.coverage") summary16Job.addArguments("-output=700.annotation.lca.summary") summary16Job.addArguments("-nr_ver=%s" % self.ach_annotation_ver) summary16Job.addArguments("-type=lca") summary16Job.uses("075.assembly.coverage", link=Link.INPUT) summary16Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT) summary16Job.uses("650.aa.expand.lca", link=Link.INPUT) summary16Job.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.INPUT) summary16Job.uses("450.rna.expand.lca", link=Link.INPUT) summary16Job.uses("700.annotation.lca.summary", link=Link.OUTPUT, transfer=True) summary16Job.profile("globus", "maxwalltime", "30") dax.addJob(summary16Job) dax.depends(summary16Job, qcJob) dax.depends(summary16Job, cluster1Job) dax.depends(summary16Job, cluster2Job) dax.depends(summary16Job, annotatesims1Job) dax.depends(summary16Job, annotatesims2Job) # Annotate Summary Job (17) summary17Job = Job("wrapper-summary", node_label="wrapper-summary") summary17Job.addArguments("-job=1") summary17Job.addArguments("-in_expand=650.aa.expand.ontology") summary17Job.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid) summary17Job.addArguments("-in_assemb=075.assembly.coverage") summary17Job.addArguments("-output=700.annotation.ontology.summary") summary17Job.addArguments("-nr_ver=%s" % self.ach_annotation_ver) summary17Job.addArguments("-type=ontology") summary17Job.uses("075.assembly.coverage", link=Link.INPUT) summary17Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT) summary17Job.uses("650.aa.expand.ontology", link=Link.INPUT) summary17Job.uses("700.annotation.ontology.summary", link=Link.OUTPUT, transfer=True) summary17Job.profile("globus", "maxwalltime", "30") dax.addJob(summary17Job) dax.depends(summary17Job, qcJob) dax.depends(summary17Job, cluster1Job) dax.depends(summary17Job, annotatesims1Job) # Annotate Summary Job (18) summary18Job = Job("wrapper-summary", node_label="wrapper-summary") summary18Job.addArguments("-job=1") summary18Job.addArguments("-in_expand=650.aa.expand.protein") summary18Job.addArguments("-in_expand=450.rna.expand.rna") summary18Job.addArguments("-in_maps=550.cluster.aa%s.mapping" % self.aa_pid) summary18Job.addArguments("-in_maps=440.cluster.rna%s.mapping" % self.rna_pid) summary18Job.addArguments("-in_assemb=075.assembly.coverage") summary18Job.addArguments("-output=700.annotation.source.stats") summary18Job.addArguments("-nr_ver=%s" % self.ach_annotation_ver) summary18Job.addArguments("-type=source") summary18Job.uses("075.assembly.coverage", link=Link.INPUT) summary18Job.uses("550.cluster.aa%s.mapping" % self.aa_pid, link=Link.INPUT) summary18Job.uses("650.aa.expand.protein", link=Link.INPUT) summary18Job.uses("440.cluster.rna%s.mapping" % self.rna_pid, link=Link.INPUT) summary18Job.uses("450.rna.expand.rna", link=Link.INPUT) summary18Job.uses("700.annotation.source.stats", link=Link.OUTPUT, transfer=True) summary18Job.profile("globus", "maxwalltime", "30") dax.addJob(summary18Job) dax.depends(summary18Job, qcJob) dax.depends(summary18Job, cluster1Job) dax.depends(summary18Job, cluster2Job) dax.depends(summary18Job, annotatesims1Job) dax.depends(summary18Job, annotatesims2Job) # Write the DAX file dax.writeXMLFile(self.daxfile) # Generate the replica catalog self.generate_replica_catalog()