def write(self, filename, name='dax'): """Generate Pegasus abstract workflow (DAX). Parameters ---------- filename : `str` File to write the DAX to. name : `str`, optional Name of the DAX. Returns ------- `Pegasus.ADAG` Abstract workflow used by Pegasus' planner. """ dax = ADAG(name) # Add files to DAX-level replica catalog. catalog = {} for file_id in self.files: attrs = self.graph.node[file_id] f = File(attrs['lfn']) # Add physical file names, if any. urls = attrs.get('urls') if urls is not None: sites = attrs.get('sites') if sites is None: sites = ','.join(len(urls) * ['local']) for url, site in zip(urls.split(','), sites.split(',')): f.addPFN(PFN(url, site)) catalog[attrs['lfn']] = f dax.addFile(f) # Add jobs to the DAX. for task_id in self.tasks: attrs = self.graph.node[task_id] job = Job(name=attrs['name'], id=task_id) # Add job command line arguments replacing any file name with # respective Pegasus file object. args = attrs.get('args') if args is not None and args: args = args.split() lfns = list(set(catalog) & set(args)) if lfns: indices = [args.index(lfn) for lfn in lfns] for idx, lfn in zip(indices, lfns): args[idx] = catalog[lfn] job.addArguments(*args) # Specify job's inputs. inputs = [file_id for file_id in self.graph.predecessors(task_id)] for file_id in inputs: attrs = self.graph.node[file_id] f = catalog[attrs['lfn']] job.uses(f, link=Link.INPUT) # Specify job's outputs outputs = [file_id for file_id in self.graph.successors(task_id)] for file_id in outputs: attrs = self.graph.node[file_id] f = catalog[attrs['lfn']] job.uses(f, link=Link.OUTPUT) streams = attrs.get('streams') if streams is not None: if streams & 1 != 0: job.setStdout(f) if streams & 2 != 0: job.setStderr(f) dax.addJob(job) # Add job dependencies to the DAX. for task_id in self.tasks: parents = set() for file_id in self.graph.predecessors(task_id): parents.update(self.graph.predecessors(file_id)) for parent_id in parents: dax.depends(parent=dax.getJob(parent_id), child=dax.getJob(task_id)) # Finally, write down the workflow in DAX format. with open(filename, 'w') as f: dax.writeXML(f)
sensor_model = SensorModel(subject, trg_subject, atlas_suffix) job_sensor_model_lh, job_sensor_model_rh = sensor_model.add_sensor_model_steps(dax, job_source_model) lead_field_model = LeadFieldModel(subject, trg_subject, atlas_suffix) lead_field_model.add_lead_field_model_steps(dax, job_sensor_model_lh, job_sensor_model_rh) else: seeg_gain_computation = SeegGainComputation(config.props[ConfigKey.SUBJECT], atlas_suffix) if config.props[ConfigKey.SEEG_GAIN_USE_DP] == "True": seeg_gain_computation.add_seeg_gain_dp_computation_steps(dax, job_seeg_xyz, job_mapping_details) if config.props[ConfigKey.SEEG_GAIN_USE_MRS] == "True": seeg_gain_computation.add_seeg_mrs_gain_computation_steps(dax, job_seeg_xyz, job_mapping_details) else: if config.props[ConfigKey.EEG_FLAG] == "True": projection_computation = ProjectionComputation(config.props[ConfigKey.SUBJECT], SensorsType.EEG.value, atlas_suffix) projection_computation.add_projection_computation_steps(dax, job_mapping_details) if config.props[ConfigKey.MEG_FLAG] == "True": projection_computation = ProjectionComputation(config.props[ConfigKey.SUBJECT], SensorsType.MEG.value, atlas_suffix) projection_computation.add_projection_computation_steps(dax, job_mapping_details) out_dir = os.path.dirname(daxfile) if not os.path.exists(out_dir): os.mkdir(out_dir) with open(daxfile, "w") as f: dax.writeXML(f)
def generate_dax(self, daxfile): from Pegasus.DAX3 import ADAG, Job, File, Link # The DAX generator dax = ADAG("pipeline") # Some bits of metadata. Shoulf put plenty more here. dax.metadata("owner", self.pipeline.owner) dax.metadata("basename", self.pipeline.basename) dax.metadata("version", self.pipeline.version) # string tag -> pegasus File object mapping of all the # inputs and outputs used by any pipeline stage. files = {} # First generate the overall inputs to the pipeline, # i.e. ones that are not generated by any other stage # but must be specified at the start for tag in self.pipeline.input_tags(): path = self.info['inputs'].get(tag) files[tag] = File(path) # Now go through the pipeline in sequence. for stage_name, stage_class in self.pipeline.sequence(): # The stage in the pipeline. We describe the meaning of it # (which image it corresponds to) # in the transformation catalog generation job = Job(stage_name, id=stage_name) # Configuration files for this job. # These will not be built during the pipeline and must be # provided by the user for config_tag, config_filename in stage_class.config.items(): filename = self.pipeline.cfg[stage_name]['config'][config_tag] config_path = os.path.join(self.config_dir(), filename) config = File(config_path) job.uses(config, link=Link.INPUT) # Input files for the job, either created by the user or by previous # stages. In either case they should be in the "files" dictionary, because # precursor jobs will have been added before this one. for input_tag in stage_class.inputs.keys(): job.uses(files[input_tag], link=Link.INPUT) # Output files from the job. These will be created by the job # and used by future jobs for output_tag, output_type in stage_class.outputs.items(): output_filename = "{}.{}".format(output_tag, output_type) output = File(output_filename) job.uses(output, link=Link.OUTPUT, transfer=True, register=True) files[output_tag] = output # Add this job to the pipeline dax.addJob(job) # Tell pegasus which jobs this one depends on. # The pipeline already knows this information. # The pipeline.sequence command runs through # the jobs in an order that guarantees that a job's predecessors are # always done before it is, so they will always exist in the dax by this point. for predecessor_name in self.pipeline.dependencies(stage_name): dax.depends(stage_name, predecessor_name) # Generate the final DAX XML file. dax.writeXML(open(daxfile, "w"))
class PegasusWorkflowModel (WorkflowModel): def __init__(self, namespace, wms): logger.debug ("wms:pegasus:create-workflowmodel: %s" % namespace) self.wms = wms logger.debug ("outputdir: %s", wms.getOutputDir ()) self.adag = ADAG (namespace) self.namespace = namespace self.files = {} self.exes = {} self.propertyMap = {} self.nodes = {} self.jobTransformations = {} self.variableMap = { 'literalToVariable' : {}, 'literalToNodeId' : {} } def addToVariableMap (self, literal, variable, id): self.variableMap ['literalToVariable'][literal] = variable self.variableMap ['literalToNodeId'][literal] = id logger.debug ("variablematch: recorded lit=%s var=%s id=%s", literal, variable, id) def setProperties (self, nodeId, properties): logger.debug ("wms:pegasus:dax:setprops: (%s)->(%s)" % (nodeId, properties)) self.propertyMap [nodeId] = properties def getProperties (self, nodeId): return self.propertyMap [nodeId] def createFile (self, fileName): return File (fileName) def addNode (self, id, node): logger.debug ("wms:pegasus:dax:add-node: (%s)->(%s)" % (node.getId(), properties)) self.nodes [id] = node def getNode (self, id): node = None if id in self.nodes: node = self.nodes [id] return node def createFile (self, fileName, fileURL=None, site=None): #traceback.print_stack () file = self.getFile (fileName) if not file: file = File (fileName) if not fileURL: fileURL = "file://%s/%s" % (self.wms.getOutputDir (), fileName) logger.debug ("fileurl: %s", fileURL) if not site: site = "local" if not isinstance(fileURL, basestring) and len (fileURL) > 0: fileURL = fileURL [0] logger.debug ("--add-pfn: (%s)(%s)(%s)", fileName, fileURL, site) pfn = PFN (fileURL, site) file.addPFN (pfn) self.files [fileName] = file return file def addFileToDAG (self, file): if not self.adag.hasFile (file): self.adag.addFile (file) def removeFileFromDAG (self, file): if file in self.adag.files: self.adag.files.remove (file) def addFile (self, fileName, fileURL=None, site=None): #traceback.print_stack () file = self.getFile (fileName) if not file: file = File (fileName) if not fileURL: fileURL = "file://%s/%s" % (self.wms.getOutputDir (), fileName) logger.debug ("fileurl: %s", fileURL) if not site: site = "local" if not isinstance(fileURL, basestring) and len (fileURL) > 0: fileURL = fileURL [0] logger.debug ("--add-pfn: (%s)(%s)(%s)", fileName, fileURL, site) pfn = PFN (fileURL, site) file.addPFN (pfn) self.adag.addFile (file) self.files [fileName] = file return file def getFile (self, fileName, prefix=""): value = None key = "%s%s" % (prefix, fileName) if key in self.files: logger.debug ("wms:pegasus:dax:get-file: (%s)" % key) value = self.files [key] else: value = None return value def addExecutable (self, jobId, name, path, version="1.0", exe_os="linux", exe_arch="x86_64", site="local", installed="true"): e_exe = self.getExecutable (name) if not version: version = "1.0" if not exe_arch: exe_arch="x86_64" if not e_exe: e_exe = Executable ( namespace=self.namespace, name=name, version=version, os=exe_os, arch=exe_arch, installed=installed) if not site: site = "local" if not installed: installed = False if logging.getLogger().isEnabledFor (logging.DEBUG): logger.debug ("wms:pegasus:dax:add-exe: (name=[%s], path=[%s], version=[%s], os=[%s], arch=[%s], site=[%s], installed=[%s])" % (name, path, version, exe_os, exe_arch, site, installed)) if not "://" in path: path = "file://%s" % path if not path: raise ValueError ("empty path for executable: %s at site %s" % (name, site)) e_exe.addPFN (PFN (path, site)) if not installed: e_exe.installed = installed self.adag.addExecutable (e_exe) self.exes [name] = e_exe transformation = Transformation (name, self.namespace, version) self.jobTransformations [jobId] = transformation return e_exe def getExecutable (self, name): key = name if key in self.exes: return self.exes [key] else: return None def addSubWorkflow (self, name, transformation=None): #self.adag.addTransformation (transformation) abstractJob = DAX (name) self.adag.addDAX (abstractJob) return abstractJob def addJob (self, id): #self.adag.addTransformation (transformation) transformation = self.jobTransformations [id] logger.debug ("wms:pegasus:add-job: transformation(%s) jobid(%s)", transformation.name, id) abstractJob = Job (name=transformation, id=id) self.adag.addJob (abstractJob) return abstractJob def addProfiles (self, abstractJob, profiles): if profiles: for astProfile in profiles: logger.debug ("wms:pegasus:add-profile: (namespace=%s,key=%s,value=%s) to job (%s)", astProfile.namespace, astProfile.key, astProfile.value, abstractJob.name) profile = Profile (astProfile.namespace, astProfile.key, astProfile.value) abstractJob.addProfile (profile) def addFiles (self, abstractJob, files, link): if files: for fileKey in files: tuple = files [fileKey] fileElement = tuple [0] file = fileElement.getDaxNode () try: isLeaf = tuple [2] if isLeaf: abstractJob.uses (file, link=link, transfer=True) else: abstractJob.uses (file, link=link) ''' abstractJob.uses (file, link=link) ''' arg = tuple [1] if arg: abstractJob.addArguments (arg, file) except DuplicateError: pass def addInputFiles (self, abstractJob, files): self.addFiles (abstractJob, files, Link.INPUT) def addOutputFiles (self, abstractJob, files): self.addFiles (abstractJob, files, Link.OUTPUT) def addArguments (self, abstractJob, arguments): if arguments: abstractJob.addArguments (arguments) def addDependency (self, parent, child): self.adag.addDependency (Dependency (parent, child)) def writeExecutable (self, stream): self.adag.writeXML (stream) filename = "%s.%s" % (self.namespace, 'obj') filepath = os.path.join (self.wms.getOutputDir (), filename) try: output = None try: output = open (filepath, 'w') output.write (json.dumps (self.variableMap, indent=3, sort_keys=True)) output.flush () finally: if output: output.close () except IOError: traceback.print_stack () def getADAG (self): return self.adag
Profile(namespace="pegasus", key="clusters.size", value=config.get('all', 'clusters_size'))) sleep.addProfile( Profile(namespace="pegasus", key="clusters.maxruntime", value=config.get('all', 'clusters_maxruntime'))) cluster.addExecutable(sleep) for i in range(4): job = Job(namespace="cluster", name="level1", version="1.0") job.addArguments('-a level1 -T ' + str(i + 1)) job.addArguments('-i', a) job.addProfile( Profile(namespace="pegasus", key="job.runtime", value=str(i + 1))) job.uses(a, link=Link.INPUT) cluster.addJob(job) for j in range(4): child = Job(namespace="cluster", name="level2", version="1.0") child.addArguments('-a level2 -T ' + str((j + 1) * 2)) child.addProfile( Profile(namespace="pegasus", key="runtime", value=str( (j + 1) * 2))) cluster.addJob(child) cluster.depends(parent=job, child=child) # Write the DAX to standard out cluster.writeXML(sys.stdout)
# Add input file to the DAX-level replica catalog a = File("f.a") a.addPFN(PFN(config.get('all', 'file_url') + input_file + "/f.a", config.get('all', 'file_site'))) cluster.addFile(a) for i in range (1, 3): sleep = Executable (namespace = "cluster", name = "level" + str (i), version = "1.0", os = "linux", arch = "x86", installed=config.getboolean('all', 'executable_installed')) sleep.addPFN (PFN (config.get('all', 'executable_url') + sys.argv[1] + "/bin/pegasus-keg", config.get('all', 'executable_site'))) sleep.addProfile (Profile (namespace = "pegasus", key = "clusters.size", value = config.get('all', 'clusters_size'))) sleep.addProfile (Profile (namespace = "pegasus", key = "clusters.maxruntime", value = config.get('all', 'clusters_maxruntime'))) cluster.addExecutable(sleep) for i in range (4): job = Job (namespace = "cluster", name = "level1", version = "1.0") job.addArguments('-a level1 -T ' + str (i + 1)) job.addArguments('-i', a) job.addProfile (Profile (namespace = "pegasus", key = "job.runtime", value = str (i + 1))) job.uses(a, link=Link.INPUT) cluster.addJob (job) for j in range (4): child = Job (namespace = "cluster", name = "level2", version = "1.0") child.addArguments('-a level2 -T ' + str ((j + 1) * 2)) child.addProfile (Profile (namespace = "pegasus", key = "runtime", value = str ((j + 1) * 2))) cluster.addJob (child) cluster.depends (parent = job, child = child) # Write the DAX to standard out cluster.writeXML (sys.stdout)
def write_dax(self, filename='workflow.dax', name='workflow'): """Generate Pegasus abstract workflow (DAX). Parameters ---------- filename : `str` File to write the DAX to. name : `str`, optional Name of the DAX. Returns ------- `Pegasus.ADAG` Abstract workflow used by Pegasus' planner. Raises ------ `ValueError` If either task or file node is missing mandatory attribute. """ dax = ADAG(name) # Process file nodes. for file_id in self.files: attrs = self.graph.node[file_id] try: name = attrs['lfn'] except KeyError: msg = 'Mandatory attribute "%s" is missing.' raise AttributeError(msg.format('lfn')) file_ = File(name) # Add physical file names, if any. urls = attrs.get('pfn') if urls is not None: urls = urls.split(',') sites = attrs.get('sites') if sites is None: sites = len(urls) * ['condorpool'] for url, site in zip(urls, sites): file_.addPFN(PFN(url, site)) self.catalog[attrs['lfn']] = file_ # Add jobs to the DAX. for task_id in self.tasks: attrs = self.graph.node[task_id] try: name = attrs['exec_name'] except KeyError: msg = 'Mandatory attribute "%s" is missing.' raise AttributeError(msg.format('exec_name')) label = '{name}_{id}'.format(name=name, id=task_id) job = Job(name, id=task_id, node_label=label) # Add job command line arguments replacing any file name with # respective Pegasus file object. args = attrs.get('exec_args', []) if args: args = args.split() lfns = list(set(self.catalog) & set(args)) if lfns: indices = [args.index(lfn) for lfn in lfns] for idx, lfn in zip(indices, lfns): args[idx] = self.catalog[lfn] job.addArguments(*args) # Specify job's inputs. inputs = [file_id for file_id in self.graph.predecessors(task_id)] for file_id in inputs: attrs = self.graph.node[file_id] is_ignored = attrs.get('ignore', False) if not is_ignored: file_ = self.catalog[attrs['lfn']] job.uses(file_, link=Link.INPUT) # Specify job's outputs outputs = [file_id for file_id in self.graph.successors(task_id)] for file_id in outputs: attrs = self.graph.node[file_id] is_ignored = attrs.get('ignore', False) if not is_ignored: file_ = self.catalog[attrs['lfn']] job.uses(file_, link=Link.OUTPUT) streams = attrs.get('streams') if streams is not None: if streams & 1 != 0: job.setStdout(file_) if streams & 2 != 0: job.setStderr(file_) # Provide default files to store stderr and stdout, if not # specified explicitly. if job.stderr is None: file_ = File('{name}.out'.format(name=label)) job.uses(file_, link=Link.OUTPUT) job.setStderr(file_) if job.stdout is None: file_ = File('{name}.err'.format(name=label)) job.uses(file_, link=Link.OUTPUT) job.setStdout(file_) dax.addJob(job) # Add job dependencies to the DAX. for task_id in self.tasks: parents = set() for file_id in self.graph.predecessors(task_id): parents.update(self.graph.predecessors(file_id)) for parent_id in parents: dax.depends(parent=dax.getJob(parent_id), child=dax.getJob(task_id)) # Finally, write down the workflow in DAX format. with open(filename, 'w') as f: dax.writeXML(f)
else: seeg_gain_computation = SeegGainComputation( config.props[ConfigKey.SUBJECT], atlas_suffix) if config.props[ConfigKey.SEEG_GAIN_USE_DP] == "True": seeg_gain_computation.add_seeg_gain_dp_computation_steps( dax, job_seeg_xyz, job_mapping_details) if config.props[ConfigKey.SEEG_GAIN_USE_MRS] == "True": seeg_gain_computation.add_seeg_mrs_gain_computation_steps( dax, job_seeg_xyz, job_mapping_details) else: if config.props[ConfigKey.EEG_FLAG] == "True": projection_computation = ProjectionComputation( config.props[ConfigKey.SUBJECT], SensorsType.EEG.value, atlas_suffix) projection_computation.add_projection_computation_steps( dax, job_mapping_details) if config.props[ConfigKey.MEG_FLAG] == "True": projection_computation = ProjectionComputation( config.props[ConfigKey.SUBJECT], SensorsType.MEG.value, atlas_suffix) projection_computation.add_projection_computation_steps( dax, job_mapping_details) out_dir = os.path.dirname(daxfile) if not os.path.exists(out_dir): os.mkdir(out_dir) with open(daxfile, "w") as f: dax.writeXML(f)