Example #1
0
    def write(self, filename, name='dax'):
        """Generate Pegasus abstract workflow (DAX).

        Parameters
        ----------
        filename : `str`
            File to write the DAX to.
        name : `str`, optional
            Name of the DAX.

        Returns
        -------
        `Pegasus.ADAG`
            Abstract workflow used by Pegasus' planner.
        """
        dax = ADAG(name)

        # Add files to DAX-level replica catalog.
        catalog = {}
        for file_id in self.files:
            attrs = self.graph.node[file_id]
            f = File(attrs['lfn'])

            # Add physical file names, if any.
            urls = attrs.get('urls')
            if urls is not None:
                sites = attrs.get('sites')
                if sites is None:
                    sites = ','.join(len(urls) * ['local'])
                for url, site in zip(urls.split(','), sites.split(',')):
                    f.addPFN(PFN(url, site))

            catalog[attrs['lfn']] = f
            dax.addFile(f)

        # Add jobs to the DAX.
        for task_id in self.tasks:
            attrs = self.graph.node[task_id]
            job = Job(name=attrs['name'], id=task_id)

            # Add job command line arguments replacing any file name with
            # respective Pegasus file object.
            args = attrs.get('args')
            if args is not None and args:
                args = args.split()
                lfns = list(set(catalog) & set(args))
                if lfns:
                    indices = [args.index(lfn) for lfn in lfns]
                    for idx, lfn in zip(indices, lfns):
                        args[idx] = catalog[lfn]
                job.addArguments(*args)

            # Specify job's inputs.
            inputs = [file_id for file_id in self.graph.predecessors(task_id)]
            for file_id in inputs:
                attrs = self.graph.node[file_id]
                f = catalog[attrs['lfn']]
                job.uses(f, link=Link.INPUT)

            # Specify job's outputs
            outputs = [file_id for file_id in self.graph.successors(task_id)]
            for file_id in outputs:
                attrs = self.graph.node[file_id]
                f = catalog[attrs['lfn']]
                job.uses(f, link=Link.OUTPUT)

                streams = attrs.get('streams')
                if streams is not None:
                    if streams & 1 != 0:
                        job.setStdout(f)
                    if streams & 2 != 0:
                        job.setStderr(f)

            dax.addJob(job)

        # Add job dependencies to the DAX.
        for task_id in self.tasks:
            parents = set()
            for file_id in self.graph.predecessors(task_id):
                parents.update(self.graph.predecessors(file_id))
            for parent_id in parents:
                dax.depends(parent=dax.getJob(parent_id),
                            child=dax.getJob(task_id))

        # Finally, write down the workflow in DAX format.
        with open(filename, 'w') as f:
            dax.writeXML(f)
Example #2
0
                sensor_model = SensorModel(subject, trg_subject, atlas_suffix)
                job_sensor_model_lh, job_sensor_model_rh = sensor_model.add_sensor_model_steps(dax, job_source_model)

                lead_field_model = LeadFieldModel(subject, trg_subject, atlas_suffix)
                lead_field_model.add_lead_field_model_steps(dax, job_sensor_model_lh, job_sensor_model_rh)

            else:
                seeg_gain_computation = SeegGainComputation(config.props[ConfigKey.SUBJECT], atlas_suffix)
                if config.props[ConfigKey.SEEG_GAIN_USE_DP] == "True":
                    seeg_gain_computation.add_seeg_gain_dp_computation_steps(dax, job_seeg_xyz, job_mapping_details)
                if config.props[ConfigKey.SEEG_GAIN_USE_MRS] == "True":
                    seeg_gain_computation.add_seeg_mrs_gain_computation_steps(dax, job_seeg_xyz, job_mapping_details)
        else:
            if config.props[ConfigKey.EEG_FLAG] == "True":
                projection_computation = ProjectionComputation(config.props[ConfigKey.SUBJECT], SensorsType.EEG.value,
                                                               atlas_suffix)
                projection_computation.add_projection_computation_steps(dax, job_mapping_details)

            if config.props[ConfigKey.MEG_FLAG] == "True":
                projection_computation = ProjectionComputation(config.props[ConfigKey.SUBJECT], SensorsType.MEG.value,
                                                               atlas_suffix)
                projection_computation.add_projection_computation_steps(dax, job_mapping_details)

    out_dir = os.path.dirname(daxfile)
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    with open(daxfile, "w") as f:
        dax.writeXML(f)
Example #3
0
    def generate_dax(self, daxfile):
        from Pegasus.DAX3 import ADAG, Job, File, Link

        # The DAX generator
        dax = ADAG("pipeline")

        # Some bits of metadata.  Shoulf put plenty more here.
        dax.metadata("owner", self.pipeline.owner)
        dax.metadata("basename", self.pipeline.basename)
        dax.metadata("version", self.pipeline.version)

        # string tag -> pegasus File object mapping of all the
        # inputs and outputs used by any pipeline stage.
        files = {}

        # First generate the overall inputs to the pipeline,
        # i.e. ones that are not generated by any other stage
        # but must be specified at the start
        for tag in self.pipeline.input_tags():
            path = self.info['inputs'].get(tag)
            files[tag] = File(path)

        # Now go through the pipeline in sequence.
        for stage_name, stage_class in self.pipeline.sequence():
            # The stage in the pipeline.  We describe the meaning of it
            # (which image it corresponds to)
            # in the transformation catalog generation
            job = Job(stage_name, id=stage_name)

            # Configuration files for this job.
            # These will not be built during the pipeline and must be
            # provided by the user
            for config_tag, config_filename in stage_class.config.items():
                filename = self.pipeline.cfg[stage_name]['config'][config_tag]
                config_path = os.path.join(self.config_dir(), filename)
                config = File(config_path)
                job.uses(config, link=Link.INPUT)

            # Input files for the job, either created by the user or by previous
            # stages.  In either case they should be in the "files" dictionary, because
            # precursor jobs will have been added before this one.
            for input_tag in stage_class.inputs.keys():
                job.uses(files[input_tag], link=Link.INPUT)

            # Output files from the job. These will be created by the job
            # and used by future jobs
            for output_tag, output_type in stage_class.outputs.items():
                output_filename = "{}.{}".format(output_tag, output_type)
                output = File(output_filename)
                job.uses(output,
                         link=Link.OUTPUT,
                         transfer=True,
                         register=True)
                files[output_tag] = output

            # Add this job to the pipeline
            dax.addJob(job)

            # Tell pegasus which jobs this one depends on.
            # The pipeline already knows this information.
            # The pipeline.sequence command runs through
            # the jobs in an order that guarantees that a job's predecessors are
            # always done before it is, so they will always exist in the dax by this point.
            for predecessor_name in self.pipeline.dependencies(stage_name):
                dax.depends(stage_name, predecessor_name)

        # Generate the final DAX XML file.
        dax.writeXML(open(daxfile, "w"))
Example #4
0
class PegasusWorkflowModel (WorkflowModel):
    
    def __init__(self, namespace, wms):
        logger.debug ("wms:pegasus:create-workflowmodel: %s" % namespace)
        self.wms = wms
        logger.debug ("outputdir: %s", wms.getOutputDir ())
        self.adag = ADAG (namespace)
        self.namespace = namespace
        self.files = {}
        self.exes = {}
        self.propertyMap = {}
        self.nodes = {}
        self.jobTransformations = {}
        
        self.variableMap = {
            'literalToVariable' : {},
            'literalToNodeId'   : {}
            }
        
    def addToVariableMap (self, literal, variable, id):
        self.variableMap ['literalToVariable'][literal] = variable
        self.variableMap ['literalToNodeId'][literal] = id
        logger.debug ("variablematch: recorded lit=%s var=%s id=%s", literal, variable, id)

    def setProperties (self, nodeId, properties):
        logger.debug ("wms:pegasus:dax:setprops: (%s)->(%s)" % (nodeId, properties))
        self.propertyMap [nodeId] = properties
        
    def getProperties (self, nodeId):
        return self.propertyMap [nodeId]
    
    def createFile (self, fileName):
        return File (fileName)
    
    def addNode (self, id, node):
        logger.debug ("wms:pegasus:dax:add-node: (%s)->(%s)" % (node.getId(), properties))
        self.nodes [id] = node

    def getNode (self, id):
        node = None
        if id in self.nodes:
            node = self.nodes [id]
            return node
        
    def createFile (self, fileName, fileURL=None, site=None):
        #traceback.print_stack ()
        file = self.getFile (fileName)
        if not file:
            file = File (fileName)
            if not fileURL:
                fileURL = "file://%s/%s" % (self.wms.getOutputDir (), fileName)
                logger.debug ("fileurl: %s", fileURL)
            if not site:
                site = "local"
            if not isinstance(fileURL, basestring) and len (fileURL) > 0:
                fileURL = fileURL [0]
            logger.debug ("--add-pfn: (%s)(%s)(%s)", fileName, fileURL, site)
            pfn = PFN (fileURL, site)
            file.addPFN (pfn)
            self.files [fileName] = file
        return file
    def addFileToDAG (self, file):
        if not self.adag.hasFile (file):
            self.adag.addFile (file)
    def removeFileFromDAG (self, file):
        if file in self.adag.files:
            self.adag.files.remove (file)
        
    def addFile (self, fileName, fileURL=None, site=None):
        #traceback.print_stack ()
        file = self.getFile (fileName)
        if not file:
            file = File (fileName)
            if not fileURL:
                fileURL = "file://%s/%s" % (self.wms.getOutputDir (), fileName)
                logger.debug ("fileurl: %s", fileURL)
            if not site:
                site = "local"
            if not isinstance(fileURL, basestring) and len (fileURL) > 0:
                fileURL = fileURL [0]
            logger.debug ("--add-pfn: (%s)(%s)(%s)", fileName, fileURL, site)
            pfn = PFN (fileURL, site)
            file.addPFN (pfn)
            self.adag.addFile (file)
            self.files [fileName] = file
        return file
	
    def getFile (self, fileName, prefix=""):
        value = None
        key = "%s%s" % (prefix, fileName)
        if key in self.files:
            logger.debug ("wms:pegasus:dax:get-file: (%s)" % key)
            value = self.files [key]
        else:
            value = None
	return value

    def addExecutable (self, jobId, name, path, version="1.0", exe_os="linux", exe_arch="x86_64", site="local", installed="true"):
        e_exe = self.getExecutable (name)
        
        if not version:
            version = "1.0"
        if not exe_arch:
            exe_arch="x86_64"

        if not e_exe:
            e_exe = Executable (
                namespace=self.namespace, 
                name=name, 
                version=version, 
                os=exe_os, 
                arch=exe_arch, 
                installed=installed)
            if not site:
                site = "local"
            if not installed:
                installed = False
            if logging.getLogger().isEnabledFor (logging.DEBUG):
                logger.debug ("wms:pegasus:dax:add-exe: (name=[%s], path=[%s], version=[%s], os=[%s], arch=[%s], site=[%s], installed=[%s])" % 
                               (name,
                                path,
                                version,
                                exe_os,
                                exe_arch,
                                site,
                                installed))
            if not "://" in path:
                path = "file://%s" % path
            if not path:
                raise ValueError ("empty path for executable: %s at site %s" % (name, site))

            e_exe.addPFN (PFN (path, site))
            if not installed:
                e_exe.installed = installed
            self.adag.addExecutable (e_exe)
            self.exes [name] = e_exe

            transformation = Transformation (name, self.namespace, version)
            self.jobTransformations [jobId] = transformation
            
        return e_exe
    
    def getExecutable (self, name):
        key = name
        if key in self.exes:
            return self.exes [key]
        else:
            return None

    def addSubWorkflow (self, name, transformation=None):
        #self.adag.addTransformation (transformation)
        abstractJob = DAX (name)
        self.adag.addDAX (abstractJob)
        return abstractJob
    
    def addJob (self, id):
        #self.adag.addTransformation (transformation)
        transformation = self.jobTransformations [id]
        logger.debug ("wms:pegasus:add-job: transformation(%s) jobid(%s)", transformation.name, id)
        abstractJob = Job (name=transformation, id=id)
        self.adag.addJob (abstractJob)
        return abstractJob
    
    def addProfiles (self, abstractJob, profiles):
        if profiles:
            for astProfile in profiles:
                logger.debug ("wms:pegasus:add-profile: (namespace=%s,key=%s,value=%s) to job (%s)",
                               astProfile.namespace,
                               astProfile.key,
                               astProfile.value,
                               abstractJob.name)
                profile = Profile (astProfile.namespace,
                                   astProfile.key,
                                   astProfile.value)
                abstractJob.addProfile (profile)

    def addFiles (self, abstractJob, files, link):
        if files:
            for fileKey in files:
                tuple = files [fileKey]
                fileElement = tuple [0]
                file = fileElement.getDaxNode ()
                try:

                    isLeaf = tuple [2]
                    if isLeaf:
                        abstractJob.uses (file, link=link, transfer=True)
                    else:
                        abstractJob.uses (file, link=link)
                    '''
                    abstractJob.uses (file, link=link)
                    '''
                    arg = tuple [1]
                    if arg:
                        abstractJob.addArguments (arg, file)
                except DuplicateError:
                    pass

    def addInputFiles (self, abstractJob, files):
        self.addFiles (abstractJob, files, Link.INPUT)

    def addOutputFiles (self, abstractJob, files):
        self.addFiles (abstractJob, files, Link.OUTPUT)

    def addArguments (self, abstractJob, arguments):
        if arguments:
            abstractJob.addArguments (arguments)

    def addDependency (self, parent, child):
        self.adag.addDependency (Dependency (parent, child))
                        
    def writeExecutable (self, stream):
        self.adag.writeXML (stream)
        filename = "%s.%s" % (self.namespace, 'obj')
        filepath = os.path.join (self.wms.getOutputDir (), filename)
        try:
            output = None
            try:
                output = open (filepath, 'w')
                output.write (json.dumps (self.variableMap, indent=3, sort_keys=True))                
                output.flush ()                
            finally:
                if output:
                    output.close ()
        except IOError:
            traceback.print_stack ()

    def getADAG (self):
        return self.adag
Example #5
0
        Profile(namespace="pegasus",
                key="clusters.size",
                value=config.get('all', 'clusters_size')))
    sleep.addProfile(
        Profile(namespace="pegasus",
                key="clusters.maxruntime",
                value=config.get('all', 'clusters_maxruntime')))
    cluster.addExecutable(sleep)

for i in range(4):
    job = Job(namespace="cluster", name="level1", version="1.0")
    job.addArguments('-a level1 -T ' + str(i + 1))
    job.addArguments('-i', a)
    job.addProfile(
        Profile(namespace="pegasus", key="job.runtime", value=str(i + 1)))
    job.uses(a, link=Link.INPUT)
    cluster.addJob(job)

    for j in range(4):
        child = Job(namespace="cluster", name="level2", version="1.0")
        child.addArguments('-a level2 -T ' + str((j + 1) * 2))
        child.addProfile(
            Profile(namespace="pegasus", key="runtime", value=str(
                (j + 1) * 2)))
        cluster.addJob(child)

        cluster.depends(parent=job, child=child)

# Write the DAX to standard out
cluster.writeXML(sys.stdout)
Example #6
0
# Add input file to the DAX-level replica catalog
a = File("f.a")
a.addPFN(PFN(config.get('all', 'file_url') + input_file + "/f.a", config.get('all', 'file_site')))
cluster.addFile(a)

for i in range (1, 3):
    sleep = Executable (namespace = "cluster", name = "level" + str (i), version = "1.0", os = "linux", arch = "x86", installed=config.getboolean('all', 'executable_installed'))
    sleep.addPFN (PFN (config.get('all', 'executable_url') + sys.argv[1] + "/bin/pegasus-keg", config.get('all', 'executable_site')))
    sleep.addProfile (Profile (namespace = "pegasus", key = "clusters.size", value = config.get('all', 'clusters_size')))
    sleep.addProfile (Profile (namespace = "pegasus", key = "clusters.maxruntime", value = config.get('all', 'clusters_maxruntime')))
    cluster.addExecutable(sleep)

for i in range (4):
    job = Job (namespace = "cluster", name = "level1", version = "1.0")
    job.addArguments('-a level1 -T ' + str (i + 1))
    job.addArguments('-i', a)
    job.addProfile (Profile (namespace = "pegasus", key = "job.runtime", value = str (i + 1)))
    job.uses(a, link=Link.INPUT)
    cluster.addJob (job)

    for j in range (4):
        child = Job (namespace = "cluster", name = "level2", version = "1.0")
	child.addArguments('-a level2 -T ' + str ((j + 1) * 2))
        child.addProfile (Profile (namespace = "pegasus", key = "runtime", value = str ((j + 1) * 2)))
        cluster.addJob (child)

        cluster.depends (parent = job, child = child)

# Write the DAX to standard out
cluster.writeXML (sys.stdout)
Example #7
0
    def write_dax(self, filename='workflow.dax', name='workflow'):
        """Generate Pegasus abstract workflow (DAX).

        Parameters
        ----------
        filename : `str`
            File to write the DAX to.
        name : `str`, optional
            Name of the DAX.

        Returns
        -------
        `Pegasus.ADAG`
            Abstract workflow used by Pegasus' planner.

        Raises
        ------
        `ValueError`
            If either task or file node is missing mandatory attribute.
        """
        dax = ADAG(name)

        # Process file nodes.
        for file_id in self.files:
            attrs = self.graph.node[file_id]
            try:
                name = attrs['lfn']
            except KeyError:
                msg = 'Mandatory attribute "%s" is missing.'
                raise AttributeError(msg.format('lfn'))
            file_ = File(name)

            # Add physical file names, if any.
            urls = attrs.get('pfn')
            if urls is not None:
                urls = urls.split(',')
                sites = attrs.get('sites')
                if sites is None:
                    sites = len(urls) * ['condorpool']
                for url, site in zip(urls, sites):
                    file_.addPFN(PFN(url, site))

            self.catalog[attrs['lfn']] = file_

        # Add jobs to the DAX.
        for task_id in self.tasks:
            attrs = self.graph.node[task_id]
            try:
                name = attrs['exec_name']
            except KeyError:
                msg = 'Mandatory attribute "%s" is missing.'
                raise AttributeError(msg.format('exec_name'))
            label = '{name}_{id}'.format(name=name, id=task_id)
            job = Job(name, id=task_id, node_label=label)

            # Add job command line arguments replacing any file name with
            # respective Pegasus file object.
            args = attrs.get('exec_args', [])
            if args:
                args = args.split()
                lfns = list(set(self.catalog) & set(args))
                if lfns:
                    indices = [args.index(lfn) for lfn in lfns]
                    for idx, lfn in zip(indices, lfns):
                        args[idx] = self.catalog[lfn]
                job.addArguments(*args)

            # Specify job's inputs.
            inputs = [file_id for file_id in self.graph.predecessors(task_id)]
            for file_id in inputs:
                attrs = self.graph.node[file_id]
                is_ignored = attrs.get('ignore', False)
                if not is_ignored:
                    file_ = self.catalog[attrs['lfn']]
                    job.uses(file_, link=Link.INPUT)

            # Specify job's outputs
            outputs = [file_id for file_id in self.graph.successors(task_id)]
            for file_id in outputs:
                attrs = self.graph.node[file_id]
                is_ignored = attrs.get('ignore', False)
                if not is_ignored:
                    file_ = self.catalog[attrs['lfn']]
                    job.uses(file_, link=Link.OUTPUT)

                    streams = attrs.get('streams')
                    if streams is not None:
                        if streams & 1 != 0:
                            job.setStdout(file_)
                        if streams & 2 != 0:
                            job.setStderr(file_)

            # Provide default files to store stderr and stdout, if not
            # specified explicitly.
            if job.stderr is None:
                file_ = File('{name}.out'.format(name=label))
                job.uses(file_, link=Link.OUTPUT)
                job.setStderr(file_)
            if job.stdout is None:
                file_ = File('{name}.err'.format(name=label))
                job.uses(file_, link=Link.OUTPUT)
                job.setStdout(file_)

            dax.addJob(job)

        # Add job dependencies to the DAX.
        for task_id in self.tasks:
            parents = set()
            for file_id in self.graph.predecessors(task_id):
                parents.update(self.graph.predecessors(file_id))
            for parent_id in parents:
                dax.depends(parent=dax.getJob(parent_id),
                            child=dax.getJob(task_id))

        # Finally, write down the workflow in DAX format.
        with open(filename, 'w') as f:
            dax.writeXML(f)
Example #8
0
            else:
                seeg_gain_computation = SeegGainComputation(
                    config.props[ConfigKey.SUBJECT], atlas_suffix)
                if config.props[ConfigKey.SEEG_GAIN_USE_DP] == "True":
                    seeg_gain_computation.add_seeg_gain_dp_computation_steps(
                        dax, job_seeg_xyz, job_mapping_details)
                if config.props[ConfigKey.SEEG_GAIN_USE_MRS] == "True":
                    seeg_gain_computation.add_seeg_mrs_gain_computation_steps(
                        dax, job_seeg_xyz, job_mapping_details)
        else:
            if config.props[ConfigKey.EEG_FLAG] == "True":
                projection_computation = ProjectionComputation(
                    config.props[ConfigKey.SUBJECT], SensorsType.EEG.value,
                    atlas_suffix)
                projection_computation.add_projection_computation_steps(
                    dax, job_mapping_details)

            if config.props[ConfigKey.MEG_FLAG] == "True":
                projection_computation = ProjectionComputation(
                    config.props[ConfigKey.SUBJECT], SensorsType.MEG.value,
                    atlas_suffix)
                projection_computation.add_projection_computation_steps(
                    dax, job_mapping_details)

    out_dir = os.path.dirname(daxfile)
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    with open(daxfile, "w") as f:
        dax.writeXML(f)