Example #1
0
    def write(self, filename, name='dax'):
        """Generate Pegasus abstract workflow (DAX).

        Parameters
        ----------
        filename : `str`
            File to write the DAX to.
        name : `str`, optional
            Name of the DAX.

        Returns
        -------
        `Pegasus.ADAG`
            Abstract workflow used by Pegasus' planner.
        """
        dax = ADAG(name)

        # Add files to DAX-level replica catalog.
        catalog = {}
        for file_id in self.files:
            attrs = self.graph.node[file_id]
            f = File(attrs['lfn'])

            # Add physical file names, if any.
            urls = attrs.get('urls')
            if urls is not None:
                sites = attrs.get('sites')
                if sites is None:
                    sites = ','.join(len(urls) * ['local'])
                for url, site in zip(urls.split(','), sites.split(',')):
                    f.addPFN(PFN(url, site))

            catalog[attrs['lfn']] = f
            dax.addFile(f)

        # Add jobs to the DAX.
        for task_id in self.tasks:
            attrs = self.graph.node[task_id]
            job = Job(name=attrs['name'], id=task_id)

            # Add job command line arguments replacing any file name with
            # respective Pegasus file object.
            args = attrs.get('args')
            if args is not None and args:
                args = args.split()
                lfns = list(set(catalog) & set(args))
                if lfns:
                    indices = [args.index(lfn) for lfn in lfns]
                    for idx, lfn in zip(indices, lfns):
                        args[idx] = catalog[lfn]
                job.addArguments(*args)

            # Specify job's inputs.
            inputs = [file_id for file_id in self.graph.predecessors(task_id)]
            for file_id in inputs:
                attrs = self.graph.node[file_id]
                f = catalog[attrs['lfn']]
                job.uses(f, link=Link.INPUT)

            # Specify job's outputs
            outputs = [file_id for file_id in self.graph.successors(task_id)]
            for file_id in outputs:
                attrs = self.graph.node[file_id]
                f = catalog[attrs['lfn']]
                job.uses(f, link=Link.OUTPUT)

                streams = attrs.get('streams')
                if streams is not None:
                    if streams & 1 != 0:
                        job.setStdout(f)
                    if streams & 2 != 0:
                        job.setStderr(f)

            dax.addJob(job)

        # Add job dependencies to the DAX.
        for task_id in self.tasks:
            parents = set()
            for file_id in self.graph.predecessors(task_id):
                parents.update(self.graph.predecessors(file_id))
            for parent_id in parents:
                dax.depends(parent=dax.getJob(parent_id),
                            child=dax.getJob(task_id))

        # Finally, write down the workflow in DAX format.
        with open(filename, 'w') as f:
            dax.writeXML(f)
Example #2
0
# Create an abstract dag
cluster = ADAG(config.get('all', 'workflow_name'))

input_file = config.get('all', 'input_file')
if (input_file == ''):
    input_file = os.getcwd()
else:
    input_file += '/' + os.getenv('USER') + '/inputs'

# Add input file to the DAX-level replica catalog
a = File("f.a")
a.addPFN(
    PFN(
        config.get('all', 'file_url') + input_file + "/f.a",
        config.get('all', 'file_site')))
cluster.addFile(a)

for i in range(1, 3):
    sleep = Executable(namespace="cluster",
                       name="level" + str(i),
                       version="1.0",
                       os="linux",
                       arch="x86_64",
                       installed=config.getboolean('all',
                                                   'executable_installed'))
    sleep.addPFN(
        PFN(
            config.get('all', 'executable_url') + sys.argv[1] +
            "/bin/pegasus-keg", config.get('all', 'executable_site')))
    sleep.addProfile(
        Profile(namespace="pegasus",
Example #3
0
class PegasusWorkflowModel (WorkflowModel):
    
    def __init__(self, namespace, wms):
        logger.debug ("wms:pegasus:create-workflowmodel: %s" % namespace)
        self.wms = wms
        logger.debug ("outputdir: %s", wms.getOutputDir ())
        self.adag = ADAG (namespace)
        self.namespace = namespace
        self.files = {}
        self.exes = {}
        self.propertyMap = {}
        self.nodes = {}
        self.jobTransformations = {}
        
        self.variableMap = {
            'literalToVariable' : {},
            'literalToNodeId'   : {}
            }
        
    def addToVariableMap (self, literal, variable, id):
        self.variableMap ['literalToVariable'][literal] = variable
        self.variableMap ['literalToNodeId'][literal] = id
        logger.debug ("variablematch: recorded lit=%s var=%s id=%s", literal, variable, id)

    def setProperties (self, nodeId, properties):
        logger.debug ("wms:pegasus:dax:setprops: (%s)->(%s)" % (nodeId, properties))
        self.propertyMap [nodeId] = properties
        
    def getProperties (self, nodeId):
        return self.propertyMap [nodeId]
    
    def createFile (self, fileName):
        return File (fileName)
    
    def addNode (self, id, node):
        logger.debug ("wms:pegasus:dax:add-node: (%s)->(%s)" % (node.getId(), properties))
        self.nodes [id] = node

    def getNode (self, id):
        node = None
        if id in self.nodes:
            node = self.nodes [id]
            return node
        
    def createFile (self, fileName, fileURL=None, site=None):
        #traceback.print_stack ()
        file = self.getFile (fileName)
        if not file:
            file = File (fileName)
            if not fileURL:
                fileURL = "file://%s/%s" % (self.wms.getOutputDir (), fileName)
                logger.debug ("fileurl: %s", fileURL)
            if not site:
                site = "local"
            if not isinstance(fileURL, basestring) and len (fileURL) > 0:
                fileURL = fileURL [0]
            logger.debug ("--add-pfn: (%s)(%s)(%s)", fileName, fileURL, site)
            pfn = PFN (fileURL, site)
            file.addPFN (pfn)
            self.files [fileName] = file
        return file
    def addFileToDAG (self, file):
        if not self.adag.hasFile (file):
            self.adag.addFile (file)
    def removeFileFromDAG (self, file):
        if file in self.adag.files:
            self.adag.files.remove (file)
        
    def addFile (self, fileName, fileURL=None, site=None):
        #traceback.print_stack ()
        file = self.getFile (fileName)
        if not file:
            file = File (fileName)
            if not fileURL:
                fileURL = "file://%s/%s" % (self.wms.getOutputDir (), fileName)
                logger.debug ("fileurl: %s", fileURL)
            if not site:
                site = "local"
            if not isinstance(fileURL, basestring) and len (fileURL) > 0:
                fileURL = fileURL [0]
            logger.debug ("--add-pfn: (%s)(%s)(%s)", fileName, fileURL, site)
            pfn = PFN (fileURL, site)
            file.addPFN (pfn)
            self.adag.addFile (file)
            self.files [fileName] = file
        return file
	
    def getFile (self, fileName, prefix=""):
        value = None
        key = "%s%s" % (prefix, fileName)
        if key in self.files:
            logger.debug ("wms:pegasus:dax:get-file: (%s)" % key)
            value = self.files [key]
        else:
            value = None
	return value

    def addExecutable (self, jobId, name, path, version="1.0", exe_os="linux", exe_arch="x86_64", site="local", installed="true"):
        e_exe = self.getExecutable (name)
        
        if not version:
            version = "1.0"
        if not exe_arch:
            exe_arch="x86_64"

        if not e_exe:
            e_exe = Executable (
                namespace=self.namespace, 
                name=name, 
                version=version, 
                os=exe_os, 
                arch=exe_arch, 
                installed=installed)
            if not site:
                site = "local"
            if not installed:
                installed = False
            if logging.getLogger().isEnabledFor (logging.DEBUG):
                logger.debug ("wms:pegasus:dax:add-exe: (name=[%s], path=[%s], version=[%s], os=[%s], arch=[%s], site=[%s], installed=[%s])" % 
                               (name,
                                path,
                                version,
                                exe_os,
                                exe_arch,
                                site,
                                installed))
            if not "://" in path:
                path = "file://%s" % path
            if not path:
                raise ValueError ("empty path for executable: %s at site %s" % (name, site))

            e_exe.addPFN (PFN (path, site))
            if not installed:
                e_exe.installed = installed
            self.adag.addExecutable (e_exe)
            self.exes [name] = e_exe

            transformation = Transformation (name, self.namespace, version)
            self.jobTransformations [jobId] = transformation
            
        return e_exe
    
    def getExecutable (self, name):
        key = name
        if key in self.exes:
            return self.exes [key]
        else:
            return None

    def addSubWorkflow (self, name, transformation=None):
        #self.adag.addTransformation (transformation)
        abstractJob = DAX (name)
        self.adag.addDAX (abstractJob)
        return abstractJob
    
    def addJob (self, id):
        #self.adag.addTransformation (transformation)
        transformation = self.jobTransformations [id]
        logger.debug ("wms:pegasus:add-job: transformation(%s) jobid(%s)", transformation.name, id)
        abstractJob = Job (name=transformation, id=id)
        self.adag.addJob (abstractJob)
        return abstractJob
    
    def addProfiles (self, abstractJob, profiles):
        if profiles:
            for astProfile in profiles:
                logger.debug ("wms:pegasus:add-profile: (namespace=%s,key=%s,value=%s) to job (%s)",
                               astProfile.namespace,
                               astProfile.key,
                               astProfile.value,
                               abstractJob.name)
                profile = Profile (astProfile.namespace,
                                   astProfile.key,
                                   astProfile.value)
                abstractJob.addProfile (profile)

    def addFiles (self, abstractJob, files, link):
        if files:
            for fileKey in files:
                tuple = files [fileKey]
                fileElement = tuple [0]
                file = fileElement.getDaxNode ()
                try:

                    isLeaf = tuple [2]
                    if isLeaf:
                        abstractJob.uses (file, link=link, transfer=True)
                    else:
                        abstractJob.uses (file, link=link)
                    '''
                    abstractJob.uses (file, link=link)
                    '''
                    arg = tuple [1]
                    if arg:
                        abstractJob.addArguments (arg, file)
                except DuplicateError:
                    pass

    def addInputFiles (self, abstractJob, files):
        self.addFiles (abstractJob, files, Link.INPUT)

    def addOutputFiles (self, abstractJob, files):
        self.addFiles (abstractJob, files, Link.OUTPUT)

    def addArguments (self, abstractJob, arguments):
        if arguments:
            abstractJob.addArguments (arguments)

    def addDependency (self, parent, child):
        self.adag.addDependency (Dependency (parent, child))
                        
    def writeExecutable (self, stream):
        self.adag.writeXML (stream)
        filename = "%s.%s" % (self.namespace, 'obj')
        filepath = os.path.join (self.wms.getOutputDir (), filename)
        try:
            output = None
            try:
                output = open (filepath, 'w')
                output.write (json.dumps (self.variableMap, indent=3, sort_keys=True))                
                output.flush ()                
            finally:
                if output:
                    output.close ()
        except IOError:
            traceback.print_stack ()

    def getADAG (self):
        return self.adag
Example #4
0
config = ConfigParser.ConfigParser({'input_file':'', 'workflow_name':'horizontal-clustering-test', 'executable_installed':"False", 'clusters_size':"3", 'clusters_maxruntime':"7"})
config.read(sys.argv[2] + '/test.config')

# Create an abstract dag
cluster = ADAG (config.get('all', 'workflow_name'))

input_file = config.get('all', 'input_file')
if (input_file == ''):
        input_file = os.getcwd ()
else:
        input_file += '/' + os.getenv ('USER') + '/inputs'

# Add input file to the DAX-level replica catalog
a = File("f.a")
a.addPFN(PFN(config.get('all', 'file_url') + input_file + "/f.a", config.get('all', 'file_site')))
cluster.addFile(a)

for i in range (1, 3):
    sleep = Executable (namespace = "cluster", name = "level" + str (i), version = "1.0", os = "linux", arch = "x86", installed=config.getboolean('all', 'executable_installed'))
    sleep.addPFN (PFN (config.get('all', 'executable_url') + sys.argv[1] + "/bin/pegasus-keg", config.get('all', 'executable_site')))
    sleep.addProfile (Profile (namespace = "pegasus", key = "clusters.size", value = config.get('all', 'clusters_size')))
    sleep.addProfile (Profile (namespace = "pegasus", key = "clusters.maxruntime", value = config.get('all', 'clusters_maxruntime')))
    cluster.addExecutable(sleep)

for i in range (4):
    job = Job (namespace = "cluster", name = "level1", version = "1.0")
    job.addArguments('-a level1 -T ' + str (i + 1))
    job.addArguments('-i', a)
    job.addProfile (Profile (namespace = "pegasus", key = "job.runtime", value = str (i + 1)))
    job.uses(a, link=Link.INPUT)
    cluster.addJob (job)