def registerCustomExecutables(self, workflow=None):
		"""
		2011-11-28
		"""
		FilterVCFPipeline.registerCustomExecutables(self, workflow=workflow)
		PlinkOnVCFWorkflow.registerCustomExecutables(self, workflow=workflow)
		
		if workflow is None:
			workflow = self
		namespace = workflow.namespace
		version = workflow.version
		operatingSystem = workflow.operatingSystem
		architecture = workflow.architecture
		clusters_size = workflow.clusters_size
		site_handler = workflow.site_handler
		vervetSrcPath = self.vervetSrcPath
		
		executableClusterSizeMultiplierList = []	#2012.8.7 each cell is a tuple of (executable, clusterSizeMultipler (0 if u do not need clustering)
		
		
		OutputSitesBelowMaxMendelError = Executable(namespace=namespace, name="OutputSitesBelowMaxMendelError", version=version, \
							os=operatingSystem, arch=architecture, installed=True)
		OutputSitesBelowMaxMendelError.addPFN(PFN("file://" + os.path.join(self.pymodulePath, "pegasus/mapper/OutputSitesBelowMaxMendelError.py"), \
							site_handler))
		executableClusterSizeMultiplierList.append((OutputSitesBelowMaxMendelError, 0))
		
		self.addExecutableAndAssignProperClusterSize(executableClusterSizeMultiplierList, defaultClustersSize=self.clusters_size)
Example #2
0
    def addExecutable (self, jobId, name, path, version="1.0", exe_os="linux", exe_arch="x86_64", site="local", installed="true"):
        e_exe = self.getExecutable (name)
        
        if not version:
            version = "1.0"
        if not exe_arch:
            exe_arch="x86_64"

        if not e_exe:
            e_exe = Executable (
                namespace=self.namespace, 
                name=name, 
                version=version, 
                os=exe_os, 
                arch=exe_arch, 
                installed=installed)
            if not site:
                site = "local"
            if not installed:
                installed = False
            if logging.getLogger().isEnabledFor (logging.DEBUG):
                logger.debug ("wms:pegasus:dax:add-exe: (name=[%s], path=[%s], version=[%s], os=[%s], arch=[%s], site=[%s], installed=[%s])" % 
                               (name,
                                path,
                                version,
                                exe_os,
                                exe_arch,
                                site,
                                installed))
            if not "://" in path:
                path = "file://%s" % path
            if not path:
                raise ValueError ("empty path for executable: %s at site %s" % (name, site))

            e_exe.addPFN (PFN (path, site))
            if not installed:
                e_exe.installed = installed
            self.adag.addExecutable (e_exe)
            self.exes [name] = e_exe

            transformation = Transformation (name, self.namespace, version)
            self.jobTransformations [jobId] = transformation
            
        return e_exe
Example #3
0
def script_to_pegasus_executable(
        path: Path,
        name: Optional[str] = None,
        *,
        site: str = "local",
        namespace: Optional[str] = None,
        version: Optional[str] = None,
        arch: Optional[Arch] = None,
        os: Optional[OS] = None,
        osrelease: Optional[str] = None,
        osversion: Optional[str] = None,
        glibc: Optional[str] = None,
        installed: Optional[bool] = None,
        container: Optional[str] = None) -> Executable:
    """
    Turns a script path into a pegasus Executable

    Arguments:
        *name*: Logical name of executable
        *namespace*: Executable namespace
        *version*: Executable version
        *arch*: Architecture that this exe was compiled for
        *os*: Name of os that this exe was compiled for
        *osrelease*: Release of os that this exe was compiled for
        *osversion*: Version of os that this exe was compiled for
        *glibc*: Version of glibc this exe was compiled against
        *installed*: Is the executable installed (true), or stageable (false)
        *container*: Optional attribute to specify the container to use
    """

    rtrnr = Executable(
        path.stem + path.suffix if name is None else name,
        namespace=namespace,
        version=version,
        arch=arch,
        os=os,
        osrelease=osrelease,
        osversion=osversion,
        glibc=glibc,
        installed=installed,
        container=container,
    )
    rtrnr.addPFN(path_to_pfn(path, site=site))
    return rtrnr
	def registerCustomExecutables(self, workflow=None):
		"""
		2011-11-28
		"""
		if workflow==None:
			workflow=self
		
		parentClass.registerCustomExecutables(self, workflow=workflow)
		
		namespace = workflow.namespace
		version = workflow.version
		operatingSystem = workflow.operatingSystem
		architecture = workflow.architecture
		clusters_size = workflow.clusters_size
		site_handler = workflow.site_handler
		vervetSrcPath = self.vervetSrcPath
		
		executableClusterSizeMultiplierList = []	#2012.8.7 each cell is a tuple of (executable, clusterSizeMultipler (0 if u do not need clustering)		
		#mergeSameHeaderTablesIntoOne is used here on per chromosome basis, so allow clustering
		executableClusterSizeMultiplierList.append((workflow.mergeSameHeaderTablesIntoOne, 1))
		
		
		ReplaceIndividualIDInMatrixFileWithReadGroup = Executable(namespace=namespace, name="ReplaceIndividualIDInMatrixFileWithReadGroup", version=version, \
											os=operatingSystem, arch=architecture, installed=True)
		ReplaceIndividualIDInMatrixFileWithReadGroup.addPFN(PFN("file://" + os.path.join(vervetSrcPath, "db/ReplaceIndividualIDInMatrixFileWithReadGroup.py"), site_handler))
		executableClusterSizeMultiplierList.append((ReplaceIndividualIDInMatrixFileWithReadGroup, 0.5))
		
		SelectRowsWithinCoverageRange = Executable(namespace=namespace, name="SelectRowsWithinCoverageRange", version=version, \
											os=operatingSystem, arch=architecture, installed=True)
		SelectRowsWithinCoverageRange.addPFN(PFN("file://" + os.path.join(vervetSrcPath, "db/SelectRowsWithinCoverageRange.py"), site_handler))
		executableClusterSizeMultiplierList.append((SelectRowsWithinCoverageRange, 0.5))
		
		self.addExecutableAndAssignProperClusterSize(executableClusterSizeMultiplierList, defaultClustersSize=self.clusters_size)
		
		#2013.06.13
		self.addOneExecutableFromPathAndAssignProperClusterSize(path=os.path.join(vervetSrcPath, "db/output/OutputVCFAlignmentDepthRange.py"), \
											name='OutputVCFAlignmentDepthRange', \
											clusterSizeMultipler=1)
		self.addOneExecutableFromPathAndAssignProperClusterSize(path=os.path.join(vervetSrcPath, "db/output/OutputVRCPedigreeInTFAMGivenOrderFromFile.py"), \
											name='OutputVRCPedigreeInTFAMGivenOrderFromFile', \
											clusterSizeMultipler=0.8)
Example #5
0
a.addPFN(
    PFN(
        config.get('all', 'file_url') + input_file + "/f.a",
        config.get('all', 'file_site')))
cluster.addFile(a)

for i in range(1, 3):
    sleep = Executable(namespace="cluster",
                       name="level" + str(i),
                       version="1.0",
                       os="linux",
                       arch="x86_64",
                       installed=config.getboolean('all',
                                                   'executable_installed'))
    sleep.addPFN(
        PFN(
            config.get('all', 'executable_url') + sys.argv[1] +
            "/bin/pegasus-keg", config.get('all', 'executable_site')))
    sleep.addProfile(
        Profile(namespace="pegasus",
                key="clusters.size",
                value=config.get('all', 'clusters_size')))
    sleep.addProfile(
        Profile(namespace="pegasus",
                key="clusters.maxruntime",
                value=config.get('all', 'clusters_maxruntime')))
    cluster.addExecutable(sleep)

for i in range(4):
    job = Job(namespace="cluster", name="level1", version="1.0")
    job.addArguments('-a level1 -T ' + str(i + 1))
    job.addArguments('-i', a)
Example #6
0
cluster = ADAG (config.get('all', 'workflow_name'))

input_file = config.get('all', 'input_file')
if (input_file == ''):
        input_file = os.getcwd ()
else:
        input_file += '/' + os.getenv ('USER') + '/inputs'

# Add input file to the DAX-level replica catalog
a = File("f.a")
a.addPFN(PFN(config.get('all', 'file_url') + input_file + "/f.a", config.get('all', 'file_site')))
cluster.addFile(a)

for i in range (1, 3):
    sleep = Executable (namespace = "cluster", name = "level" + str (i), version = "1.0", os = "linux", arch = "x86", installed=config.getboolean('all', 'executable_installed'))
    sleep.addPFN (PFN (config.get('all', 'executable_url') + sys.argv[1] + "/bin/pegasus-keg", config.get('all', 'executable_site')))
    sleep.addProfile (Profile (namespace = "pegasus", key = "clusters.size", value = config.get('all', 'clusters_size')))
    sleep.addProfile (Profile (namespace = "pegasus", key = "clusters.maxruntime", value = config.get('all', 'clusters_maxruntime')))
    cluster.addExecutable(sleep)

for i in range (4):
    job = Job (namespace = "cluster", name = "level1", version = "1.0")
    job.addArguments('-a level1 -T ' + str (i + 1))
    job.addArguments('-i', a)
    job.addProfile (Profile (namespace = "pegasus", key = "job.runtime", value = str (i + 1)))
    job.uses(a, link=Link.INPUT)
    cluster.addJob (job)

    for j in range (4):
        child = Job (namespace = "cluster", name = "level2", version = "1.0")
	child.addArguments('-a level2 -T ' + str ((j + 1) * 2))
    def run_python_on_parameters(
        self,
        job_name: Locator,
        python_module: Any,
        parameters: Union[Parameters, Dict[str, Any]],
        *,
        depends_on,
        resource_request: Optional[ResourceRequest] = None,
        override_conda_config: Optional[CondaConfiguration] = None,
        category: Optional[str] = None,
    ) -> DependencyNode:
        """
        Schedule a job to run the given *python_module* on the given *parameters*.

        If this job requires other jobs to be executed first,
        include them in *depends_on*.

        This method returns a `DependencyNode` which can be used in *depends_on*
        for future jobs.
        """
        job_dir = self.directory_for(job_name)
        ckpt_name = job_name / "___ckpt"
        checkpoint_path = job_dir / "___ckpt"

        depends_on = _canonicalize_depends_on(depends_on)
        if isinstance(python_module, str):
            fully_qualified_module_name = python_module
        else:
            fully_qualified_module_name = fully_qualified_name(python_module)

        # allow users to specify the parameters as a dict for convenience
        if not isinstance(parameters, Parameters):
            parameters = Parameters.from_mapping(parameters)

        # If we've already scheduled this identical job,
        # then don't schedule it again.
        params_sink = CharSink.to_string()
        YAMLParametersWriter().write(parameters, params_sink)
        signature = (fully_qualified_module_name,
                     params_sink.last_string_written)
        if signature in self._signature_to_job:
            logging.info("Job %s recognized as a duplicate", job_name)
            return self._signature_to_job[signature]

        script_path = job_dir / "___run.sh"
        stdout_path = parameters.string(
            "logfile", default=str((job_dir / "___stdout.log").absolute()))
        self._conda_script_generator.write_shell_script_to(
            entry_point_name=fully_qualified_module_name,
            parameters=parameters,
            working_directory=job_dir,
            script_path=script_path,
            params_path=job_dir / "____params.params",
            stdout_file=stdout_path,
            ckpt_path=checkpoint_path,
            override_conda_config=override_conda_config,
        )
        script_executable = Executable(
            namespace=self._namespace,
            name=str(job_name).replace("/", "_"),
            version="4.0",
            os="linux",
            arch="x86_64",
        )
        script_executable.addPFN(
            path_to_pfn(script_path, site=self._default_site))
        if not self._job_graph.hasExecutable(script_executable):
            self._job_graph.addExecutable(script_executable)
        job = Job(script_executable)
        self._job_graph.addJob(job)
        for parent_dependency in depends_on:
            if parent_dependency.job:
                self._job_graph.depends(job, parent_dependency.job)
            for out_file in parent_dependency.output_files:
                job.uses(out_file, link=Link.INPUT)

        if resource_request is not None:
            resource_request = self.default_resource_request.unify(
                resource_request)
        else:
            resource_request = self.default_resource_request

        if category:
            job.profile(Namespace.DAGMAN, "category", category)
        resource_request.apply_to_job(job,
                                      job_name=self._job_name_for(job_name))

        # Handle Output Files
        # This is currently only handled as the checkpoint file
        # See: https://github.com/isi-vista/vista-pegasus-wrapper/issues/25
        checkpoint_pegasus_file = path_to_pegasus_file(checkpoint_path,
                                                       site=self._default_site,
                                                       name=f"{ckpt_name}")

        if checkpoint_pegasus_file not in self._added_files:
            self._job_graph.addFile(checkpoint_pegasus_file)
            self._added_files.add(checkpoint_pegasus_file)

        # If the checkpoint file already exists, we want to add it to the replica catalog
        # so that we don't run the job corresponding to the checkpoint file again
        if checkpoint_path.exists():
            with self._replica_catalog.open("a+") as handle:
                handle.write(
                    f"{ckpt_name} file://{checkpoint_path} site={self._default_site}\n"
                )

        job.uses(checkpoint_pegasus_file, link=Link.OUTPUT, transfer=True)

        dependency_node = DependencyNode.from_job(
            job, output_files=[checkpoint_pegasus_file])
        self._signature_to_job[signature] = dependency_node

        logging.info("Scheduled Python job %s", job_name)
        return dependency_node