def registerCustomExecutables(self, workflow=None): """ 2011-11-28 """ FilterVCFPipeline.registerCustomExecutables(self, workflow=workflow) PlinkOnVCFWorkflow.registerCustomExecutables(self, workflow=workflow) if workflow is None: workflow = self namespace = workflow.namespace version = workflow.version operatingSystem = workflow.operatingSystem architecture = workflow.architecture clusters_size = workflow.clusters_size site_handler = workflow.site_handler vervetSrcPath = self.vervetSrcPath executableClusterSizeMultiplierList = [] #2012.8.7 each cell is a tuple of (executable, clusterSizeMultipler (0 if u do not need clustering) OutputSitesBelowMaxMendelError = Executable(namespace=namespace, name="OutputSitesBelowMaxMendelError", version=version, \ os=operatingSystem, arch=architecture, installed=True) OutputSitesBelowMaxMendelError.addPFN(PFN("file://" + os.path.join(self.pymodulePath, "pegasus/mapper/OutputSitesBelowMaxMendelError.py"), \ site_handler)) executableClusterSizeMultiplierList.append((OutputSitesBelowMaxMendelError, 0)) self.addExecutableAndAssignProperClusterSize(executableClusterSizeMultiplierList, defaultClustersSize=self.clusters_size)
def addExecutable (self, jobId, name, path, version="1.0", exe_os="linux", exe_arch="x86_64", site="local", installed="true"): e_exe = self.getExecutable (name) if not version: version = "1.0" if not exe_arch: exe_arch="x86_64" if not e_exe: e_exe = Executable ( namespace=self.namespace, name=name, version=version, os=exe_os, arch=exe_arch, installed=installed) if not site: site = "local" if not installed: installed = False if logging.getLogger().isEnabledFor (logging.DEBUG): logger.debug ("wms:pegasus:dax:add-exe: (name=[%s], path=[%s], version=[%s], os=[%s], arch=[%s], site=[%s], installed=[%s])" % (name, path, version, exe_os, exe_arch, site, installed)) if not "://" in path: path = "file://%s" % path if not path: raise ValueError ("empty path for executable: %s at site %s" % (name, site)) e_exe.addPFN (PFN (path, site)) if not installed: e_exe.installed = installed self.adag.addExecutable (e_exe) self.exes [name] = e_exe transformation = Transformation (name, self.namespace, version) self.jobTransformations [jobId] = transformation return e_exe
def script_to_pegasus_executable( path: Path, name: Optional[str] = None, *, site: str = "local", namespace: Optional[str] = None, version: Optional[str] = None, arch: Optional[Arch] = None, os: Optional[OS] = None, osrelease: Optional[str] = None, osversion: Optional[str] = None, glibc: Optional[str] = None, installed: Optional[bool] = None, container: Optional[str] = None) -> Executable: """ Turns a script path into a pegasus Executable Arguments: *name*: Logical name of executable *namespace*: Executable namespace *version*: Executable version *arch*: Architecture that this exe was compiled for *os*: Name of os that this exe was compiled for *osrelease*: Release of os that this exe was compiled for *osversion*: Version of os that this exe was compiled for *glibc*: Version of glibc this exe was compiled against *installed*: Is the executable installed (true), or stageable (false) *container*: Optional attribute to specify the container to use """ rtrnr = Executable( path.stem + path.suffix if name is None else name, namespace=namespace, version=version, arch=arch, os=os, osrelease=osrelease, osversion=osversion, glibc=glibc, installed=installed, container=container, ) rtrnr.addPFN(path_to_pfn(path, site=site)) return rtrnr
def registerCustomExecutables(self, workflow=None): """ 2011-11-28 """ if workflow==None: workflow=self parentClass.registerCustomExecutables(self, workflow=workflow) namespace = workflow.namespace version = workflow.version operatingSystem = workflow.operatingSystem architecture = workflow.architecture clusters_size = workflow.clusters_size site_handler = workflow.site_handler vervetSrcPath = self.vervetSrcPath executableClusterSizeMultiplierList = [] #2012.8.7 each cell is a tuple of (executable, clusterSizeMultipler (0 if u do not need clustering) #mergeSameHeaderTablesIntoOne is used here on per chromosome basis, so allow clustering executableClusterSizeMultiplierList.append((workflow.mergeSameHeaderTablesIntoOne, 1)) ReplaceIndividualIDInMatrixFileWithReadGroup = Executable(namespace=namespace, name="ReplaceIndividualIDInMatrixFileWithReadGroup", version=version, \ os=operatingSystem, arch=architecture, installed=True) ReplaceIndividualIDInMatrixFileWithReadGroup.addPFN(PFN("file://" + os.path.join(vervetSrcPath, "db/ReplaceIndividualIDInMatrixFileWithReadGroup.py"), site_handler)) executableClusterSizeMultiplierList.append((ReplaceIndividualIDInMatrixFileWithReadGroup, 0.5)) SelectRowsWithinCoverageRange = Executable(namespace=namespace, name="SelectRowsWithinCoverageRange", version=version, \ os=operatingSystem, arch=architecture, installed=True) SelectRowsWithinCoverageRange.addPFN(PFN("file://" + os.path.join(vervetSrcPath, "db/SelectRowsWithinCoverageRange.py"), site_handler)) executableClusterSizeMultiplierList.append((SelectRowsWithinCoverageRange, 0.5)) self.addExecutableAndAssignProperClusterSize(executableClusterSizeMultiplierList, defaultClustersSize=self.clusters_size) #2013.06.13 self.addOneExecutableFromPathAndAssignProperClusterSize(path=os.path.join(vervetSrcPath, "db/output/OutputVCFAlignmentDepthRange.py"), \ name='OutputVCFAlignmentDepthRange', \ clusterSizeMultipler=1) self.addOneExecutableFromPathAndAssignProperClusterSize(path=os.path.join(vervetSrcPath, "db/output/OutputVRCPedigreeInTFAMGivenOrderFromFile.py"), \ name='OutputVRCPedigreeInTFAMGivenOrderFromFile', \ clusterSizeMultipler=0.8)
a.addPFN( PFN( config.get('all', 'file_url') + input_file + "/f.a", config.get('all', 'file_site'))) cluster.addFile(a) for i in range(1, 3): sleep = Executable(namespace="cluster", name="level" + str(i), version="1.0", os="linux", arch="x86_64", installed=config.getboolean('all', 'executable_installed')) sleep.addPFN( PFN( config.get('all', 'executable_url') + sys.argv[1] + "/bin/pegasus-keg", config.get('all', 'executable_site'))) sleep.addProfile( Profile(namespace="pegasus", key="clusters.size", value=config.get('all', 'clusters_size'))) sleep.addProfile( Profile(namespace="pegasus", key="clusters.maxruntime", value=config.get('all', 'clusters_maxruntime'))) cluster.addExecutable(sleep) for i in range(4): job = Job(namespace="cluster", name="level1", version="1.0") job.addArguments('-a level1 -T ' + str(i + 1)) job.addArguments('-i', a)
cluster = ADAG (config.get('all', 'workflow_name')) input_file = config.get('all', 'input_file') if (input_file == ''): input_file = os.getcwd () else: input_file += '/' + os.getenv ('USER') + '/inputs' # Add input file to the DAX-level replica catalog a = File("f.a") a.addPFN(PFN(config.get('all', 'file_url') + input_file + "/f.a", config.get('all', 'file_site'))) cluster.addFile(a) for i in range (1, 3): sleep = Executable (namespace = "cluster", name = "level" + str (i), version = "1.0", os = "linux", arch = "x86", installed=config.getboolean('all', 'executable_installed')) sleep.addPFN (PFN (config.get('all', 'executable_url') + sys.argv[1] + "/bin/pegasus-keg", config.get('all', 'executable_site'))) sleep.addProfile (Profile (namespace = "pegasus", key = "clusters.size", value = config.get('all', 'clusters_size'))) sleep.addProfile (Profile (namespace = "pegasus", key = "clusters.maxruntime", value = config.get('all', 'clusters_maxruntime'))) cluster.addExecutable(sleep) for i in range (4): job = Job (namespace = "cluster", name = "level1", version = "1.0") job.addArguments('-a level1 -T ' + str (i + 1)) job.addArguments('-i', a) job.addProfile (Profile (namespace = "pegasus", key = "job.runtime", value = str (i + 1))) job.uses(a, link=Link.INPUT) cluster.addJob (job) for j in range (4): child = Job (namespace = "cluster", name = "level2", version = "1.0") child.addArguments('-a level2 -T ' + str ((j + 1) * 2))
def run_python_on_parameters( self, job_name: Locator, python_module: Any, parameters: Union[Parameters, Dict[str, Any]], *, depends_on, resource_request: Optional[ResourceRequest] = None, override_conda_config: Optional[CondaConfiguration] = None, category: Optional[str] = None, ) -> DependencyNode: """ Schedule a job to run the given *python_module* on the given *parameters*. If this job requires other jobs to be executed first, include them in *depends_on*. This method returns a `DependencyNode` which can be used in *depends_on* for future jobs. """ job_dir = self.directory_for(job_name) ckpt_name = job_name / "___ckpt" checkpoint_path = job_dir / "___ckpt" depends_on = _canonicalize_depends_on(depends_on) if isinstance(python_module, str): fully_qualified_module_name = python_module else: fully_qualified_module_name = fully_qualified_name(python_module) # allow users to specify the parameters as a dict for convenience if not isinstance(parameters, Parameters): parameters = Parameters.from_mapping(parameters) # If we've already scheduled this identical job, # then don't schedule it again. params_sink = CharSink.to_string() YAMLParametersWriter().write(parameters, params_sink) signature = (fully_qualified_module_name, params_sink.last_string_written) if signature in self._signature_to_job: logging.info("Job %s recognized as a duplicate", job_name) return self._signature_to_job[signature] script_path = job_dir / "___run.sh" stdout_path = parameters.string( "logfile", default=str((job_dir / "___stdout.log").absolute())) self._conda_script_generator.write_shell_script_to( entry_point_name=fully_qualified_module_name, parameters=parameters, working_directory=job_dir, script_path=script_path, params_path=job_dir / "____params.params", stdout_file=stdout_path, ckpt_path=checkpoint_path, override_conda_config=override_conda_config, ) script_executable = Executable( namespace=self._namespace, name=str(job_name).replace("/", "_"), version="4.0", os="linux", arch="x86_64", ) script_executable.addPFN( path_to_pfn(script_path, site=self._default_site)) if not self._job_graph.hasExecutable(script_executable): self._job_graph.addExecutable(script_executable) job = Job(script_executable) self._job_graph.addJob(job) for parent_dependency in depends_on: if parent_dependency.job: self._job_graph.depends(job, parent_dependency.job) for out_file in parent_dependency.output_files: job.uses(out_file, link=Link.INPUT) if resource_request is not None: resource_request = self.default_resource_request.unify( resource_request) else: resource_request = self.default_resource_request if category: job.profile(Namespace.DAGMAN, "category", category) resource_request.apply_to_job(job, job_name=self._job_name_for(job_name)) # Handle Output Files # This is currently only handled as the checkpoint file # See: https://github.com/isi-vista/vista-pegasus-wrapper/issues/25 checkpoint_pegasus_file = path_to_pegasus_file(checkpoint_path, site=self._default_site, name=f"{ckpt_name}") if checkpoint_pegasus_file not in self._added_files: self._job_graph.addFile(checkpoint_pegasus_file) self._added_files.add(checkpoint_pegasus_file) # If the checkpoint file already exists, we want to add it to the replica catalog # so that we don't run the job corresponding to the checkpoint file again if checkpoint_path.exists(): with self._replica_catalog.open("a+") as handle: handle.write( f"{ckpt_name} file://{checkpoint_path} site={self._default_site}\n" ) job.uses(checkpoint_pegasus_file, link=Link.OUTPUT, transfer=True) dependency_node = DependencyNode.from_job( job, output_files=[checkpoint_pegasus_file]) self._signature_to_job[signature] = dependency_node logging.info("Scheduled Python job %s", job_name) return dependency_node