def script_to_pegasus_executable( path: Path, name: Optional[str] = None, *, site: str = "local", namespace: Optional[str] = None, version: Optional[str] = None, arch: Optional[Arch] = None, os: Optional[OS] = None, osrelease: Optional[str] = None, osversion: Optional[str] = None, glibc: Optional[str] = None, installed: Optional[bool] = None, container: Optional[str] = None) -> Executable: """ Turns a script path into a pegasus Executable Arguments: *name*: Logical name of executable *namespace*: Executable namespace *version*: Executable version *arch*: Architecture that this exe was compiled for *os*: Name of os that this exe was compiled for *osrelease*: Release of os that this exe was compiled for *osversion*: Version of os that this exe was compiled for *glibc*: Version of glibc this exe was compiled against *installed*: Is the executable installed (true), or stageable (false) *container*: Optional attribute to specify the container to use """ rtrnr = Executable( path.stem + path.suffix if name is None else name, namespace=namespace, version=version, arch=arch, os=os, osrelease=osrelease, osversion=osversion, glibc=glibc, installed=installed, container=container, ) rtrnr.addPFN(path_to_pfn(path, site=site)) return rtrnr
else: input_file += '/' + os.getenv('USER') + '/inputs' # Add input file to the DAX-level replica catalog a = File("f.a") a.addPFN( PFN( config.get('all', 'file_url') + input_file + "/f.a", config.get('all', 'file_site'))) cluster.addFile(a) for i in range(1, 3): sleep = Executable(namespace="cluster", name="level" + str(i), version="1.0", os="linux", arch="x86_64", installed=config.getboolean('all', 'executable_installed')) sleep.addPFN( PFN( config.get('all', 'executable_url') + sys.argv[1] + "/bin/pegasus-keg", config.get('all', 'executable_site'))) sleep.addProfile( Profile(namespace="pegasus", key="clusters.size", value=config.get('all', 'clusters_size'))) sleep.addProfile( Profile(namespace="pegasus", key="clusters.maxruntime", value=config.get('all', 'clusters_maxruntime')))
def run_python_on_parameters( self, job_name: Locator, python_module: Any, parameters: Union[Parameters, Dict[str, Any]], *, depends_on, resource_request: Optional[ResourceRequest] = None, override_conda_config: Optional[CondaConfiguration] = None, category: Optional[str] = None, ) -> DependencyNode: """ Schedule a job to run the given *python_module* on the given *parameters*. If this job requires other jobs to be executed first, include them in *depends_on*. This method returns a `DependencyNode` which can be used in *depends_on* for future jobs. """ job_dir = self.directory_for(job_name) ckpt_name = job_name / "___ckpt" checkpoint_path = job_dir / "___ckpt" depends_on = _canonicalize_depends_on(depends_on) if isinstance(python_module, str): fully_qualified_module_name = python_module else: fully_qualified_module_name = fully_qualified_name(python_module) # allow users to specify the parameters as a dict for convenience if not isinstance(parameters, Parameters): parameters = Parameters.from_mapping(parameters) # If we've already scheduled this identical job, # then don't schedule it again. params_sink = CharSink.to_string() YAMLParametersWriter().write(parameters, params_sink) signature = (fully_qualified_module_name, params_sink.last_string_written) if signature in self._signature_to_job: logging.info("Job %s recognized as a duplicate", job_name) return self._signature_to_job[signature] script_path = job_dir / "___run.sh" stdout_path = parameters.string( "logfile", default=str((job_dir / "___stdout.log").absolute())) self._conda_script_generator.write_shell_script_to( entry_point_name=fully_qualified_module_name, parameters=parameters, working_directory=job_dir, script_path=script_path, params_path=job_dir / "____params.params", stdout_file=stdout_path, ckpt_path=checkpoint_path, override_conda_config=override_conda_config, ) script_executable = Executable( namespace=self._namespace, name=str(job_name).replace("/", "_"), version="4.0", os="linux", arch="x86_64", ) script_executable.addPFN( path_to_pfn(script_path, site=self._default_site)) if not self._job_graph.hasExecutable(script_executable): self._job_graph.addExecutable(script_executable) job = Job(script_executable) self._job_graph.addJob(job) for parent_dependency in depends_on: if parent_dependency.job: self._job_graph.depends(job, parent_dependency.job) for out_file in parent_dependency.output_files: job.uses(out_file, link=Link.INPUT) if resource_request is not None: resource_request = self.default_resource_request.unify( resource_request) else: resource_request = self.default_resource_request if category: job.profile(Namespace.DAGMAN, "category", category) resource_request.apply_to_job(job, job_name=self._job_name_for(job_name)) # Handle Output Files # This is currently only handled as the checkpoint file # See: https://github.com/isi-vista/vista-pegasus-wrapper/issues/25 checkpoint_pegasus_file = path_to_pegasus_file(checkpoint_path, site=self._default_site, name=f"{ckpt_name}") if checkpoint_pegasus_file not in self._added_files: self._job_graph.addFile(checkpoint_pegasus_file) self._added_files.add(checkpoint_pegasus_file) # If the checkpoint file already exists, we want to add it to the replica catalog # so that we don't run the job corresponding to the checkpoint file again if checkpoint_path.exists(): with self._replica_catalog.open("a+") as handle: handle.write( f"{ckpt_name} file://{checkpoint_path} site={self._default_site}\n" ) job.uses(checkpoint_pegasus_file, link=Link.OUTPUT, transfer=True) dependency_node = DependencyNode.from_job( job, output_files=[checkpoint_pegasus_file]) self._signature_to_job[signature] = dependency_node logging.info("Scheduled Python job %s", job_name) return dependency_node