Beispiel #1
0
def script_to_pegasus_executable(
        path: Path,
        name: Optional[str] = None,
        *,
        site: str = "local",
        namespace: Optional[str] = None,
        version: Optional[str] = None,
        arch: Optional[Arch] = None,
        os: Optional[OS] = None,
        osrelease: Optional[str] = None,
        osversion: Optional[str] = None,
        glibc: Optional[str] = None,
        installed: Optional[bool] = None,
        container: Optional[str] = None) -> Executable:
    """
    Turns a script path into a pegasus Executable

    Arguments:
        *name*: Logical name of executable
        *namespace*: Executable namespace
        *version*: Executable version
        *arch*: Architecture that this exe was compiled for
        *os*: Name of os that this exe was compiled for
        *osrelease*: Release of os that this exe was compiled for
        *osversion*: Version of os that this exe was compiled for
        *glibc*: Version of glibc this exe was compiled against
        *installed*: Is the executable installed (true), or stageable (false)
        *container*: Optional attribute to specify the container to use
    """

    rtrnr = Executable(
        path.stem + path.suffix if name is None else name,
        namespace=namespace,
        version=version,
        arch=arch,
        os=os,
        osrelease=osrelease,
        osversion=osversion,
        glibc=glibc,
        installed=installed,
        container=container,
    )
    rtrnr.addPFN(path_to_pfn(path, site=site))
    return rtrnr
Beispiel #2
0
else:
    input_file += '/' + os.getenv('USER') + '/inputs'

# Add input file to the DAX-level replica catalog
a = File("f.a")
a.addPFN(
    PFN(
        config.get('all', 'file_url') + input_file + "/f.a",
        config.get('all', 'file_site')))
cluster.addFile(a)

for i in range(1, 3):
    sleep = Executable(namespace="cluster",
                       name="level" + str(i),
                       version="1.0",
                       os="linux",
                       arch="x86_64",
                       installed=config.getboolean('all',
                                                   'executable_installed'))
    sleep.addPFN(
        PFN(
            config.get('all', 'executable_url') + sys.argv[1] +
            "/bin/pegasus-keg", config.get('all', 'executable_site')))
    sleep.addProfile(
        Profile(namespace="pegasus",
                key="clusters.size",
                value=config.get('all', 'clusters_size')))
    sleep.addProfile(
        Profile(namespace="pegasus",
                key="clusters.maxruntime",
                value=config.get('all', 'clusters_maxruntime')))
    def run_python_on_parameters(
        self,
        job_name: Locator,
        python_module: Any,
        parameters: Union[Parameters, Dict[str, Any]],
        *,
        depends_on,
        resource_request: Optional[ResourceRequest] = None,
        override_conda_config: Optional[CondaConfiguration] = None,
        category: Optional[str] = None,
    ) -> DependencyNode:
        """
        Schedule a job to run the given *python_module* on the given *parameters*.

        If this job requires other jobs to be executed first,
        include them in *depends_on*.

        This method returns a `DependencyNode` which can be used in *depends_on*
        for future jobs.
        """
        job_dir = self.directory_for(job_name)
        ckpt_name = job_name / "___ckpt"
        checkpoint_path = job_dir / "___ckpt"

        depends_on = _canonicalize_depends_on(depends_on)
        if isinstance(python_module, str):
            fully_qualified_module_name = python_module
        else:
            fully_qualified_module_name = fully_qualified_name(python_module)

        # allow users to specify the parameters as a dict for convenience
        if not isinstance(parameters, Parameters):
            parameters = Parameters.from_mapping(parameters)

        # If we've already scheduled this identical job,
        # then don't schedule it again.
        params_sink = CharSink.to_string()
        YAMLParametersWriter().write(parameters, params_sink)
        signature = (fully_qualified_module_name,
                     params_sink.last_string_written)
        if signature in self._signature_to_job:
            logging.info("Job %s recognized as a duplicate", job_name)
            return self._signature_to_job[signature]

        script_path = job_dir / "___run.sh"
        stdout_path = parameters.string(
            "logfile", default=str((job_dir / "___stdout.log").absolute()))
        self._conda_script_generator.write_shell_script_to(
            entry_point_name=fully_qualified_module_name,
            parameters=parameters,
            working_directory=job_dir,
            script_path=script_path,
            params_path=job_dir / "____params.params",
            stdout_file=stdout_path,
            ckpt_path=checkpoint_path,
            override_conda_config=override_conda_config,
        )
        script_executable = Executable(
            namespace=self._namespace,
            name=str(job_name).replace("/", "_"),
            version="4.0",
            os="linux",
            arch="x86_64",
        )
        script_executable.addPFN(
            path_to_pfn(script_path, site=self._default_site))
        if not self._job_graph.hasExecutable(script_executable):
            self._job_graph.addExecutable(script_executable)
        job = Job(script_executable)
        self._job_graph.addJob(job)
        for parent_dependency in depends_on:
            if parent_dependency.job:
                self._job_graph.depends(job, parent_dependency.job)
            for out_file in parent_dependency.output_files:
                job.uses(out_file, link=Link.INPUT)

        if resource_request is not None:
            resource_request = self.default_resource_request.unify(
                resource_request)
        else:
            resource_request = self.default_resource_request

        if category:
            job.profile(Namespace.DAGMAN, "category", category)
        resource_request.apply_to_job(job,
                                      job_name=self._job_name_for(job_name))

        # Handle Output Files
        # This is currently only handled as the checkpoint file
        # See: https://github.com/isi-vista/vista-pegasus-wrapper/issues/25
        checkpoint_pegasus_file = path_to_pegasus_file(checkpoint_path,
                                                       site=self._default_site,
                                                       name=f"{ckpt_name}")

        if checkpoint_pegasus_file not in self._added_files:
            self._job_graph.addFile(checkpoint_pegasus_file)
            self._added_files.add(checkpoint_pegasus_file)

        # If the checkpoint file already exists, we want to add it to the replica catalog
        # so that we don't run the job corresponding to the checkpoint file again
        if checkpoint_path.exists():
            with self._replica_catalog.open("a+") as handle:
                handle.write(
                    f"{ckpt_name} file://{checkpoint_path} site={self._default_site}\n"
                )

        job.uses(checkpoint_pegasus_file, link=Link.OUTPUT, transfer=True)

        dependency_node = DependencyNode.from_job(
            job, output_files=[checkpoint_pegasus_file])
        self._signature_to_job[signature] = dependency_node

        logging.info("Scheduled Python job %s", job_name)
        return dependency_node