Beispiel #1
0
 def execute(self):
     """Execute / submit a job with Slurm."""
     self.cmd = self._encode_cmd(" ".join(self.cmd))
     initialize_krb5_token(workflow_uuid=self.workflow_uuid)
     self.slurm_connection = SSHClient(
         hostname=SlurmJobManagerCERN.SLURM_HEADNODE_HOSTNAME,
         port=SlurmJobManagerCERN.SLURM_HEADNODE_PORT,
     )
     self._transfer_inputs()
     self._dump_job_file()
     self._dump_job_submission_file()
     stdout = self.slurm_connection.exec_command(
         "cd {} && sbatch --parsable {}".format(
             SlurmJobManagerCERN.SLURM_WORKSAPCE_PATH,
             self.job_description_file))
     backend_job_id = stdout.rstrip()
     return backend_job_id
Beispiel #2
0
 def execute(self):
     """Execute / submit a job with HTCondor."""
     os.chdir(self.workflow_workspace)
     initialize_krb5_token(workflow_uuid=self.workflow_uuid)
     job_ad = classad.ClassAd()
     job_ad["JobDescription"] = (
         self.workflow.get_full_workflow_name() + "_" + self.job_name
     )
     job_ad["JobMaxRetries"] = 3
     job_ad["LeaveJobInQueue"] = classad.ExprTree(
         "(JobStatus == 4) && ((StageOutFinish =?= UNDEFINED) || "
         "(StageOutFinish == 0))"
     )
     job_ad["Cmd"] = (
         "./job_wrapper.sh"
         if not self.unpacked_img
         else "./job_singularity_wrapper.sh"
     )
     if not self.unpacked_img:
         job_ad["Arguments"] = self._format_arguments()
         job_ad["DockerImage"] = self.docker_img
         job_ad["WantDocker"] = True
     job_ad["Environment"] = self._format_env_vars()
     job_ad["Out"] = classad.ExprTree(
         'strcat("reana_job.", ClusterId, ".", ProcId, ".out")'
     )
     job_ad["Err"] = classad.ExprTree(
         'strcat("reana_job.", ClusterId, ".", ProcId, ".err")'
     )
     job_ad["log"] = classad.ExprTree('strcat("reana_job.", ClusterId, ".err")')
     job_ad["ShouldTransferFiles"] = "YES"
     job_ad["WhenToTransferOutput"] = "ON_EXIT"
     job_ad["TransferInput"] = self._get_input_files()
     job_ad["TransferOutput"] = "."
     job_ad["PeriodicRelease"] = classad.ExprTree("(HoldReasonCode == 35)")
     job_ad["MaxRunTime"] = 3600
     future = current_app.htcondor_executor.submit(self._submit, job_ad)
     clusterid = future.result()
     return clusterid
    def __init__(
        self,
        docker_img=None,
        cmd=None,
        prettified_cmd=None,
        env_vars=None,
        workflow_uuid=None,
        workflow_workspace=None,
        cvmfs_mounts="false",
        shared_file_system=False,
        job_name=None,
        kerberos=False,
        kubernetes_uid=None,
        unpacked_img=False,
        htcondor_max_runtime="",
        htcondor_accounting_group=None,
    ):
        """Instanciate HTCondor job manager.

        :param docker_img: Docker image.
        :type docker_img: str
        :param cmd: Command to execute.
        :type cmd: list
        :param prettified_cmd: pretified version of command to execute.
        :type prettified_cmd: str
        :param env_vars: Environment variables.
        :type env_vars: dict
        :param workflow_uuid: Unique workflow id.
        :type workflow_uuid: str
        :param workflow_workspace: Workflow workspace path.
        :type workflow_workspace: str
        :param cvmfs_mounts: list of CVMFS mounts as a string.
        :type cvmfs_mounts: str
        :param shared_file_system: if shared file system is available.
        :type shared_file_system: bool
        :param job_name: Name of the job
        :type job_name: str
        :unpacked_img: if unpacked_img should be used
        :type unpacked_img: bool
        :param htcondor_max_runtime: Maximum runtime of a HTCondor job.
        :type htcondor_max_runtime: str
        :param htcondor_accounting_group: Accounting group of a HTCondor job.
        :type htcondor_accounting_group: str
        """
        super(HTCondorJobManagerCERN, self).__init__(
            docker_img=docker_img,
            cmd=cmd,
            prettified_cmd=prettified_cmd,
            env_vars=env_vars,
            workflow_uuid=workflow_uuid,
            workflow_workspace=workflow_workspace,
            job_name=job_name,
        )
        self.compute_backend = "HTCondor"
        self.cvmfs_mounts = cvmfs_mounts
        self.shared_file_system = shared_file_system
        self.workflow = self._get_workflow()
        self.unpacked_img = unpacked_img
        self.htcondor_max_runtime = htcondor_max_runtime
        self.htcondor_accounting_group = htcondor_accounting_group

        # We need to import the htcondor package later during runtime after the Kerberos environment is fully initialised.
        # Without a valid Kerberos ticket, importing will exit with "ERROR: Unauthorized 401 - do you have authentication tokens? Error "/usr/bin/myschedd.sh |"
        initialize_krb5_token(workflow_uuid=self.workflow_uuid)
        globals()["htcondor"] = __import__("htcondor")