Example #1
0
    def start_job(self):
        command = ["bsub", "-env all"]

        queue = get_setting("queue", task=self.task, default=False)
        if queue is not False:
            command += ["-q", queue]

        job_name = get_setting("job_name", task=self.task, default=False)
        if job_name is not False:
            command += ["-J", job_name]

        log_file_dir = get_log_file_dir(self.task)
        os.makedirs(log_file_dir, exist_ok=True)

        stdout_log_file = os.path.join(log_file_dir, "stdout")
        stderr_log_file = os.path.join(log_file_dir, "stderr")

        command += ["-eo", stderr_log_file, "-oo", stdout_log_file]

        executable_file = create_executable_wrapper(self.task)
        command.append(executable_file)

        output = subprocess.check_output(command)
        output = output.decode()

        # Output of the form Job <72065926> is submitted to default queue <s>.
        match = re.search(r"<[0-9]+>", output)
        if not match:
            raise RuntimeError("Batch submission failed with output " + output)

        self._batch_job_id = match.group(0)[1:-1]
Example #2
0
    def start_job(self):
        log_file_dir = get_log_file_dir(self.task)
        os.makedirs(log_file_dir, exist_ok=True)
        stdout_log_file = os.path.join(log_file_dir, "stdout")
        stderr_log_file = os.path.join(log_file_dir, "stderr")

        executable_file = create_executable_wrapper(self.task)

        with open(stdout_log_file, "w") as stdout_file:
            with open(stderr_log_file, "w") as stderr_file:
                self._process = subprocess.Popen([executable_file],
                                                 stdout=stdout_file,
                                                 stderr=stderr_file)
Example #3
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        #: gbasf2 project name, must be property/attribute, e.g. a luigi parameter
        # Setting it via a setting.json file is not supported to make sure users set unique project names
        self.gbasf2_project_name = get_unique_project_name(self.task)

        #: Output file directory of the task to wrap with gbasf2, where we will
        # store the pickled basf2 path and the created steerinfile to execute
        # that path.
        task_file_dir = get_task_file_dir(self.task)
        os.makedirs(task_file_dir, exist_ok=True)
        #: file name in which the pickled basf2 path from ``self.task.create_path()`` will be stored
        self.pickle_file_path = os.path.join(task_file_dir,
                                             "serialized_basf2_path.pkl")
        #: file name for steering file that executes pickled path, which will be send to the grid
        self.wrapper_file_path = os.path.join(task_file_dir,
                                              "steering_file_wrapper.py")

        self.log_file_dir = get_log_file_dir(self.task)
        os.makedirs(self.log_file_dir, exist_ok=True)

        self.dirac_user = get_dirac_user()
        #: Maximum number of times that each job in the project can be rescheduled until the project is declared as failed.
        self.max_retries = get_setting("gbasf2_max_retries",
                                       default=0,
                                       task=self.task)

        #: Store number of times each job had been rescheduled
        self.n_retries_by_job = Counter()

        #: Local storage for ``n_retries_by_job`` counter
        # so that it persists even if luigi process is killed and restarted.
        self.retries_file_path = os.path.join(self.log_file_dir,
                                              "n_retries_by_grid_job.json")
        if os.path.isfile(self.retries_file_path):
            with open(self.retries_file_path, "r") as retries_file:
                retries_from_file = json.load(retries_file)
                self.n_retries_by_job.update(retries_from_file)

        # Store dictionary with n_jobs_by_status in attribute to check if it changed,
        # useful for printing job status on change only
        self._n_jobs_by_status = ""

        # Store whether the job had already been successful in a variable b/c
        # there's actions we want to do only the first time that
        # ``get_job_status`` returns a success.
        self._project_had_been_successful = False
Example #4
0
def run_task_remote(task):
    """
    Run a given task "remotely", which means
    create an exectable script and call it via a subprocess 
    call. 
    """
    log_file_dir = get_log_file_dir(task)
    os.makedirs(log_file_dir, exist_ok=True)
    stdout_log_file = os.path.join(log_file_dir, "stdout")
    stderr_log_file = os.path.join(log_file_dir, "stderr")

    executable_file = create_executable_wrapper(task)

    add_on_failure_function(task)

    with open(stdout_log_file, "w") as stdout_file:
        with open(stderr_log_file, "w") as stderr_file:
            return_code = subprocess.call([executable_file],
                                          stdout=stdout_file, stderr=stderr_file)

    if return_code:
        raise RuntimeError(f"Execution failed with return code {return_code}")
Example #5
0
    def _create_htcondor_submit_file(self):
        submit_file_content = []

        # Specify where to write the log to
        log_file_dir = get_log_file_dir(self.task)
        os.makedirs(log_file_dir, exist_ok=True)

        stdout_log_file = os.path.abspath(os.path.join(log_file_dir, "stdout"))
        submit_file_content.append(f"output = {stdout_log_file}")

        stderr_log_file = os.path.abspath(os.path.join(log_file_dir, "stderr"))
        submit_file_content.append(f"error = {stderr_log_file}")

        job_log_file = os.path.abspath(os.path.join(log_file_dir, "job.log"))
        submit_file_content.append(f"log = {job_log_file}")

        # Specify the executable
        executable_file = create_executable_wrapper(self.task)
        submit_file_content.append(
            f"executable = {os.path.basename(executable_file)}")

        # Specify additional settings
        general_settings = get_setting("htcondor_settings", dict())
        try:
            general_settings.update(self.task.htcondor_settings)
        except AttributeError:
            pass

        transfer_files = get_setting("transfer_files",
                                     task=self.task,
                                     default=[])
        if transfer_files:
            working_dir = get_setting("working_dir",
                                      task=self.task,
                                      default="")
            if not working_dir or working_dir != ".":
                raise ValueError(
                    "If using transfer_files, the working_dir must be explicitely set to '.'"
                )

            general_settings.setdefault("should_transfer_files", "YES")
            general_settings.setdefault("when_to_transfer_output", "ON_EXIT")

            transfer_files = set(transfer_files)

            for transfer_file in transfer_files:
                if os.path.abspath(transfer_file) != transfer_file:
                    raise ValueError(
                        "You should only give absolute file names in transfer_files!"
                        +
                        f"{os.path.abspath(transfer_file)} != {transfer_file}")

            env_setup_script = get_setting("env_script",
                                           task=self.task,
                                           default="")
            if env_setup_script:
                # TODO: make sure to call it relatively
                transfer_files.add(os.path.abspath(env_setup_script))

            general_settings.setdefault("transfer_input_files",
                                        ",".join(transfer_files))

        job_name = get_setting("job_name", task=self.task, default=False)
        if job_name is not False:
            general_settings.setdefault("JobBatchName", job_name)

        for key, item in general_settings.items():
            submit_file_content.append(f"{key} = {item}")

        # Finally also start the process
        submit_file_content.append("queue 1")

        # Now we can write the submit file
        output_path = get_task_file_dir(self.task)
        submit_file_path = os.path.join(output_path, "job.submit")

        os.makedirs(output_path, exist_ok=True)

        with open(submit_file_path, "w") as submit_file:
            submit_file.write("\n".join(submit_file_content))

        return submit_file_path