Exemple #1
0
    def _run_job(self, job: Job, params_it: Iterable[ParamSet]) -> Iterator[Tuple[ParamSet, Result]]:
        # TODO: introduce public API of Process to get working_directory
        if job.process._working_directory is not None:
            raise Exception('HTCondor does not support setting the working_directory on Process')

        with ExitStack() as stack:
            cluster_generator = _JobClusterGenerator(self, job, params_it)
            stack.enter_context(cluster_generator)

            submit = Submit()

            with self._schedd.transaction() as txn:
                submit_result = submit.queue_with_itemdata(txn, itemdata=iter(cluster_generator))

            stack.callback(
                self._schedd.act, JobAction.Remove, f'ClusterId == {submit_result.cluster()}',
            )

            job_states: Dict[int, _JobState] = {}

            for sleep_time in _get_poll_sleep_times():
                sleep(sleep_time)

                query_result = self._schedd.xquery(
                    requirements = f'ClusterId == {submit_result.cluster()}',
                    projection = _JobState.projection(),
                )

                job_states.clear()
                for job_state_ad in query_result:
                    job_state = _JobState.from_class_ad(job_state_ad)
                    job_states[job_state.proc_id] = job_state

                counts = _StatusCounts()
                counts.add_jobs(job_states.values())

                print(counts)

                if counts.completed == counts.total:
                    break

            results: List[Tuple[ParamSet, Result]] = []
            for proc_id, process in enumerate(cluster_generator.processes):
                job_state = job_states[proc_id]
                if job_state.exit_by_signal:
                    raise _ProcessFailedError(
                        f'Process exited due to receiving signal {job_state.exit_signal}',
                    )
                if job_state.exit_code is None:
                    raise Exception('Exit code received from HTCondor is None')

                result = process.result(job_state.exit_code)
                self._check_for_failure(job, result, process.params)
                results.append((process.params, result))

            self._cleanup_handlers += cluster_generator.cleanup_handlers

        return iter(results)
Exemple #2
0
def submit_dag(dag_file):
    """
    Function to submit a HTCondor DAG (see
    https://htcondor.readthedocs.io/en/latest/apis/python-bindings/tutorials/DAG-Creation-And-Submission.html#Submit-the-DAG-via-the-Python-bindings)

    Parameters
    ----------
    dag_file: str, Path
        Path to the DAG file.
    """

    # create submit file for DAG
    dag_submit = Submit.from_dag(str(dag_file), {"force": 1})

    dagdir = os.path.split(str(dag_file))[0]

    # get current directory
    cwd = os.getcwd()

    # move into DAG directory
    os.chdir(dagdir)

    # start scheddular
    schedd = Schedd()
    with schedd.transaction() as txn:
        _ = dag_submit.queue(txn)

    # switch back to current directory
    os.chdir(cwd)
Exemple #3
0
    def generate_submit_job(self, submitoptions={}):
        """
        Generate a submit object.

        Parameters
        ----------
        submitoptions: dict
            A dictionary containing any additional options for the submit file.
        """

        # dictionary to contain specific submit options
        submit = {}

        submit.update(copy.deepcopy(self.submit_options))
        submit.update(copy.deepcopy(submitoptions))

        # add arguments
        submit["arguments"] = "$(ARGS)"

        # add requirements
        if isinstance(self.requirements, list):
            if len(self.requirements) > 0:
                submit["requirements"] = " && ".join(self.requirements)
        else:
            submit["requirements"] = self.requirements

        return Submit(submit)
Exemple #4
0
def execute_submit(submit_object: htcondor.Submit, itemdata: List[Dict[str, str]]) -> int:
    """
    Execute a map via the scheduler defined by the settings.
    Return the HTCondor cluster ID of the map's jobs.
    """
    schedd = get_schedd()
    with schedd.transaction() as txn:
        submit_result = submit_object.queue_with_itemdata(
            txn,
            1,
            iter(itemdata),
        )

        return submit_result.cluster()
Exemple #5
0
    def ppplots(self):
        """
        Set up job to create PP plots.
        """

        from htcondor import Submit

        # get executable
        jobexec = shutil.which("cwinpy_pe_generate_pp_plots")

        # set log directory
        logdir = os.path.join(os.path.abspath(self.basedir), "log")
        self.makedirs(logdir)

        subdict = {
            "universe": "local",
            "executable": jobexec,
            "getenv": self.getenv,
            "arguments": "$(ARGS)",
            "log": os.path.join(logdir, "cwinpy_pe_pp_plots.log"),
            "error": os.path.join(logdir, "cwinpy_pe_pp_plots.err"),
            "output": os.path.join(logdir, "cwinpy_pe_pp_plots.out"),
        }

        if self.accountgroup is not None:
            subdict["accounting_group"] = self.accountgroup

        if self.accountuser is not None:
            subdict["accounting_group_user"] = self.accountuser

        submit = Submit(subdict)

        jobargs = "--path '{}' ".format(os.path.join(self.basedir, "results", "*", "*"))
        jobargs += "--output {} ".format(os.path.join(self.basedir, "ppplot.png"))
        if self.outputsnr:
            jobargs += "--snrs "

        vars = [{"ARGS": jobargs}]

        # add child layer to dag
        nodes = self.runner.dag.select(lambda x: x.name.startswith("cwinpy_pe"))
        nodes.child_layer(
            name="cwinpy_pe_pp_plots",
            submit_description=submit,
            vars=vars,
        )
Exemple #6
0
def condor_submit(txn, priority: int) -> int:
    directory = os.getcwd()
    input_files = get_all_files(directory)

    if 'run_respdiff.sh' in input_files:
        executable = 'run_respdiff.sh'
        output_files = [
            'j$(Cluster).$(Process)_docker.txt',
            'j$(Cluster).$(Process)_report.json',
            'j$(Cluster).$(Process)_report.diffrepro.json',
            'j$(Cluster).$(Process)_report.txt',
            'j$(Cluster).$(Process)_report.diffrepro.txt',
            'j$(Cluster).$(Process)_histogram.tar.gz',
            'j$(Cluster).$(Process)_logs.tar.gz'
        ]
        if 'stats.json' in input_files:
            output_files.extend([
                'j$(Cluster).$(Process)_report.noref.json',
                'j$(Cluster).$(Process)_report.noref.txt',
                'j$(Cluster).$(Process)_report.diffrepro.noref.json',
                'j$(Cluster).$(Process)_report.diffrepro.noref.txt',
                # 'j$(Cluster).$(Process)_dnsviz.json.gz',
                # 'j$(Cluster).$(Process)_report.noref.dnsviz.json',
                # 'j$(Cluster).$(Process)_report.noref.dnsviz.txt',
            ])
    elif 'run_resperf.sh' in input_files:
        executable = 'run_resperf.sh'
        output_files = [
            'j$(Cluster).$(Process)_exitcode',
            'j$(Cluster).$(Process)_docker.txt',
            'j$(Cluster).$(Process)_resperf.txt',
            'j$(Cluster).$(Process)_logs.tar.gz'
        ]
    elif 'run_distrotest.sh' in input_files:
        executable = 'run_distrotest.sh'
        output_files = [
            'j$(Cluster).$(Process)_exitcode',
            'j$(Cluster).$(Process)_vagrant.log.txt'
        ]
    else:
        raise RuntimeError(
            "The provided directory doesn't look like a respdiff/resperf job. "
            "{}/run_*.sh is missing!".format(directory))

    # create batch name from dir structure
    commit_dir_path, test_case = os.path.split(directory)
    _, commit_dir = os.path.split(commit_dir_path)
    batch_name = commit_dir + '_' + test_case

    submit = Submit({
        'priority': str(priority),
        'executable': executable,
        'arguments': '$(Cluster) $(Process)',
        'error': 'j$(Cluster).$(Process)_stderr.txt',
        'output': 'j$(Cluster).$(Process)_stdout.txt',
        'log': 'j$(Cluster).$(Process)_log.txt',
        'jobbatchname': batch_name,
        'should_transfer_files': 'YES',
        'when_to_transfer_output': 'ON_EXIT',
        'transfer_input_files': ', '.join(input_files),
        'transfer_output_files': ', '.join(output_files),
    })
    return submit.queue(txn)