Beispiel #1
0
def test_custom_interval(config_file):
    check_run_command(f"{CMD} -R aggregation -r 3 -c {config_file}")
    assert config_file.exists()
    data = load_data(config_file)
    assert data["resource_monitor_interval"] == 3
    assert data[
        "resource_monitor_type"] == ResourceMonitorType.AGGREGATION.value
Beispiel #2
0
def test_singularity_params(config_file):
    # Not a real container. It needs to be a path that exists
    container = "jade"
    check_run_command(f"{CMD} -S -C {container} -c {config_file}")
    assert config_file.exists()
    data = load_data(config_file)
    assert data["singularity_params"] is not None
    assert data["singularity_params"]["enabled"]
    assert data["singularity_params"]["container"] == "jade"
def test_no_distributed_submitter(cleanup):
    cmd = f"{SUBMIT_JOBS} {CONFIG_FILE} --output={OUTPUT} -N --no-reports"
    check_run_command(cmd)

    results_file = Path(OUTPUT) / RESULTS_DIR / "results_batch_1.csv"
    processed_results_file = Path(OUTPUT) / "processed_results.csv"
    all_jobs_complete = False
    for _ in range(10):
        if results_file.exists():
            lines = results_file.read_text().splitlines()
            # The file has an extra line for the header.
            if len(lines) == NUM_COMMANDS + 1:
                all_jobs_complete = True
                break
        time.sleep(1)

    assert all_jobs_complete
    assert len(processed_results_file.read_text().splitlines()) == 1

    check_run_command(f"{TRY_SUBMIT_JOBS} {OUTPUT}")
    check_run_command(f"{WAIT} --output={OUTPUT}")
    assert len(processed_results_file.read_text().splitlines()) == NUM_COMMANDS + 1
    assert not results_file.exists()
Beispiel #4
0
    def submit_jobs(self, cluster, force_local=False):
        """Submit simulations. Auto-detect whether the current system is an HPC
        and submit to its queue. Otherwise, run locally.

        Parameters
        ----------
        cluster : Cluster
        force_local : bool
            If on HPC, run jobs through subprocess as if local.

        Returns
        -------
        Status

        """
        if self._is_new:
            logger.info("Submit %s jobs for execution.",
                        self._config.get_num_jobs())
            logger.info("JADE version %s", jade.version.__version__)
            registry = Registry()
            loggers = registry.list_loggers()
            logger.info("Registered modules for logging: %s",
                        ", ".join(loggers))
            self._save_repository_info(registry)

            ResultsAggregator.create(self._output)

            # If an events summary file exists, it is invalid.
            events_file = os.path.join(self._output, EVENTS_FILENAME)
            if os.path.exists(events_file):
                os.remove(events_file)

            event = StructuredLogEvent(
                source="submitter",
                category=EVENT_CATEGORY_RESOURCE_UTIL,
                name=EVENT_NAME_SUBMIT_COMPLETED,
                message="job submission started",
                num_jobs=self.get_num_jobs(),
            )
            log_event(event)

            os.environ["JADE_RUNTIME_OUTPUT"] = self._output
            if self._config.setup_command is not None:
                cmd = f"JADE_RUNTIME_OUTPUT={self._output} {self._config.setup_command}"
                logger.info("Running setup command: %s", cmd)
                check_run_command(self._config.setup_command)
        else:
            self._handle_submission_groups()

        result = Status.IN_PROGRESS
        group = self._config.get_default_submission_group()
        groups = make_submission_group_lookup(cluster.config.submission_groups)
        self._hpc = HpcManager(groups, self._output)

        if self._hpc.hpc_type == HpcType.LOCAL or force_local:
            runner = JobRunner(self._config_file, output=self._output)
            num_processes = group.submitter_params.num_processes
            verbose = group.submitter_params.verbose
            result = runner.run_jobs(verbose=verbose,
                                     num_processes=num_processes)
            agg = ResultsAggregator.load(self._output)
            agg.process_results()
            is_complete = True
        else:
            is_complete = self._submit_to_hpc(cluster)

        if is_complete:
            result = self._handle_completion(cluster)

        return result
Beispiel #5
0
def test_resubmit_successful(cleanup):
    cmd = f"{SUBMIT_JOBS} {CONFIG_FILE} --output={OUTPUT}"
    check_run_command(cmd)
    check_run_command(f"{WAIT} --output={OUTPUT} -p 0.01 -t2")
    summary = ResultsSummary(OUTPUT)
    assert len(summary.get_failed_results()) == 0
    assert len(summary.get_successful_results()) == NUM_COMMANDS

    check_run_command(
        f"jade config save-submission-groups {OUTPUT} -c {SG_FILE}")
    groups = load_data(SG_FILE)
    assert groups[0]["submitter_params"]["per_node_batch_size"] > NUM_COMMANDS
    groups[0]["submitter_params"]["per_node_batch_size"] = NUM_COMMANDS
    dump_data(groups, SG_FILE)

    check_run_command(f"{RESUBMIT_JOBS} {OUTPUT} -s {SG_FILE} --successful")
    check_run_command(f"{WAIT} --output={OUTPUT} -p 0.01")
    summary = ResultsSummary(OUTPUT)
    assert len(summary.get_failed_results()) == 0
    assert len(summary.get_successful_results()) == NUM_COMMANDS

    check_run_command(
        f"jade config save-submission-groups {OUTPUT} --force -c {SG_FILE}")
    groups = load_data(SG_FILE)
    assert groups[0]["submitter_params"]["per_node_batch_size"] == NUM_COMMANDS
Beispiel #6
0
def test_none(config_file):
    check_run_command(f"{CMD} -R none -c {config_file}")
    assert config_file.exists()
    data = load_data(config_file)
    assert data["resource_monitor_type"] == ResourceMonitorType.NONE.value
Beispiel #7
0
def test_legacy_stats_enabled(config_file):
    check_run_command(f"{CMD} -r 5 -c {config_file}")
    assert config_file.exists()
    data = load_data(config_file)
    assert data["resource_monitor_interval"] == 5
    assert data["resource_monitor_type"] == ResourceMonitorType.PERIODIC.value
Beispiel #8
0
def test_defaults(config_file):
    check_run_command(f"{CMD} -c {config_file}")
    assert config_file.exists()
    assert load_data(config_file)[
        "resource_monitor_type"] == ResourceMonitorType.AGGREGATION.value