예제 #1
0
파일: pipeline.py 프로젝트: jgu2/jade
def create(
    auto_config_cmds,
    per_node_batch_size,
    config_file,
    hpc_config,
    local,
    max_nodes,
    poll_interval,
    num_processes,
    reports,
    enable_singularity,
    container,
    verbose,
):
    """Create a pipeline with multiple Jade configurations."""
    if local:
        hpc_config = HpcConfig(hpc_type=HpcType.LOCAL, hpc=LocalHpcConfig())
    else:
        if not os.path.exists(hpc_config):
            print(
                f"{hpc_config} does not exist. Generate it with 'jade config hpc' "
                "or run in local mode with '-l'",
                file=sys.stderr,
            )
            sys.exit(1)
        hpc_config = HpcConfig(**load_data(hpc_config))

    if enable_singularity:
        singularity_params = SingularityParams(enabled=True,
                                               container=container)
    else:
        singularity_params = None
    submit_params = SubmitterParams(
        generate_reports=reports,
        hpc_config=hpc_config,
        max_nodes=max_nodes,
        num_processes=num_processes,
        per_node_batch_size=per_node_batch_size,
        poll_interval=poll_interval,
        singularity_params=singularity_params,
        verbose=verbose,
    )
    PipelineManager.create_config(auto_config_cmds, config_file, submit_params)
예제 #2
0
파일: test_cluster.py 프로젝트: jgu2/jade
def cluster():
    os.makedirs(OUTPUT, exist_ok=True)
    commands = ["echo 'hello'"] * 2
    cmd_file = os.path.join(OUTPUT, "commands.txt")
    with open(cmd_file, "w") as f_out:
        for cmd in commands:
            f_out.write(cmd + "\n")

    jade_config = GenericCommandConfiguration.auto_config(cmd_file)
    config_file = os.path.join(OUTPUT, CONFIG_FILE)
    jade_config.dump(config_file)
    hpc_config = HpcConfig(hpc_type="slurm", hpc=SlurmConfig(account="abc"))
    cluster = Cluster.create(OUTPUT, jade_config)

    yield cluster

    if os.path.exists(OUTPUT):
        shutil.rmtree(OUTPUT)
예제 #3
0
파일: config.py 프로젝트: jgu2/jade
def hpc(account, config_file, mem, partition, qos, hpc_type, tmp, walltime):
    """Create an HPC config file."""
    if hpc_type == "slurm":
        hpc = SlurmConfig(
            account=account,
            mem=mem,
            partition=partition,
            qos=qos,
            tmp=tmp,
            walltime=walltime,
        )
    elif hpc_type == "fake":
        hpc = FakeHpcConfig(walltime=walltime)
    else:
        assert hpc_type == "local"
        hpc = LocalHpcConfig()

    # This converts enums to values.
    data = json.loads(HpcConfig(hpc_type=hpc_type, hpc=hpc).json())
    dump_data(data, config_file)
    print(f"Created HPC config file {config_file}")
예제 #4
0
def make_submitter_params(
    per_node_batch_size=None,
    dry_run=None,
    hpc_config=None,
    local=None,
    max_nodes=None,
    poll_interval=None,
    resource_monitor_interval=None,
    resource_monitor_type=None,
    num_processes=None,
    verbose=None,
    reports=None,
    enable_singularity=None,
    container=None,
    try_add_blocked_jobs=None,
    time_based_batching=None,
    node_setup_script=None,
    node_shutdown_script=None,
    no_distributed_submitter=None,
):
    """Returns an instance of SubmitterParams for use in a job submission."""
    if node_setup_script is not None or node_shutdown_script is not None:
        print(
            "Warning: node_setup_script and node_shutdown_script are deprecated and will "
            "be removed in release v0.9.0.")
    if local:
        hpc_config = HpcConfig(hpc_type="local", hpc=LocalHpcConfig())
    else:
        # TODO: If the config file contains submission groups then this should not be required.
        if not os.path.exists(hpc_config):
            print(
                f"{hpc_config} does not exist. Generate it with 'jade config hpc' "
                "or run in local mode with '-l'",
                file=sys.stderr,
            )
            sys.exit(1)
        hpc_config = HpcConfig(**load_data(hpc_config))

    if local and dry_run:
        print("Dry run is not allowed in local mode.", file=sys.stderr)
        sys.exit(1)

    if (time_based_batching and per_node_batch_size !=
            SUBMITTER_PARAMS_DEFAULTS["per_node_batch_size"]):
        # This doesn't catch the case where the user passes --per-node-batch-size=default, but
        # I don't see that click provides a way to detect that condition.
        print(
            "Error: --per-node-batch-size and --time-based-batching are mutually exclusive",
            file=sys.stderr,
        )
        sys.exit(1)

    if time_based_batching and num_processes is None:
        print("Error: num_processes must be set with time-based batching",
              file=sys.stderr)
        sys.exit(1)

    # We added resource_monitor_type after resource_monitor_interval. The following logic
    # maintains backwards compatibility with user settings.
    default_monitor_interval = SUBMITTER_PARAMS_DEFAULTS[
        "resource_monitor_interval"]
    if resource_monitor_interval is not None and resource_monitor_type is not None:
        pass
    elif resource_monitor_interval is None and resource_monitor_type is None:
        resource_monitor_type = ResourceMonitorType.AGGREGATION
        resource_monitor_interval = default_monitor_interval
    elif resource_monitor_interval is not None and resource_monitor_type is None:
        resource_monitor_type = ResourceMonitorType.PERIODIC
    elif resource_monitor_interval is None and resource_monitor_type is not None:
        resource_monitor_interval = default_monitor_interval
    else:
        assert False, f"interval={resource_monitor_interval} type={resource_monitor_type}"

    if enable_singularity:
        singularity_params = SingularityParams(enabled=True,
                                               container=container)
    else:
        singularity_params = None
    return SubmitterParams(
        generate_reports=reports,
        hpc_config=hpc_config,
        max_nodes=max_nodes,
        num_processes=num_processes,
        per_node_batch_size=per_node_batch_size,
        distributed_submitter=not no_distributed_submitter,
        dry_run=dry_run,
        node_setup_script=node_setup_script,
        node_shutdown_script=node_shutdown_script,
        poll_interval=poll_interval,
        resource_monitor_interval=resource_monitor_interval,
        resource_monitor_type=resource_monitor_type,
        singularity_params=singularity_params,
        time_based_batching=time_based_batching,
        try_add_blocked_jobs=try_add_blocked_jobs,
        verbose=verbose,
    )