def create( auto_config_cmds, per_node_batch_size, config_file, hpc_config, local, max_nodes, poll_interval, num_processes, reports, enable_singularity, container, verbose, ): """Create a pipeline with multiple Jade configurations.""" if local: hpc_config = HpcConfig(hpc_type=HpcType.LOCAL, hpc=LocalHpcConfig()) else: if not os.path.exists(hpc_config): print( f"{hpc_config} does not exist. Generate it with 'jade config hpc' " "or run in local mode with '-l'", file=sys.stderr, ) sys.exit(1) hpc_config = HpcConfig(**load_data(hpc_config)) if enable_singularity: singularity_params = SingularityParams(enabled=True, container=container) else: singularity_params = None submit_params = SubmitterParams( generate_reports=reports, hpc_config=hpc_config, max_nodes=max_nodes, num_processes=num_processes, per_node_batch_size=per_node_batch_size, poll_interval=poll_interval, singularity_params=singularity_params, verbose=verbose, ) PipelineManager.create_config(auto_config_cmds, config_file, submit_params)
def cluster(): os.makedirs(OUTPUT, exist_ok=True) commands = ["echo 'hello'"] * 2 cmd_file = os.path.join(OUTPUT, "commands.txt") with open(cmd_file, "w") as f_out: for cmd in commands: f_out.write(cmd + "\n") jade_config = GenericCommandConfiguration.auto_config(cmd_file) config_file = os.path.join(OUTPUT, CONFIG_FILE) jade_config.dump(config_file) hpc_config = HpcConfig(hpc_type="slurm", hpc=SlurmConfig(account="abc")) cluster = Cluster.create(OUTPUT, jade_config) yield cluster if os.path.exists(OUTPUT): shutil.rmtree(OUTPUT)
def hpc(account, config_file, mem, partition, qos, hpc_type, tmp, walltime): """Create an HPC config file.""" if hpc_type == "slurm": hpc = SlurmConfig( account=account, mem=mem, partition=partition, qos=qos, tmp=tmp, walltime=walltime, ) elif hpc_type == "fake": hpc = FakeHpcConfig(walltime=walltime) else: assert hpc_type == "local" hpc = LocalHpcConfig() # This converts enums to values. data = json.loads(HpcConfig(hpc_type=hpc_type, hpc=hpc).json()) dump_data(data, config_file) print(f"Created HPC config file {config_file}")
def make_submitter_params( per_node_batch_size=None, dry_run=None, hpc_config=None, local=None, max_nodes=None, poll_interval=None, resource_monitor_interval=None, resource_monitor_type=None, num_processes=None, verbose=None, reports=None, enable_singularity=None, container=None, try_add_blocked_jobs=None, time_based_batching=None, node_setup_script=None, node_shutdown_script=None, no_distributed_submitter=None, ): """Returns an instance of SubmitterParams for use in a job submission.""" if node_setup_script is not None or node_shutdown_script is not None: print( "Warning: node_setup_script and node_shutdown_script are deprecated and will " "be removed in release v0.9.0.") if local: hpc_config = HpcConfig(hpc_type="local", hpc=LocalHpcConfig()) else: # TODO: If the config file contains submission groups then this should not be required. if not os.path.exists(hpc_config): print( f"{hpc_config} does not exist. Generate it with 'jade config hpc' " "or run in local mode with '-l'", file=sys.stderr, ) sys.exit(1) hpc_config = HpcConfig(**load_data(hpc_config)) if local and dry_run: print("Dry run is not allowed in local mode.", file=sys.stderr) sys.exit(1) if (time_based_batching and per_node_batch_size != SUBMITTER_PARAMS_DEFAULTS["per_node_batch_size"]): # This doesn't catch the case where the user passes --per-node-batch-size=default, but # I don't see that click provides a way to detect that condition. print( "Error: --per-node-batch-size and --time-based-batching are mutually exclusive", file=sys.stderr, ) sys.exit(1) if time_based_batching and num_processes is None: print("Error: num_processes must be set with time-based batching", file=sys.stderr) sys.exit(1) # We added resource_monitor_type after resource_monitor_interval. The following logic # maintains backwards compatibility with user settings. default_monitor_interval = SUBMITTER_PARAMS_DEFAULTS[ "resource_monitor_interval"] if resource_monitor_interval is not None and resource_monitor_type is not None: pass elif resource_monitor_interval is None and resource_monitor_type is None: resource_monitor_type = ResourceMonitorType.AGGREGATION resource_monitor_interval = default_monitor_interval elif resource_monitor_interval is not None and resource_monitor_type is None: resource_monitor_type = ResourceMonitorType.PERIODIC elif resource_monitor_interval is None and resource_monitor_type is not None: resource_monitor_interval = default_monitor_interval else: assert False, f"interval={resource_monitor_interval} type={resource_monitor_type}" if enable_singularity: singularity_params = SingularityParams(enabled=True, container=container) else: singularity_params = None return SubmitterParams( generate_reports=reports, hpc_config=hpc_config, max_nodes=max_nodes, num_processes=num_processes, per_node_batch_size=per_node_batch_size, distributed_submitter=not no_distributed_submitter, dry_run=dry_run, node_setup_script=node_setup_script, node_shutdown_script=node_shutdown_script, poll_interval=poll_interval, resource_monitor_interval=resource_monitor_interval, resource_monitor_type=resource_monitor_type, singularity_params=singularity_params, time_based_batching=time_based_batching, try_add_blocked_jobs=try_add_blocked_jobs, verbose=verbose, )