def fresh_config(): return Config( executors=[ HighThroughputExecutor( label="htex_Local", working_dir=working_dir, storage_access=[FTPInTaskStaging(), HTTPInTaskStaging(), NoOpFileStaging()], worker_debug=True, cores_per_worker=1, heartbeat_period=2, heartbeat_threshold=5, poll_period=100, provider=LocalProvider( channel=LocalChannel(), init_blocks=0, min_blocks=0, max_blocks=5, launcher=SingleNodeLauncher(), ), ) ], strategy='simple', app_cache=True, checkpoint_mode='task_exit', retries=2, monitoring=MonitoringHub( hub_address="localhost", hub_port=55055, monitoring_debug=False, resource_monitoring_interval=1, ) )
def configure_parsl(n_threads, monitoring, **kwargs): from parsl.config import Config from parsl.executors.threads import ThreadPoolExecutor from parsl.addresses import address_by_hostname if monitoring: from parsl.monitoring import MonitoringHub monitoring = MonitoringHub( hub_address=address_by_hostname(), hub_port=55055, logging_level=logging.INFO, resource_monitoring_interval=10, ) else: monitoring = None local_threads = ThreadPoolExecutor(max_threads=n_threads, label='local_threads') config = Config( executors=[local_threads], monitoring=monitoring, strategy=None, app_cache=True, ) return config
def theta_nwchem_config(log_dir: str, nodes_per_nwchem: int = 2, total_nodes: int = int( os.environ.get("COBALT_JOBSIZE", 1)), ml_prefetch: int = 0) -> Config: """Theta configuration where QC workers sit on the launch node (to be able to aprun) and ML workers are placed on compute nodes Args: nodes_per_nwchem: Number of nodes per NWChem computation log_dir: Path to store monitoring DB and parsl logs total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE ml_prefetch: Number of tasks for ML workers to prefetch for inference Returns: (Config) Parsl configuration """ assert total_nodes % nodes_per_nwchem == 0, "NWChem node count not a multiple of nodes per task" nwc_workers = total_nodes // nodes_per_nwchem return Config( executors=[ ThreadPoolExecutor(label='qc', max_threads=nwc_workers), HighThroughputExecutor( address=address_by_hostname(), label="ml", max_workers=1, prefetch_capacity=ml_prefetch, provider=LocalProvider( nodes_per_block= nodes_per_nwchem, # Minimum increment in blcoks init_blocks=0, max_blocks=total_nodes // nodes_per_nwchem, # Limits the number of manager processes, launcher=AprunLauncher( overrides='-d 256 --cc depth -j 4' ), # Places worker on the compute node worker_init=''' module load miniconda-3 conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env ''', ), ) ], monitoring=MonitoringHub( hub_address=address_by_hostname(), monitoring_debug=False, resource_monitoring_interval=10, logdir=log_dir, logging_endpoint= f'sqlite:///{os.path.join(log_dir, "monitoring.db")}'), run_dir=log_dir, strategy='simple', max_idletime=15.)
import logging from parsl import ThreadPoolExecutor from parsl.config import Config from parsl.monitoring import MonitoringHub config = Config(executors=[ThreadPoolExecutor(label='threads', max_threads=4)], monitoring=MonitoringHub( hub_address="localhost", hub_port=55055, logging_level=logging.INFO, resource_monitoring_interval=3, ))
def theta_xtb_config(log_dir: str, xtb_per_node: int = 1, ml_tasks_per_node: int = 1, total_nodes: int = int(os.environ.get( "COBALT_JOBSIZE", 1))): """Theta configuration where QC tasks and ML tasks run on single nodes. There are no MPI tasks in this configuration. Args: ml_workers: Number of nodes dedicated to ML tasks xtb_per_node: Number of XTB calculations ml_tasks_per_node: Number of ML tasks to place on each node log_dir: Path to store monitoring DB and parsl logs total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE Returns: (Config) Parsl configuration """ return Config( executors=[ HighThroughputExecutor( address=address_by_hostname(), label="qc", max_workers=xtb_per_node, cpu_affinity='block', provider=LocalProvider( nodes_per_block=total_nodes, init_blocks=0, max_blocks=1, launcher=AprunLauncher( overrides='-d 64 --cc depth' ), # Places worker on the compute node worker_init=''' module load miniconda-3 conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env ''', ), ), HighThroughputExecutor( address=address_by_hostname(), label="ml", max_workers=ml_tasks_per_node, cpu_affinity='block', provider=LocalProvider( nodes_per_block=total_nodes, init_blocks=1, max_blocks=1, launcher=AprunLauncher( overrides='-d 64 --cc depth' ), # Places worker on the compute node worker_init=''' module load miniconda-3 conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env ''', ), ) ], monitoring=MonitoringHub( hub_address=address_by_hostname(), hub_port=55055, monitoring_debug=False, resource_monitoring_interval=10, logdir=log_dir, logging_endpoint= f'sqlite:///{os.path.join(log_dir, "monitoring.db")}'), run_dir=log_dir, strategy='simple', max_idletime=15.)
obs_lsst_configs="/opt/lsst/software/stack/obs_lsst/config/", # this is the butler repo to use repo_dir="/global/cscratch1/sd/bxc/lsst-dm-repo-1", root_softs="/global/homes/b/bxc/dm/", # what is ROOT_SOFTS in general? this has come from the SRS workflow, # probably the path to this workflow's repo, up one level. # This specifies a function (str -> str) which rewrites a bash command into # one appropriately wrapper for whichever container/environment is being used # with this configuration (for example, wrap_shifter_container writes the # command to a temporary file and then invokes that file inside shifter) wrap=partial(wrap_shifter_container, image_id="lsstdesc/desc-drp-stack:v19-dc2-run2.2-v4"), wrap_sql=wrap_no_op, parsl_config=Config(executors=[ cori_queue_executor_1, cori_queue_executor_2, local_executor ], strategy='htex', app_cache=True, checkpoint_mode='task_exit', checkpoint_files=get_all_checkpoints(), retries=2, monitoring=MonitoringHub( hub_address=address_by_hostname(), hub_port=55055, monitoring_debug=True, resource_monitoring_interval=10))) configuration = cori_shifter_debug_config
from parsl import ThreadPoolExecutor from parsl.config import Config from parsl.monitoring import MonitoringHub config = Config(executors=[ThreadPoolExecutor(label='threads', max_threads=4)], monitoring=MonitoringHub( hub_address="localhost", hub_port=55055, resource_monitoring_interval=3, ))
def theta_nwchem_config(ml_workers: int, log_dir: str, nodes_per_nwchem: int = 2, total_nodes: int = int(os.environ.get("COBALT_JOBSIZE", 1))) -> Config: """Theta configuration where QC workers sit on the launch node (to be able to aprun) and ML workers are placed on compute nodes Args: ml_workers: Number of nodes dedicated to ML tasks nodes_per_nwchem: Number of nodes per NWChem computation log_dir: Path to store monitoring DB and parsl logs total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE Returns: (Config) Parsl configuration """ nwc_nodes = total_nodes - ml_workers assert nwc_nodes % nodes_per_nwchem == 0, "NWChem node count not a multiple of nodes per task" nwc_workers = nwc_nodes // nodes_per_nwchem return Config( executors=[ HighThroughputExecutor( address=address_by_hostname(), label="qc", max_workers=nwc_workers, cores_per_worker=1e-6, provider=LocalProvider( nodes_per_block=1, init_blocks=1, max_blocks=1, launcher=SimpleLauncher(), # Places worker on the launch node worker_init=''' module load miniconda-3 conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env ''', ), ), HighThroughputExecutor( address=address_by_hostname(), label="ml", max_workers=1, provider=LocalProvider( nodes_per_block=ml_workers, init_blocks=1, max_blocks=1, launcher=AprunLauncher(overrides='-d 64 --cc depth'), # Places worker on the compute node worker_init=''' module load miniconda-3 conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env ''', ), ) ], monitoring=MonitoringHub( hub_address=address_by_hostname(), monitoring_debug=False, resource_monitoring_interval=10, logdir=log_dir, logging_endpoint=f'sqlite:///{os.path.join(log_dir, "monitoring.db")}' ), run_dir=log_dir, strategy=None, )
working_dir=working_dir, storage_access=[ FTPInTaskStaging(), HTTPInTaskStaging(), NoOpFileStaging() ], worker_debug=True, cores_per_worker=1, heartbeat_period=2, heartbeat_threshold=5, poll_period=100, provider=LocalProvider( channel=LocalChannel(), init_blocks=0, min_blocks=0, max_blocks=5, launcher=SingleNodeLauncher(), ), ) ], strategy='simple', app_cache=True, checkpoint_mode='task_exit', retries=2, monitoring=MonitoringHub( hub_address="localhost", hub_port=55055, monitoring_debug=False, resource_monitoring_interval=1, ))
## To discover the hash, run the command "$ shifterimg lookup <image name>" ## Image name = lsstdesc/desc-drp-stack:v19-dc2-run2.2-v5 ## corresponds to ## Hash = 2d1db8fd83d62956ca0fbbe544c7f194f7aee72c106afd58ad2f1094d4c77435 ## ## --image=id:$(shifterimg lookup <image name>) ## OLD WAY wrap=partial(wrap_shifter_container, image_id="lsstdesc/desc-drp-stack:v19-dc2-run2.2-v5"), wrap=partial( wrap_shifter_container, image_id= "id:2d1db8fd83d62956ca0fbbe544c7f194f7aee72c106afd58ad2f1094d4c77435"), wrap_sql=wrap_no_op, parsl_config=Config( executors=[ local_executor, cori_knl_1, cori_knl_2, cori_knl_3, cori_knl_4, cori_knl_5 ], app_cache=True, checkpoint_mode='task_exit', checkpoint_files=get_all_checkpoints(), retries=2, # plus the original attempt monitoring=MonitoringHub( hub_address=address_by_hostname(), hub_port=55055, monitoring_debug=False, resource_monitoring_enabled=True, resource_monitoring_interval=100, # seconds workflow_name="DRPtest"))) configuration = cori_shifter_debug_config
from parsl.addresses import address_by_hostname # Define a configuration for using local threads and pilot jobs #parsl.set_stream_logger() FILENAME = 'log_monitor.txt' parsl.set_file_logger(FILENAME, level=logging.DEBUG) config = Config( executors=[ ThreadPoolExecutor( max_threads=8, label='local_threads' ) ], monitoring =MonitoringHub( hub_address=address_by_hostname(), hub_port=55055, logging_level=logging.INFO, resource_monitoring_interval=10, ), strategy=None ) parsl.clear() parsl.load(config) print( 'Modules Imported!') # App that generates @bash_app def echo1(inputs=[], outputs=[], stdout=parsl.AUTO_LOGNAME, stderr=parsl.AUTO_LOGNAME): command = '{exe} {output}'.format( exe = inputs[0],