def fresh_config(): return Config( executors=[ HighThroughputExecutor( label='theta_local_htex_multinode', max_workers=1, address=address_by_hostname(), provider=CobaltProvider( queue=user_opts['theta']['queue'], account=user_opts['theta']['account'], launcher=AprunLauncher(overrides="-d 64"), walltime='00:10:00', nodes_per_block=2, init_blocks=1, max_blocks=1, # string to prepend to #COBALT blocks in the submit # script to the scheduler eg: '#COBALT -t 50' scheduler_options='', # Command to be run before starting a worker, such as: # 'module load Anaconda; source activate parsl_env'. worker_init=user_opts['theta']['worker_init'], cmd_timeout=120, ), ) ], )
def parsl_config(name: str) -> Tuple[Config, int]: """Make the compute resource configuration Args: name: Name of the diesred configuration Returns: - Parsl compute configuration - Number of compute slots: Includes execution slots and pre-fetch buffers """ if name == 'local': return Config( executors=[ HighThroughputExecutor(max_workers=16, prefetch_capacity=1) ] ), 64 elif name == 'theta-debug': return Config( retries=16, executors=[HighThroughputExecutor( address=address_by_hostname(), label="debug", max_workers=64, prefetch_capacity=64, cpu_affinity='block', provider=CobaltProvider( account='redox_adsp', queue='debug-flat-quad', nodes_per_block=8, scheduler_options='#COBALT --attrs enable_ssh=1', walltime='00:60:00', init_blocks=0, max_blocks=1, cmd_timeout=360, launcher=AprunLauncher(overrides='-d 64 --cc depth -j 1'), worker_init=''' module load miniconda-3 conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env''', ), )] ), 64 * 8 * 4 else: raise ValueError(f'Configuration not defined: {name}')
max_workers=8, # One task per node provider=CobaltProvider( cmd_timeout=120, nodes_per_block=8, account='CSC249ADCD08', queue='debug-cache-quad', walltime="1:00:00", init_blocks=1, max_blocks=1, launcher=SimpleLauncher(), # Places worker on the launch node scheduler_options='#COBALT --attrs enable_ssh=1', worker_init=''' module load miniconda-3 export PATH=~/software/psi4/bin:$PATH conda activate /lus/theta-fs0/projects/CSC249ADCD08/colmena/env # NWChem settings export PATH="/home/lward/software/nwchem-6.8.1/bin/LINUX64:$PATH" module load atp export MPICH_GNI_MAX_EAGER_MSG_SIZE=16384 export MPICH_GNI_MAX_VSHORT_MSG_SIZE=10000 export MPICH_GNI_MAX_EAGER_MSG_SIZE=131072 export MPICH_GNI_NUM_BUFS=300 export MPICH_GNI_NDREG_MAXSIZE=16777216 export MPICH_GNI_MBOX_PLACEMENT=nic export MPICH_GNI_LMT_PATH=disabled export COMEX_MAX_NB_OUTSTANDING=6 export LD_LIBRARY_PATH=/opt/intel/compilers_and_libraries_2018.0.128/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH ''', ), ),
executors=[ HighThroughputExecutor( label='theta-htex', max_workers=WORKERS_PER_NODE * MY_COMPUTE_NODES * MY_COMPUTE_BLOCKS, worker_debug=True, address=address_by_hostname(), provider=CobaltProvider( queue=MY_QUEUE, account=MY_ALLOCATION, launcher=AprunLauncher(overrides="-d 64"), walltime=MY_TIME, nodes_per_block=MY_COMPUTE_NODES, init_blocks=1, min_blocks=1, max_blocks=MY_COMPUTE_BLOCKS, # string to prepend to #COBALT blocks in the submit # script to the scheduler eg: '#COBALT -t 50' scheduler_options='', # Command to be run before starting a worker, such as: worker_init='module load miniconda-3; export PATH=$PATH:{}'. format(MY_USER_PATH), cmd_timeout=120, ), ), ThreadPoolExecutor(label='login-node', max_threads=8), ], monitoring=MonitoringHub( hub_address=address_by_hostname(), hub_port=55055, monitoring_debug=False,
HighThroughputExecutor( address=address_by_hostname(), label="htex", max_workers=4, prefetch_capacity=1, provider=CobaltProvider( queue='CVD_Research', account='CVD_Research', launcher=AprunLauncher(overrides="-d 256 --cc depth -j 4"), walltime='3:00:00', nodes_per_block=4, init_blocks=1, min_blocks=1, max_blocks=4, scheduler_options='#COBALT --attrs enable_ssh=1', worker_init=f''' module load miniconda-3 module load java source activate /home/lward/exalearn/covid/toxicity-prediction/opera/env export KMP_AFFINITY=disabled which python # Set the environment variables {envs} ''', cmd_timeout=120, ), ), ], strategy=None, )
def theta_persistent(log_dir: str, nodes_per_nwchem: int = 1, qc_nodes: int = 8, ml_nodes: int = 8, ml_prefetch: int = 0) -> Config: """Configuration where the application is persistent and sits on the Theta login node. Nodes will be requested from Cobalt using separate jobs for ML and QC tasks. Args: nodes_per_nwchem: Number of nodes per NWChem computation log_dir: Path to store monitoring DB and parsl logs qc_nodes: Number of nodes dedicated to QC tasks ml_prefetch: Number of tasks for ML workers to prefetch for inference Returns: (Config) Parsl configuration """ return Config( retries=8, executors=[ HighThroughputExecutor( address=address_by_hostname(), label="qc", max_workers=qc_nodes // nodes_per_nwchem, prefetch_capacity=ml_prefetch, provider=CobaltProvider( account='CSC249ADCD08', queue='debug-cache-quad' if qc_nodes <= 8 else None, walltime='00:60:00', nodes_per_block=qc_nodes, init_blocks=0, max_blocks=1, launcher=SimpleLauncher(), cmd_timeout=360, worker_init=''' module load miniconda-3 conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env export OMP_NUM_THREADS=64 export KMP_INIT_AT_FORK=FALSE export PYTHONPATH=$PYTHONPATH:$(pwd) export PATH="/lus/theta-fs0/projects/CSC249ADCD08/software/nwchem-6.8.1/bin/LINUX64:$PATH" mkdir -p scratch # For the NWChem tasks pwd which nwchem hostname module load atp export MPICH_GNI_MAX_EAGER_MSG_SIZE=16384 export MPICH_GNI_MAX_VSHORT_MSG_SIZE=10000 export MPICH_GNI_MAX_EAGER_MSG_SIZE=131072 export MPICH_GNI_NUM_BUFS=300 export MPICH_GNI_NDREG_MAXSIZE=16777216 export MPICH_GNI_MBOX_PLACEMENT=nic export MPICH_GNI_LMT_PATH=disabled export COMEX_MAX_NB_OUTSTANDING=6 export LD_LIBRARY_PATH=/opt/intel/mkl/lib/intel64_lin/:/opt/intel/compilers_and_libraries_2020.0.166/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH ''', ), ), HighThroughputExecutor( address=address_by_hostname(), label="ml", max_workers=1, prefetch_capacity=ml_prefetch, provider=CobaltProvider( account='CSC249ADCD08', queue='debug-flat-quad', nodes_per_block=ml_nodes, scheduler_options='#COBALT --attrs enable_ssh=1', walltime='00:60:00', init_blocks=0, max_blocks=1, cmd_timeout=360, launcher=AprunLauncher( overrides='-d 256 --cc depth -j 4' ), # Places worker on the compute node worker_init=''' module load miniconda-3 conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env''', ), ) ], run_dir=log_dir, strategy='simple', max_idletime=15.)