Exemple #1
0
def fresh_config():
    return Config(
        executors=[
            HighThroughputExecutor(
                label='theta_local_htex_multinode',
                max_workers=1,
                address=address_by_hostname(),
                provider=CobaltProvider(
                    queue=user_opts['theta']['queue'],
                    account=user_opts['theta']['account'],
                    launcher=AprunLauncher(overrides="-d 64"),
                    walltime='00:10:00',
                    nodes_per_block=2,
                    init_blocks=1,
                    max_blocks=1,
                    # string to prepend to #COBALT blocks in the submit
                    # script to the scheduler eg: '#COBALT -t 50'
                    scheduler_options='',
                    # Command to be run before starting a worker, such as:
                    # 'module load Anaconda; source activate parsl_env'.
                    worker_init=user_opts['theta']['worker_init'],
                    cmd_timeout=120,
                ),
            )
        ], )
Exemple #2
0
def parsl_config(name: str) -> Tuple[Config, int]:
    """Make the compute resource configuration

    Args:
        name: Name of the diesred configuration
    Returns:
        - Parsl compute configuration
        - Number of compute slots: Includes execution slots and pre-fetch buffers
    """

    if name == 'local':
        return Config(
            executors=[
                HighThroughputExecutor(max_workers=16, prefetch_capacity=1)
            ]
        ), 64
    elif name == 'theta-debug':
        return Config(
            retries=16,
            executors=[HighThroughputExecutor(
                    address=address_by_hostname(),
                    label="debug",
                    max_workers=64,
                    prefetch_capacity=64,
                    cpu_affinity='block',
                    provider=CobaltProvider(
                        account='redox_adsp',
                        queue='debug-flat-quad',
                        nodes_per_block=8,
                        scheduler_options='#COBALT --attrs enable_ssh=1',
                        walltime='00:60:00',
                        init_blocks=0,
                        max_blocks=1,
                        cmd_timeout=360,
                        launcher=AprunLauncher(overrides='-d 64 --cc depth -j 1'),
                        worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env''',
                    ),
                )]
            ), 64 * 8 * 4
    else:
        raise ValueError(f'Configuration not defined: {name}')
Exemple #3
0
            max_workers=8,  # One task per node
            provider=CobaltProvider(
                cmd_timeout=120,
                nodes_per_block=8,
                account='CSC249ADCD08',
                queue='debug-cache-quad',
                walltime="1:00:00",
                init_blocks=1,
                max_blocks=1,
                launcher=SimpleLauncher(),  # Places worker on the launch node
                scheduler_options='#COBALT --attrs enable_ssh=1',
                worker_init='''
module load miniconda-3
export PATH=~/software/psi4/bin:$PATH
conda activate /lus/theta-fs0/projects/CSC249ADCD08/colmena/env

# NWChem settings
export PATH="/home/lward/software/nwchem-6.8.1/bin/LINUX64:$PATH"
module load atp
export MPICH_GNI_MAX_EAGER_MSG_SIZE=16384
export MPICH_GNI_MAX_VSHORT_MSG_SIZE=10000
export MPICH_GNI_MAX_EAGER_MSG_SIZE=131072
export MPICH_GNI_NUM_BUFS=300
export MPICH_GNI_NDREG_MAXSIZE=16777216
export MPICH_GNI_MBOX_PLACEMENT=nic
export MPICH_GNI_LMT_PATH=disabled
export COMEX_MAX_NB_OUTSTANDING=6
export LD_LIBRARY_PATH=/opt/intel/compilers_and_libraries_2018.0.128/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH
''',
            ),
        ),
Exemple #4
0
 executors=[
     HighThroughputExecutor(
         label='theta-htex',
         max_workers=WORKERS_PER_NODE * MY_COMPUTE_NODES *
         MY_COMPUTE_BLOCKS,
         worker_debug=True,
         address=address_by_hostname(),
         provider=CobaltProvider(
             queue=MY_QUEUE,
             account=MY_ALLOCATION,
             launcher=AprunLauncher(overrides="-d 64"),
             walltime=MY_TIME,
             nodes_per_block=MY_COMPUTE_NODES,
             init_blocks=1,
             min_blocks=1,
             max_blocks=MY_COMPUTE_BLOCKS,
             # string to prepend to #COBALT blocks in the submit
             # script to the scheduler eg: '#COBALT -t 50'
             scheduler_options='',
             # Command to be run before starting a worker, such as:
             worker_init='module load miniconda-3; export PATH=$PATH:{}'.
             format(MY_USER_PATH),
             cmd_timeout=120,
         ),
     ),
     ThreadPoolExecutor(label='login-node', max_threads=8),
 ],
 monitoring=MonitoringHub(
     hub_address=address_by_hostname(),
     hub_port=55055,
     monitoring_debug=False,
        HighThroughputExecutor(
            address=address_by_hostname(),
            label="htex",
            max_workers=4,
            prefetch_capacity=1,
            provider=CobaltProvider(
                queue='CVD_Research',
                account='CVD_Research',
                launcher=AprunLauncher(overrides="-d 256 --cc depth -j 4"),
                walltime='3:00:00',
                nodes_per_block=4,
                init_blocks=1,
                min_blocks=1,
                max_blocks=4,
                scheduler_options='#COBALT --attrs enable_ssh=1',
                worker_init=f'''
module load miniconda-3
module load java
source activate /home/lward/exalearn/covid/toxicity-prediction/opera/env
export KMP_AFFINITY=disabled
which python

# Set the environment variables
{envs}
''',
                cmd_timeout=120,
            ),
        ),
    ],
    strategy=None,
)
def theta_persistent(log_dir: str,
                     nodes_per_nwchem: int = 1,
                     qc_nodes: int = 8,
                     ml_nodes: int = 8,
                     ml_prefetch: int = 0) -> Config:
    """Configuration where the application is persistent and sits on the Theta login node.

    Nodes will be requested from Cobalt using separate jobs for ML and QC tasks.

    Args:
        nodes_per_nwchem: Number of nodes per NWChem computation
        log_dir: Path to store monitoring DB and parsl logs
        qc_nodes: Number of nodes dedicated to QC tasks
        ml_prefetch: Number of tasks for ML workers to prefetch for inference
    Returns:
        (Config) Parsl configuration
    """
    return Config(
        retries=8,
        executors=[
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="qc",
                max_workers=qc_nodes // nodes_per_nwchem,
                prefetch_capacity=ml_prefetch,
                provider=CobaltProvider(
                    account='CSC249ADCD08',
                    queue='debug-cache-quad' if qc_nodes <= 8 else None,
                    walltime='00:60:00',
                    nodes_per_block=qc_nodes,
                    init_blocks=0,
                    max_blocks=1,
                    launcher=SimpleLauncher(),
                    cmd_timeout=360,
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env


export OMP_NUM_THREADS=64
export KMP_INIT_AT_FORK=FALSE
export PYTHONPATH=$PYTHONPATH:$(pwd)

export PATH="/lus/theta-fs0/projects/CSC249ADCD08/software/nwchem-6.8.1/bin/LINUX64:$PATH"
mkdir -p scratch  # For the NWChem tasks
pwd
which nwchem
hostname
module load atp
export MPICH_GNI_MAX_EAGER_MSG_SIZE=16384
export MPICH_GNI_MAX_VSHORT_MSG_SIZE=10000
export MPICH_GNI_MAX_EAGER_MSG_SIZE=131072
export MPICH_GNI_NUM_BUFS=300
export MPICH_GNI_NDREG_MAXSIZE=16777216
export MPICH_GNI_MBOX_PLACEMENT=nic
export MPICH_GNI_LMT_PATH=disabled
export COMEX_MAX_NB_OUTSTANDING=6
export LD_LIBRARY_PATH=/opt/intel/mkl/lib/intel64_lin/:/opt/intel/compilers_and_libraries_2020.0.166/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH
''',
                ),
            ),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml",
                max_workers=1,
                prefetch_capacity=ml_prefetch,
                provider=CobaltProvider(
                    account='CSC249ADCD08',
                    queue='debug-flat-quad',
                    nodes_per_block=ml_nodes,
                    scheduler_options='#COBALT --attrs enable_ssh=1',
                    walltime='00:60:00',
                    init_blocks=0,
                    max_blocks=1,
                    cmd_timeout=360,
                    launcher=AprunLauncher(
                        overrides='-d 256 --cc depth -j 4'
                    ),  # Places worker on the compute node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env''',
                ),
            )
        ],
        run_dir=log_dir,
        strategy='simple',
        max_idletime=15.)