Beispiel #1
0
def local_interleaved_config(qc_workers: int, ml_workers: int, log_dir: str) -> Config:
    """All workers on the local machine, split between QC and ML tasks

    Args:
        qc_workers: Number of quantum chemistry workers
        ml_workers: Number of machine learning workers
        log_dir: Path to store monitoring DB and parsl logs
    Returns:
        (Config): Desired configuration
    """
    return Config(
        executors=[
            HighThroughputExecutor(
                address="localhost",
                label="qc",
                max_workers=qc_workers,
                provider=LocalProvider(
                    init_blocks=1,
                    max_blocks=1
                ),
            ),
            HighThroughputExecutor(
                address="localhost",
                label="ml",
                max_workers=ml_workers,
                provider=LocalProvider(
                    init_blocks=1,
                    max_blocks=1
                ),
            )
        ],
        run_dir=log_dir,
        strategy=None
    )
Beispiel #2
0
def parsl_config(name: str) -> Tuple[Config, int]:
    """Make the compute resource configuration

    Args:
        name: Name of the diesred configuration
    Returns:
        - Parsl compute configuration
        - Number of compute slots: Includes execution slots and pre-fetch buffers
    """

    if name == 'local':
        return Config(
            executors=[
                HighThroughputExecutor(max_workers=16, prefetch_capacity=1)
            ]
        ), 64
    elif name == 'theta-debug':
        return Config(
            retries=16,
            executors=[HighThroughputExecutor(
                    address=address_by_hostname(),
                    label="debug",
                    max_workers=64,
                    prefetch_capacity=64,
                    cpu_affinity='block',
                    provider=CobaltProvider(
                        account='redox_adsp',
                        queue='debug-flat-quad',
                        nodes_per_block=8,
                        scheduler_options='#COBALT --attrs enable_ssh=1',
                        walltime='00:60:00',
                        init_blocks=0,
                        max_blocks=1,
                        cmd_timeout=360,
                        launcher=AprunLauncher(overrides='-d 64 --cc depth -j 1'),
                        worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env''',
                    ),
                )]
            ), 64 * 8 * 4
    else:
        raise ValueError(f'Configuration not defined: {name}')
Beispiel #3
0
def local_config(log_dir: str, max_workers: int, prefetch: int = 0) -> Config:
    """Single node with a single task per worker

    Args:
        log_dir: Path to store monitoring DB and parsl logs
        max_workers: Maximum number of concurrent tasks
        prefetch: Number of tasks for ML workers to prefetch for inference
    Returns:
        (Config) Parsl configuration
    """

    return Config(
        executors=[
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="qc-worker",
                max_workers=max_workers,
                prefetch_capacity=prefetch,
                cpu_affinity='block',
                provider=LocalProvider(
                    nodes_per_block=1,
                    init_blocks=1,
                    max_blocks=1,
                    launcher=SimpleLauncher(),  # Places worker on the launch node
                ),
            ),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml-worker",
                max_workers=1,
                prefetch_capacity=prefetch,
                provider=LocalProvider(
                    nodes_per_block=1,
                    init_blocks=1,
                    max_blocks=1,
                    launcher=SimpleLauncher(),  # Places worker on the launch node
               )
           )
        ],
        run_dir=log_dir,
        strategy='simple',
        max_idletime=15.
    )
Beispiel #4
0
def simple_config(n_workers: int, log_dir: str) -> Config:
    """Single type of worker. All running on local system

    Args:
        n_workers (int): Number of parallel workers
        log_dir: Path to store monitoring DB and parsl logs
    Returns:
        (Config): Colmena-ready configuration
    """
    return Config(
        executors=[
            HighThroughputExecutor(
                address="localhost",
                label="htex",
                max_workers=n_workers,
                provider=LocalProvider(
                    init_blocks=1,
                    max_blocks=1
                ),
            )
        ],
        run_dir=log_dir,
        strategy=None
    )
Beispiel #5
0
def theta_xtb_config(log_dir: str,
                     xtb_per_node: int = 1,
                     ml_tasks_per_node: int = 1,
                     total_nodes: int = int(os.environ.get(
                         "COBALT_JOBSIZE", 1))):
    """Theta configuration where QC tasks and ML tasks run on single nodes.

    There are no MPI tasks in this configuration.

    Args:
        ml_workers: Number of nodes dedicated to ML tasks
        xtb_per_node: Number of XTB calculations
        ml_tasks_per_node: Number of ML tasks to place on each node
        log_dir: Path to store monitoring DB and parsl logs
        total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE
    Returns:
        (Config) Parsl configuration
    """

    return Config(
        executors=[
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="qc",
                max_workers=xtb_per_node,
                cpu_affinity='block',
                provider=LocalProvider(
                    nodes_per_block=total_nodes,
                    init_blocks=0,
                    max_blocks=1,
                    launcher=AprunLauncher(
                        overrides='-d 64 --cc depth'
                    ),  # Places worker on the compute node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
''',
                ),
            ),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml",
                max_workers=ml_tasks_per_node,
                cpu_affinity='block',
                provider=LocalProvider(
                    nodes_per_block=total_nodes,
                    init_blocks=1,
                    max_blocks=1,
                    launcher=AprunLauncher(
                        overrides='-d 64 --cc depth'
                    ),  # Places worker on the compute node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
''',
                ),
            )
        ],
        monitoring=MonitoringHub(
            hub_address=address_by_hostname(),
            hub_port=55055,
            monitoring_debug=False,
            resource_monitoring_interval=10,
            logdir=log_dir,
            logging_endpoint=
            f'sqlite:///{os.path.join(log_dir, "monitoring.db")}'),
        run_dir=log_dir,
        strategy='simple',
        max_idletime=15.)
Beispiel #6
0
def theta_nwchem_config(log_dir: str,
                        nodes_per_nwchem: int = 2,
                        total_nodes: int = int(
                            os.environ.get("COBALT_JOBSIZE", 1)),
                        ml_prefetch: int = 0) -> Config:
    """Theta configuration where QC workers sit on the launch node (to be able to aprun)
    and ML workers are placed on compute nodes

    Args:
        ml_workers: Number of nodes dedicated to ML tasks
        nodes_per_nwchem: Number of nodes per NWChem computation
        log_dir: Path to store monitoring DB and parsl logs
        total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE
        ml_prefetch: Number of tasks for ML workers to prefect
    Returns:
        (Config) Parsl configuration
    """
    assert total_nodes % nodes_per_nwchem == 0, "NWChem node count not a multiple of nodes per task"
    nwc_workers = total_nodes // nodes_per_nwchem

    return Config(
        executors=[
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="qc",
                max_workers=nwc_workers,
                cores_per_worker=1e-6,
                provider=LocalProvider(
                    nodes_per_block=1,
                    init_blocks=0,
                    max_blocks=1,
                    launcher=SimpleLauncher(
                    ),  # Places worker on the launch node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
''',
                ),
            ),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml",
                max_workers=1,
                prefetch_capacity=ml_prefetch,
                provider=LocalProvider(
                    nodes_per_block=total_nodes,
                    init_blocks=1,
                    max_blocks=1,
                    launcher=AprunLauncher(
                        overrides='-d 64 --cc depth'
                    ),  # Places worker on the compute node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
    ''',
                ),
            )
        ],
        monitoring=MonitoringHub(
            hub_address=address_by_hostname(),
            monitoring_debug=False,
            resource_monitoring_interval=10,
            logdir=log_dir,
            logging_endpoint=
            f'sqlite:///{os.path.join(log_dir, "monitoring.db")}'),
        run_dir=log_dir,
        strategy='simple',
        max_idletime=15.)
from parsl import HighThroughputExecutor
from parsl.addresses import address_by_hostname
from parsl.config import Config
from parsl.launchers import AprunLauncher
from parsl.providers import LocalProvider, CobaltProvider

local_config = Config(
    executors=[
        HighThroughputExecutor(
            address="localhost",
            label="htex",
            max_workers=1,
            prefetch_capacity=1,
            provider=LocalProvider(init_blocks=1, max_blocks=1),
        ),
    ],
    strategy=None,
)

config = Config(
    executors=[
        HighThroughputExecutor(
            address=address_by_hostname(),
            label="htex",
            max_workers=1,
            prefetch_capacity=2,
            provider=CobaltProvider(
                queue='CVD_Research',
                account='CVD_Research',
                launcher=AprunLauncher(overrides="-d 64 --cc depth"),
                walltime='3:00:00',
Beispiel #8
0
def local_config(log_dir: str, max_workers: int, prefetch: int = 0) -> Config:
    """Single node with a single task per worker

    Args:
        log_dir: Path to store monitoring DB and parsl logs
        max_workers: Maximum number of concurrent tasks
        prefetch: Number of tasks for ML workers to prefetch for inference
    Returns:
        (Config) Parsl configuration
    """

    return Config(
        executors=[
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="qc-worker",
                max_workers=max_workers,
                prefetch_capacity=prefetch,
                cpu_affinity='block',
                provider=LocalProvider(
                    nodes_per_block=1,
                    init_blocks=1,
                    max_blocks=1,
                    launcher=SimpleLauncher(
                    ),  # Places worker on the launch node
                ),
            ),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml-worker-tensorflow",
                max_workers=1,
                prefetch_capacity=prefetch,
                provider=LocalProvider(
                    nodes_per_block=1,
                    init_blocks=1,
                    max_blocks=1,
                    worker_init=
                    'sleep 30',  # Give enough time for other workers to exit (memory!)
                    launcher=SimpleLauncher(
                    ),  # Places worker on the launch node
                )),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label=
                "ml-worker-tensorflow-infer",  # Something about training and then running a model causes issues?
                max_workers=1,
                prefetch_capacity=prefetch,
                provider=LocalProvider(
                    nodes_per_block=1,
                    init_blocks=1,
                    max_blocks=1,
                    worker_init=
                    'sleep 30',  # Give enough time for other workers to exit (memory!)
                    launcher=SimpleLauncher(
                    ),  # Places worker on the launch node
                )),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml-worker-torch",
                max_workers=1,
                prefetch_capacity=prefetch,
                provider=LocalProvider(
                    nodes_per_block=1,
                    init_blocks=1,
                    max_blocks=1,
                    worker_init=
                    'sleep 30',  # Give enough time for other workers to exit (memory!)
                    launcher=SimpleLauncher(
                    ),  # Places worker on the launch node
                ))
        ],
        run_dir=log_dir,
        strategy='simple',
        max_idletime=15.)
Beispiel #9
0
def multisite_nwchem_config() -> Config:
    """Experimental multi-site configuration"""
    return Config(
        retries=1,
        executors=[
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="qc",
                max_workers=8,  # One task per node
                provider=CobaltProvider(
                    cmd_timeout=120,
                    nodes_per_block=8,
                    account='CSC249ADCD08',
                    queue='debug-cache-quad',
                    walltime="1:00:00",
                    init_blocks=1,
                    max_blocks=1,
                    launcher=SimpleLauncher(),  # Places worker on the launch node
                    scheduler_options='#COBALT --attrs enable_ssh=1',
                    worker_init='''
module load miniconda-3
export PATH=~/software/psi4/bin:$PATH
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env

# NWChem settings
export PATH="/home/lward/software/nwchem-6.8.1/bin/LINUX64:$PATH"
module load atp
export MPICH_GNI_MAX_EAGER_MSG_SIZE=16384
export MPICH_GNI_MAX_VSHORT_MSG_SIZE=10000
export MPICH_GNI_MAX_EAGER_MSG_SIZE=131072
export MPICH_GNI_NUM_BUFS=300
export MPICH_GNI_NDREG_MAXSIZE=16777216
export MPICH_GNI_MBOX_PLACEMENT=nic
export MPICH_GNI_LMT_PATH=disabled
export COMEX_MAX_NB_OUTSTANDING=6
export LD_LIBRARY_PATH=/opt/intel/compilers_and_libraries_2018.0.128/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH
''',
                ),
            ),
            HighThroughputExecutor(
                address='localhost',  # Using an SSH tunnel
                worker_ports=(54382, 54008),
                label="ml",
                max_workers=1,
                working_dir='/homes/lward/parsl',
                worker_logdir_root='/homes/lward/parsl',
                provider=LocalProvider(
                    channel=SSHChannel('lambda5.cels.anl.gov', script_dir='/home/lward/parsl'),
                    nodes_per_block=1,
                    init_blocks=1,
                    max_blocks=1,
                    launcher=SimpleLauncher(),
                    worker_init='''
source /homes/lward/miniconda3/etc/profile.d/conda.sh
conda activate colmena_full
export CUDA_VISIBLE_DEVICES=17  # Pins to a GPU worker
''',
                ),
            )
        ],
        strategy=None,
    )
Beispiel #10
0
import os

from parsl import HighThroughputExecutor, ThreadPoolExecutor
from parsl.addresses import address_by_hostname
from parsl.config import Config
from parsl.launchers import AprunLauncher, SimpleLauncher
from parsl.providers import LocalProvider, CobaltProvider
from parsl.channels import SSHChannel

config = Config(
    executors=[
        HighThroughputExecutor(
            address="localhost",
            label="htex",
            max_workers=2,
            provider=LocalProvider(
                init_blocks=1,
                max_blocks=1
            ),
        ),
        ThreadPoolExecutor(label="local_threads", max_threads=4)
    ],
    strategy=None
)

local_interleaved_config = Config(
    executors=[
        HighThroughputExecutor(
            address="localhost",
            label="qc",
            max_workers=2,