Ejemplo n.º 1
0
    def __init__(self,
                 channels=[],
                 worker_init='',
                 cmd_timeout=30,
                 parallelism=1,
                 move_files=None):

        self.channels = channels
        self._label = 'ad-hoc'
        self.worker_init = worker_init
        self.cmd_timeout = cmd_timeout
        self.parallelism = 1
        self.move_files = move_files
        self.launcher = SimpleLauncher()
        self.init_blocks = self.min_blocks = self.max_blocks = len(channels)

        # This will be overridden by the DFK to the rundirs.
        self.script_dir = "."

        # In ad-hoc mode, nodes_per_block should be 1
        self.nodes_per_block = 1

        # Dictionary that keeps track of jobs, keyed on job_id
        self.resources = {}

        self.least_loaded = self._least_loaded()
        logger.debug("AdHoc provider initialized")
Ejemplo n.º 2
0
def test_one_block():

    oneshot_provider = OneShotLocalProvider(
        channel=LocalChannel(),
        init_blocks=0,
        min_blocks=0,
        max_blocks=10,
        launcher=SimpleLauncher(),
    )

    config = Config(
        executors=[
            HighThroughputExecutor(
                label="htex_local",
                worker_debug=True,
                cores_per_worker=1,
                provider=oneshot_provider,
            )
        ],
        strategy='simple',
    )

    parsl.load(config)

    f = app()
    f.result()
    parsl.clear()

    assert oneshot_provider.recorded_submits == 1
Ejemplo n.º 3
0
def local_config(log_dir: str, max_workers: int, prefetch: int = 0) -> Config:
    """Single node with a single task per worker

    Args:
        log_dir: Path to store monitoring DB and parsl logs
        max_workers: Maximum number of concurrent tasks
        prefetch: Number of tasks for ML workers to prefetch for inference
    Returns:
        (Config) Parsl configuration
    """

    return Config(
        executors=[
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="qc-worker",
                max_workers=max_workers,
                prefetch_capacity=prefetch,
                cpu_affinity='block',
                provider=LocalProvider(
                    nodes_per_block=1,
                    init_blocks=1,
                    max_blocks=1,
                    launcher=SimpleLauncher(),  # Places worker on the launch node
                ),
            ),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml-worker",
                max_workers=1,
                prefetch_capacity=prefetch,
                provider=LocalProvider(
                    nodes_per_block=1,
                    init_blocks=1,
                    max_blocks=1,
                    launcher=SimpleLauncher(),  # Places worker on the launch node
               )
           )
        ],
        run_dir=log_dir,
        strategy='simple',
        max_idletime=15.
    )
Ejemplo n.º 4
0
def fresh_config():
    return Config(
        executors=[
            HighThroughputExecutor(
                label="htex_local",
                worker_debug=True,
                cores_per_worker=1,
                provider=LocalProvider(
                    channel=LocalChannel(),
                    init_blocks=1,
                    max_blocks=1,
                    launcher=SimpleLauncher(),
                ),
            )
        ],
        strategy=None,
    )
Ejemplo n.º 5
0
def theta_nwchem_config(
    choice: str,
    log_dir: str,
    nodes_per_nwchem: int = 2,
    total_nodes: int = int(os.environ.get("COBALT_JOBSIZE", 1))
) -> Config:
    """Theta configuration to run NWChem

    Args:
        choice: Choice of the runtime configuration
        nodes_per_nwchem: Number of nodes per NWChem computation
        log_dir: Path to store monitoring DB and parsl logs
        total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE
    Returns:
        (Config) Parsl configuration
    """
    assert total_nodes % nodes_per_nwchem == 0, "NWChem node count not a multiple of nodes per task"
    nwc_workers = total_nodes // nodes_per_nwchem

    if choice == "htex":
        qc_exec = HighThroughputExecutor(
            address=address_by_hostname(),
            label="qc",
            max_workers=nwc_workers,
            cores_per_worker=1e-6,
            provider=LocalProvider(
                nodes_per_block=1,
                init_blocks=0,
                max_blocks=1,
                launcher=SimpleLauncher(),  # Places worker on the launch node
                worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
''',
            ),
        )
    elif choice == 'thread':
        qc_exec = ThreadPoolExecutor(label='qc', max_threads=nwc_workers)
    else:
        raise ValueError(f'Choice "{choice}" not recognized ')

    return Config(executors=[qc_exec],
                  run_dir=log_dir,
                  strategy='simple',
                  max_idletime=15.)
Ejemplo n.º 6
0
from parsl.providers import LocalProvider
from parsl.channels import LocalChannel
from parsl.launchers import SimpleLauncher

from parsl.config import Config
from parsl.executors import ExtremeScaleExecutor

config = Config(
    executors=[
        ExtremeScaleExecutor(label="Extreme_Local",
                             worker_debug=True,
                             ranks_per_node=4,
                             provider=LocalProvider(
                                 channel=LocalChannel(),
                                 init_blocks=1,
                                 max_blocks=1,
                                 launcher=SimpleLauncher(),
                             ))
    ],
    strategy=None,
)
Ejemplo n.º 7
0
def theta_nwchem_config(log_dir: str,
                        nodes_per_nwchem: int = 2,
                        total_nodes: int = int(
                            os.environ.get("COBALT_JOBSIZE", 1)),
                        ml_prefetch: int = 0) -> Config:
    """Theta configuration where QC workers sit on the launch node (to be able to aprun)
    and ML workers are placed on compute nodes

    Args:
        ml_workers: Number of nodes dedicated to ML tasks
        nodes_per_nwchem: Number of nodes per NWChem computation
        log_dir: Path to store monitoring DB and parsl logs
        total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE
        ml_prefetch: Number of tasks for ML workers to prefect
    Returns:
        (Config) Parsl configuration
    """
    assert total_nodes % nodes_per_nwchem == 0, "NWChem node count not a multiple of nodes per task"
    nwc_workers = total_nodes // nodes_per_nwchem

    return Config(
        executors=[
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="qc",
                max_workers=nwc_workers,
                cores_per_worker=1e-6,
                provider=LocalProvider(
                    nodes_per_block=1,
                    init_blocks=0,
                    max_blocks=1,
                    launcher=SimpleLauncher(
                    ),  # Places worker on the launch node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
''',
                ),
            ),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml",
                max_workers=1,
                prefetch_capacity=ml_prefetch,
                provider=LocalProvider(
                    nodes_per_block=total_nodes,
                    init_blocks=1,
                    max_blocks=1,
                    launcher=AprunLauncher(
                        overrides='-d 64 --cc depth'
                    ),  # Places worker on the compute node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
    ''',
                ),
            )
        ],
        monitoring=MonitoringHub(
            hub_address=address_by_hostname(),
            monitoring_debug=False,
            resource_monitoring_interval=10,
            logdir=log_dir,
            logging_endpoint=
            f'sqlite:///{os.path.join(log_dir, "monitoring.db")}'),
        run_dir=log_dir,
        strategy='simple',
        max_idletime=15.)
Ejemplo n.º 8
0
def local_config(log_dir: str, max_workers: int, prefetch: int = 0) -> Config:
    """Single node with a single task per worker

    Args:
        log_dir: Path to store monitoring DB and parsl logs
        max_workers: Maximum number of concurrent tasks
        prefetch: Number of tasks for ML workers to prefetch for inference
    Returns:
        (Config) Parsl configuration
    """

    return Config(
        executors=[
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="qc-worker",
                max_workers=max_workers,
                prefetch_capacity=prefetch,
                cpu_affinity='block',
                provider=LocalProvider(
                    nodes_per_block=1,
                    init_blocks=1,
                    max_blocks=1,
                    launcher=SimpleLauncher(
                    ),  # Places worker on the launch node
                ),
            ),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml-worker-tensorflow",
                max_workers=1,
                prefetch_capacity=prefetch,
                provider=LocalProvider(
                    nodes_per_block=1,
                    init_blocks=1,
                    max_blocks=1,
                    worker_init=
                    'sleep 30',  # Give enough time for other workers to exit (memory!)
                    launcher=SimpleLauncher(
                    ),  # Places worker on the launch node
                )),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label=
                "ml-worker-tensorflow-infer",  # Something about training and then running a model causes issues?
                max_workers=1,
                prefetch_capacity=prefetch,
                provider=LocalProvider(
                    nodes_per_block=1,
                    init_blocks=1,
                    max_blocks=1,
                    worker_init=
                    'sleep 30',  # Give enough time for other workers to exit (memory!)
                    launcher=SimpleLauncher(
                    ),  # Places worker on the launch node
                )),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml-worker-torch",
                max_workers=1,
                prefetch_capacity=prefetch,
                provider=LocalProvider(
                    nodes_per_block=1,
                    init_blocks=1,
                    max_blocks=1,
                    worker_init=
                    'sleep 30',  # Give enough time for other workers to exit (memory!)
                    launcher=SimpleLauncher(
                    ),  # Places worker on the launch node
                ))
        ],
        run_dir=log_dir,
        strategy='simple',
        max_idletime=15.)
Ejemplo n.º 9
0
def multisite_nwchem_config() -> Config:
    """Experimental multi-site configuration"""
    return Config(
        retries=1,
        executors=[
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="qc",
                max_workers=8,  # One task per node
                provider=CobaltProvider(
                    cmd_timeout=120,
                    nodes_per_block=8,
                    account='CSC249ADCD08',
                    queue='debug-cache-quad',
                    walltime="1:00:00",
                    init_blocks=1,
                    max_blocks=1,
                    launcher=SimpleLauncher(),  # Places worker on the launch node
                    scheduler_options='#COBALT --attrs enable_ssh=1',
                    worker_init='''
module load miniconda-3
export PATH=~/software/psi4/bin:$PATH
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env

# NWChem settings
export PATH="/home/lward/software/nwchem-6.8.1/bin/LINUX64:$PATH"
module load atp
export MPICH_GNI_MAX_EAGER_MSG_SIZE=16384
export MPICH_GNI_MAX_VSHORT_MSG_SIZE=10000
export MPICH_GNI_MAX_EAGER_MSG_SIZE=131072
export MPICH_GNI_NUM_BUFS=300
export MPICH_GNI_NDREG_MAXSIZE=16777216
export MPICH_GNI_MBOX_PLACEMENT=nic
export MPICH_GNI_LMT_PATH=disabled
export COMEX_MAX_NB_OUTSTANDING=6
export LD_LIBRARY_PATH=/opt/intel/compilers_and_libraries_2018.0.128/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH
''',
                ),
            ),
            HighThroughputExecutor(
                address='localhost',  # Using an SSH tunnel
                worker_ports=(54382, 54008),
                label="ml",
                max_workers=1,
                working_dir='/homes/lward/parsl',
                worker_logdir_root='/homes/lward/parsl',
                provider=LocalProvider(
                    channel=SSHChannel('lambda5.cels.anl.gov', script_dir='/home/lward/parsl'),
                    nodes_per_block=1,
                    init_blocks=1,
                    max_blocks=1,
                    launcher=SimpleLauncher(),
                    worker_init='''
source /homes/lward/miniconda3/etc/profile.d/conda.sh
conda activate colmena_full
export CUDA_VISIBLE_DEVICES=17  # Pins to a GPU worker
''',
                ),
            )
        ],
        strategy=None,
    )
Ejemplo n.º 10
0
            ThreadPoolExecutor(label="local_threads", max_threads=4)
        ],
        strategy=None,
    )

theta_nwchem_config = Config(
    executors=[
        HighThroughputExecutor(
            address=address_by_hostname(),
            label="htex",
            max_workers=int(os.environ.get("COBALT_JOBSIZE", 1)),
            provider=LocalProvider(
                nodes_per_block=1,
                init_blocks=1,
                max_blocks=1,
                launcher=SimpleLauncher(),  # Places worker on the launch node
                worker_init='''
module load miniconda-3
export PATH=~/software/psi4/bin:$PATH
conda activate /lus/theta-fs0/projects/CSC249ADCD08/colmena/env
''',
            ),
        ),
        ThreadPoolExecutor(label="local_threads", max_threads=4)
    ],
    strategy=None,
)

theta_interleaved_config = Config(
    executors=[
        HighThroughputExecutor(
Ejemplo n.º 11
0
def test():
    import parsl
    from pyscf import lib, gto, scf
    import numpy as np
    import pandas as pd
    import logging

    from parsl.config import Config
    from parsl.providers import LocalProvider
    from parsl.channels import LocalChannel
    from parsl.launchers import SimpleLauncher
    from parsl.executors import ExtremeScaleExecutor
    ncore = 4
    config = Config(
        executors=[
            ExtremeScaleExecutor(label="Extreme_Local",
                                 worker_debug=True,
                                 ranks_per_node=ncore,
                                 provider=LocalProvider(
                                     channel=LocalChannel(),
                                     init_blocks=1,
                                     max_blocks=1,
                                     launcher=SimpleLauncher()))
        ],
        strategy=None,
    )

    parsl.load(config)

    mol = gto.M(atom='H 0. 0. 0.; H 0. 0. 2.0',
                unit='bohr',
                ecp='bfd',
                basis='bfd_vtz')
    mf = scf.RHF(mol).run()
    mol.output = None
    mol.stdout = None
    mf.output = None
    mf.stdout = None
    mf.chkfile = None
    from pyqmc import ExpCuspFunction, GaussianFunction, MultiplyWF, PySCFSlaterRHF, JastrowSpin, initial_guess, EnergyAccumulator
    from pyqmc.accumulators import PGradTransform, LinearTransform

    nconf = 1600
    basis = [
        ExpCuspFunction(2.0, 1.5),
        GaussianFunction(0.5),
        GaussianFunction(2.0),
        GaussianFunction(.25),
        GaussianFunction(1.0),
        GaussianFunction(4.0),
        GaussianFunction(8.0)
    ]
    wf = MultiplyWF(PySCFSlaterRHF(mol, mf), JastrowSpin(mol, basis, basis))
    coords = initial_guess(mol, nconf)
    energy_acc = EnergyAccumulator(mol)
    pgrad_acc = PGradTransform(
        energy_acc, LinearTransform(wf.parameters, ['wf2acoeff', 'wf2bcoeff']))

    from pyqmc.optsr import gradient_descent
    gradient_descent(wf,
                     coords,
                     pgrad_acc,
                     vmc=distvmc,
                     vmcoptions={
                         'npartitions': ncore,
                         'nsteps': 100,
                         'nsteps_per': 100
                     })
Ejemplo n.º 12
0
def theta_persistent(log_dir: str,
                     nodes_per_nwchem: int = 1,
                     qc_nodes: int = 8,
                     ml_nodes: int = 8,
                     ml_prefetch: int = 0) -> Config:
    """Configuration where the application is persistent and sits on the Theta login node.

    Nodes will be requested from Cobalt using separate jobs for ML and QC tasks.

    Args:
        nodes_per_nwchem: Number of nodes per NWChem computation
        log_dir: Path to store monitoring DB and parsl logs
        qc_nodes: Number of nodes dedicated to QC tasks
        ml_prefetch: Number of tasks for ML workers to prefetch for inference
    Returns:
        (Config) Parsl configuration
    """
    return Config(
        retries=8,
        executors=[
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="qc",
                max_workers=qc_nodes // nodes_per_nwchem,
                prefetch_capacity=ml_prefetch,
                provider=CobaltProvider(
                    account='CSC249ADCD08',
                    queue='debug-cache-quad' if qc_nodes <= 8 else None,
                    walltime='00:60:00',
                    nodes_per_block=qc_nodes,
                    init_blocks=0,
                    max_blocks=1,
                    launcher=SimpleLauncher(),
                    cmd_timeout=360,
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env


export OMP_NUM_THREADS=64
export KMP_INIT_AT_FORK=FALSE
export PYTHONPATH=$PYTHONPATH:$(pwd)

export PATH="/lus/theta-fs0/projects/CSC249ADCD08/software/nwchem-6.8.1/bin/LINUX64:$PATH"
mkdir -p scratch  # For the NWChem tasks
pwd
which nwchem
hostname
module load atp
export MPICH_GNI_MAX_EAGER_MSG_SIZE=16384
export MPICH_GNI_MAX_VSHORT_MSG_SIZE=10000
export MPICH_GNI_MAX_EAGER_MSG_SIZE=131072
export MPICH_GNI_NUM_BUFS=300
export MPICH_GNI_NDREG_MAXSIZE=16777216
export MPICH_GNI_MBOX_PLACEMENT=nic
export MPICH_GNI_LMT_PATH=disabled
export COMEX_MAX_NB_OUTSTANDING=6
export LD_LIBRARY_PATH=/opt/intel/mkl/lib/intel64_lin/:/opt/intel/compilers_and_libraries_2020.0.166/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH
''',
                ),
            ),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml",
                max_workers=1,
                prefetch_capacity=ml_prefetch,
                provider=CobaltProvider(
                    account='CSC249ADCD08',
                    queue='debug-flat-quad',
                    nodes_per_block=ml_nodes,
                    scheduler_options='#COBALT --attrs enable_ssh=1',
                    walltime='00:60:00',
                    init_blocks=0,
                    max_blocks=1,
                    cmd_timeout=360,
                    launcher=AprunLauncher(
                        overrides='-d 256 --cc depth -j 4'
                    ),  # Places worker on the compute node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env''',
                ),
            )
        ],
        run_dir=log_dir,
        strategy='simple',
        max_idletime=15.)