Exemple #1
0
def fresh_config():
    return Config(
        executors=[
            HighThroughputExecutor(
                label="htex_Local",
                working_dir=working_dir,
                storage_access=[FTPInTaskStaging(), HTTPInTaskStaging(), NoOpFileStaging()],
                worker_debug=True,
                cores_per_worker=1,
                heartbeat_period=2,
                heartbeat_threshold=5,
                poll_period=100,
                provider=LocalProvider(
                    channel=LocalChannel(),
                    init_blocks=0,
                    min_blocks=0,
                    max_blocks=5,
                    launcher=SingleNodeLauncher(),
                ),
            )
        ],
        strategy='simple',
        app_cache=True, checkpoint_mode='task_exit',
        retries=2,
        monitoring=MonitoringHub(
                        hub_address="localhost",
                        hub_port=55055,
                        monitoring_debug=False,
                        resource_monitoring_interval=1,
        )
    )
Exemple #2
0
def configure_parsl(n_threads, monitoring, **kwargs):
    from parsl.config import Config
    from parsl.executors.threads import ThreadPoolExecutor
    from parsl.addresses import address_by_hostname

    if monitoring:
        from parsl.monitoring import MonitoringHub
        monitoring = MonitoringHub(
            hub_address=address_by_hostname(),
            hub_port=55055,
            logging_level=logging.INFO,
            resource_monitoring_interval=10,
        )
    else:
        monitoring = None

    local_threads = ThreadPoolExecutor(max_threads=n_threads,
                                       label='local_threads')
    config = Config(
        executors=[local_threads],
        monitoring=monitoring,
        strategy=None,
        app_cache=True,
    )
    return config
def theta_nwchem_config(log_dir: str,
                        nodes_per_nwchem: int = 2,
                        total_nodes: int = int(
                            os.environ.get("COBALT_JOBSIZE", 1)),
                        ml_prefetch: int = 0) -> Config:
    """Theta configuration where QC workers sit on the launch node (to be able to aprun)
    and ML workers are placed on compute nodes

    Args:
        nodes_per_nwchem: Number of nodes per NWChem computation
        log_dir: Path to store monitoring DB and parsl logs
        total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE
        ml_prefetch: Number of tasks for ML workers to prefetch for inference
    Returns:
        (Config) Parsl configuration
    """
    assert total_nodes % nodes_per_nwchem == 0, "NWChem node count not a multiple of nodes per task"
    nwc_workers = total_nodes // nodes_per_nwchem

    return Config(
        executors=[
            ThreadPoolExecutor(label='qc', max_threads=nwc_workers),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml",
                max_workers=1,
                prefetch_capacity=ml_prefetch,
                provider=LocalProvider(
                    nodes_per_block=
                    nodes_per_nwchem,  # Minimum increment in blcoks
                    init_blocks=0,
                    max_blocks=total_nodes //
                    nodes_per_nwchem,  # Limits the number of manager processes,
                    launcher=AprunLauncher(
                        overrides='-d 256 --cc depth -j 4'
                    ),  # Places worker on the compute node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
    ''',
                ),
            )
        ],
        monitoring=MonitoringHub(
            hub_address=address_by_hostname(),
            monitoring_debug=False,
            resource_monitoring_interval=10,
            logdir=log_dir,
            logging_endpoint=
            f'sqlite:///{os.path.join(log_dir, "monitoring.db")}'),
        run_dir=log_dir,
        strategy='simple',
        max_idletime=15.)
Exemple #4
0
import logging

from parsl import ThreadPoolExecutor
from parsl.config import Config
from parsl.monitoring import MonitoringHub

config = Config(executors=[ThreadPoolExecutor(label='threads', max_threads=4)],
                monitoring=MonitoringHub(
                    hub_address="localhost",
                    hub_port=55055,
                    logging_level=logging.INFO,
                    resource_monitoring_interval=3,
                ))
def theta_xtb_config(log_dir: str,
                     xtb_per_node: int = 1,
                     ml_tasks_per_node: int = 1,
                     total_nodes: int = int(os.environ.get(
                         "COBALT_JOBSIZE", 1))):
    """Theta configuration where QC tasks and ML tasks run on single nodes.

    There are no MPI tasks in this configuration.

    Args:
        ml_workers: Number of nodes dedicated to ML tasks
        xtb_per_node: Number of XTB calculations
        ml_tasks_per_node: Number of ML tasks to place on each node
        log_dir: Path to store monitoring DB and parsl logs
        total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE
    Returns:
        (Config) Parsl configuration
    """

    return Config(
        executors=[
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="qc",
                max_workers=xtb_per_node,
                cpu_affinity='block',
                provider=LocalProvider(
                    nodes_per_block=total_nodes,
                    init_blocks=0,
                    max_blocks=1,
                    launcher=AprunLauncher(
                        overrides='-d 64 --cc depth'
                    ),  # Places worker on the compute node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
''',
                ),
            ),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml",
                max_workers=ml_tasks_per_node,
                cpu_affinity='block',
                provider=LocalProvider(
                    nodes_per_block=total_nodes,
                    init_blocks=1,
                    max_blocks=1,
                    launcher=AprunLauncher(
                        overrides='-d 64 --cc depth'
                    ),  # Places worker on the compute node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
''',
                ),
            )
        ],
        monitoring=MonitoringHub(
            hub_address=address_by_hostname(),
            hub_port=55055,
            monitoring_debug=False,
            resource_monitoring_interval=10,
            logdir=log_dir,
            logging_endpoint=
            f'sqlite:///{os.path.join(log_dir, "monitoring.db")}'),
        run_dir=log_dir,
        strategy='simple',
        max_idletime=15.)
Exemple #6
0
    obs_lsst_configs="/opt/lsst/software/stack/obs_lsst/config/",

    # this is the butler repo to use
    repo_dir="/global/cscratch1/sd/bxc/lsst-dm-repo-1",
    root_softs="/global/homes/b/bxc/dm/",
    # what is ROOT_SOFTS in general? this has come from the SRS workflow,
    # probably the path to this workflow's repo, up one level.

    # This specifies a function (str -> str) which rewrites a bash command into
    # one appropriately wrapper for whichever container/environment is being used
    # with this configuration (for example, wrap_shifter_container writes the
    # command to a temporary file and then invokes that file inside shifter)
    wrap=partial(wrap_shifter_container,
                 image_id="lsstdesc/desc-drp-stack:v19-dc2-run2.2-v4"),
    wrap_sql=wrap_no_op,
    parsl_config=Config(executors=[
        cori_queue_executor_1, cori_queue_executor_2, local_executor
    ],
                        strategy='htex',
                        app_cache=True,
                        checkpoint_mode='task_exit',
                        checkpoint_files=get_all_checkpoints(),
                        retries=2,
                        monitoring=MonitoringHub(
                            hub_address=address_by_hostname(),
                            hub_port=55055,
                            monitoring_debug=True,
                            resource_monitoring_interval=10)))

configuration = cori_shifter_debug_config
Exemple #7
0
from parsl import ThreadPoolExecutor
from parsl.config import Config
from parsl.monitoring import MonitoringHub

config = Config(executors=[ThreadPoolExecutor(label='threads', max_threads=4)],
                monitoring=MonitoringHub(
                    hub_address="localhost",
                    hub_port=55055,
                    resource_monitoring_interval=3,
                ))
def theta_nwchem_config(ml_workers: int, log_dir: str, nodes_per_nwchem: int = 2,
                        total_nodes: int = int(os.environ.get("COBALT_JOBSIZE", 1))) -> Config:
    """Theta configuration where QC workers sit on the launch node (to be able to aprun)
    and ML workers are placed on compute nodes

    Args:
        ml_workers: Number of nodes dedicated to ML tasks
        nodes_per_nwchem: Number of nodes per NWChem computation
        log_dir: Path to store monitoring DB and parsl logs
        total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE
    Returns:
        (Config) Parsl configuration
    """
    nwc_nodes = total_nodes - ml_workers
    assert nwc_nodes % nodes_per_nwchem == 0, "NWChem node count not a multiple of nodes per task"
    nwc_workers = nwc_nodes // nodes_per_nwchem

    return Config(
        executors=[
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="qc",
                max_workers=nwc_workers,
                cores_per_worker=1e-6,
                provider=LocalProvider(
                    nodes_per_block=1,
                    init_blocks=1,
                    max_blocks=1,
                    launcher=SimpleLauncher(),  # Places worker on the launch node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
''',
                ),
            ),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml",
                max_workers=1,
                provider=LocalProvider(
                    nodes_per_block=ml_workers,
                    init_blocks=1,
                    max_blocks=1,
                    launcher=AprunLauncher(overrides='-d 64 --cc depth'),  # Places worker on the compute node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
    ''',
                ),
            )
        ],
        monitoring=MonitoringHub(
            hub_address=address_by_hostname(),
            monitoring_debug=False,
            resource_monitoring_interval=10,
            logdir=log_dir,
            logging_endpoint=f'sqlite:///{os.path.join(log_dir, "monitoring.db")}'
        ),
        run_dir=log_dir,
        strategy=None,
    )
Exemple #9
0
        working_dir=working_dir,
        storage_access=[
            FTPInTaskStaging(),
            HTTPInTaskStaging(),
            NoOpFileStaging()
        ],
        worker_debug=True,
        cores_per_worker=1,
        heartbeat_period=2,
        heartbeat_threshold=5,
        poll_period=100,
        provider=LocalProvider(
            channel=LocalChannel(),
            init_blocks=0,
            min_blocks=0,
            max_blocks=5,
            launcher=SingleNodeLauncher(),
        ),
    )
],
                strategy='simple',
                app_cache=True,
                checkpoint_mode='task_exit',
                retries=2,
                monitoring=MonitoringHub(
                    hub_address="localhost",
                    hub_port=55055,
                    monitoring_debug=False,
                    resource_monitoring_interval=1,
                ))
    ##       To discover the hash, run the command "$ shifterimg lookup <image name>"
    ## Image name = lsstdesc/desc-drp-stack:v19-dc2-run2.2-v5
    ##     corresponds to
    ## Hash = 2d1db8fd83d62956ca0fbbe544c7f194f7aee72c106afd58ad2f1094d4c77435
    ##
    ## --image=id:$(shifterimg lookup <image name>)
    ## OLD WAY  wrap=partial(wrap_shifter_container, image_id="lsstdesc/desc-drp-stack:v19-dc2-run2.2-v5"),
    wrap=partial(
        wrap_shifter_container,
        image_id=
        "id:2d1db8fd83d62956ca0fbbe544c7f194f7aee72c106afd58ad2f1094d4c77435"),
    wrap_sql=wrap_no_op,
    parsl_config=Config(
        executors=[
            local_executor, cori_knl_1, cori_knl_2, cori_knl_3, cori_knl_4,
            cori_knl_5
        ],
        app_cache=True,
        checkpoint_mode='task_exit',
        checkpoint_files=get_all_checkpoints(),
        retries=2,  # plus the original attempt
        monitoring=MonitoringHub(
            hub_address=address_by_hostname(),
            hub_port=55055,
            monitoring_debug=False,
            resource_monitoring_enabled=True,
            resource_monitoring_interval=100,  # seconds
            workflow_name="DRPtest")))

configuration = cori_shifter_debug_config
Exemple #11
0
from parsl.addresses import address_by_hostname

# Define a configuration for using local threads and pilot jobs
#parsl.set_stream_logger()
FILENAME = 'log_monitor.txt'
parsl.set_file_logger(FILENAME, level=logging.DEBUG)
config = Config(
    executors=[
        ThreadPoolExecutor(
            max_threads=8, 
            label='local_threads'
        )
    ],
    monitoring =MonitoringHub(
        hub_address=address_by_hostname(),
        hub_port=55055,
        logging_level=logging.INFO,
        resource_monitoring_interval=10,
    ),
    strategy=None
)

parsl.clear()
parsl.load(config)
print( 'Modules Imported!')


# App that generates 
@bash_app
def echo1(inputs=[], outputs=[], stdout=parsl.AUTO_LOGNAME, stderr=parsl.AUTO_LOGNAME):
    command = '{exe} {output}'.format(
        exe    = inputs[0],