Ejemplo n.º 1
0
def local_interleaved_config(qc_workers: int, ml_workers: int, log_dir: str) -> Config:
    """All workers on the local machine, split between QC and ML tasks

    Args:
        qc_workers: Number of quantum chemistry workers
        ml_workers: Number of machine learning workers
        log_dir: Path to store monitoring DB and parsl logs
    Returns:
        (Config): Desired configuration
    """
    return Config(
        executors=[
            HighThroughputExecutor(
                address="localhost",
                label="qc",
                max_workers=qc_workers,
                provider=LocalProvider(
                    init_blocks=1,
                    max_blocks=1
                ),
            ),
            HighThroughputExecutor(
                address="localhost",
                label="ml",
                max_workers=ml_workers,
                provider=LocalProvider(
                    init_blocks=1,
                    max_blocks=1
                ),
            )
        ],
        run_dir=log_dir,
        strategy=None
    )
Ejemplo n.º 2
0
def test_local_channel():
    with tempfile.TemporaryDirectory() as script_dir:
        script_dir = tempfile.mkdtemp()
        p = LocalProvider(channel=LocalChannel(),
                          launcher=SingleNodeLauncher(debug=False))
        p.script_dir = script_dir
        _run_tests(p)
Ejemplo n.º 3
0
def fresh_config():
    return Config(
        executors=[
            HighThroughputExecutor(
                label="htex_Local",
                working_dir=working_dir,
                storage_access=[FTPInTaskStaging(), HTTPInTaskStaging(), NoOpFileStaging()],
                worker_debug=True,
                cores_per_worker=1,
                heartbeat_period=2,
                heartbeat_threshold=5,
                poll_period=100,
                provider=LocalProvider(
                    channel=LocalChannel(),
                    init_blocks=0,
                    min_blocks=0,
                    max_blocks=5,
                    launcher=SingleNodeLauncher(),
                ),
            )
        ],
        strategy='simple',
        app_cache=True, checkpoint_mode='task_exit',
        retries=2,
        monitoring=MonitoringHub(
                        hub_address="localhost",
                        hub_port=55055,
                        monitoring_debug=False,
                        resource_monitoring_interval=1,
        )
    )
Ejemplo n.º 4
0
def load_ir2_dc_config():
    """
    Load the parsl config for ad-hoc providers.
    """
    try:
        parsl.DataFlowKernelLoader.dfk()
        print("parsl config is already loaded.")
        return
    except RuntimeError:
        pass

    executors = []

    for host in WORKER_NODE_ADDRESSES:
        channel = SSHChannel(hostname=host, script_dir=script_dir(host))
        provider = LocalProvider(channel=channel,
                                 init_blocks=1,
                                 worker_init='source %s' % SETUP_SCRIPT)
        executors.append(
            HighThroughputExecutor(label=host,
                                   address=MOTHER_NODE_ADDRESS,
                                   worker_debug=False,
                                   provider=provider,
                                   heartbeat_period=2,
                                   heartbeat_threshold=10))

    config = Config(executors=executors, strategy=None, retries=3)

    parsl.load(config)
Ejemplo n.º 5
0
def test_parsl_htex_executor():
    parsl = pytest.importorskip("parsl", minversion="0.7.2")

    from parsl.providers import LocalProvider
    from parsl.channels import LocalChannel
    from parsl.executors import HighThroughputExecutor
    from parsl.addresses import address_by_hostname
    from parsl.config import Config
    parsl_config = Config(
        executors=[
            HighThroughputExecutor(
                label="coffea_parsl_default",
                address=address_by_hostname(),
                cores_per_worker=max(multiprocessing.cpu_count() // 2, 1),
                max_workers=1,
                provider=LocalProvider(channel=LocalChannel(),
                                       init_blocks=1,
                                       max_blocks=1,
                                       nodes_per_block=1),
            )
        ],
        strategy=None,
    )

    do_parsl_job(parsl_config)
Ejemplo n.º 6
0
    def __init__(self,
                 provider=LocalProvider(),
                 label='ipp',
                 working_dir=None,
                 controller=Controller(),
                 container_image=None,
                 engine_dir=None,
                 storage_access=None,
                 engine_debug_level=None,
                 workers_per_node=1,
                 managed=True):
        self.provider = provider
        self.label = label
        self.working_dir = working_dir
        self.controller = controller
        self.engine_debug_level = engine_debug_level
        self.container_image = container_image
        self.engine_dir = engine_dir
        self.workers_per_node = workers_per_node
        self.storage_access = storage_access if storage_access is not None else []
        if len(self.storage_access) > 1:
            raise ConfigurationError(
                'Multiple storage access schemes are not yet supported')
        self.managed = managed

        self.debug_option = ""
        if self.engine_debug_level:
            self.debug_option = "--log-level={}".format(
                self.engine_debug_level)
Ejemplo n.º 7
0
    def __init__(self,
                 provider=LocalProvider(),
                 label='ipp',
                 working_dir=None,
                 controller=Controller(),
                 container_image=None,
                 engine_dir=None,
                 storage_access=None,
                 engine_debug_level=None,
                 workers_per_node=1,
                 managed=True):
        self.provider = provider
        self.label = label
        self.working_dir = working_dir
        self.controller = controller
        self.engine_debug_level = engine_debug_level
        self.container_image = container_image
        self.engine_dir = engine_dir
        self.workers_per_node = workers_per_node
        self.storage_access = storage_access
        self.managed = managed

        self.debug_option = ""
        if self.engine_debug_level:
            self.debug_option = "--log-level={}".format(
                self.engine_debug_level)
Ejemplo n.º 8
0
def test_dynamic_executor():
    dfk = parsl.load()
    tasks = [sleeper() for i in range(5)]
    results = [i.result() for i in tasks]
    print("Done with initial test. The results are", results)

    # Here we add a new executor to an active DFK
    thread_executors = [ThreadPoolExecutor(label='threads2', max_threads=4)]
    dfk.add_executors(executors=thread_executors)
    tasks = [cpu_stress() for i in range(8)]
    results = [i.result() for i in tasks]
    print(
        "Successfully added thread executor and ran with it. The results are",
        results)

    # We add a htex executor to an active DFK
    executors = [
        HighThroughputExecutor(
            label='htex_local',
            cores_per_worker=1,
            max_workers=5,
            provider=LocalProvider(
                init_blocks=1,
                max_blocks=1,
            ),
        )
    ]
    dfk.add_executors(executors=executors)
    tasks = [add() for i in range(10)]
    results = [i.result() for i in tasks]
    print("Successfully added htex executor and ran with it. The results are",
          results)

    print("Done testing")
    parsl.clear()
Ejemplo n.º 9
0
def test_1():

    x = HTEX(
        label='htex',
        provider=LocalProvider(channel=LocalChannel),
        address="127.0.0.1",
    )
    task_p, result_p, command_p = x.start()
    print(task_p, result_p, command_p)
    print("Executor initialized : ", x)

    args = [2]
    kwargs = {}
    f1 = x.submit(double, *args, **kwargs)
    print("Sent task with :", f1)
    args = [2]
    kwargs = {}
    f2 = x.submit(fail, *args, **kwargs)

    print("hi")
    while True:
        stop = input("Stop ? (y/n)")
        if stop == "y":
            break

    print("F1: {}, f2: {}".format(f1.done(), f2.done()))
    x.shutdown()
Ejemplo n.º 10
0
def _get_parsl_config():
    """Get the Parsl config.

    Returns
    -------
    parsl.config.Config
        Parsl config to execute tasks.
    """

    config = Config(executors=[
        HighThroughputExecutor(
            label="htex_local",
            worker_debug=False,
            poll_period=1,
            cores_per_worker=1,
            max_workers=1,
            provider=LocalProvider(
                channel=LocalChannel(),
                init_blocks=1,
                max_blocks=1,
                min_blocks=1,
            ),
        )
    ],
                    strategy=None)
    return config
Ejemplo n.º 11
0
def fresh_config():

    return Config(executors=[
        IPyParallelExecutor(label="local_ipp",
                            engine_dir='engines',
                            provider=LocalProvider(channel=LocalChannel(),
                                                   init_blocks=2,
                                                   max_blocks=2))
    ])
Ejemplo n.º 12
0
def test_parsl_executor():
    parsl = pytest.importorskip("parsl", minversion="0.7.2")

    from coffea.processor import run_parsl_job

    from coffea.processor.parsl.detail import (_parsl_initialize, _parsl_stop)

    from parsl.providers import LocalProvider
    from parsl.channels import LocalChannel
    from parsl.executors import HighThroughputExecutor
    from parsl.addresses import address_by_hostname
    from parsl.config import Config
    parsl_config = Config(
        executors=[
            HighThroughputExecutor(
                label="coffea_parsl_default",
                address=address_by_hostname(),
                cores_per_worker=max(multiprocessing.cpu_count() // 2, 1),
                max_workers=1,
                provider=LocalProvider(channel=LocalChannel(),
                                       init_blocks=1,
                                       max_blocks=1,
                                       nodes_per_block=1),
            )
        ],
        strategy=None,
    )

    import os
    import os.path as osp

    filelist = {
        'ZJets': [osp.join(os.getcwd(), 'tests/samples/nano_dy.root')],
        'Data': [osp.join(os.getcwd(), 'tests/samples/nano_dimuon.root')]
    }
    treename = 'Events'

    from coffea.processor.test_items import NanoTestProcessor
    from coffea.processor.parsl.parsl_executor import parsl_executor

    dfk = _parsl_initialize(parsl_config)

    proc = NanoTestProcessor()

    hists = run_parsl_job(filelist,
                          treename,
                          processor_instance=proc,
                          executor=parsl_executor,
                          data_flow=dfk)

    _parsl_stop(dfk)

    assert (hists['cutflow']['ZJets_pt'] == 4)
    assert (hists['cutflow']['ZJets_mass'] == 1)
    assert (hists['cutflow']['Data_pt'] == 15)
    assert (hists['cutflow']['Data_mass'] == 5)
Ejemplo n.º 13
0
 def __init__(
     self,
     provider: Optional[ExecutionProvider] = None,
     managed: bool = True,
     working_dir: Optional[str] = None,
     label: str = "FluxExecutor",
     flux_executor_kwargs: Mapping = {},
     flux_path: Optional[str] = None,
     launch_cmd: Optional[str] = None,
 ):
     super().__init__()
     if provider is None:
         provider = LocalProvider()
     self._provider = provider
     self.label = label
     if working_dir is None:
         working_dir = self.label + "_" + str(uuid.uuid4())
     self.working_dir = os.path.abspath(working_dir)
     self.managed = managed
     # check that flux_path is an executable, or look for flux in PATH
     if flux_path is None:
         flux_path = shutil.which("flux")
         if flux_path is None:
             raise EnvironmentError("Cannot find Flux installation in PATH")
     self.flux_path = os.path.abspath(flux_path)
     self._task_id_counter = itertools.count()
     self._socket = zmq.Context().socket(zmq.REP)
     if launch_cmd is None:
         self.launch_cmd = self.DEFAULT_LAUNCH_CMD
     self._submission_queue: queue.Queue = queue.Queue()
     self._stop_event = threading.Event()
     # lock to protect self._task_id_counter and also submission/shutdown race
     self._submission_lock = threading.Lock()
     self.flux_executor_kwargs = flux_executor_kwargs
     self._submission_thread = threading.Thread(
         target=_submit_wrapper,
         args=(
             self._submission_queue,
             self._stop_event,
             self._socket,
             self.working_dir,
             self.flux_executor_kwargs,
             self.provider,
             self,
             self.flux_path,
             self.launch_cmd,
         ),
         daemon=True,
     )
     # add a ``weakref.finalize()`` function for joining the executor thread
     weakref.finalize(
         self,
         lambda x, y: x.set() or y.join(),
         self._stop_event,
         self._submission_thread,
     )
Ejemplo n.º 14
0
def local_config(log_dir: str, max_workers: int, prefetch: int = 0) -> Config:
    """Single node with a single task per worker

    Args:
        log_dir: Path to store monitoring DB and parsl logs
        max_workers: Maximum number of concurrent tasks
        prefetch: Number of tasks for ML workers to prefetch for inference
    Returns:
        (Config) Parsl configuration
    """

    return Config(
        executors=[
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="qc-worker",
                max_workers=max_workers,
                prefetch_capacity=prefetch,
                cpu_affinity='block',
                provider=LocalProvider(
                    nodes_per_block=1,
                    init_blocks=1,
                    max_blocks=1,
                    launcher=SimpleLauncher(),  # Places worker on the launch node
                ),
            ),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml-worker",
                max_workers=1,
                prefetch_capacity=prefetch,
                provider=LocalProvider(
                    nodes_per_block=1,
                    init_blocks=1,
                    max_blocks=1,
                    launcher=SimpleLauncher(),  # Places worker on the launch node
               )
           )
        ],
        run_dir=log_dir,
        strategy='simple',
        max_idletime=15.
    )
Ejemplo n.º 15
0
def test_2():

    from funcx_endpoint.executors.high_throughput.executor import executor_starter

    htex = HTEX(label='htex',
                provider=LocalProvider(channel=LocalChannel),
                address="127.0.0.1")
    print("Foo")
    executor_starter(htex, "forwarder", "ep_01")
    print("Here")
Ejemplo n.º 16
0
    def __init__(
            self,
            # Scaling mechanics
            provider=LocalProvider(),
            scaling_enabled=True,
            # Connection info
            funcx_service_address='https://api.funcx.org/v1',
            worker_ports=None,
            worker_port_range=(54000, 55000),
            # Scaling info
            strategy=SimpleStrategy(),
            max_workers_per_node=float('inf'),
            cores_per_worker=1.0,
            mem_per_worker=None,
            launch_cmd=None,
            # Tuning info
            worker_mode='no_container',
            scheduler_mode='hard',
            container_type=None,
            prefetch_capacity=10,
            heartbeat_period=2,
            heartbeat_threshold=10,
            poll_period=10,
            # Logging info
            working_dir=None,
            worker_debug=False):

        # Scaling mechanics
        self.provider = provider
        self.scaling_enabled = scaling_enabled

        # Connection info
        self.funcx_service_address = funcx_service_address
        self.worker_ports = worker_ports
        self.worker_port_range = worker_port_range

        # Scaling info
        self.strategy = strategy
        self.max_workers_per_node = max_workers_per_node
        self.cores_per_worker = cores_per_worker
        self.mem_per_worker = mem_per_worker
        self.launch_cmd = launch_cmd

        # Tuning info
        self.worker_mode = worker_mode
        self.scheduler_mode = scheduler_mode
        self.container_type = container_type
        self.prefetch_capacity = prefetch_capacity
        self.heartbeat_period = heartbeat_period
        self.heartbeat_threshold = heartbeat_threshold
        self.poll_period = poll_period

        # Logging info
        self.working_dir = working_dir
        self.worker_debug = worker_debug
Ejemplo n.º 17
0
def test_ssh_channel():
    with tempfile.TemporaryDirectory() as config_dir:
        sshd_thread, priv_key, server_port = _start_sshd(config_dir)
        try:
            with tempfile.TemporaryDirectory() as remote_script_dir:
                # The SSH library fails to add the new host key to the file if the file does not
                # already exist, so create it here.
                pathlib.Path(
                    '{}/known.hosts'.format(config_dir)).touch(mode=0o600)
                script_dir = tempfile.mkdtemp()
                p = LocalProvider(channel=SSHChannel(
                    '127.0.0.1',
                    port=server_port,
                    script_dir=remote_script_dir,
                    host_keys_filename='{}/known.hosts'.format(config_dir),
                    key_filename=priv_key),
                                  launcher=SingleNodeLauncher(debug=False))
                p.script_dir = script_dir
                _run_tests(p)
        finally:
            _stop_sshd(sshd_thread)
Ejemplo n.º 18
0
def theta_nwchem_config(log_dir: str,
                        nodes_per_nwchem: int = 2,
                        total_nodes: int = int(
                            os.environ.get("COBALT_JOBSIZE", 1)),
                        ml_prefetch: int = 0) -> Config:
    """Theta configuration where QC workers sit on the launch node (to be able to aprun)
    and ML workers are placed on compute nodes

    Args:
        nodes_per_nwchem: Number of nodes per NWChem computation
        log_dir: Path to store monitoring DB and parsl logs
        total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE
        ml_prefetch: Number of tasks for ML workers to prefetch for inference
    Returns:
        (Config) Parsl configuration
    """
    assert total_nodes % nodes_per_nwchem == 0, "NWChem node count not a multiple of nodes per task"
    nwc_workers = total_nodes // nodes_per_nwchem

    return Config(
        executors=[
            ThreadPoolExecutor(label='qc', max_threads=nwc_workers),
            HighThroughputExecutor(
                address=address_by_hostname(),
                label="ml",
                max_workers=1,
                prefetch_capacity=ml_prefetch,
                provider=LocalProvider(
                    nodes_per_block=
                    nodes_per_nwchem,  # Minimum increment in blcoks
                    init_blocks=0,
                    max_blocks=total_nodes //
                    nodes_per_nwchem,  # Limits the number of manager processes,
                    launcher=AprunLauncher(
                        overrides='-d 256 --cc depth -j 4'
                    ),  # Places worker on the compute node
                    worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
    ''',
                ),
            )
        ],
        monitoring=MonitoringHub(
            hub_address=address_by_hostname(),
            monitoring_debug=False,
            resource_monitoring_interval=10,
            logdir=log_dir,
            logging_endpoint=
            f'sqlite:///{os.path.join(log_dir, "monitoring.db")}'),
        run_dir=log_dir,
        strategy='simple',
        max_idletime=15.)
Ejemplo n.º 19
0
    def __init__(self,
                 label='HighThroughputExecutor',
                 provider=LocalProvider(),
                 launch_cmd=None,
                 address="127.0.0.1",
                 worker_ports=None,
                 worker_port_range=(54000, 55000),
                 interchange_port_range=(55000, 56000),
                 storage_access=None,
                 working_dir=None,
                 worker_debug=False,
                 cores_per_worker=1.0,
                 max_workers=float('inf'),
                 heartbeat_threshold=120,
                 heartbeat_period=30,
                 suppress_failure=False,
                 managed=True):

        logger.debug("Initializing HighThroughputExecutor")

        self.label = label
        self.launch_cmd = launch_cmd
        self.provider = provider
        self.worker_debug = worker_debug
        self.storage_access = storage_access if storage_access is not None else []
        if len(self.storage_access) > 1:
            raise ConfigurationError(
                'Multiple storage access schemes are not supported')
        self.working_dir = working_dir
        self.managed = managed
        self.blocks = []
        self.tasks = {}
        self.cores_per_worker = cores_per_worker
        self.max_workers = max_workers

        self._task_counter = 0
        self.address = address
        self.worker_ports = worker_ports
        self.worker_port_range = worker_port_range
        self.interchange_port_range = interchange_port_range
        self.heartbeat_threshold = heartbeat_threshold
        self.heartbeat_period = heartbeat_period
        self.suppress_failure = suppress_failure
        self.run_dir = '.'

        if not launch_cmd:
            self.launch_cmd = ("process_worker_pool.py {debug} {max_workers} "
                               "-c {cores_per_worker} "
                               "--task_url={task_url} "
                               "--result_url={result_url} "
                               "--logdir={logdir} "
                               "--hb_period={heartbeat_period} "
                               "--hb_threshold={heartbeat_threshold} ")
Ejemplo n.º 20
0
    def __init__(self,
                 label='ExtremeScaleExecutor',
                 provider=LocalProvider(),
                 launch_cmd=None,
                 address="127.0.0.1",
                 worker_ports=None,
                 worker_port_range=(54000, 55000),
                 interchange_port_range=(55000, 56000),
                 storage_access=None,
                 working_dir=None,
                 worker_debug=False,
                 ranks_per_node=1,
                 heartbeat_threshold=120,
                 heartbeat_period=30,
                 managed=True):

        super().__init__(label=label,
                         provider=provider,
                         launch_cmd=launch_cmd,
                         address=address,
                         worker_ports=worker_ports,
                         worker_port_range=worker_port_range,
                         interchange_port_range=interchange_port_range,
                         storage_access=storage_access,
                         working_dir=working_dir,
                         worker_debug=worker_debug,
                         heartbeat_threshold=heartbeat_threshold,
                         heartbeat_period=heartbeat_period,
                         managed=managed)

        if not _mpi_enabled:
            raise OptionalModuleMissing(
                "mpi4py",
                "Cannot initialize ExtremeScaleExecutor without mpi4py")
        else:
            # This is only to stop flake8 from complaining
            logger.debug("MPI version :{}".format(mpi4py.__version__))

        self.ranks_per_node = ranks_per_node

        logger.debug("Initializing ExtremeScaleExecutor")

        if not launch_cmd:
            self.launch_cmd = (
                "mpiexec -np {ranks_per_node} mpi_worker_pool.py "
                "{debug} "
                "--task_url={task_url} "
                "--result_url={result_url} "
                "--logdir={logdir} "
                "--hb_period={heartbeat_period} "
                "--hb_threshold={heartbeat_threshold} ")
        self.worker_debug = worker_debug
Ejemplo n.º 21
0
def test_parsl_htex_executor():
    pytest.importorskip("parsl", minversion="0.7.2")
    import os
    import os.path as osp

    import parsl
    from parsl.providers import LocalProvider
    from parsl.channels import LocalChannel
    from parsl.executors import HighThroughputExecutor
    from parsl.config import Config

    parsl_config = Config(
        executors=[
            HighThroughputExecutor(
                label="coffea_parsl_default",
                address="127.0.0.1",
                cores_per_worker=max(multiprocessing.cpu_count() // 2, 1),
                max_workers=1,
                provider=LocalProvider(
                    channel=LocalChannel(),
                    init_blocks=1,
                    max_blocks=1,
                    nodes_per_block=1,
                ),
            )
        ],
        strategy=None,
    )
    parsl.load(parsl_config)

    filelist = {
        "ZJets": [osp.join(os.getcwd(), "tests/samples/nano_dy.root")],
        "Data": [osp.join(os.getcwd(), "tests/samples/nano_dimuon.root")],
    }

    do_parsl_job(filelist)
    do_parsl_job(filelist, compression=1)

    filelist = {
        "ZJets": {
            "treename": "Events",
            "files": [osp.join(os.getcwd(), "tests/samples/nano_dy.root")],
        },
        "Data": {
            "treename": "Events",
            "files": [osp.join(os.getcwd(), "tests/samples/nano_dimuon.root")],
        },
    }

    do_parsl_job(filelist)
Ejemplo n.º 22
0
def spawn_forwarder(address,
                    executor=None,
                    task_q=None,
                    result_q=None,
                    endpoint_id=uuid.uuid4(),
                    logging_level=logging.INFO):
    """ Spawns a forwarder and returns the forwarder process for tracking.

    Parameters
    ----------

    address : str
       IP Address to which the endpoint must connect

    executor : Executor object. Optional
       Executor object to be instantiated.

    task_q : Queue object
       Queue object matching funcx.queues.base.FuncxQueue interface

    logging_level : int
       Logging level as defined in the logging module. Default: logging.INFO (20)

    endpoint_id : uuid string
       Endpoint id for which the forwarder is being spawned.

    Returns:
         A Forwarder object
    """
    from funcx.queues.redis import RedisQueue
    from funcx.executors import HighThroughputExecutor as HTEX
    from parsl.providers import LocalProvider
    from parsl.channels import LocalChannel

    task_q = RedisQueue('task', '127.0.0.1')
    result_q = RedisQueue('result', '127.0.0.1')

    if not executor:
        executor = HTEX(label='htex',
                        provider=LocalProvider(channel=LocalChannel),
                        address=address)

    fw = Forwarder(task_q,
                   result_q,
                   executor,
                   "Endpoint_{}".format(endpoint_id),
                   logging_level=logging_level)
    fw.start()
    return fw
Ejemplo n.º 23
0
def test_parsl_htex_executor():
    parsl = pytest.importorskip("parsl", minversion="0.7.2")
    import os
    import os.path as osp

    from parsl.providers import LocalProvider
    from parsl.channels import LocalChannel
    from parsl.executors import HighThroughputExecutor
    from parsl.addresses import address_by_hostname
    from parsl.config import Config
    parsl_config = Config(
        executors=[
            HighThroughputExecutor(
                label="coffea_parsl_default",
                address=address_by_hostname(),
                cores_per_worker=max(multiprocessing.cpu_count() // 2, 1),
                max_workers=1,
                provider=LocalProvider(channel=LocalChannel(),
                                       init_blocks=1,
                                       max_blocks=1,
                                       nodes_per_block=1),
            )
        ],
        strategy=None,
    )
    parsl.load(parsl_config)

    filelist = {
        'ZJets': [osp.join(os.getcwd(), 'tests/samples/nano_dy.root')],
        'Data': [osp.join(os.getcwd(), 'tests/samples/nano_dimuon.root')]
    }

    do_parsl_job(filelist)
    do_parsl_job(filelist, compression=1)

    filelist = {
        'ZJets': {
            'treename': 'Events',
            'files': [osp.join(os.getcwd(), 'tests/samples/nano_dy.root')]
        },
        'Data': {
            'treename': 'Events',
            'files': [osp.join(os.getcwd(), 'tests/samples/nano_dimuon.root')]
        }
    }

    do_parsl_job(filelist)
    do_parsl_job(filelist, flatten=True)
Ejemplo n.º 24
0
def fresh_config():
    return Config(
        executors=[
            HighThroughputExecutor(
                label="htex_local",
                worker_debug=True,
                cores_per_worker=1,
                provider=LocalProvider(
                    channel=LocalChannel(),
                    init_blocks=1,
                    max_blocks=1,
                    launcher=SimpleLauncher(),
                ),
            )
        ],
        strategy=None,
    )
Ejemplo n.º 25
0
    def __init__(self,
                 # Scaling mechanics
                 provider=LocalProvider(),
                 scaling_enabled=True,
                 # Connection info
                 worker_ports=None,
                 worker_port_range=(54000, 55000),
                 # Scaling info
                 max_workers_per_node=float('inf'),
                 cores_per_worker=1.0,
                 mem_per_worker=None,
                 launch_cmd=None,
                 # Tuning info
                 worker_mode='no_container',
                 prefetch_capacity=10,
                 heartbeat_period=2,
                 heartbeat_threshold=10,
                 poll_period=10,
                 # Logging info
                 working_dir=None,
                 worker_debug=True):

        # Scaling mechanics
        self.provider = provider
        self.scaling_enabled = True

        # Connection info
        self.worker_ports = worker_ports
        self.worker_port_range = worker_port_range

        # Scaling info
        self.max_workers_per_node = max_workers_per_node
        self.cores_per_worker = cores_per_worker
        self.mem_per_worker = mem_per_worker
        self.launch_cmd = None

        # Tuning info
        self.worker_mode = worker_mode
        self.prefetch_capacity = prefetch_capacity
        self.heartbeat_period = heartbeat_period
        self.heartbeat_threshold = heartbeat_threshold
        self.poll_period = poll_period

        # Logging info
        self.working_dir = working_dir
        self.worker_debug = worker_debug
Ejemplo n.º 26
0
def theta_nwchem_config(
    choice: str,
    log_dir: str,
    nodes_per_nwchem: int = 2,
    total_nodes: int = int(os.environ.get("COBALT_JOBSIZE", 1))
) -> Config:
    """Theta configuration to run NWChem

    Args:
        choice: Choice of the runtime configuration
        nodes_per_nwchem: Number of nodes per NWChem computation
        log_dir: Path to store monitoring DB and parsl logs
        total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE
    Returns:
        (Config) Parsl configuration
    """
    assert total_nodes % nodes_per_nwchem == 0, "NWChem node count not a multiple of nodes per task"
    nwc_workers = total_nodes // nodes_per_nwchem

    if choice == "htex":
        qc_exec = HighThroughputExecutor(
            address=address_by_hostname(),
            label="qc",
            max_workers=nwc_workers,
            cores_per_worker=1e-6,
            provider=LocalProvider(
                nodes_per_block=1,
                init_blocks=0,
                max_blocks=1,
                launcher=SimpleLauncher(),  # Places worker on the launch node
                worker_init='''
module load miniconda-3
conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env
''',
            ),
        )
    elif choice == 'thread':
        qc_exec = ThreadPoolExecutor(label='qc', max_threads=nwc_workers)
    else:
        raise ValueError(f'Choice "{choice}" not recognized ')

    return Config(executors=[qc_exec],
                  run_dir=log_dir,
                  strategy='simple',
                  max_idletime=15.)
Ejemplo n.º 27
0
    def __init__(self,
                 label='ExtremeScaleExecutor',
                 provider=LocalProvider(),
                 launch_cmd=None,
                 address="127.0.0.1",
                 worker_ports=None,
                 worker_port_range=(54000, 55000),
                 interchange_port_range=(55000, 56000),
                 storage_access=None,
                 working_dir=None,
                 worker_debug=False,
                 ranks_per_node=1,
                 heartbeat_threshold=120,
                 heartbeat_period=30,
                 managed=True):

        super().__init__(label=label,
                         provider=provider,
                         launch_cmd=launch_cmd,
                         address=address,
                         worker_ports=worker_ports,
                         worker_port_range=worker_port_range,
                         interchange_port_range=interchange_port_range,
                         storage_access=storage_access,
                         working_dir=working_dir,
                         worker_debug=worker_debug,
                         heartbeat_threshold=heartbeat_threshold,
                         heartbeat_period=heartbeat_period,
                         managed=managed)

        self.ranks_per_node = ranks_per_node

        logger.debug("Initializing ExtremeScaleExecutor")

        if not launch_cmd:
            self.launch_cmd = (
                "mpiexec -np {ranks_per_node} mpi_worker_pool.py "
                "{debug} "
                "--task_url={task_url} "
                "--result_url={result_url} "
                "--logdir={logdir} "
                "--hb_period={heartbeat_period} "
                "--hb_threshold={heartbeat_threshold} ")
        self.worker_debug = worker_debug
Ejemplo n.º 28
0
def test_simple(mem_per_worker):

    config = Config(
        executors=[
            HighThroughputExecutor(
                poll_period=1,
                label="htex_local",
                worker_debug=True,
                mem_per_worker=mem_per_worker,
                cores_per_worker=0.1,
                suppress_failure=True,
                provider=LocalProvider(
                    channel=LocalChannel(),
                    init_blocks=1,
                    max_blocks=1,
                    launcher=SingleNodeLauncher(),
                ),
            )
        ],
        strategy=None,
    )
    parsl.load(config)

    print("Configuration requests:")
    print("cores_per_worker: ", config.executors[0].cores_per_worker)
    print("mem_per_worker: ", config.executors[0].mem_per_worker)

    available_mem_on_node = round(psutil.virtual_memory().available / (2**30), 1)
    expected_workers = multiprocessing.cpu_count() / config.executors[0].cores_per_worker
    if mem_per_worker:
        expected_workers = int(available_mem_on_node / config.executors[0].mem_per_worker)

    print("Available memory: ", available_mem_on_node)
    print("Expected workers: ", expected_workers)
    # Prime a worker
    double(5).result()
    dfk = parsl.dfk()
    connected = dfk.executors['htex_local'].connected_workers
    print("Connected : ", connected)
    assert expected_workers == connected, "Expected {} workers, instead got {} workers".format(expected_workers,
                                                                                               connected)
    parsl.clear()
    return True
Ejemplo n.º 29
0
    def __init__(
            self,
            label='LowLatencyExecutor',
            provider=LocalProvider(),
            launch_cmd=None,
            address="127.0.0.1",
            worker_port=None,
            worker_port_range=(54000, 55000),
            interchange_port_range=(55000, 56000),
            #  storage_access=None,
            working_dir=None,
            worker_debug=False,
            workers_per_node=1,
            #  cores_per_worker=1.0,
            managed=True):
        logger.debug("Initializing LowLatencyExecutor")

        StatusHandlingExecutor.__init__(self, provider)
        self.label = label
        self.launch_cmd = launch_cmd
        self.provider = provider
        self.worker_debug = worker_debug
        # self.storage_access = storage_access if storage_access is not None else []
        # if len(self.storage_access) > 1:
        # raise ConfigurationError('Multiple storage access schemes are not supported')
        self.working_dir = working_dir
        self.managed = managed
        self.blocks = []
        self.workers_per_node = workers_per_node

        self._task_counter = 0
        self.address = address
        self.worker_port = worker_port
        self.worker_port_range = worker_port_range
        self.interchange_port_range = interchange_port_range
        self.run_dir = '.'

        # TODO: add debugging, logdir, other functionality to workers
        if not launch_cmd:
            self.launch_cmd = """lowlatency_worker.py -n {workers_per_node} --task_url={task_url} --logdir={logdir}"""
Ejemplo n.º 30
0
def parsl_local_config(workers=1):
    log_dir = 'parsl_logs'

    htex = Config(
        executors=[
            HighThroughputExecutor(
                label="coffea_parsl_default",
                cores_per_worker=1,
                max_workers=workers,
                worker_logdir_root=log_dir,
                provider=LocalProvider(
                    channel=LocalChannel(),
                    init_blocks=1,
                    max_blocks=1,
                ),
            )
        ],
        strategy=None,
        run_dir=os.path.join(log_dir,'runinfo'),
        #retries = 2,
    )
    return htex