def local_interleaved_config(qc_workers: int, ml_workers: int, log_dir: str) -> Config: """All workers on the local machine, split between QC and ML tasks Args: qc_workers: Number of quantum chemistry workers ml_workers: Number of machine learning workers log_dir: Path to store monitoring DB and parsl logs Returns: (Config): Desired configuration """ return Config( executors=[ HighThroughputExecutor( address="localhost", label="qc", max_workers=qc_workers, provider=LocalProvider( init_blocks=1, max_blocks=1 ), ), HighThroughputExecutor( address="localhost", label="ml", max_workers=ml_workers, provider=LocalProvider( init_blocks=1, max_blocks=1 ), ) ], run_dir=log_dir, strategy=None )
def test_local_channel(): with tempfile.TemporaryDirectory() as script_dir: script_dir = tempfile.mkdtemp() p = LocalProvider(channel=LocalChannel(), launcher=SingleNodeLauncher(debug=False)) p.script_dir = script_dir _run_tests(p)
def fresh_config(): return Config( executors=[ HighThroughputExecutor( label="htex_Local", working_dir=working_dir, storage_access=[FTPInTaskStaging(), HTTPInTaskStaging(), NoOpFileStaging()], worker_debug=True, cores_per_worker=1, heartbeat_period=2, heartbeat_threshold=5, poll_period=100, provider=LocalProvider( channel=LocalChannel(), init_blocks=0, min_blocks=0, max_blocks=5, launcher=SingleNodeLauncher(), ), ) ], strategy='simple', app_cache=True, checkpoint_mode='task_exit', retries=2, monitoring=MonitoringHub( hub_address="localhost", hub_port=55055, monitoring_debug=False, resource_monitoring_interval=1, ) )
def load_ir2_dc_config(): """ Load the parsl config for ad-hoc providers. """ try: parsl.DataFlowKernelLoader.dfk() print("parsl config is already loaded.") return except RuntimeError: pass executors = [] for host in WORKER_NODE_ADDRESSES: channel = SSHChannel(hostname=host, script_dir=script_dir(host)) provider = LocalProvider(channel=channel, init_blocks=1, worker_init='source %s' % SETUP_SCRIPT) executors.append( HighThroughputExecutor(label=host, address=MOTHER_NODE_ADDRESS, worker_debug=False, provider=provider, heartbeat_period=2, heartbeat_threshold=10)) config = Config(executors=executors, strategy=None, retries=3) parsl.load(config)
def test_parsl_htex_executor(): parsl = pytest.importorskip("parsl", minversion="0.7.2") from parsl.providers import LocalProvider from parsl.channels import LocalChannel from parsl.executors import HighThroughputExecutor from parsl.addresses import address_by_hostname from parsl.config import Config parsl_config = Config( executors=[ HighThroughputExecutor( label="coffea_parsl_default", address=address_by_hostname(), cores_per_worker=max(multiprocessing.cpu_count() // 2, 1), max_workers=1, provider=LocalProvider(channel=LocalChannel(), init_blocks=1, max_blocks=1, nodes_per_block=1), ) ], strategy=None, ) do_parsl_job(parsl_config)
def __init__(self, provider=LocalProvider(), label='ipp', working_dir=None, controller=Controller(), container_image=None, engine_dir=None, storage_access=None, engine_debug_level=None, workers_per_node=1, managed=True): self.provider = provider self.label = label self.working_dir = working_dir self.controller = controller self.engine_debug_level = engine_debug_level self.container_image = container_image self.engine_dir = engine_dir self.workers_per_node = workers_per_node self.storage_access = storage_access if storage_access is not None else [] if len(self.storage_access) > 1: raise ConfigurationError( 'Multiple storage access schemes are not yet supported') self.managed = managed self.debug_option = "" if self.engine_debug_level: self.debug_option = "--log-level={}".format( self.engine_debug_level)
def __init__(self, provider=LocalProvider(), label='ipp', working_dir=None, controller=Controller(), container_image=None, engine_dir=None, storage_access=None, engine_debug_level=None, workers_per_node=1, managed=True): self.provider = provider self.label = label self.working_dir = working_dir self.controller = controller self.engine_debug_level = engine_debug_level self.container_image = container_image self.engine_dir = engine_dir self.workers_per_node = workers_per_node self.storage_access = storage_access self.managed = managed self.debug_option = "" if self.engine_debug_level: self.debug_option = "--log-level={}".format( self.engine_debug_level)
def test_dynamic_executor(): dfk = parsl.load() tasks = [sleeper() for i in range(5)] results = [i.result() for i in tasks] print("Done with initial test. The results are", results) # Here we add a new executor to an active DFK thread_executors = [ThreadPoolExecutor(label='threads2', max_threads=4)] dfk.add_executors(executors=thread_executors) tasks = [cpu_stress() for i in range(8)] results = [i.result() for i in tasks] print( "Successfully added thread executor and ran with it. The results are", results) # We add a htex executor to an active DFK executors = [ HighThroughputExecutor( label='htex_local', cores_per_worker=1, max_workers=5, provider=LocalProvider( init_blocks=1, max_blocks=1, ), ) ] dfk.add_executors(executors=executors) tasks = [add() for i in range(10)] results = [i.result() for i in tasks] print("Successfully added htex executor and ran with it. The results are", results) print("Done testing") parsl.clear()
def test_1(): x = HTEX( label='htex', provider=LocalProvider(channel=LocalChannel), address="127.0.0.1", ) task_p, result_p, command_p = x.start() print(task_p, result_p, command_p) print("Executor initialized : ", x) args = [2] kwargs = {} f1 = x.submit(double, *args, **kwargs) print("Sent task with :", f1) args = [2] kwargs = {} f2 = x.submit(fail, *args, **kwargs) print("hi") while True: stop = input("Stop ? (y/n)") if stop == "y": break print("F1: {}, f2: {}".format(f1.done(), f2.done())) x.shutdown()
def _get_parsl_config(): """Get the Parsl config. Returns ------- parsl.config.Config Parsl config to execute tasks. """ config = Config(executors=[ HighThroughputExecutor( label="htex_local", worker_debug=False, poll_period=1, cores_per_worker=1, max_workers=1, provider=LocalProvider( channel=LocalChannel(), init_blocks=1, max_blocks=1, min_blocks=1, ), ) ], strategy=None) return config
def fresh_config(): return Config(executors=[ IPyParallelExecutor(label="local_ipp", engine_dir='engines', provider=LocalProvider(channel=LocalChannel(), init_blocks=2, max_blocks=2)) ])
def test_parsl_executor(): parsl = pytest.importorskip("parsl", minversion="0.7.2") from coffea.processor import run_parsl_job from coffea.processor.parsl.detail import (_parsl_initialize, _parsl_stop) from parsl.providers import LocalProvider from parsl.channels import LocalChannel from parsl.executors import HighThroughputExecutor from parsl.addresses import address_by_hostname from parsl.config import Config parsl_config = Config( executors=[ HighThroughputExecutor( label="coffea_parsl_default", address=address_by_hostname(), cores_per_worker=max(multiprocessing.cpu_count() // 2, 1), max_workers=1, provider=LocalProvider(channel=LocalChannel(), init_blocks=1, max_blocks=1, nodes_per_block=1), ) ], strategy=None, ) import os import os.path as osp filelist = { 'ZJets': [osp.join(os.getcwd(), 'tests/samples/nano_dy.root')], 'Data': [osp.join(os.getcwd(), 'tests/samples/nano_dimuon.root')] } treename = 'Events' from coffea.processor.test_items import NanoTestProcessor from coffea.processor.parsl.parsl_executor import parsl_executor dfk = _parsl_initialize(parsl_config) proc = NanoTestProcessor() hists = run_parsl_job(filelist, treename, processor_instance=proc, executor=parsl_executor, data_flow=dfk) _parsl_stop(dfk) assert (hists['cutflow']['ZJets_pt'] == 4) assert (hists['cutflow']['ZJets_mass'] == 1) assert (hists['cutflow']['Data_pt'] == 15) assert (hists['cutflow']['Data_mass'] == 5)
def __init__( self, provider: Optional[ExecutionProvider] = None, managed: bool = True, working_dir: Optional[str] = None, label: str = "FluxExecutor", flux_executor_kwargs: Mapping = {}, flux_path: Optional[str] = None, launch_cmd: Optional[str] = None, ): super().__init__() if provider is None: provider = LocalProvider() self._provider = provider self.label = label if working_dir is None: working_dir = self.label + "_" + str(uuid.uuid4()) self.working_dir = os.path.abspath(working_dir) self.managed = managed # check that flux_path is an executable, or look for flux in PATH if flux_path is None: flux_path = shutil.which("flux") if flux_path is None: raise EnvironmentError("Cannot find Flux installation in PATH") self.flux_path = os.path.abspath(flux_path) self._task_id_counter = itertools.count() self._socket = zmq.Context().socket(zmq.REP) if launch_cmd is None: self.launch_cmd = self.DEFAULT_LAUNCH_CMD self._submission_queue: queue.Queue = queue.Queue() self._stop_event = threading.Event() # lock to protect self._task_id_counter and also submission/shutdown race self._submission_lock = threading.Lock() self.flux_executor_kwargs = flux_executor_kwargs self._submission_thread = threading.Thread( target=_submit_wrapper, args=( self._submission_queue, self._stop_event, self._socket, self.working_dir, self.flux_executor_kwargs, self.provider, self, self.flux_path, self.launch_cmd, ), daemon=True, ) # add a ``weakref.finalize()`` function for joining the executor thread weakref.finalize( self, lambda x, y: x.set() or y.join(), self._stop_event, self._submission_thread, )
def local_config(log_dir: str, max_workers: int, prefetch: int = 0) -> Config: """Single node with a single task per worker Args: log_dir: Path to store monitoring DB and parsl logs max_workers: Maximum number of concurrent tasks prefetch: Number of tasks for ML workers to prefetch for inference Returns: (Config) Parsl configuration """ return Config( executors=[ HighThroughputExecutor( address=address_by_hostname(), label="qc-worker", max_workers=max_workers, prefetch_capacity=prefetch, cpu_affinity='block', provider=LocalProvider( nodes_per_block=1, init_blocks=1, max_blocks=1, launcher=SimpleLauncher(), # Places worker on the launch node ), ), HighThroughputExecutor( address=address_by_hostname(), label="ml-worker", max_workers=1, prefetch_capacity=prefetch, provider=LocalProvider( nodes_per_block=1, init_blocks=1, max_blocks=1, launcher=SimpleLauncher(), # Places worker on the launch node ) ) ], run_dir=log_dir, strategy='simple', max_idletime=15. )
def test_2(): from funcx_endpoint.executors.high_throughput.executor import executor_starter htex = HTEX(label='htex', provider=LocalProvider(channel=LocalChannel), address="127.0.0.1") print("Foo") executor_starter(htex, "forwarder", "ep_01") print("Here")
def __init__( self, # Scaling mechanics provider=LocalProvider(), scaling_enabled=True, # Connection info funcx_service_address='https://api.funcx.org/v1', worker_ports=None, worker_port_range=(54000, 55000), # Scaling info strategy=SimpleStrategy(), max_workers_per_node=float('inf'), cores_per_worker=1.0, mem_per_worker=None, launch_cmd=None, # Tuning info worker_mode='no_container', scheduler_mode='hard', container_type=None, prefetch_capacity=10, heartbeat_period=2, heartbeat_threshold=10, poll_period=10, # Logging info working_dir=None, worker_debug=False): # Scaling mechanics self.provider = provider self.scaling_enabled = scaling_enabled # Connection info self.funcx_service_address = funcx_service_address self.worker_ports = worker_ports self.worker_port_range = worker_port_range # Scaling info self.strategy = strategy self.max_workers_per_node = max_workers_per_node self.cores_per_worker = cores_per_worker self.mem_per_worker = mem_per_worker self.launch_cmd = launch_cmd # Tuning info self.worker_mode = worker_mode self.scheduler_mode = scheduler_mode self.container_type = container_type self.prefetch_capacity = prefetch_capacity self.heartbeat_period = heartbeat_period self.heartbeat_threshold = heartbeat_threshold self.poll_period = poll_period # Logging info self.working_dir = working_dir self.worker_debug = worker_debug
def test_ssh_channel(): with tempfile.TemporaryDirectory() as config_dir: sshd_thread, priv_key, server_port = _start_sshd(config_dir) try: with tempfile.TemporaryDirectory() as remote_script_dir: # The SSH library fails to add the new host key to the file if the file does not # already exist, so create it here. pathlib.Path( '{}/known.hosts'.format(config_dir)).touch(mode=0o600) script_dir = tempfile.mkdtemp() p = LocalProvider(channel=SSHChannel( '127.0.0.1', port=server_port, script_dir=remote_script_dir, host_keys_filename='{}/known.hosts'.format(config_dir), key_filename=priv_key), launcher=SingleNodeLauncher(debug=False)) p.script_dir = script_dir _run_tests(p) finally: _stop_sshd(sshd_thread)
def theta_nwchem_config(log_dir: str, nodes_per_nwchem: int = 2, total_nodes: int = int( os.environ.get("COBALT_JOBSIZE", 1)), ml_prefetch: int = 0) -> Config: """Theta configuration where QC workers sit on the launch node (to be able to aprun) and ML workers are placed on compute nodes Args: nodes_per_nwchem: Number of nodes per NWChem computation log_dir: Path to store monitoring DB and parsl logs total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE ml_prefetch: Number of tasks for ML workers to prefetch for inference Returns: (Config) Parsl configuration """ assert total_nodes % nodes_per_nwchem == 0, "NWChem node count not a multiple of nodes per task" nwc_workers = total_nodes // nodes_per_nwchem return Config( executors=[ ThreadPoolExecutor(label='qc', max_threads=nwc_workers), HighThroughputExecutor( address=address_by_hostname(), label="ml", max_workers=1, prefetch_capacity=ml_prefetch, provider=LocalProvider( nodes_per_block= nodes_per_nwchem, # Minimum increment in blcoks init_blocks=0, max_blocks=total_nodes // nodes_per_nwchem, # Limits the number of manager processes, launcher=AprunLauncher( overrides='-d 256 --cc depth -j 4' ), # Places worker on the compute node worker_init=''' module load miniconda-3 conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env ''', ), ) ], monitoring=MonitoringHub( hub_address=address_by_hostname(), monitoring_debug=False, resource_monitoring_interval=10, logdir=log_dir, logging_endpoint= f'sqlite:///{os.path.join(log_dir, "monitoring.db")}'), run_dir=log_dir, strategy='simple', max_idletime=15.)
def __init__(self, label='HighThroughputExecutor', provider=LocalProvider(), launch_cmd=None, address="127.0.0.1", worker_ports=None, worker_port_range=(54000, 55000), interchange_port_range=(55000, 56000), storage_access=None, working_dir=None, worker_debug=False, cores_per_worker=1.0, max_workers=float('inf'), heartbeat_threshold=120, heartbeat_period=30, suppress_failure=False, managed=True): logger.debug("Initializing HighThroughputExecutor") self.label = label self.launch_cmd = launch_cmd self.provider = provider self.worker_debug = worker_debug self.storage_access = storage_access if storage_access is not None else [] if len(self.storage_access) > 1: raise ConfigurationError( 'Multiple storage access schemes are not supported') self.working_dir = working_dir self.managed = managed self.blocks = [] self.tasks = {} self.cores_per_worker = cores_per_worker self.max_workers = max_workers self._task_counter = 0 self.address = address self.worker_ports = worker_ports self.worker_port_range = worker_port_range self.interchange_port_range = interchange_port_range self.heartbeat_threshold = heartbeat_threshold self.heartbeat_period = heartbeat_period self.suppress_failure = suppress_failure self.run_dir = '.' if not launch_cmd: self.launch_cmd = ("process_worker_pool.py {debug} {max_workers} " "-c {cores_per_worker} " "--task_url={task_url} " "--result_url={result_url} " "--logdir={logdir} " "--hb_period={heartbeat_period} " "--hb_threshold={heartbeat_threshold} ")
def __init__(self, label='ExtremeScaleExecutor', provider=LocalProvider(), launch_cmd=None, address="127.0.0.1", worker_ports=None, worker_port_range=(54000, 55000), interchange_port_range=(55000, 56000), storage_access=None, working_dir=None, worker_debug=False, ranks_per_node=1, heartbeat_threshold=120, heartbeat_period=30, managed=True): super().__init__(label=label, provider=provider, launch_cmd=launch_cmd, address=address, worker_ports=worker_ports, worker_port_range=worker_port_range, interchange_port_range=interchange_port_range, storage_access=storage_access, working_dir=working_dir, worker_debug=worker_debug, heartbeat_threshold=heartbeat_threshold, heartbeat_period=heartbeat_period, managed=managed) if not _mpi_enabled: raise OptionalModuleMissing( "mpi4py", "Cannot initialize ExtremeScaleExecutor without mpi4py") else: # This is only to stop flake8 from complaining logger.debug("MPI version :{}".format(mpi4py.__version__)) self.ranks_per_node = ranks_per_node logger.debug("Initializing ExtremeScaleExecutor") if not launch_cmd: self.launch_cmd = ( "mpiexec -np {ranks_per_node} mpi_worker_pool.py " "{debug} " "--task_url={task_url} " "--result_url={result_url} " "--logdir={logdir} " "--hb_period={heartbeat_period} " "--hb_threshold={heartbeat_threshold} ") self.worker_debug = worker_debug
def test_parsl_htex_executor(): pytest.importorskip("parsl", minversion="0.7.2") import os import os.path as osp import parsl from parsl.providers import LocalProvider from parsl.channels import LocalChannel from parsl.executors import HighThroughputExecutor from parsl.config import Config parsl_config = Config( executors=[ HighThroughputExecutor( label="coffea_parsl_default", address="127.0.0.1", cores_per_worker=max(multiprocessing.cpu_count() // 2, 1), max_workers=1, provider=LocalProvider( channel=LocalChannel(), init_blocks=1, max_blocks=1, nodes_per_block=1, ), ) ], strategy=None, ) parsl.load(parsl_config) filelist = { "ZJets": [osp.join(os.getcwd(), "tests/samples/nano_dy.root")], "Data": [osp.join(os.getcwd(), "tests/samples/nano_dimuon.root")], } do_parsl_job(filelist) do_parsl_job(filelist, compression=1) filelist = { "ZJets": { "treename": "Events", "files": [osp.join(os.getcwd(), "tests/samples/nano_dy.root")], }, "Data": { "treename": "Events", "files": [osp.join(os.getcwd(), "tests/samples/nano_dimuon.root")], }, } do_parsl_job(filelist)
def spawn_forwarder(address, executor=None, task_q=None, result_q=None, endpoint_id=uuid.uuid4(), logging_level=logging.INFO): """ Spawns a forwarder and returns the forwarder process for tracking. Parameters ---------- address : str IP Address to which the endpoint must connect executor : Executor object. Optional Executor object to be instantiated. task_q : Queue object Queue object matching funcx.queues.base.FuncxQueue interface logging_level : int Logging level as defined in the logging module. Default: logging.INFO (20) endpoint_id : uuid string Endpoint id for which the forwarder is being spawned. Returns: A Forwarder object """ from funcx.queues.redis import RedisQueue from funcx.executors import HighThroughputExecutor as HTEX from parsl.providers import LocalProvider from parsl.channels import LocalChannel task_q = RedisQueue('task', '127.0.0.1') result_q = RedisQueue('result', '127.0.0.1') if not executor: executor = HTEX(label='htex', provider=LocalProvider(channel=LocalChannel), address=address) fw = Forwarder(task_q, result_q, executor, "Endpoint_{}".format(endpoint_id), logging_level=logging_level) fw.start() return fw
def test_parsl_htex_executor(): parsl = pytest.importorskip("parsl", minversion="0.7.2") import os import os.path as osp from parsl.providers import LocalProvider from parsl.channels import LocalChannel from parsl.executors import HighThroughputExecutor from parsl.addresses import address_by_hostname from parsl.config import Config parsl_config = Config( executors=[ HighThroughputExecutor( label="coffea_parsl_default", address=address_by_hostname(), cores_per_worker=max(multiprocessing.cpu_count() // 2, 1), max_workers=1, provider=LocalProvider(channel=LocalChannel(), init_blocks=1, max_blocks=1, nodes_per_block=1), ) ], strategy=None, ) parsl.load(parsl_config) filelist = { 'ZJets': [osp.join(os.getcwd(), 'tests/samples/nano_dy.root')], 'Data': [osp.join(os.getcwd(), 'tests/samples/nano_dimuon.root')] } do_parsl_job(filelist) do_parsl_job(filelist, compression=1) filelist = { 'ZJets': { 'treename': 'Events', 'files': [osp.join(os.getcwd(), 'tests/samples/nano_dy.root')] }, 'Data': { 'treename': 'Events', 'files': [osp.join(os.getcwd(), 'tests/samples/nano_dimuon.root')] } } do_parsl_job(filelist) do_parsl_job(filelist, flatten=True)
def fresh_config(): return Config( executors=[ HighThroughputExecutor( label="htex_local", worker_debug=True, cores_per_worker=1, provider=LocalProvider( channel=LocalChannel(), init_blocks=1, max_blocks=1, launcher=SimpleLauncher(), ), ) ], strategy=None, )
def __init__(self, # Scaling mechanics provider=LocalProvider(), scaling_enabled=True, # Connection info worker_ports=None, worker_port_range=(54000, 55000), # Scaling info max_workers_per_node=float('inf'), cores_per_worker=1.0, mem_per_worker=None, launch_cmd=None, # Tuning info worker_mode='no_container', prefetch_capacity=10, heartbeat_period=2, heartbeat_threshold=10, poll_period=10, # Logging info working_dir=None, worker_debug=True): # Scaling mechanics self.provider = provider self.scaling_enabled = True # Connection info self.worker_ports = worker_ports self.worker_port_range = worker_port_range # Scaling info self.max_workers_per_node = max_workers_per_node self.cores_per_worker = cores_per_worker self.mem_per_worker = mem_per_worker self.launch_cmd = None # Tuning info self.worker_mode = worker_mode self.prefetch_capacity = prefetch_capacity self.heartbeat_period = heartbeat_period self.heartbeat_threshold = heartbeat_threshold self.poll_period = poll_period # Logging info self.working_dir = working_dir self.worker_debug = worker_debug
def theta_nwchem_config( choice: str, log_dir: str, nodes_per_nwchem: int = 2, total_nodes: int = int(os.environ.get("COBALT_JOBSIZE", 1)) ) -> Config: """Theta configuration to run NWChem Args: choice: Choice of the runtime configuration nodes_per_nwchem: Number of nodes per NWChem computation log_dir: Path to store monitoring DB and parsl logs total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE Returns: (Config) Parsl configuration """ assert total_nodes % nodes_per_nwchem == 0, "NWChem node count not a multiple of nodes per task" nwc_workers = total_nodes // nodes_per_nwchem if choice == "htex": qc_exec = HighThroughputExecutor( address=address_by_hostname(), label="qc", max_workers=nwc_workers, cores_per_worker=1e-6, provider=LocalProvider( nodes_per_block=1, init_blocks=0, max_blocks=1, launcher=SimpleLauncher(), # Places worker on the launch node worker_init=''' module load miniconda-3 conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env ''', ), ) elif choice == 'thread': qc_exec = ThreadPoolExecutor(label='qc', max_threads=nwc_workers) else: raise ValueError(f'Choice "{choice}" not recognized ') return Config(executors=[qc_exec], run_dir=log_dir, strategy='simple', max_idletime=15.)
def __init__(self, label='ExtremeScaleExecutor', provider=LocalProvider(), launch_cmd=None, address="127.0.0.1", worker_ports=None, worker_port_range=(54000, 55000), interchange_port_range=(55000, 56000), storage_access=None, working_dir=None, worker_debug=False, ranks_per_node=1, heartbeat_threshold=120, heartbeat_period=30, managed=True): super().__init__(label=label, provider=provider, launch_cmd=launch_cmd, address=address, worker_ports=worker_ports, worker_port_range=worker_port_range, interchange_port_range=interchange_port_range, storage_access=storage_access, working_dir=working_dir, worker_debug=worker_debug, heartbeat_threshold=heartbeat_threshold, heartbeat_period=heartbeat_period, managed=managed) self.ranks_per_node = ranks_per_node logger.debug("Initializing ExtremeScaleExecutor") if not launch_cmd: self.launch_cmd = ( "mpiexec -np {ranks_per_node} mpi_worker_pool.py " "{debug} " "--task_url={task_url} " "--result_url={result_url} " "--logdir={logdir} " "--hb_period={heartbeat_period} " "--hb_threshold={heartbeat_threshold} ") self.worker_debug = worker_debug
def test_simple(mem_per_worker): config = Config( executors=[ HighThroughputExecutor( poll_period=1, label="htex_local", worker_debug=True, mem_per_worker=mem_per_worker, cores_per_worker=0.1, suppress_failure=True, provider=LocalProvider( channel=LocalChannel(), init_blocks=1, max_blocks=1, launcher=SingleNodeLauncher(), ), ) ], strategy=None, ) parsl.load(config) print("Configuration requests:") print("cores_per_worker: ", config.executors[0].cores_per_worker) print("mem_per_worker: ", config.executors[0].mem_per_worker) available_mem_on_node = round(psutil.virtual_memory().available / (2**30), 1) expected_workers = multiprocessing.cpu_count() / config.executors[0].cores_per_worker if mem_per_worker: expected_workers = int(available_mem_on_node / config.executors[0].mem_per_worker) print("Available memory: ", available_mem_on_node) print("Expected workers: ", expected_workers) # Prime a worker double(5).result() dfk = parsl.dfk() connected = dfk.executors['htex_local'].connected_workers print("Connected : ", connected) assert expected_workers == connected, "Expected {} workers, instead got {} workers".format(expected_workers, connected) parsl.clear() return True
def __init__( self, label='LowLatencyExecutor', provider=LocalProvider(), launch_cmd=None, address="127.0.0.1", worker_port=None, worker_port_range=(54000, 55000), interchange_port_range=(55000, 56000), # storage_access=None, working_dir=None, worker_debug=False, workers_per_node=1, # cores_per_worker=1.0, managed=True): logger.debug("Initializing LowLatencyExecutor") StatusHandlingExecutor.__init__(self, provider) self.label = label self.launch_cmd = launch_cmd self.provider = provider self.worker_debug = worker_debug # self.storage_access = storage_access if storage_access is not None else [] # if len(self.storage_access) > 1: # raise ConfigurationError('Multiple storage access schemes are not supported') self.working_dir = working_dir self.managed = managed self.blocks = [] self.workers_per_node = workers_per_node self._task_counter = 0 self.address = address self.worker_port = worker_port self.worker_port_range = worker_port_range self.interchange_port_range = interchange_port_range self.run_dir = '.' # TODO: add debugging, logdir, other functionality to workers if not launch_cmd: self.launch_cmd = """lowlatency_worker.py -n {workers_per_node} --task_url={task_url} --logdir={logdir}"""
def parsl_local_config(workers=1): log_dir = 'parsl_logs' htex = Config( executors=[ HighThroughputExecutor( label="coffea_parsl_default", cores_per_worker=1, max_workers=workers, worker_logdir_root=log_dir, provider=LocalProvider( channel=LocalChannel(), init_blocks=1, max_blocks=1, ), ) ], strategy=None, run_dir=os.path.join(log_dir,'runinfo'), #retries = 2, ) return htex