def configure_for_parsl(self): """Utility function to set parsl configuration parameters""" from parsl.executors import ThreadPoolExecutor max_threads = int(os.environ.get("SLURM_JOB_NUM_NODES", 1)) executor = ThreadPoolExecutor(label="local", max_threads=max_threads) self.info["executor"] = executor
def configure_for_parsl(self): """Utility function to set parsl configuration parameters""" from parsl.executors import ThreadPoolExecutor max_threads = self.config.get("max_threads", 4) executor = ThreadPoolExecutor(label="local", max_threads=max_threads) # executors = [executor] self.info["executor"] = executor
def local_setup(): threads_config = Config( executors=[ThreadPoolExecutor(label='threads', max_threads=4)], monitoring=MonitoringHub(hub_address="127.0.0.1", hub_port=55055, logging_level=logging.INFO, resource_monitoring_interval=10)) parsl.load(threads_config)
def theta_nwchem_config(log_dir: str, nodes_per_nwchem: int = 2, total_nodes: int = int( os.environ.get("COBALT_JOBSIZE", 1)), ml_prefetch: int = 0) -> Config: """Theta configuration where QC workers sit on the launch node (to be able to aprun) and ML workers are placed on compute nodes Args: nodes_per_nwchem: Number of nodes per NWChem computation log_dir: Path to store monitoring DB and parsl logs total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE ml_prefetch: Number of tasks for ML workers to prefetch for inference Returns: (Config) Parsl configuration """ assert total_nodes % nodes_per_nwchem == 0, "NWChem node count not a multiple of nodes per task" nwc_workers = total_nodes // nodes_per_nwchem return Config( executors=[ ThreadPoolExecutor(label='qc', max_threads=nwc_workers), HighThroughputExecutor( address=address_by_hostname(), label="ml", max_workers=1, prefetch_capacity=ml_prefetch, provider=LocalProvider( nodes_per_block= nodes_per_nwchem, # Minimum increment in blcoks init_blocks=0, max_blocks=total_nodes // nodes_per_nwchem, # Limits the number of manager processes, launcher=AprunLauncher( overrides='-d 256 --cc depth -j 4' ), # Places worker on the compute node worker_init=''' module load miniconda-3 conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env ''', ), ) ], monitoring=MonitoringHub( hub_address=address_by_hostname(), monitoring_debug=False, resource_monitoring_interval=10, logdir=log_dir, logging_endpoint= f'sqlite:///{os.path.join(log_dir, "monitoring.db")}'), run_dir=log_dir, strategy='simple', max_idletime=15.)
def theta_nwchem_config( choice: str, log_dir: str, nodes_per_nwchem: int = 2, total_nodes: int = int(os.environ.get("COBALT_JOBSIZE", 1)) ) -> Config: """Theta configuration to run NWChem Args: choice: Choice of the runtime configuration nodes_per_nwchem: Number of nodes per NWChem computation log_dir: Path to store monitoring DB and parsl logs total_nodes: Total number of nodes available. Default: COBALT_JOBSIZE Returns: (Config) Parsl configuration """ assert total_nodes % nodes_per_nwchem == 0, "NWChem node count not a multiple of nodes per task" nwc_workers = total_nodes // nodes_per_nwchem if choice == "htex": qc_exec = HighThroughputExecutor( address=address_by_hostname(), label="qc", max_workers=nwc_workers, cores_per_worker=1e-6, provider=LocalProvider( nodes_per_block=1, init_blocks=0, max_blocks=1, launcher=SimpleLauncher(), # Places worker on the launch node worker_init=''' module load miniconda-3 conda activate /lus/theta-fs0/projects/CSC249ADCD08/edw/env ''', ), ) elif choice == 'thread': qc_exec = ThreadPoolExecutor(label='qc', max_threads=nwc_workers) else: raise ValueError(f'Choice "{choice}" not recognized ') return Config(executors=[qc_exec], run_dir=log_dir, strategy='simple', max_idletime=15.)
args = parser.parse_args() # Write the configuration config = Config( executors=[ HighThroughputExecutor( label="htex", # Max workers limits the concurrency exposed via mom node max_workers=2, worker_port_range=(10000, 20000), provider=LocalProvider( init_blocks=1, max_blocks=1, ), ), ThreadPoolExecutor(label="local_threads") ], strategy=None, ) parsl.load(config) # Connect to the redis server input_queue = RedisQueue(args.redishost, port=int(args.redisport), prefix='input') input_queue.connect() output_queue = RedisQueue(args.redishost, port=int(args.redisport), prefix='output') output_queue.connect()
provider=SlurmProvider( cori_queue, nodes_per_block=compute_nodes, exclusive=True, init_blocks=0, min_blocks=0, max_blocks=1, scheduler_options="""#SBATCH --constraint=haswell""", launcher=SrunLauncher(), cmd_timeout=60, walltime=walltime, worker_init=worker_init, parallelism=1.0), ) local_executor = ThreadPoolExecutor(max_threads=2, label="submit-node") def wrap_no_op(s): return s cori_shifter_debug_config = WorkflowConfig( trim_ingest_list=600, ingest_source= "/global/projecta/projectdirs/lsst/production/DC2_ImSim/Run2.1.1i/sim/agn-test", rerun_prefix="benc4", visit_min=0, visit_max=9999999999, tract_subset=None, patch_subset=None,
# Write the configuration config = Config( executors=[ HighThroughputExecutor( address="localhost", label="htex", # Max workers limits the concurrency exposed via mom node max_workers=2, worker_port_range=(10000, 20000), provider=LocalProvider( init_blocks=1, max_blocks=1, ), ), ThreadPoolExecutor(label="local_threads", max_threads=4) ], strategy=None, ) parsl.load(config) parsl.set_stream_logger(level=logging.INFO) # Connect to the redis server client_queues, server_queues = make_queue_pairs(args.redishost, args.redisport, clean_slate=True, use_pickle=True) # Create the method server and task generator doer = ParslMethodServer([target_fun, generate, score, select], server_queues,
def parslConfigFromCompute(compute): """Given a Compute instance, return a setup parsl configuration""" if isinstance(compute, EC2Compute): # NOTE: Assumes the paropt is being run on an EC2 instance with access to metadata service try: public_ip = getAWSPublicIP() # get the required environment variables required_env_vars = [ "PAROPT_AWS_REGION", "PAROPT_AWS_KEY_NAME", "PAROPT_AWS_STATE_FILE", "PAROPT_AWS_IAM_INSTANCE_PROFILE_ARN" ] env_vars = { varname.replace('PAROPT_AWS_', '').lower(): os.getenv(varname) for varname in required_env_vars } missing_vars = [ varname for varname, value in env_vars.items() if value == None ] if missing_vars: raise Exception( "Missing required environment variables for running parsl with AWS:\n{}" .format(missing_vars)) parsl_config = Config( executors=[ HighThroughputExecutor( label='htex_local', address=public_ip, worker_port_range=(54000, 54050), interchange_port_range=(54051, 54100), cores_per_worker=1, max_workers=1, provider=AWSProvider( image_id=compute.ami, instance_type=compute.instance_model, worker_init= 'pip3 install git+https://[email protected]/globus-labs/ParaOpt@Chaofeng_modification', # git+https://[email protected]/chaofengwu/paropt',#git+https://[email protected]/macintoshpie/paropt', nodes_per_block=1, init_blocks=1, max_blocks=1, min_blocks=0, walltime='24:00:00', spot_max_bid=2.0, **env_vars), ) ], strategy=None, ) return parsl_config except KeyError as e: logger.error('Failed initializing aws config: {}'.format(e)) raise e except (HTTPError, URLError, OSError) as e: logger.error('Request to metadata service failed: {}'.format(e)) raise e elif isinstance(compute, LocalCompute): return Config(executors=[ ThreadPoolExecutor(max_threads=8, label='local_threads') ]) elif isinstance(compute, PBSProCompute): # NOTE: Assumes the paropt is being run on an PBS node with access to metadata service try: parsl_config = Config( executors=[ HighThroughputExecutor( label="htex", heartbeat_period=15, heartbeat_threshold=120, worker_debug=True, max_workers=4, address=address_by_interface('ib0'), provider=PBSProProvider( launcher=MpiRunLauncher(), # PBS directives (header lines): for array jobs pass '-J' option # scheduler_options='#PBS -J 1-10', scheduler_options=compute.scheduler_options, # Command to be run before starting a worker, such as: # 'module load Anaconda; source activate parsl_env'. worker_init=compute.worker_init, # number of compute nodes allocated for each block nodes_per_block=1, min_blocks=1, max_blocks=5, cpus_per_node=compute.cpus_per_node, # medium queue has a max walltime of 24 hrs walltime=compute.walltime), ), ], monitoring=MonitoringHub( hub_address=address_by_interface('ib0'), hub_port=55055, resource_monitoring_interval=10, ), strategy='simple', retries=3, app_cache=True, checkpoint_mode='task_exit') return parsl_config except KeyError as e: logger.error('Failed initializing PBSPro config: {}'.format(e)) raise e else: raise Exception('Unknown Compute type')
), # node upon which the top-level parsl script is running cores_per_worker=1, max_workers=5, # user tasks/node (up to capacity of machine) poll_period=30, provider=LocalProvider( # Dispatch tasks on local machine only channel=LocalChannel(), init_blocks=1, max_blocks=1, worker_init=os.environ['PT_ENVSETUP'], # Initial ENV setup )) ## This is based on the *default* executor (*DO NOT USE* due to this ## executor is not recommended by Yadu) coriLogin = ThreadPoolExecutor(label='coriLogin', managed=True, max_threads=2, storage_access=[], thread_name_prefix='', working_dir=None) ################################################### ################################################### ################################################### ## ## Finally, assemble the full Parsl configuration ## [Be sure to specify your needed executor(s)] config = Config(app_cache=True, checkpoint_mode='task_exit', executors=[knlMj], monitoring=MonitoringHub(
jobs = [] start_time = time.time() for i in range(num_processes): jobs.append(long_task(num_processes)) for j in jobs: j.result() end_time = time.time() return end_time - start_time address_str = "condorfe.crc.nd.edu" local_config=Config( executors=[ ThreadPoolExecutor( max_threads=8, label='local_threads' ) ] ) condor_config=Config( ) if __name__ == "__main__": task_batch_sizes = [1, 10, 50, 100, 250] configs = [local_config] """ print("Sequential Workload") for config in configs: parsl.load(config) fig, axes = plt.subplots(1, 1)
def parslConfigFromCompute(compute): """Given a Compute instance, return a setup parsl configuration""" if isinstance(compute, EC2Compute): # NOTE: Assumes the paropt is being run on an EC2 instance with access to metadata service try: public_ip = getAWSPublicIP() # get the required environment variables required_env_vars = [ "PAROPT_AWS_REGION", "PAROPT_AWS_KEY_NAME", "PAROPT_AWS_STATE_FILE", "PAROPT_AWS_IAM_INSTANCE_PROFILE_ARN" ] env_vars = { varname.replace('PAROPT_AWS_', '').lower(): os.getenv(varname) for varname in required_env_vars } missing_vars = [ varname for varname, value in env_vars.items() if value == None ] if missing_vars: raise Exception( "Missing required environment variables for running parsl with AWS:\n{}" .format(missing_vars)) parsl_config = Config( executors=[ HighThroughputExecutor( label='htex_local', address=public_ip, worker_port_range=(54000, 54050), interchange_port_range=(54051, 54100), cores_per_worker=1, max_workers=1, provider=AWSProvider( image_id=compute.ami, instance_type=compute.instance_model, worker_init= 'pip3 install git+https://[email protected]/macintoshpie/paropt', nodes_per_block=1, init_blocks=1, max_blocks=1, min_blocks=0, walltime='01:00:00', spot_max_bid=2.0, **env_vars), ) ], strategy=None, ) return parsl_config except KeyError as e: logger.error('Failed initializing aws config: {}'.format(e)) raise e except (HTTPError, URLError, OSError) as e: logger.error('Request to metadata service failed: {}'.format(e)) raise e elif isinstance(compute, LocalCompute): return Config(executors=[ ThreadPoolExecutor(max_threads=8, label='local_threads') ]) else: raise Exception('Unknown Compute type')
from parsl import * # from parsl.monitoring.db_logger import MonitoringConfig from parsl.monitoring.monitoring import MonitoringHub from parsl.config import Config from parsl.executors import ThreadPoolExecutor import logging # parsl.set_stream_logger() threads_config = Config( executors=[ThreadPoolExecutor(label='threads', max_threads=4)], monitoring=MonitoringHub( hub_address="127.0.0.1", hub_port=55055, logging_level=logging.INFO, resource_monitoring_interval=10, )) dfk = DataFlowKernel(config=threads_config) @App('python', dfk) def sleeper(dur=25): import time time.sleep(dur) @App('python', dfk) def cpu_stress(dur=30): import time s = 0
launcher=AprunLauncher(overrides="-d 64"), walltime=MY_TIME, nodes_per_block=MY_COMPUTE_NODES, init_blocks=1, min_blocks=1, max_blocks=MY_COMPUTE_BLOCKS, # string to prepend to #COBALT blocks in the submit # script to the scheduler eg: '#COBALT -t 50' scheduler_options='', # Command to be run before starting a worker, such as: worker_init='module load miniconda-3; export PATH=$PATH:{}'. format(MY_USER_PATH), cmd_timeout=120, ), ), ThreadPoolExecutor(label='login-node', max_threads=8), ], monitoring=MonitoringHub( hub_address=address_by_hostname(), hub_port=55055, monitoring_debug=False, resource_monitoring_interval=10, )) parsl.load(parsl_config) @python_app(executors=['theta-htex']) def pi(num_points): from random import random inside = 0
import parsl import os from parsl.app.app import python_app from parsl.config import Config from parsl.executors import ThreadPoolExecutor parsl_config = Config( executors=[ThreadPoolExecutor(max_threads=8, label='login-node')], strategy=None, ) parsl.load(parsl_config) @python_app(executors=['login-node']) def estimate_pi(n_points): import numpy as np x = np.random.uniform(0, 1, n_points) y = np.random.uniform(0, 1, n_points) dist = np.sqrt(x * x + y * y) n_circle = np.sum(dist <= 1) pi_est = 4 * n_circle / n_points return pi_est if __name__ == '__main__': import numpy as np n_points = 100000 n_trials = 100 trials = [] for i in range(n_trials):
def cli_run(): parser = argparse.ArgumentParser() parser.add_argument("--redishost", default="127.0.0.1", help="Address at which the redis server can be reached") parser.add_argument("--redisport", default="6379", help="Port on which redis is available") parser.add_argument("-d", "--debug", action='store_true', help="Count of apps to launch") parser.add_argument("-m", "--mac", action='store_true', help="Configure for Mac") args = parser.parse_args() if args.debug: parsl.set_stream_logger() if args.mac: config = Config( executors=[ ThreadPoolExecutor(label="theta_mpi_launcher"), ThreadPoolExecutor(label="local_threads") ], strategy=None, ) else: config = Config( executors=[ HighThroughputExecutor( label="theta_mpi_launcher", # Max workers limits the concurrency exposed via mom node max_workers=2, provider=LocalProvider( init_blocks=1, max_blocks=1, ), ), ThreadPoolExecutor(label="local_threads") ], strategy=None, ) parsl.load(config) print('''This program creates an "MPI Method Server" that listens on an input queue and write on an output queue: input_queue --> mpi_method_server --> output_queue To send it a request, add an entry to the input queue: run "pipeline-pump -p N" where N is an integer request To access a result, remove it from the outout queue: run "pipeline-pull" (blocking) or "pipeline-pull -t T" (T an integer) to time out after T seconds TODO: Timeout does not work yet! ''') # input_queue --> mpi_method_server --> output_queue input_queue = RedisQueue(args.redishost, port=int( args.redisport), prefix='input') try: input_queue.connect() except: exit(1) output_queue = RedisQueue(args.redishost, port=int( args.redisport), prefix='output') try: output_queue.connect() except: exit(1) # value_server = RedisQueue(args.redishost, port=int(args.redisport), prefix='value') # value_server.connect() mms = MpiMethodServer(input_queue, output_queue) mms.main_loop() # Next up, we likely want to add the ability to create a value server and connect it to a method server, e.g.: # vs = value_server.ValueServer(output_queue) print("All done")
provider=GridEngineProvider( init_blocks=1, max_blocks=20), label="workers"), ThreadPoolExecutor(label="login", max_threads=20) ], ) """ config = Config( executors=[IPyParallelExecutor(workers_per_node=10, provider=GridEngineProvider( init_blocks=1, max_blocks=20), label="workers"), ThreadPoolExecutor(label="login", max_threads=20) ], ) parsl.set_stream_logger() parsl.load(config) from data_generation import generate_data proteomefile = sys.argv[1] directory = f'/home/users/ellenrichards/{sys.argv[2]}/' threshold = 1000 if not os.path.isdir(directory): os.makedirs(directory)
import parsl from parsl import python_app from parsl.config import Config from parsl.executors import ThreadPoolExecutor import sys config = config = Config(executors=[ThreadPoolExecutor()], run_dir=sys.argv[1]) parsl.load(config) @python_app def pi(total): import random width = 10000 center = width / 2 c2 = center**2 count = 0 for i in range(total): # Drop a random point in the box. x, y = random.randint(1, width), random.randint(1, width) # Count points within the circle if (x - center)**2 + (y - center)**2 < c2: count += 1 return (count * 4 / total) @python_app def my_sum(a, b, c): return (a + b + c) / 3
if args.named_receptors is not None: for named_receptor in args.named_receptors: receptor, name = named_receptor.split(":") receptors.append(receptor) names.append(name) print(f"Adding receptor {receptors[-1]}, named {names[-1]}") args.receptors = receptors args.names = names return args if __name__ == '__main__': args = get_args() config = Config( executors=[ ThreadPoolExecutor(max_threads=args.n_jobs) ], ) print("Parsl loaded.") from engines.oe import oedock_from_smiles, setup_receptor_from_file, OEOptions parsl.load(config) sender(args.a, args.p, args.receptors, args.names)
def cli_run(): parser = argparse.ArgumentParser() parser.add_argument( "--redishost", default="127.0.0.1", help="Address at which the redis server can be reached") parser.add_argument("--redisport", default="6379", help="Port on which redis is available") parser.add_argument("-d", "--debug", action='store_true', help="Count of apps to launch") parser.add_argument("-m", "--mac", action='store_true', help="Configure for Mac") args = parser.parse_args() if args.debug: parsl.set_stream_logger() if args.mac: config = Config( executors=[ ThreadPoolExecutor(label="htex"), ThreadPoolExecutor(label="local_threads") ], strategy=None, ) else: config = Config( executors=[ HighThroughputExecutor( label="htex", # Max workers limits the concurrency exposed via mom node max_workers=2, provider=LocalProvider( init_blocks=1, max_blocks=1, ), ), ThreadPoolExecutor(label="local_threads") ], strategy=None, ) parsl.load(config) print( '''This program creates an "MPI Method Server" that listens on an inputs queue and write on an output queue: input_queue --> mpi_method_server --> queues To send it a request, add an entry to the inputs queue: run "pipeline-pump -p N" where N is an integer request To access a value, remove it from the outout queue: run "pipeline-pull" (blocking) or "pipeline-pull -t T" (T an integer) to time out after T seconds TODO: Timeout does not work yet! ''') # Get the queues for the method server method_queues = MethodServerQueues(args.redishost, port=args.redisport) # Start the method server mms = ParslMethodServer([target_fun], method_queues, default_executors=['htex']) mms.run()