from funcx_endpoint.endpoint.utils.config import Config from funcx_endpoint.executors import HighThroughputExecutor from parsl.providers import LocalProvider ... config = Config(executors=[ HighThroughputExecutor( label="fe.cs.uchicago", address=address_by_hostname(), provider=SlurmProvider( channel=LocalChannel(), nodes_per_block=NODES_PER_JOB, init_blocks=1, partition="general", launcher=SrunLauncher( overrides=(f"hostname; srun --ntasks={TOTAL_WORKERS} " f"--ntasks-per-node={WORKERS_PER_NODE} " f"--gpus-per-task=rtx2080ti:{GPUS_PER_WORKER} " f"--gpu-bind=map_gpu:{GPU_MAP}")), walltime="01:00:00", ), ) ], )
def cli_run(): parser = argparse.ArgumentParser() parser.add_argument("-c", "--client_address", required=True, help="Client address") parser.add_argument( "--client_ports", required=True, help="client ports as a triple of outgoing,incoming,command", ) parser.add_argument("--worker_port_range", help="Worker port range as a tuple") parser.add_argument( "-l", "--logdir", default="./parsl_worker_logs", help="Parsl worker log directory", ) parser.add_argument( "--worker_ports", default=None, help="OPTIONAL, pair of workers ports to listen on, " "e.g. --worker_ports=50001,50005", ) parser.add_argument( "--suppress_failure", action="store_true", help="Enables suppression of failures", ) parser.add_argument( "--endpoint_id", required=True, help="Endpoint ID, used to identify the endpoint to the remote broker", ) parser.add_argument("--hb_threshold", help="Heartbeat threshold in seconds") parser.add_argument( "--config", default=None, help="Configuration object that describes provisioning", ) parser.add_argument("-d", "--debug", action="store_true", help="Enables debug logging") print("Starting HTEX Intechange") args = parser.parse_args() logdir = os.path.abspath(args.logdir) os.makedirs(logdir, exist_ok=True) setup_logging(logfile=os.path.join(logdir, "endpoint.log"), debug=args.debug) optionals = {} optionals["suppress_failure"] = args.suppress_failure optionals["logdir"] = os.path.abspath(args.logdir) optionals["client_address"] = args.client_address optionals["client_ports"] = [int(i) for i in args.client_ports.split(",")] optionals["endpoint_id"] = args.endpoint_id # DEBUG ONLY : TODO: FIX if args.config is None: from parsl.providers import LocalProvider from funcx_endpoint.endpoint.utils.config import Config config = Config( worker_debug=True, scaling_enabled=True, provider=LocalProvider( init_blocks=1, min_blocks=1, max_blocks=1, ), max_workers_per_node=2, funcx_service_address="http://127.0.0.1:8080", ) optionals["config"] = config else: optionals["config"] = args.config if args.worker_ports: optionals["worker_ports"] = [ int(i) for i in args.worker_ports.split(",") ] if args.worker_port_range: optionals["worker_port_range"] = [ int(i) for i in args.worker_port_range.split(",") ] ic = EndpointInterchange(**optionals) ic.start() """
from funcx_endpoint.endpoint.utils.config import Config config = Config() if __name__ == '__main__': import funcx import os import logging funcx.set_stream_logger() logger = logging.getLogger(__file__) endpoint_dir = "/home/yadu/.funcx/default" if config.working_dir is None: working_dir = "{}/{}".format(endpoint_dir, "worker_logs") # if self.worker_logdir_root is not None: # worker_logdir = "{}/{}".format(self.worker_logdir_root, self.label) print("Loading : ", config) # Set script dir config.provider.script_dir = working_dir config.provider.channel.script_dir = os.path.join(working_dir, 'submit_scripts') config.provider.channel.makedirs(config.provider.channel.script_dir, exist_ok=True) os.makedirs(config.provider.script_dir, exist_ok=True) debug_opts = "--debug" if config.worker_debug else "" max_workers = "" if config.max_workers_per_node == float('inf') \ else "--max_workers={}".format(config.max_workers_per_node)
config = Config( executors=[ HighThroughputExecutor( label="fe.cs.uchicago", worker_debug=False, address=address_by_hostname(), provider=SlurmProvider( partition='general', # Launch 4 managers per node, each bound to 1 GPU # This is a hack. We use hostname ; to terminate the srun command, and # start our own # # DO NOT MODIFY unless you know what you are doing. launcher=SrunLauncher( overrides=(f'hostname; srun --ntasks={TOTAL_WORKERS} ' f'--ntasks-per-node={WORKERS_PER_NODE} ' f'--gpus-per-task=rtx2080ti:{GPUS_PER_WORKER} ' f'--gpu-bind=map_gpu:{GPU_MAP}')), # Scale between 0-1 blocks with 2 nodes per block nodes_per_block=NODES_PER_JOB, init_blocks=0, min_blocks=0, max_blocks=1, # Hold blocks for 30 minutes walltime='00:30:00', ), ) ], )
from funcx_endpoint.endpoint.utils.config import Config from parsl.providers import LocalProvider config = Config(scaling_enabled=True, provider=LocalProvider( init_blocks=1, min_blocks=1, max_blocks=1, ), max_workers_per_node=2, funcx_service_address='https://api.funcx.org/v1') # For now, visible_to must be a list of URNs for globus auth users or groups, e.g.: # urn:globus:auth:identity:{user_uuid} # urn:globus:groups:id:{group_uuid} meta = { "name": "$name", "description": "", "organization": "", "department": "", "public": False, "visible_to": [] }
config = Config( executors=[ HighThroughputExecutor( max_workers_per_node=2, worker_debug=False, address=address_by_hostname(), provider=SlurmProvider( partition=user_opts['frontera']['partition'], launcher=SrunLauncher(), # Enter scheduler_options if needed scheduler_options=user_opts['frontera']['scheduler_options'], # Command to be run before starting a worker, such as: # 'module load Anaconda; source activate parsl_env'. worker_init=user_opts['frontera']['worker_init'], # Add extra time for slow scheduler responses cmd_timeout=60, # Scale between 0-1 blocks with 2 nodes per block nodes_per_block=2, init_blocks=0, min_blocks=0, max_blocks=1, # Hold blocks for 30 minutes walltime='00:30:00', ), ) ], )
} config = Config( executors=[ HighThroughputExecutor( max_workers_per_node=1, worker_debug=False, address=address_by_hostname(), provider=TorqueProvider( queue='normal', launcher=AprunLauncher(overrides="-b -- bwpy-environ --"), # string to prepend to #SBATCH blocks in the submit scheduler_options=user_opts['bluewaters']['scheduler_options'], # Command to be run before starting a worker, such as: # 'module load bwpy; source activate funcx env'. worker_init=user_opts['bluewaters']['worker_init'], # Scale between 0-1 blocks with 2 nodes per block nodes_per_block=2, init_blocks=0, min_blocks=0, max_blocks=1, # Hold blocks for 30 minutes walltime='00:30:00'), ) ], ) # fmt: on
config = Config( executors=[ HighThroughputExecutor( max_workers_per_node=1, worker_debug=False, address=address_by_hostname(), scheduler_mode='soft', worker_mode='singularity_reuse', container_type='singularity', container_cmd_options="-H /home/$USER", provider=CobaltProvider( queue='debug-flat-quad', account=user_opts['theta']['account'], launcher=AprunLauncher(overrides="-d 64"), # string to prepend to #COBALT blocks in the submit # script to the scheduler eg: '#COBALT -t 50' scheduler_options=user_opts['theta']['scheduler_options'], # Command to be run before starting a worker, such as: # 'module load Anaconda; source activate funcx_env'. worker_init=user_opts['theta']['worker_init'], # Scale between 0-1 blocks with 2 nodes per block nodes_per_block=2, init_blocks=0, min_blocks=0, max_blocks=1, # Hold blocks for 30 minutes walltime='00:30:00' ), ) ], )
import argparse from funcx_endpoint.endpoint.utils.config import Config from funcx_endpoint.executors.high_throughput.interchange import Interchange import funcx funcx.set_stream_logger() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("-a", "--address", required=True, help="Address") parser.add_argument("-c", "--client_ports", required=True, help="ports") args = parser.parse_args() config = Config() ic = Interchange( config, client_address=args.address, client_ports=[int(i) for i in args.client_ports.split(',')], ) ic.start() print("Interchange started")
} config = Config( executors=[ HighThroughputExecutor( max_workers_per_node=2, worker_debug=False, address=address_by_hostname(), provider=SlurmProvider( partition='broadwl', launcher=SrunLauncher(), # string to prepend to #SBATCH blocks in the submit # script to the scheduler eg: '#SBATCH --constraint=knl,quad,cache' scheduler_options=user_opts['midway']['scheduler_options'], # Command to be run before starting a worker, such as: # 'module load Anaconda; source activate parsl_env'. worker_init=user_opts['midway']['worker_init'], # Scale between 0-1 blocks with 2 nodes per block nodes_per_block=2, init_blocks=0, min_blocks=0, max_blocks=1, # Hold blocks for 30 minutes walltime='00:30:00'), ) ], ) # fmt: on
} config = Config( executors=[ HighThroughputExecutor( label='Kubernetes_funcX', max_workers_per_node=1, address=address_by_route(), scheduler_mode='hard', container_type='docker', strategy=KubeSimpleStrategy(max_idletime=3600), provider=KubernetesProvider( init_blocks=0, min_blocks=0, max_blocks=2, init_cpu=1, max_cpu=4, init_mem="1024Mi", max_mem="4096Mi", image=user_opts['kube']['image'], worker_init=user_opts['kube']['worker_init'], namespace=user_opts['kube']['namespace'], incluster_config=False, ), ) ], heartbeat_period=15, heartbeat_threshold=200, log_dir='.', ) # fmt: on
from parsl.providers import LocalProvider from funcx_endpoint.endpoint.utils.config import Config from funcx_endpoint.executors import HighThroughputExecutor config = Config( executors=[ HighThroughputExecutor(provider=LocalProvider( init_blocks=1, min_blocks=0, max_blocks=1, ), ) ], funcx_service_address="https://api2.funcx.org/v2", ) # For now, visible_to must be a list of URNs for globus auth users or groups, e.g.: # urn:globus:auth:identity:{user_uuid} # urn:globus:groups:id:{group_uuid} meta = { "name": "$name", "description": "", "organization": "", "department": "", "public": False, "visible_to": [], }
# Set ncpus=32, otherwise it defaults to 1 on Polaris 'scheduler_options': '', } } config = Config( executors=[ HighThroughputExecutor( max_workers_per_node=1, strategy=SimpleStrategy(max_idletime=300), # IP of Polaris testbed login node address='10.230.2.72', provider=PBSProProvider( launcher=SingleNodeLauncher(), queue='workq', scheduler_options=user_opts['polaris']['scheduler_options'], # Command to be run before starting a worker, such as: # 'module load Anaconda; source activate parsl_env'. worker_init=user_opts['polaris']['worker_init'], cpus_per_node=32, walltime='01:00:00', nodes_per_block=1, init_blocks=0, min_blocks=0, max_blocks=1, ), ) ], ) # fmt: on
config = Config( executors=[ HighThroughputExecutor( worker_debug=False, address=address_by_interface('bond0.144'), provider=SlurmProvider( partition='GPU', # Partition / QOS # We request all hyperthreads on a node. launcher=SrunLauncher(overrides='-c 272'), # string to prepend to #SBATCH blocks in the submit # script to the scheduler eg: '#SBATCH --constraint=gpu' scheduler_options=user_opts['perlmutter']['scheduler_options'], # Command to be run before starting a worker, such as: # 'module load Anaconda; source activate parsl_env'. worker_init=user_opts['perlmutter']['worker_init'], # Slurm scheduler on Cori can be slow at times, # increase the command timeouts cmd_timeout=120, # Scale between 0-1 blocks with 2 nodes per block nodes_per_block=2, init_blocks=0, min_blocks=0, max_blocks=1, # Hold blocks for 10 minutes walltime='00:10:00', ), ), ], )