def __init__(self, channel=LocalChannel(), account=None, queue=None, scheduler_options='', select_options='', worker_init='', nodes_per_block=1, cpus_per_node=1, init_blocks=1, min_blocks=0, max_blocks=1, parallelism=1, launcher=SingleNodeLauncher(), walltime="00:20:00", cmd_timeout=120): super().__init__(channel, account, queue, scheduler_options, worker_init, nodes_per_block, init_blocks, min_blocks, max_blocks, parallelism, launcher, walltime, cmd_timeout=cmd_timeout) self.template_string = template_string self._label = 'pbspro' self.cpus_per_node = cpus_per_node self.select_options = select_options
def __init__(self, channel=LocalChannel(), nodes_per_block=1, launcher=SingleNodeLauncher(), init_blocks=1, min_blocks=0, max_blocks=1, worker_init='', cmd_timeout=30, parallelism=1, move_files=None): self.channel = channel self._label = 'local' self.nodes_per_block = nodes_per_block self.launcher = launcher self.worker_init = worker_init self.init_blocks = init_blocks self.min_blocks = min_blocks self.max_blocks = max_blocks self.parallelism = parallelism self.script_dir = None self.cmd_timeout = cmd_timeout self.move_files = move_files # Dictionary that keeps track of jobs, keyed on job_id self.resources = {}
def __init__(self, project_id, key_file, region, os_project, os_family, google_version='v1', instance_type='n1-standard-1', init_blocks=1, min_blocks=0, max_blocks=10, launcher=SingleNodeLauncher(), parallelism=1): self.project_id = project_id os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_file self.zone = self.get_zone(region) self.os_project = os_project self.os_family = os_family self.label = 'google_cloud' self.client = googleapiclient.discovery.build('compute', google_version) self.instance_type = instance_type self.init_blocks = init_blocks self.min_blocks = min_blocks self.max_blocks = max_blocks self.parallelism = parallelism self.num_instances = 0 self.launcher = launcher # Dictionary that keeps track of jobs, keyed on job_id self.resources = {} self.provisioned_blocks = 0 atexit.register(self.bye)
def __init__(self, channel=LocalChannel(), nodes_per_block=1, init_blocks=1, min_blocks=0, max_blocks=10, parallelism=1, walltime="00:10:00", scheduler_options='', worker_init='', project=None, cmd_timeout=120, move_files=True, launcher=SingleNodeLauncher()): label = 'LSF' super().__init__(label, channel, nodes_per_block, init_blocks, min_blocks, max_blocks, parallelism, walltime, cmd_timeout=cmd_timeout, launcher=launcher) self.project = project self.move_files = move_files self.scheduler_options = scheduler_options self.worker_init = worker_init
def fresh_config(): return Config( executors=[ HighThroughputExecutor( label="htex_Local", working_dir=working_dir, storage_access=[FTPInTaskStaging(), HTTPInTaskStaging(), NoOpFileStaging()], worker_debug=True, cores_per_worker=1, heartbeat_period=2, heartbeat_threshold=5, poll_period=100, provider=LocalProvider( channel=LocalChannel(), init_blocks=0, min_blocks=0, max_blocks=5, launcher=SingleNodeLauncher(), ), ) ], strategy='simple', app_cache=True, checkpoint_mode='task_exit', retries=2, monitoring=MonitoringHub( hub_address="localhost", hub_port=55055, monitoring_debug=False, resource_monitoring_interval=1, ) )
def __init__(self, partition, channel=LocalChannel(), nodes_per_block=1, init_blocks=1, min_blocks=0, max_blocks=10, parallelism=1, walltime="00:10:00", scheduler_options='', worker_init='', cmd_timeout=10, exclusive=True, launcher=SingleNodeLauncher()): label = 'slurm' super().__init__(label, channel, nodes_per_block, init_blocks, min_blocks, max_blocks, parallelism, walltime, cmd_timeout=cmd_timeout, launcher=launcher) self.partition = partition self.exclusive = exclusive if exclusive: self.scheduler_options = "#SBATCH --exclusive\n" + scheduler_options else: self.scheduler_options = scheduler_options self.worker_init = worker_init
def __init__(self, channel=LocalChannel(), nodes_per_block=1, init_blocks=1, min_blocks=0, max_blocks=1, parallelism=1, walltime="00:10:00", scheduler_options='', worker_init='', launcher=SingleNodeLauncher(), cmd_timeout: int = 60, queue=None): label = 'grid_engine' super().__init__(label, channel, nodes_per_block, init_blocks, min_blocks, max_blocks, parallelism, walltime, launcher, cmd_timeout=cmd_timeout) self.scheduler_options = scheduler_options self.worker_init = worker_init if launcher in ['srun', 'srun_mpi']: logger.warning( "Use of {} launcher is usually appropriate for Slurm providers. " "Recommended options include 'single_node' or 'aprun'.".format( launcher))
def test_local_channel(): with tempfile.TemporaryDirectory() as script_dir: script_dir = tempfile.mkdtemp() p = LocalProvider(channel=LocalChannel(), launcher=SingleNodeLauncher(debug=False)) p.script_dir = script_dir _run_tests(p)
def __init__(self, channel: Channel = LocalChannel(), nodes_per_block: int = 1, cores_per_slot: Optional[int] = None, mem_per_slot: Optional[float] = None, init_blocks: int = 1, min_blocks: int = 0, max_blocks: int = 1, parallelism: float = 1, environment: Optional[Dict[str, str]] = None, project: str = '', scheduler_options: str = '', transfer_input_files: List[str] = [], walltime: str = "00:10:00", worker_init: str = '', launcher: Launcher = SingleNodeLauncher(), requirements: str = '', cmd_timeout: int = 60, cmd_chunk_size: int = 100) -> None: label = 'condor' super().__init__(label, channel, nodes_per_block, init_blocks, min_blocks, max_blocks, parallelism, walltime, launcher, cmd_timeout=cmd_timeout) self.cores_per_slot = cores_per_slot self.mem_per_slot = mem_per_slot self.cmd_chunk_size = cmd_chunk_size # To Parsl, Condor slots should be treated equivalently to nodes self.cores_per_node = cores_per_slot self.mem_per_node = mem_per_slot self.environment = environment if environment is not None else {} for key, value in self.environment.items(): # To escape literal quote marks, double them # See: http://research.cs.wisc.edu/htcondor/manual/v8.6/condor_submit.html try: self.environment[key] = "'{}'".format( value.replace("'", '"').replace('"', '""')) except AttributeError: pass self.project = project self.scheduler_options = scheduler_options + '\n' self.worker_init = worker_init + '\n' self.requirements = requirements self.transfer_input_files = transfer_input_files
def __init__(self, partition: Optional[str], account: Optional[str] = None, channel: Channel = LocalChannel(), nodes_per_block: int = 1, cores_per_node: Optional[int] = None, mem_per_node: Optional[int] = None, init_blocks: int = 1, min_blocks: int = 0, max_blocks: int = 1, parallelism: float = 1, walltime: str = "00:10:00", scheduler_options: str = '', worker_init: str = '', cmd_timeout: int = 10, exclusive: bool = True, move_files: bool = True, launcher: Launcher = SingleNodeLauncher()): label = 'slurm' super().__init__(label, channel, nodes_per_block, init_blocks, min_blocks, max_blocks, parallelism, walltime, cmd_timeout=cmd_timeout, launcher=launcher) self.partition = partition self.cores_per_node = cores_per_node self.mem_per_node = mem_per_node self.exclusive = exclusive self.move_files = move_files self.account = account self.scheduler_options = scheduler_options + '\n' if exclusive: self.scheduler_options += "#SBATCH --exclusive\n" if partition: self.scheduler_options += "#SBATCH --partition={}\n".format( partition) if account: self.scheduler_options += "#SBATCH --account={}\n".format(account) self.worker_init = worker_init + '\n'
def __init__(self, channel=LocalChannel(), nodes_per_block=1, init_blocks=1, min_blocks=0, max_blocks=10, parallelism=1, environment=None, project='', scheduler_options='', transfer_input_files=[], walltime="00:10:00", worker_init='', launcher=SingleNodeLauncher(), requirements='', cmd_timeout=60): label = 'condor' super().__init__(label, channel, nodes_per_block, init_blocks, min_blocks, max_blocks, parallelism, walltime, launcher, cmd_timeout=cmd_timeout) self.provisioned_blocks = 0 self.environment = environment if environment is not None else {} for key, value in self.environment.items(): # To escape literal quote marks, double them # See: http://research.cs.wisc.edu/htcondor/manual/v8.6/condor_submit.html try: self.environment[key] = "'{}'".format( value.replace("'", '"').replace('"', '""')) except AttributeError: pass self.project = project self.scheduler_options = scheduler_options self.worker_init = worker_init self.requirements = requirements self.transfer_input_files = transfer_input_files
def test_simple(mem_per_worker): config = Config( executors=[ HighThroughputExecutor( poll_period=1, label="htex_local", worker_debug=True, mem_per_worker=mem_per_worker, cores_per_worker=0.1, suppress_failure=True, provider=LocalProvider( channel=LocalChannel(), init_blocks=1, max_blocks=1, launcher=SingleNodeLauncher(), ), ) ], strategy=None, ) parsl.load(config) print("Configuration requests:") print("cores_per_worker: ", config.executors[0].cores_per_worker) print("mem_per_worker: ", config.executors[0].mem_per_worker) available_mem_on_node = round(psutil.virtual_memory().available / (2**30), 1) expected_workers = multiprocessing.cpu_count() / config.executors[0].cores_per_worker if mem_per_worker: expected_workers = int(available_mem_on_node / config.executors[0].mem_per_worker) print("Available memory: ", available_mem_on_node) print("Expected workers: ", expected_workers) # Prime a worker double(5).result() dfk = parsl.dfk() connected = dfk.executors['htex_local'].connected_workers print("Connected : ", connected) assert expected_workers == connected, "Expected {} workers, instead got {} workers".format(expected_workers, connected) parsl.clear() return True
def test_ssh_channel(): with tempfile.TemporaryDirectory() as config_dir: sshd_thread, priv_key, server_port = _start_sshd(config_dir) try: with tempfile.TemporaryDirectory() as remote_script_dir: # The SSH library fails to add the new host key to the file if the file does not # already exist, so create it here. pathlib.Path( '{}/known.hosts'.format(config_dir)).touch(mode=0o600) script_dir = tempfile.mkdtemp() p = LocalProvider(channel=SSHChannel( '127.0.0.1', port=server_port, script_dir=remote_script_dir, host_keys_filename='{}/known.hosts'.format(config_dir), key_filename=priv_key), launcher=SingleNodeLauncher(debug=False)) p.script_dir = script_dir _run_tests(p) finally: _stop_sshd(sshd_thread)
def __init__(self, partition, channel=LocalChannel(), nodes_per_block=1, cores_per_node=None, mem_per_node=None, init_blocks=1, min_blocks=0, max_blocks=10, parallelism=1, walltime="00:10:00", scheduler_options='', worker_init='', cmd_timeout=10, exclusive=True, move_files=True, launcher=SingleNodeLauncher()): label = 'slurm' super().__init__(label, channel, nodes_per_block, init_blocks, min_blocks, max_blocks, parallelism, walltime, cmd_timeout=cmd_timeout, launcher=launcher) self.partition = partition self.cores_per_node = cores_per_node self.mem_per_node = mem_per_node self.exclusive = exclusive self.move_files = move_files self.scheduler_options = scheduler_options + '\n' if exclusive: self.scheduler_options += "#SBATCH --exclusive\n" self.worker_init = worker_init + '\n'
# This is an example config, make sure to # replace the specific values below with the literal values # (e.g., 'USERNAME' -> 'your_username') config = Config( executors=[ IPyParallelExecutor( label='cooley_ssh_il_local_single_node', provider=CobaltProvider( channel=SSHInteractiveLoginChannel( hostname='cooley.alcf.anl.gov', username='******', # Please replace USERNAME with your username script_dir='/home/USERNAME/parsl_scripts/', # Please replace USERNAME with your username ), nodes_per_block=1, tasks_per_node=1, init_blocks=1, max_blocks=1, walltime="00:05:00", scheduler_options='', # Input your scheduler_options if needed worker_init='', # Input your worker_init if needed queue='pubnet-debug', account='ALCF_ALLOCATION', # Please replace ALCF_ALLOCATION with your ALCF allocation launcher=SingleNodeLauncher(), ), controller=Controller(public_ip='PUBLIC_IP'), # Please replace PUBLIC_IP with your public ip ) ], )
tasks_per_node = target_workers % args.cores_per_node else: nodes_per_block = int(target_workers / args.cores_per_node) tasks_per_node = args.cores_per_node config = Config( executors=[ HighThroughputExecutor( label="funcx_local", # worker_debug=True, worker_mode="singularity_reuse", container_image=os.path.expanduser("~/sing-run.simg"), cores_per_worker=int(args.cores_per_node / tasks_per_node), max_workers=1, address=address_by_interface("eth0"), provider=CobaltProvider(launcher=SingleNodeLauncher(), init_blocks=1, max_blocks=1, queue=args.queue, account='DLHub', worker_init="source activate funcx_5"), ) ], run_dir="/home/tskluzac/FuncX/evaluation/runinfo", strategy=None, ) parsl.clear() dfk = parsl.load(config) executor = list(dfk.executors.values())[0]
def __init__(self, vm_reference, init_blocks=1, min_blocks=0, max_blocks=10, parallelism=1, worker_init='', location='westus', group_name='parsl.auto', key_name=None, key_file=None, vnet_name="parsl.auto", linger=False, launcher=SingleNodeLauncher()): if not _api_enabled: raise OptionalModuleMissing( ['azure', 'msrestazure'], "Azure Provider requires the azure module.") self._label = 'azure' self.init_blocks = init_blocks self.min_blocks = min_blocks self.max_blocks = max_blocks self.max_nodes = max_blocks self.parallelism = parallelism self.nodes_per_block = 1 self.worker_init = worker_init self.vm_reference = vm_reference self.region = location self.vnet_name = vnet_name self.key_name = key_name self.key_file = key_file self.location = location self.group_name = group_name self.launcher = launcher self.linger = linger self.resources = {} self.instances = [] env_specified = os.getenv("AZURE_CLIENT_ID") is not None and os.getenv( "AZURE_CLIENT_SECRET") is not None and os.getenv( "AZURE_TENANT_ID") is not None and os.getenv( "AZURE_SUBSCRIPTION_ID") is not None if key_file is None and not env_specified: raise ConfigurationError("Must specify either, 'key_file', or\ `AZURE_CLIENT_ID`, `AZURE_CLIENT_SECRET`,\ and `AZURE_TENANT_ID` environment variables.") if key_file is None: self.clientid = os.getenv("AZURE_CLIENT_ID") self.clientsecret = os.getenv("AZURE_CLIENT_SECRET") self.tenantid = os.getenv("AZURE_TENANT_ID") self.subid = os.getenv("AZURE_SUBSCRIPTION_ID") else: with open(key_file) as fh: keys = json.load(fh) self.clientid = keys.get("AZURE_CLIENT_ID") self.clientsecret = keys.get("AZURE_CLIENT_SECRET") self.tenantid = keys.get("AZURE_TENANT_ID") self.subid = keys.get("AZURE_SUBSCRIPTION_ID") self.get_clients()
def __init__(self, image_id, key_name, init_blocks=1, min_blocks=0, max_blocks=10, nodes_per_block=1, parallelism=1, worker_init='', instance_type='t2.small', region='us-east-2', spot_max_bid=0, key_file=None, profile=None, iam_instance_profile_arn='', state_file=None, walltime="01:00:00", linger=False, launcher=SingleNodeLauncher()): if not _boto_enabled: raise OptionalModuleMissing( ['boto3'], "AWS Provider requires the boto3 module.") self.image_id = image_id self._label = 'ec2' self.init_blocks = init_blocks self.min_blocks = min_blocks self.max_blocks = max_blocks self.nodes_per_block = nodes_per_block self.max_nodes = max_blocks * nodes_per_block self.parallelism = parallelism self.worker_init = worker_init self.instance_type = instance_type self.region = region self.spot_max_bid = spot_max_bid self.key_name = key_name self.key_file = key_file self.profile = profile self.iam_instance_profile_arn = iam_instance_profile_arn self.walltime = walltime self.launcher = launcher self.linger = linger self.resources = {} self.state_file = state_file if state_file is not None else '.ec2_{}.json'.format( self.label) env_specified = os.getenv( "AWS_ACCESS_KEY_ID") is not None and os.getenv( "AWS_SECRET_ACCESS_KEY") is not None if profile is None and key_file is None and not env_specified: raise ConfigurationError( "Must specify either profile', 'key_file', or " "'AWS_ACCESS_KEY_ID' and 'AWS_SECRET_ACCESS_KEY' environment variables." ) try: self.initialize_boto_client() except Exception as e: logger.error("{} failed to initialize.".format(self)) raise e state_file_exists = False try: self.read_state_file(self.state_file) state_file_exists = True except Exception: logger.info( "No state file found. Cannot load previous options. Creating new infrastructure." ) if not state_file_exists: try: self.create_vpc().id except Exception as e: logger.info( "Failed to create ec2 infrastructure: {0}".format(e)) raise else: self.write_state_file()
def __init__(self, channel=LocalChannel(), nodes_per_block=1, cores_per_block=None, cores_per_node=None, init_blocks=1, min_blocks=0, max_blocks=1, parallelism=1, walltime="00:10:00", scheduler_options='', worker_init='', project=None, queue=None, cmd_timeout=120, move_files=True, bsub_redirection=False, request_by_nodes=True, launcher=SingleNodeLauncher()): label = 'LSF' super().__init__(label, channel, nodes_per_block, init_blocks, min_blocks, max_blocks, parallelism, walltime, cmd_timeout=cmd_timeout, launcher=launcher) self.project = project self.queue = queue self.cores_per_block = cores_per_block self.cores_per_node = cores_per_node self.move_files = move_files self.bsub_redirection = bsub_redirection self.request_by_nodes = request_by_nodes # Update scheduler options self.scheduler_options = scheduler_options + "\n" if project: self.scheduler_options += "#BSUB -P {}\n".format(project) if queue: self.scheduler_options += "#BSUB -q {}\n".format(queue) if request_by_nodes: self.scheduler_options += "#BSUB -nnodes {}\n".format( nodes_per_block) else: assert cores_per_block is not None and cores_per_node is not None, \ "Requesting resources by the number of cores. " \ "Need to specify cores_per_block and cores_per_node in the LSF provider." self.scheduler_options += "#BSUB -n {}\n".format(cores_per_block) self.scheduler_options += '#BSUB -R "span[ptile={}]"\n'.format( cores_per_node) # Set nodes_per_block manually for Parsl strategy assert cores_per_node != 0, "Need to specify a non-zero cores_per_node." self.nodes_per_block = int( math.ceil(cores_per_block / cores_per_node)) self.worker_init = worker_init