def adapt(self, minimum, maximum, cores=1, memory='1 GB', **kwargs): # Check if given memory is greater than maximum allowed if parse_bytes(memory) > parse_bytes(self.max_memory): raise MemoryError(' '.join([ 'could not allocate {:s} of memory,'.format(memory), 'maximum allowed is {:s}'.format(self.max_memory) ])) # Check if given memory is lower than minimum allowed if parse_bytes(memory) < parse_bytes(self.min_memory): raise MemoryError(' '.join([ 'could not allocate {:s} of memory,'.format(memory), 'minimum allowed is {:s}'.format(self.min_memory) ])) # Check if number of cores is greater than maximum allowed if cores > self.max_cores: raise Exception(' '.join([ 'could not allocate {:d} cores,'.format(cores), 'maximum allowed is {:d}'.format(self.max_cores) ])) # Check if number of cores is lower than minimum allowed if cores < self.min_cores: raise Exception(' '.join([ 'could not allocate {:d} cores,'.format(cores), 'minimum allowed is {:d}'.format(self.min_cores) ]))
def test_parse_bytes(): assert parse_bytes('100') == 100 assert parse_bytes('100 MB') == 100000000 assert parse_bytes('100M') == 100000000 assert parse_bytes('5kB') == 5000 assert parse_bytes('5.4 kB') == 5400 assert parse_bytes('1kiB') == 1024 assert parse_bytes('1Mi') == 2**20 assert parse_bytes('1e6') == 1000000 assert parse_bytes('1e6 kB') == 1000000000 assert parse_bytes('MB') == 1000000
def test_parse_bytes(): assert parse_bytes("100") == 100 assert parse_bytes("100 MB") == 100000000 assert parse_bytes("100M") == 100000000 assert parse_bytes("5kB") == 5000 assert parse_bytes("5.4 kB") == 5400 assert parse_bytes("1kiB") == 1024 assert parse_bytes("1Mi") == 2**20 assert parse_bytes("1e6") == 1000000 assert parse_bytes("1e6 kB") == 1000000000 assert parse_bytes("MB") == 1000000
def test_parse_bytes(): assert parse_bytes('100') == 100 assert parse_bytes('100 MB') == 100000000 assert parse_bytes('100M') == 100000000 assert parse_bytes('5kB') == 5000 assert parse_bytes('5.4 kB') == 5400 assert parse_bytes('1kiB') == 1024 assert parse_bytes('1Mi') == 2**20 assert parse_bytes('1e6') == 1000000 assert parse_bytes('1e6 kB') == 1000000000 assert parse_bytes('MB') == 1000000
def __init__( self, scheduler=None, name=None, disk=None, job_extra=None, config_name=None, **base_class_kwargs ): super().__init__( scheduler=scheduler, name=name, config_name=config_name, **base_class_kwargs ) if disk is None: disk = dask.config.get("jobqueue.%s.disk" % self.config_name) if disk is None: raise ValueError( "You must specify how much disk to use per job like ``disk='1 GB'``" ) self.worker_disk = parse_bytes(disk) if job_extra is None: self.job_extra = dask.config.get( "jobqueue.%s.job-extra" % self.config_name, {} ) else: self.job_extra = job_extra env_extra = base_class_kwargs.get("env_extra", None) if env_extra is None: env_extra = dask.config.get( "jobqueue.%s.env-extra" % self.config_name, default=[] ) self.env_dict = self.env_lines_to_dict(env_extra) self.job_header_dict = { "MY.DaskWorkerName": '"htcondor--$F(MY.JobId)--"', "RequestCpus": "MY.DaskWorkerCores", "RequestMemory": "floor(MY.DaskWorkerMemory / 1048576)", "RequestDisk": "floor(MY.DaskWorkerDisk / 1024)", "MY.JobId": '"$(ClusterId).$(ProcId)"', "MY.DaskWorkerCores": self.worker_cores, "MY.DaskWorkerMemory": self.worker_memory, "MY.DaskWorkerDisk": self.worker_disk, } if self.log_directory: self.job_header_dict.update( { "LogDirectory": self.log_directory, # $F(...) strips quotes "Output": "$(LogDirectory)/worker-$F(MY.JobId).out", "Error": "$(LogDirectory)/worker-$F(MY.JobId).err", "Log": "$(LogDirectory)/worker-$(ClusterId).log", # We kill all the workers to stop them so we need to stream their # output+error if we ever want to see anything "Stream_Output": True, "Stream_Error": True, } ) if self.job_extra: self.job_header_dict.update(self.job_extra)
def __init__(self, disk=None, job_extra=None, schedd=None, config_name="htcondor", **kwargs): if disk is None: disk = dask.config.get("jobqueue.%s.disk" % config_name) if disk is None: raise ValueError( "You must specify how much disk to use per job like ``disk='1 GB'``" ) self.worker_disk = parse_bytes(disk) if job_extra is None: self.job_extra = dask.config.get( "jobqueue.%s.job-extra" % config_name, {}) else: self.job_extra = job_extra if schedd is None: schedd = dask.config.get("jobqueue.%s.schedd" % config_name, None) # Instantiate args and parameters from parent abstract class super().__init__(config_name=config_name, **kwargs) env_extra = kwargs.get("env_extra", None) if env_extra is None: env_extra = dask.config.get("jobqueue.%s.env-extra" % config_name, default=[]) self.env_dict = self.env_lines_to_dict(env_extra) self.env_dict["JOB_ID"] = "$F(MY.JobId)" self.job_header_dict = { "MY.DaskWorkerName": '"htcondor--$F(MY.JobId)--"', "RequestCpus": "MY.DaskWorkerCores", "RequestMemory": "floor(MY.DaskWorkerMemory / 1048576)", "RequestDisk": "floor(MY.DaskWorkerDisk / 1024)", "MY.JobId": '"$(ClusterId).$(ProcId)"', "MY.DaskWorkerCores": self.worker_cores, "MY.DaskWorkerMemory": self.worker_memory, "MY.DaskWorkerDisk": self.worker_disk, } if self.log_directory: self.job_header_dict.update({ "LogDirectory": self.log_directory, # $F(...) strips quotes "Output": "$(LogDirectory)/worker-$F(MY.JobId).out", "Error": "$(LogDirectory)/worker-$F(MY.JobId).err", "Log": "$(LogDirectory)/worker-$(ClusterId).log", # We kill all the workers to stop them so we need to stream their # output+error if we ever want to see anything "Stream_Output": True, "Stream_Error": True, }) if self.job_extra: self.job_header_dict.update(self.job_extra) if schedd: self.submit_command += " -name " + shlex.quote(schedd) self.cancel_command += " -name " + shlex.quote(schedd)
def lsf_unit_detection_helper(expected_unit, conf_text=None): temp_dir = tempfile.mkdtemp() current_lsf_envdir = os.environ.get("LSF_ENVDIR", None) os.environ["LSF_ENVDIR"] = temp_dir if conf_text is not None: with open(os.path.join(temp_dir, "lsf.conf"), "w") as conf_file: conf_file.write(conf_text) memory_string = "13GB" memory_base = parse_bytes(memory_string) correct_memory = lsf.lsf_format_bytes_ceil(memory_base, lsf_units=expected_unit) with LSFCluster(memory=memory_string, cores=1) as cluster: assert "#BSUB -M %s" % correct_memory in cluster.job_header rmtree(temp_dir) if current_lsf_envdir is None: del os.environ["LSF_ENVDIR"] else: os.environ["LSF_ENVDIR"] = current_lsf_envdir
def _update_lsf_settings(): from dask_jobqueue import LSFCluster #@UnresolvedImport @UnusedImport # 'ncpus' is how many CPUs are RESERVED for the LSF job. # By default, set it to the number of CPUs the workers will actually use ('cores') ncpus = dask.config.get("jobqueue.lsf.ncpus", -1) if not ncpus or ncpus == -1: ncpus = dask.config.get("jobqueue.lsf.cores") dask.config.set({"jobqueue.lsf.ncpus": ncpus}) # Similar to above, the difference between 'mem' and 'memory' is that the former # specifies the memory to reserve in LSF, whereas the latter is actually used # by Dask workers to determine when they've exceeded their limits. mem = dask.config.get("jobqueue.lsf.mem", -1) if not mem or mem == -1: memory = dask.config.get("jobqueue.lsf.memory", None) if memory: mem = parse_bytes(memory) dask.config.set({"jobqueue.lsf.mem": mem})
def parse_device_memory_limit(device_memory_limit, device_index=0): """Parse memory limit to be used by a CUDA device. Parameters ---------- device_memory_limit: float, int, str or None This can be a float (fraction of total device memory), an integer (bytes), a string (like 5GB or 5000M), and "auto", 0 or None for the total device size. device_index: int The index of device from which to obtain the total memory amount. Examples -------- >>> # On a 32GB CUDA device >>> parse_device_memory_limit(None) 34089730048 >>> parse_device_memory_limit(0.8) 27271784038 >>> parse_device_memory_limit(1000000000) 1000000000 >>> parse_device_memory_limit("1GB") 1000000000 """ if any(device_memory_limit == v for v in [0, "0", None, "auto"]): return get_device_total_memory(device_index) with suppress(ValueError, TypeError): device_memory_limit = float(device_memory_limit) if isinstance(device_memory_limit, float) and device_memory_limit <= 1: return int( get_device_total_memory(device_index) * device_memory_limit) if isinstance(device_memory_limit, str): return parse_bytes(device_memory_limit) else: return int(device_memory_limit)
def __init__(self, template, options, tmpl_dir, backend="sge", debug=False): # pdb.set_trace() self._template = template # for __repr__ self.setup = compile_template( "module", tmpl_dir, debug, package=" ".join(options["module"]) ) self.job_cmd = compile_template(template, tmpl_dir, debug, **options) jobopts = { **options[backend], "memory": "{}".format(parse_bytes(options[backend]["memory"])), "name": f"{template}-{uuid4()}", "nprocs": options.get("nprocs", 1), } # TODO: check walltime and cputime format # TODO: check if queue is valid self.job_header = compile_template(backend, tmpl_dir, debug, **jobopts) self.script = compile_template( "jobscript", tmpl_dir, debug, job_header=self.job_header, setup=self.setup, job_cmd=self.job_cmd, )
def _get_nb_workers_from_memory(self, memory): return math.ceil( parse_bytes(memory) / parse_bytes(self.jobqueue_worker_spec["memory"]))
def __init__( self, n_workers=None, threads_per_worker=1, processes=True, memory_limit="auto", device_memory_limit=None, CUDA_VISIBLE_DEVICES=None, data=None, local_directory=None, protocol=None, enable_tcp_over_ucx=False, enable_infiniband=False, enable_nvlink=False, enable_rdmacm=False, ucx_net_devices=None, rmm_pool_size=None, **kwargs, ): if CUDA_VISIBLE_DEVICES is None: CUDA_VISIBLE_DEVICES = cuda_visible_devices(0) if isinstance(CUDA_VISIBLE_DEVICES, str): CUDA_VISIBLE_DEVICES = CUDA_VISIBLE_DEVICES.split(",") CUDA_VISIBLE_DEVICES = list(map(int, CUDA_VISIBLE_DEVICES)) if n_workers is None: n_workers = len(CUDA_VISIBLE_DEVICES) self.host_memory_limit = parse_memory_limit(memory_limit, threads_per_worker, n_workers) self.device_memory_limit = device_memory_limit self.rmm_pool_size = rmm_pool_size if rmm_pool_size is not None: try: import rmm # noqa F401 except ImportError: raise ValueError( "RMM pool requested but module 'rmm' is not available. " "For installation instructions, please see " "https://github.com/rapidsai/rmm") # pragma: no cover self.rmm_pool_size = parse_bytes(self.rmm_pool_size) if not processes: raise ValueError( "Processes are necessary in order to use multiple GPUs with Dask" ) if self.device_memory_limit is None: self.device_memory_limit = get_device_total_memory(0) elif isinstance(self.device_memory_limit, str): self.device_memory_limit = parse_bytes(self.device_memory_limit) if data is None: data = ( DeviceHostFile, { "device_memory_limit": self.device_memory_limit, "memory_limit": self.host_memory_limit, "local_directory": local_directory or dask.config.get("temporary-directory") or os.getcwd(), }, ) if enable_tcp_over_ucx or enable_infiniband or enable_nvlink: if protocol is None: protocol = "ucx" elif protocol != "ucx": raise TypeError( "Enabling InfiniBand or NVLink requires protocol='ucx'") if ucx_net_devices == "auto": try: from ucp._libs.topological_distance import TopologicalDistance # noqa except ImportError: raise ValueError( "ucx_net_devices set to 'auto' but UCX-Py is not " "installed or it's compiled without hwloc support") elif ucx_net_devices == "": raise ValueError("ucx_net_devices can not be an empty string") self.ucx_net_devices = ucx_net_devices self.set_ucx_net_devices = enable_infiniband self.host = kwargs.get("host", None) super().__init__( n_workers=0, threads_per_worker=threads_per_worker, memory_limit=self.host_memory_limit, processes=True, data=data, local_directory=local_directory, protocol=protocol, config={ "ucx": get_ucx_config( enable_tcp_over_ucx=enable_tcp_over_ucx, enable_nvlink=enable_nvlink, enable_infiniband=enable_infiniband, enable_rdmacm=enable_rdmacm, ) }, **kwargs, ) self.new_spec["options"]["preload"] = self.new_spec["options"].get( "preload", []) + ["dask_cuda.initialize"] self.new_spec[ "options"]["preload_argv"] = self.new_spec["options"].get( "preload_argv", []) + ["--create-cuda-context"] self.cuda_visible_devices = CUDA_VISIBLE_DEVICES self.scale(n_workers) self.sync(self._correct_state)
def timeseries( fixed_totalsize=False, chunk_per_worker=10, chunk_size='128 MB', num_nodes=1, worker_per_node=1, chunking_scheme=None, io_format=None, store_scheme=None, # lat=320, # lon=384, lat=2560, lon=3840, start='1980-01-01', freq='1H', nan=False, # fs=None, # root='.', ): """ Create synthetic Xarray dataset filled with random data. Parameters ---------- chunk_per_worker : int number of chunk placed per worker. see docs.dask.org, best practices, for chunk. Best chunk size is around 100M but, each worker can have many chunk, which automate the parallelism in dask. chunk_size : str chunk size in bytes, kilo, mega or any factor of bytes num_nodes : int number of compute nodes worker_per_node: int number of dask workers per node chunking_scheme : str Whether to chunk across time dimension ('temporal') or horizontal dimensions (lat, lon) ('spatial'). If None, automatically determine chunk sizes along all dimensions. lat : int number of latitude values lon : int number of longitude values start : datetime (or datetime-like string) Start of time series freq : string String like '2s' or '1H' or '12W' for the time series frequency nan : bool Whether to include nan in generated data Examples --------- >>> from benchmarks.datasets import timeseries >>> ds = timeseries('128MB', 5, chunking_scheme='spatial', lat=500, lon=600) >>> ds <xarray.Dataset> Dimensions: (lat: 500, lon: 600, time: 267) Coordinates: * time (time) datetime64[ns] 1980-01-01 1980-01-02 ... 1980-09-23 * lon (lon) float64 -180.0 -179.4 -178.8 -178.2 ... 178.8 179.4 180.0 * lat (lat) float64 -90.0 -89.64 -89.28 -88.92 ... 88.92 89.28 89.64 90.0 Data variables: sst (time, lon, lat) float64 dask.array<shape=(267, 600, 500), ..... Attributes: history: created for compute benchmarking """ dt = np.dtype('f8') itemsize = dt.itemsize chunk_size = parse_bytes(chunk_size) total_bytes = chunk_size * num_nodes * worker_per_node * chunk_per_worker size = total_bytes / itemsize print(size) timesteps = math.ceil(size / (lat * lon)) print(timesteps) shape = (timesteps, lon, lat) if chunking_scheme == 'temporal': x = math.ceil(chunk_size / (lon * lat * itemsize)) chunks = (x, lon, lat) elif chunking_scheme == 'spatial': x = math.ceil(math.sqrt(chunk_size / (timesteps * itemsize))) chunks = (timesteps, x, x) else: chunks = 'auto' lats = xr.DataArray(np.linspace(start=-90, stop=90, num=lat), dims=['lat']) lons = xr.DataArray(np.linspace(start=-180, stop=180, num=lon), dims=['lon']) times = xr.DataArray(pd.date_range(start=start, freq=freq, periods=timesteps), dims=['time']) if chunks == 'auto': with dask.config.set({'array.chunk-size': chunk_size}): random_data = randn(shape=shape, chunks=chunks, nan=nan) else: random_data = randn(shape=shape, chunks=chunks, nan=nan) ds = xr.DataArray( random_data, dims=['time', 'lon', 'lat'], coords={ 'time': times, 'lon': lons, 'lat': lats }, name='sst', # encoding=None, attrs={ 'units': 'baz units', 'description': 'a description', 'history': 'created for compute benchmarking', }, ).to_dataset() return ds, chunks
def __init__( self, scheduler=None, name=None, cores=None, memory=None, processes=None, nanny=True, interface=None, death_timeout=None, local_directory=None, extra=None, env_extra=None, header_skip=None, log_directory=None, shebang=None, python=sys.executable, job_name=None, config_name=None, **kwargs ): self.scheduler = scheduler self.job_id = None super().__init__() if config_name is None: config_name = getattr(type(self), "config_name") if config_name is None: raise ValueError( "Looks like you are trying to create a class that inherits from dask_jobqueue.core.Job. " "If that is the case, you need to:\n" "- set the 'config_name' class variable to a non-None value\n" "- create a section in jobqueue.yaml with the value of 'config_name'\n" "If that is not the case, please open an issue in https://github.com/dask/dask-jobqueue/issues." ) if job_name is None: job_name = dask.config.get("jobqueue.%s.name" % config_name) if cores is None: cores = dask.config.get("jobqueue.%s.cores" % config_name) if memory is None: memory = dask.config.get("jobqueue.%s.memory" % config_name) if processes is None: processes = dask.config.get("jobqueue.%s.processes" % config_name) if interface is None: interface = dask.config.get("jobqueue.%s.interface" % config_name) if death_timeout is None: death_timeout = dask.config.get("jobqueue.%s.death-timeout" % config_name) if local_directory is None: local_directory = dask.config.get( "jobqueue.%s.local-directory" % config_name ) if extra is None: extra = dask.config.get("jobqueue.%s.extra" % config_name) if env_extra is None: env_extra = dask.config.get("jobqueue.%s.env-extra" % config_name) if header_skip is None: header_skip = dask.config.get("jobqueue.%s.header-skip" % config_name, ()) if log_directory is None: log_directory = dask.config.get("jobqueue.%s.log-directory" % config_name) if shebang is None: shebang = dask.config.get("jobqueue.%s.shebang" % config_name) if cores is None or memory is None: raise ValueError( "You must specify how much cores and memory per job you want to use, for example:\n" "cluster = {}(cores={}, memory={!r})".format( self.__class__.__name__, cores or 8, memory or "24GB" ) ) # This attribute should be overridden self.job_header = None if interface: extra = extra + ["--interface", interface] kwargs.setdefault("host", get_ip_interface(interface)) else: kwargs.setdefault("host", "") # Keep information on process, cores, and memory, for use in subclasses self.worker_memory = parse_bytes(memory) if memory is not None else None self.worker_processes = processes self.worker_cores = cores self.name = name self.job_name = job_name self.shebang = shebang self._env_header = "\n".join(filter(None, env_extra)) self.header_skip = set(header_skip) # dask-worker command line build dask_worker_command = "%(python)s -m distributed.cli.dask_worker" % dict( python=python ) command_args = [dask_worker_command, self.scheduler] command_args += ["--nthreads", self.worker_process_threads] if processes is not None and processes > 1: command_args += ["--nprocs", processes] command_args += ["--memory-limit", self.worker_process_memory] command_args += ["--name", str(name)] command_args += ["--nanny" if nanny else "--no-nanny"] if death_timeout is not None: command_args += ["--death-timeout", death_timeout] if local_directory is not None: command_args += ["--local-directory", local_directory] if extra is not None: command_args += extra self._command_template = " ".join(map(str, command_args)) self.log_directory = log_directory if self.log_directory is not None: if not os.path.exists(self.log_directory): os.makedirs(self.log_directory)
def _get_nb_workers_from_memory(self, memory): return math.ceil( parse_bytes(memory) / parse_bytes(self.worker_spec['memory']))
def main( scheduler, host, nthreads, name, memory_limit, device_memory_limit, rmm_pool_size, pid_file, resources, dashboard, dashboard_address, local_directory, scheduler_file, interface, death_timeout, preload, dashboard_prefix, tls_ca_file, tls_cert, tls_key, enable_tcp_over_ucx, enable_infiniband, enable_nvlink, enable_rdmacm, net_devices, **kwargs, ): enable_proctitle_on_current() enable_proctitle_on_children() if tls_ca_file and tls_cert and tls_key: sec = Security( tls_ca_file=tls_ca_file, tls_worker_cert=tls_cert, tls_worker_key=tls_key ) else: sec = None try: nprocs = len(os.environ["CUDA_VISIBLE_DEVICES"].split(",")) except KeyError: nprocs = get_n_gpus() if not nthreads: nthreads = min(1, multiprocessing.cpu_count() // nprocs) memory_limit = parse_memory_limit(memory_limit, nthreads, total_cores=nprocs) if pid_file: with open(pid_file, "w") as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) services = {} if dashboard: try: from distributed.dashboard import BokehWorker except ImportError: pass else: if dashboard_prefix: result = (BokehWorker, {"prefix": dashboard_prefix}) else: result = BokehWorker services[("dashboard", dashboard_address)] = result if resources: resources = resources.replace(",", " ").split() resources = dict(pair.split("=") for pair in resources) resources = valmap(float, resources) else: resources = None loop = IOLoop.current() preload_argv = kwargs.get("preload_argv", []) kwargs = {"worker_port": None, "listen_address": None} t = Nanny if not scheduler and not scheduler_file and "scheduler-address" not in config: raise ValueError( "Need to provide scheduler address like\n" "dask-worker SCHEDULER_ADDRESS:8786" ) if interface: if host: raise ValueError("Can not specify both interface and host") else: host = get_ip_interface(interface) if rmm_pool_size is not None: try: import rmm # noqa F401 except ImportError: raise ValueError( "RMM pool requested but module 'rmm' is not available. " "For installation instructions, please see " "https://github.com/rapidsai/rmm" ) # pragma: no cover rmm_pool_size = parse_bytes(rmm_pool_size) nannies = [ t( scheduler, scheduler_file=scheduler_file, nthreads=nthreads, services=services, loop=loop, resources=resources, memory_limit=memory_limit, interface=get_ucx_net_devices( cuda_device_index=i, ucx_net_devices=net_devices, get_openfabrics=False, get_network=True, ), preload=(list(preload) or []) + ["dask_cuda.initialize"], preload_argv=(list(preload_argv) or []) + ["--create-cuda-context"], security=sec, env={"CUDA_VISIBLE_DEVICES": cuda_visible_devices(i)}, plugins={CPUAffinity(get_cpu_affinity(i)), RMMPool(rmm_pool_size)}, name=name if nprocs == 1 or not name else name + "-" + str(i), local_directory=local_directory, config={ "ucx": get_ucx_config( enable_tcp_over_ucx=enable_tcp_over_ucx, enable_infiniband=enable_infiniband, enable_nvlink=enable_nvlink, enable_rdmacm=enable_rdmacm, net_devices=net_devices, cuda_device_index=i, ) }, data=( DeviceHostFile, { "device_memory_limit": get_device_total_memory(index=i) if (device_memory_limit == "auto" or device_memory_limit == int(0)) else parse_bytes(device_memory_limit), "memory_limit": memory_limit, "local_directory": local_directory, }, ), **kwargs, ) for i in range(nprocs) ] @gen.coroutine def close_all(): # Unregister all workers from scheduler yield [n._close(timeout=2) for n in nannies] def on_signal(signum): logger.info("Exiting on signal %d", signum) close_all() @gen.coroutine def run(): yield nannies yield [n.finished() for n in nannies] install_signal_handlers(loop, cleanup=on_signal) try: loop.run_sync(run) except (KeyboardInterrupt, TimeoutError): pass finally: logger.info("End worker")
def __init__(self, name=dask.config.get('jobqueue.name'), threads=dask.config.get('jobqueue.threads'), processes=dask.config.get('jobqueue.processes'), memory=dask.config.get('jobqueue.memory'), interface=dask.config.get('jobqueue.interface'), death_timeout=dask.config.get('jobqueue.death-timeout'), local_directory=dask.config.get('jobqueue.local-directory'), extra=dask.config.get('jobqueue.extra'), env_extra=dask.config.get('jobqueue.env-extra'), **kwargs): """ """ # """ # This initializer should be considered as Abstract, and never used # directly. # """ if not self.cancel_command or not self.submit_command: raise NotImplementedError('JobQueueCluster is an abstract class ' 'that should not be instanciated.') #This attribute should be overriden self.job_header = None if interface: host = get_ip_interface(interface) extra += ' --interface %s ' % interface else: host = socket.gethostname() self.cluster = LocalCluster(n_workers=0, ip=host, **kwargs) # Keep information on process, threads and memory, for use in # subclasses self.worker_memory = parse_bytes( memory) if memory is not None else None self.worker_processes = processes self.worker_threads = threads self.name = name self.jobs = dict() self.n = 0 self._adaptive = None self._env_header = '\n'.join(env_extra) # dask-worker command line build dask_worker_command = ('%(python)s -m distributed.cli.dask_worker' % dict(python=sys.executable)) self._command_template = ' '.join( [dask_worker_command, self.scheduler.address]) if threads is not None: self._command_template += " --nthreads %d" % threads if processes is not None: self._command_template += " --nprocs %d" % processes if memory is not None: self._command_template += " --memory-limit %s" % memory if name is not None: self._command_template += " --name %s" % name self._command_template += "-%(n)d" # Keep %(n) to be replaced later if death_timeout is not None: self._command_template += " --death-timeout %s" % death_timeout if local_directory is not None: self._command_template += " --local-directory %s" % local_directory if extra is not None: self._command_template += extra
def __init__(self, name=None, cores=None, memory=None, processes=None, interface=None, death_timeout=None, local_directory=None, extra=None, env_extra=None, walltime=None, threads=None, **kwargs ): """ """ # """ # This initializer should be considered as Abstract, and never used # directly. # """ if threads is not None: raise ValueError(threads_deprecation_message) if not self.scheduler_name: raise NotImplementedError('JobQueueCluster is an abstract class ' 'that should not be instanciated.') if name is None: name = dask.config.get('jobqueue.%s.name' % self.scheduler_name) if cores is None: cores = dask.config.get('jobqueue.%s.cores' % self.scheduler_name) if memory is None: memory = dask.config.get('jobqueue.%s.memory' % self.scheduler_name) if processes is None: processes = dask.config.get('jobqueue.%s.processes' % self.scheduler_name) if interface is None: interface = dask.config.get('jobqueue.%s.interface' % self.scheduler_name) if death_timeout is None: death_timeout = dask.config.get('jobqueue.%s.death-timeout' % self.scheduler_name) if local_directory is None: local_directory = dask.config.get('jobqueue.%s.local-directory' % self.scheduler_name) if extra is None: extra = dask.config.get('jobqueue.%s.extra' % self.scheduler_name) if env_extra is None: env_extra = dask.config.get('jobqueue.%s.env-extra' % self.scheduler_name) if dask.config.get('jobqueue.%s.threads', None): warnings.warn(threads_deprecation_message) if cores is None: raise ValueError("You must specify how many cores to use per job " "like ``cores=8``") if memory is None: raise ValueError("You must specify how much memory to use per job " "like ``memory='24 GB'``") #This attribute should be overriden self.job_header = None if interface: host = get_ip_interface(interface) extra += ' --interface %s ' % interface else: host = socket.gethostname() self.local_cluster = LocalCluster(n_workers=0, ip=host, **kwargs) # Keep information on process, cores, and memory, for use in subclasses self.worker_memory = parse_bytes(memory) self.worker_processes = processes self.worker_cores = cores self.name = name self.jobs = dict() self.n = 0 self._adaptive = None self._env_header = '\n'.join(env_extra) # dask-worker command line build dask_worker_command = ( '%(python)s -m distributed.cli.dask_worker' % dict(python=sys.executable)) self._command_template = ' '.join([dask_worker_command, self.scheduler.address]) self._command_template += " --nthreads %d" % self.worker_threads if processes is not None and processes > 1: self._command_template += " --nprocs %d" % processes mem = format_bytes(self.worker_memory / self.worker_processes) mem = mem.replace(' ', '') self._command_template += " --memory-limit %s" % mem if name is not None: self._command_template += " --name %s" % name self._command_template += "-%(n)d" # Keep %(n) to be replaced later if death_timeout is not None: self._command_template += " --death-timeout %s" % death_timeout if local_directory is not None: self._command_template += " --local-directory %s" % local_directory if extra is not None: self._command_template += extra
def __init__(self, name='dask-worker', threads=2, processes=4, memory='8GB', interface=None, death_timeout=60, local_directory=None, extra='', env_extra=[], **kwargs): """ """ # """ # This initializer should be considered as Abstract, and never used # directly. # """ if not self.cancel_command or not self.submit_command: raise NotImplementedError('JobQueueCluster is an abstract class ' 'that should not be instanciated.') #This attribute should be overriden self.job_header = None if interface: host = get_ip_interface(interface) extra += ' --interface %s ' % interface else: host = socket.gethostname() self.cluster = LocalCluster(n_workers=0, ip=host, **kwargs) # Keep information on process, threads and memory, for use in # subclasses self.worker_memory = parse_bytes(memory) self.worker_processes = processes self.worker_threads = threads self.name = name self.jobs = dict() self.n = 0 self._adaptive = None self._env_header = '\n'.join(env_extra) # dask-worker command line build self._command_template = os.path.join( dirname, 'dask-worker %s' % self.scheduler.address) if threads is not None: self._command_template += " --nthreads %d" % threads if processes is not None: self._command_template += " --nprocs %d" % processes if memory is not None: self._command_template += " --memory-limit %s" % memory if name is not None: self._command_template += " --name %s" % name self._command_template += "-%(n)d" # Keep %(n) to be replaced later if death_timeout is not None: self._command_template += " --death-timeout %s" % death_timeout if local_directory is not None: self._command_template += " --local-directory %s" % local_directory if extra is not None: self._command_template += extra
def __init__( self, n_workers=None, threads_per_worker=1, processes=True, memory_limit=None, device_memory_limit=None, CUDA_VISIBLE_DEVICES=None, data=None, local_dir=None, **kwargs, ): if n_workers is None: n_workers = get_n_gpus() if CUDA_VISIBLE_DEVICES is None: CUDA_VISIBLE_DEVICES = cuda_visible_devices(0) if isinstance(CUDA_VISIBLE_DEVICES, str): CUDA_VISIBLE_DEVICES = CUDA_VISIBLE_DEVICES.split(",") CUDA_VISIBLE_DEVICES = list(map(int, CUDA_VISIBLE_DEVICES)) if memory_limit is None: memory_limit = TOTAL_MEMORY / n_workers self.host_memory_limit = memory_limit self.device_memory_limit = device_memory_limit if not processes: raise ValueError( "Processes are necessary in order to use multiple GPUs with Dask" ) if self.device_memory_limit is None: self.device_memory_limit = get_device_total_memory(0) elif isinstance(self.device_memory_limit, str): self.device_memory_limit = parse_bytes(self.device_memory_limit) if data is None: data = ( DeviceHostFile, { "device_memory_limit": self.device_memory_limit, "memory_limit": self.host_memory_limit, "local_dir": local_dir or dask.config.get("temporary-directory") or os.getcwd(), }, ) super().__init__( n_workers=0, threads_per_worker=threads_per_worker, memory_limit=memory_limit, processes=True, data=data, local_dir=local_dir, **kwargs, ) self.new_spec["options"]["preload"] = self.new_spec["options"].get( "preload", []) + ["dask_cuda.initialize_context"] self.cuda_visible_devices = CUDA_VISIBLE_DEVICES self.scale(n_workers) self.sync(self._correct_state)
def main( scheduler, host, nthreads, name, memory_limit, device_memory_limit, pid_file, reconnect, resources, dashboard, dashboard_address, local_directory, scheduler_file, interface, death_timeout, preload, preload_argv, bokeh_prefix, tls_ca_file, tls_cert, tls_key, ): enable_proctitle_on_current() enable_proctitle_on_children() sec = Security(tls_ca_file=tls_ca_file, tls_worker_cert=tls_cert, tls_worker_key=tls_key) try: nprocs = len(os.environ["CUDA_VISIBLE_DEVICES"].split(",")) except KeyError: nprocs = get_n_gpus() if not nthreads: nthreads = min(1, multiprocessing.cpu_count() // nprocs) if pid_file: with open(pid_file, "w") as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) services = {} if dashboard: try: from distributed.dashboard import BokehWorker except ImportError: pass else: if bokeh_prefix: result = (BokehWorker, {"prefix": bokeh_prefix}) else: result = BokehWorker services[("dashboard", dashboard_address)] = result if resources: resources = resources.replace(",", " ").split() resources = dict(pair.split("=") for pair in resources) resources = valmap(float, resources) else: resources = None loop = IOLoop.current() kwargs = {"worker_port": None, "listen_address": None} t = Nanny if not scheduler and not scheduler_file and "scheduler-address" not in config: raise ValueError("Need to provide scheduler address like\n" "dask-worker SCHEDULER_ADDRESS:8786") if interface: if host: raise ValueError("Can not specify both interface and host") else: host = get_ip_interface(interface) if host: addr = uri_from_host_port(host, 0, 0) else: # Choose appropriate address for scheduler addr = None if death_timeout is not None: death_timeout = parse_timedelta(death_timeout, "s") local_dir = kwargs.get("local_dir", "dask-worker-space") with warn_on_duration( "1s", "Creating scratch directories is taking a surprisingly long time. " "This is often due to running workers on a network file system. " "Consider specifying a local-directory to point workers to write " "scratch data to a local disk.", ): _workspace = WorkSpace(os.path.abspath(local_dir)) _workdir = _workspace.new_work_dir(prefix="worker-") local_dir = _workdir.dir_path nannies = [ t( scheduler, scheduler_file=scheduler_file, nthreads=nthreads, services=services, loop=loop, resources=resources, memory_limit=memory_limit, reconnect=reconnect, local_dir=local_directory, death_timeout=death_timeout, preload=(preload or []) + ["dask_cuda.initialize_context"], preload_argv=preload_argv, security=sec, contact_address=None, env={"CUDA_VISIBLE_DEVICES": cuda_visible_devices(i)}, name=name if nprocs == 1 or not name else name + "-" + str(i), data=( DeviceHostFile, { "device_memory_limit": get_device_total_memory(index=i) if (device_memory_limit == "auto" or device_memory_limit == int(0)) else parse_bytes(device_memory_limit), "memory_limit": parse_memory_limit(memory_limit, nthreads, total_cores=nprocs), "local_dir": local_dir, }, ), **kwargs, ) for i in range(nprocs) ] @gen.coroutine def close_all(): # Unregister all workers from scheduler yield [n._close(timeout=2) for n in nannies] def on_signal(signum): logger.info("Exiting on signal %d", signum) close_all() @gen.coroutine def run(): yield [n._start(addr) for n in nannies] while all(n.status != "closed" for n in nannies): yield gen.sleep(0.2) install_signal_handlers(loop, cleanup=on_signal) try: loop.run_sync(run) except (KeyboardInterrupt, TimeoutError): pass finally: logger.info("End worker")
def main( scheduler, host, nthreads, name, memory_limit, device_memory_limit, pid_file, resources, dashboard, dashboard_address, local_directory, scheduler_file, interface, death_timeout, preload, dashboard_prefix, tls_ca_file, tls_cert, tls_key, **kwargs, ): enable_proctitle_on_current() enable_proctitle_on_children() sec = Security( tls_ca_file=tls_ca_file, tls_worker_cert=tls_cert, tls_worker_key=tls_key ) try: nprocs = len(os.environ["CUDA_VISIBLE_DEVICES"].split(",")) except KeyError: nprocs = get_n_gpus() if not nthreads: nthreads = min(1, multiprocessing.cpu_count() // nprocs) if pid_file: with open(pid_file, "w") as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) services = {} if dashboard: try: from distributed.dashboard import BokehWorker except ImportError: pass else: if dashboard_prefix: result = (BokehWorker, {"prefix": dashboard_prefix}) else: result = BokehWorker services[("dashboard", dashboard_address)] = result if resources: resources = resources.replace(",", " ").split() resources = dict(pair.split("=") for pair in resources) resources = valmap(float, resources) else: resources = None loop = IOLoop.current() kwargs = {"worker_port": None, "listen_address": None} t = Nanny if not scheduler and not scheduler_file and "scheduler-address" not in config: raise ValueError( "Need to provide scheduler address like\n" "dask-worker SCHEDULER_ADDRESS:8786" ) if interface: if host: raise ValueError("Can not specify both interface and host") else: host = get_ip_interface(interface) nannies = [ t( scheduler, scheduler_file=scheduler_file, nthreads=nthreads, services=services, loop=loop, resources=resources, memory_limit=memory_limit, host=host, preload=(preload or []) + ["dask_cuda.initialize_context"], security=sec, env={"CUDA_VISIBLE_DEVICES": cuda_visible_devices(i)}, name=name if nprocs == 1 or not name else name + "-" + str(i), data=( DeviceHostFile, { "device_memory_limit": get_device_total_memory(index=i) if (device_memory_limit == "auto" or device_memory_limit == int(0)) else parse_bytes(device_memory_limit), "memory_limit": parse_memory_limit( memory_limit, nthreads, total_cores=nprocs ), "local_directory": local_directory, }, ), **kwargs, ) for i in range(nprocs) ] @gen.coroutine def close_all(): # Unregister all workers from scheduler yield [n._close(timeout=2) for n in nannies] def on_signal(signum): logger.info("Exiting on signal %d", signum) close_all() @gen.coroutine def run(): yield nannies yield [n.finished() for n in nannies] install_signal_handlers(loop, cleanup=on_signal) try: loop.run_sync(run) except (KeyboardInterrupt, TimeoutError): pass finally: logger.info("End worker")
def __init__( self, scheduler=None, name=None, cores=None, memory=None, processes=None, nanny=True, protocol=None, security=None, interface=None, death_timeout=None, local_directory=None, extra=None, env_extra=None, header_skip=None, log_directory=None, shebang=None, python=sys.executable, job_name=None, config_name=None, ): self.scheduler = scheduler self.job_id = None super().__init__() default_config_name = self.default_config_name() if config_name is None: config_name = default_config_name self.config_name = config_name if cores is None: cores = dask.config.get("jobqueue.%s.cores" % self.config_name) if memory is None: memory = dask.config.get("jobqueue.%s.memory" % self.config_name) if cores is None or memory is None: job_class_name = self.__class__.__name__ cluster_class_name = job_class_name.replace("Job", "Cluster") raise ValueError( "You must specify how much cores and memory per job you want to use, for example:\n" "cluster = {}(cores={}, memory={!r})".format( cluster_class_name, cores or 8, memory or "24GB" ) ) if job_name is None: job_name = dask.config.get("jobqueue.%s.name" % self.config_name) if processes is None: processes = dask.config.get("jobqueue.%s.processes" % self.config_name) if processes is None: processes, _ = nprocesses_nthreads(cores) if interface is None: interface = dask.config.get("jobqueue.%s.interface" % self.config_name) if death_timeout is None: death_timeout = dask.config.get( "jobqueue.%s.death-timeout" % self.config_name ) if local_directory is None: local_directory = dask.config.get( "jobqueue.%s.local-directory" % self.config_name ) if extra is None: extra = dask.config.get("jobqueue.%s.extra" % self.config_name) if env_extra is None: env_extra = dask.config.get("jobqueue.%s.env-extra" % self.config_name) if header_skip is None: header_skip = dask.config.get( "jobqueue.%s.header-skip" % self.config_name, () ) if log_directory is None: log_directory = dask.config.get( "jobqueue.%s.log-directory" % self.config_name ) if shebang is None: shebang = dask.config.get("jobqueue.%s.shebang" % self.config_name) # This attribute should be set in the derived class self.job_header = None if interface: extra = extra + ["--interface", interface] if protocol: extra = extra + ["--protocol", protocol] if security: worker_security_dict = security.get_tls_config_for_role("worker") security_command_line_list = [ ["--tls-" + key.replace("_", "-"), value] for key, value in worker_security_dict.items() # 'ciphers' parameter does not have a command-line equivalent if key != "ciphers" ] security_command_line = sum(security_command_line_list, []) extra = extra + security_command_line # Keep information on process, cores, and memory, for use in subclasses self.worker_memory = parse_bytes(memory) if memory is not None else None self.worker_processes = processes self.worker_cores = cores self.name = name self.job_name = job_name self.shebang = shebang self._env_header = "\n".join(filter(None, env_extra)) self.header_skip = set(header_skip) # dask-worker command line build dask_worker_command = "%(python)s -m distributed.cli.dask_worker" % dict( python=python ) command_args = [dask_worker_command, self.scheduler] command_args += ["--nthreads", self.worker_process_threads] if processes is not None and processes > 1: command_args += ["--nprocs", processes] command_args += ["--memory-limit", self.worker_process_memory] command_args += ["--name", str(name)] command_args += ["--nanny" if nanny else "--no-nanny"] if death_timeout is not None: command_args += ["--death-timeout", death_timeout] if local_directory is not None: command_args += ["--local-directory", local_directory] if extra is not None: command_args += extra self._command_template = " ".join(map(str, command_args)) self.log_directory = log_directory if self.log_directory is not None: if not os.path.exists(self.log_directory): os.makedirs(self.log_directory)
def __init__( self, scheduler=None, host=None, nthreads=0, name=None, memory_limit="auto", device_memory_limit="auto", rmm_pool_size=None, rmm_managed_memory=False, pid_file=None, resources=None, dashboard=True, dashboard_address=":0", local_directory=None, scheduler_file=None, interface=None, death_timeout=None, preload=[], dashboard_prefix=None, security=None, enable_tcp_over_ucx=False, enable_infiniband=False, enable_nvlink=False, enable_rdmacm=False, net_devices=None, **kwargs, ): # Required by RAPIDS libraries (e.g., cuDF) to ensure no context # initialization happens before we can set CUDA_VISIBLE_DEVICES os.environ["RAPIDS_NO_INITIALIZE"] = "True" enable_proctitle_on_current() enable_proctitle_on_children() try: nprocs = len(os.environ["CUDA_VISIBLE_DEVICES"].split(",")) except KeyError: nprocs = get_n_gpus() if not nthreads: nthreads = min(1, multiprocessing.cpu_count() // nprocs) memory_limit = parse_memory_limit(memory_limit, nthreads, total_cores=nprocs) if pid_file: with open(pid_file, "w") as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) services = {} if dashboard: try: from distributed.dashboard import BokehWorker except ImportError: pass else: if dashboard_prefix: result = (BokehWorker, {"prefix": dashboard_prefix}) else: result = BokehWorker services[("dashboard", dashboard_address)] = result if resources: resources = resources.replace(",", " ").split() resources = dict(pair.split("=") for pair in resources) resources = valmap(float, resources) else: resources = None loop = IOLoop.current() preload_argv = kwargs.get("preload_argv", []) kwargs = {"worker_port": None, "listen_address": None} t = Nanny if ( not scheduler and not scheduler_file and dask.config.get("scheduler-address", None) is None ): raise ValueError( "Need to provide scheduler address like\n" "dask-worker SCHEDULER_ADDRESS:8786" ) if interface and host: raise ValueError("Can not specify both interface and host") if rmm_pool_size is not None or rmm_managed_memory: try: import rmm # noqa F401 except ImportError: raise ValueError( "RMM pool requested but module 'rmm' is not available. " "For installation instructions, please see " "https://github.com/rapidsai/rmm" ) # pragma: no cover if rmm_pool_size is not None: rmm_pool_size = parse_bytes(rmm_pool_size) else: if enable_nvlink: warnings.warn( "When using NVLink we recommend setting a " "`rmm_pool_size`. Please see: " "https://dask-cuda.readthedocs.io/en/latest/ucx.html" "#important-notes for more details" ) if enable_nvlink and rmm_managed_memory: raise ValueError( "RMM managed memory and NVLink are currently incompatible." ) # Ensure this parent dask-cuda-worker process uses the same UCX # configuration as child worker processes created by it. initialize( create_cuda_context=False, enable_tcp_over_ucx=enable_tcp_over_ucx, enable_infiniband=enable_infiniband, enable_nvlink=enable_nvlink, enable_rdmacm=enable_rdmacm, net_devices=net_devices, cuda_device_index=0, ) self.nannies = [ t( scheduler, scheduler_file=scheduler_file, nthreads=nthreads, services=services, loop=loop, resources=resources, memory_limit=memory_limit, interface=_get_interface(interface, host, i, net_devices), host=host, preload=(list(preload) or []) + ["dask_cuda.initialize"], preload_argv=(list(preload_argv) or []) + ["--create-cuda-context"], security=security, env={"CUDA_VISIBLE_DEVICES": cuda_visible_devices(i)}, plugins={ CPUAffinity(get_cpu_affinity(i)), RMMSetup(rmm_pool_size, rmm_managed_memory), }, name=name if nprocs == 1 or not name else name + "-" + str(i), local_directory=local_directory, config={ "ucx": get_ucx_config( enable_tcp_over_ucx=enable_tcp_over_ucx, enable_infiniband=enable_infiniband, enable_nvlink=enable_nvlink, enable_rdmacm=enable_rdmacm, net_devices=net_devices, cuda_device_index=i, ) }, data=( DeviceHostFile, { "device_memory_limit": parse_device_memory_limit( device_memory_limit, device_index=i ), "memory_limit": memory_limit, "local_directory": local_directory, }, ), **kwargs, ) for i in range(nprocs) ]
def __init__(self, name=None, cores=None, memory=None, processes=None, interface=None, death_timeout=None, local_directory=None, extra=None, env_extra=None, log_directory=None, walltime=None, threads=None, python=sys.executable, **kwargs): """ """ # """ # This initializer should be considered as Abstract, and never used directly. # """ if threads is not None: raise ValueError(threads_deprecation_message) if not self.scheduler_name: raise NotImplementedError( 'JobQueueCluster is an abstract class that should not be instanciated.' ) if name is None: name = dask.config.get('jobqueue.%s.name' % self.scheduler_name) if cores is None: cores = dask.config.get('jobqueue.%s.cores' % self.scheduler_name) if memory is None: memory = dask.config.get('jobqueue.%s.memory' % self.scheduler_name) if processes is None: processes = dask.config.get('jobqueue.%s.processes' % self.scheduler_name) if interface is None: interface = dask.config.get('jobqueue.%s.interface' % self.scheduler_name) if death_timeout is None: death_timeout = dask.config.get('jobqueue.%s.death-timeout' % self.scheduler_name) if local_directory is None: local_directory = dask.config.get('jobqueue.%s.local-directory' % self.scheduler_name) if extra is None: extra = dask.config.get('jobqueue.%s.extra' % self.scheduler_name) if env_extra is None: env_extra = dask.config.get('jobqueue.%s.env-extra' % self.scheduler_name) if log_directory is None: log_directory = dask.config.get('jobqueue.%s.log-directory' % self.scheduler_name) if dask.config.get('jobqueue.%s.threads', None): warnings.warn(threads_deprecation_message) if cores is None: raise ValueError( "You must specify how many cores to use per job like ``cores=8``" ) if memory is None: raise ValueError( "You must specify how much memory to use per job like ``memory='24 GB'``" ) # This attribute should be overridden self.job_header = None if interface: extra += ['--interface', interface] kwargs.setdefault('ip', get_ip_interface(interface)) else: kwargs.setdefault('ip', '') # Bokeh diagnostics server should listen on all interfaces diagnostics_ip_and_port = ('', 8787) self.local_cluster = LocalCluster( n_workers=0, diagnostics_port=diagnostics_ip_and_port, **kwargs) # Keep information on process, cores, and memory, for use in subclasses self.worker_memory = parse_bytes( memory) if memory is not None else None self.worker_processes = processes self.worker_cores = cores self.name = name # plugin for tracking job status self._scheduler_plugin = JobQueuePlugin() self.local_cluster.scheduler.add_plugin(self._scheduler_plugin) self._adaptive = None self._env_header = '\n'.join(env_extra) # dask-worker command line build dask_worker_command = '%(python)s -m distributed.cli.dask_worker' % dict( python=python) command_args = [dask_worker_command, self.scheduler.address] command_args += ['--nthreads', self.worker_threads] if processes is not None and processes > 1: command_args += ['--nprocs', processes] mem = format_bytes(self.worker_memory / self.worker_processes) command_args += ['--memory-limit', mem.replace(' ', '')] command_args += ['--name', '%s--${JOB_ID}--' % name] if death_timeout is not None: command_args += ['--death-timeout', death_timeout] if local_directory is not None: command_args += ['--local-directory', local_directory] if extra is not None: command_args += extra self._command_template = ' '.join(map(str, command_args)) self._target_scale = 0 self.log_directory = log_directory if self.log_directory is not None: if not os.path.exists(self.log_directory): os.makedirs(self.log_directory)
def __init__(self, name=None, cores=None, memory=None, processes=None, interface=None, death_timeout=None, local_directory=None, extra=None, env_extra=None, log_directory=None, threads=None, shebang=None, python=sys.executable, config_name=None, **kwargs): """ """ # """ # This initializer should be considered as Abstract, and never used directly. # """ super(JobQueueCluster, self).__init__() if threads is not None: raise ValueError(threads_deprecation_message) if config_name is None: raise NotImplementedError( "JobQueueCluster is an abstract class that should not be instantiated." ) if name is None: name = dask.config.get("jobqueue.%s.name" % config_name) if cores is None: cores = dask.config.get("jobqueue.%s.cores" % config_name) if memory is None: memory = dask.config.get("jobqueue.%s.memory" % config_name) if processes is None: processes = dask.config.get("jobqueue.%s.processes" % config_name) if interface is None: interface = dask.config.get("jobqueue.%s.interface" % config_name) if death_timeout is None: death_timeout = dask.config.get("jobqueue.%s.death-timeout" % config_name) if local_directory is None: local_directory = dask.config.get("jobqueue.%s.local-directory" % config_name) if extra is None: extra = dask.config.get("jobqueue.%s.extra" % config_name) if env_extra is None: env_extra = dask.config.get("jobqueue.%s.env-extra" % config_name) if log_directory is None: log_directory = dask.config.get("jobqueue.%s.log-directory" % config_name) if shebang is None: shebang = dask.config.get("jobqueue.%s.shebang" % config_name) if dask.config.get("jobqueue.%s.threads", None): warnings.warn(threads_deprecation_message) if cores is None: raise ValueError( "You must specify how many cores to use per job like ``cores=8``" ) if memory is None: raise ValueError( "You must specify how much memory to use per job like ``memory='24 GB'``" ) # This attribute should be overridden self.job_header = None if interface: extra += ["--interface", interface] kwargs.setdefault("ip", get_ip_interface(interface)) else: kwargs.setdefault("ip", "") # Bokeh diagnostics server should listen on all interfaces kwargs.setdefault("dashboard_address", ("", 8787)) self.local_cluster = LocalCluster(n_workers=0, **kwargs) # Keep information on process, cores, and memory, for use in subclasses self.worker_memory = parse_bytes( memory) if memory is not None else None self.worker_processes = processes self.worker_cores = cores self.name = name # plugin for tracking job status self._scheduler_plugin = JobQueuePlugin() self.local_cluster.scheduler.add_plugin(self._scheduler_plugin) self._adaptive = None self.shebang = shebang self._env_header = "\n".join(env_extra) # dask-worker command line build dask_worker_command = "%(python)s -m distributed.cli.dask_worker" % dict( python=python) command_args = [dask_worker_command, self.scheduler.address] command_args += ["--nthreads", self.worker_process_threads] if processes is not None and processes > 1: command_args += ["--nprocs", processes] command_args += ["--memory-limit", self.worker_process_memory] command_args += ["--name", "%s--${JOB_ID}--" % name] if death_timeout is not None: command_args += ["--death-timeout", death_timeout] if local_directory is not None: command_args += ["--local-directory", local_directory] if extra is not None: command_args += extra self._command_template = " ".join(map(str, command_args)) self.log_directory = log_directory if self.log_directory is not None: if not os.path.exists(self.log_directory): os.makedirs(self.log_directory)
def __init__( self, n_workers=None, threads_per_worker=1, processes=True, memory_limit="auto", device_memory_limit=0.8, CUDA_VISIBLE_DEVICES=None, data=None, local_directory=None, protocol=None, enable_tcp_over_ucx=False, enable_infiniband=False, enable_nvlink=False, enable_rdmacm=False, ucx_net_devices=None, rmm_pool_size=None, rmm_managed_memory=False, **kwargs, ): # Required by RAPIDS libraries (e.g., cuDF) to ensure no context # initialization happens before we can set CUDA_VISIBLE_DEVICES os.environ["RAPIDS_NO_INITIALIZE"] = "True" if CUDA_VISIBLE_DEVICES is None: CUDA_VISIBLE_DEVICES = cuda_visible_devices(0) if isinstance(CUDA_VISIBLE_DEVICES, str): CUDA_VISIBLE_DEVICES = CUDA_VISIBLE_DEVICES.split(",") CUDA_VISIBLE_DEVICES = list( map(parse_cuda_visible_device, CUDA_VISIBLE_DEVICES)) if n_workers is None: n_workers = len(CUDA_VISIBLE_DEVICES) self.host_memory_limit = parse_memory_limit(memory_limit, threads_per_worker, n_workers) self.device_memory_limit = parse_device_memory_limit( device_memory_limit, device_index=0) self.rmm_pool_size = rmm_pool_size self.rmm_managed_memory = rmm_managed_memory if rmm_pool_size is not None or rmm_managed_memory: try: import rmm # noqa F401 except ImportError: raise ValueError( "RMM pool or managed memory requested but module 'rmm' " "is not available. For installation instructions, please " "see https://github.com/rapidsai/rmm") # pragma: no cover if self.rmm_pool_size is not None: self.rmm_pool_size = parse_bytes(self.rmm_pool_size) else: if enable_nvlink: warnings.warn( "When using NVLink we recommend setting a " "`rmm_pool_size`. Please see: " "https://dask-cuda.readthedocs.io/en/latest/ucx.html" "#important-notes for more details") if not processes: raise ValueError( "Processes are necessary in order to use multiple GPUs with Dask" ) if data is None: data = ( DeviceHostFile, { "device_memory_limit": self.device_memory_limit, "memory_limit": self.host_memory_limit, "local_directory": local_directory or dask.config.get("temporary-directory") or os.getcwd(), }, ) if enable_tcp_over_ucx or enable_infiniband or enable_nvlink: if protocol is None: protocol = "ucx" elif protocol != "ucx": raise TypeError( "Enabling InfiniBand or NVLink requires protocol='ucx'") if ucx_net_devices == "auto": try: from ucp._libs.topological_distance import TopologicalDistance # NOQA except ImportError: raise ValueError( "ucx_net_devices set to 'auto' but UCX-Py is not " "installed or it's compiled without hwloc support") elif ucx_net_devices == "": raise ValueError("ucx_net_devices can not be an empty string") self.ucx_net_devices = ucx_net_devices self.set_ucx_net_devices = enable_infiniband self.host = kwargs.get("host", None) initialize( enable_tcp_over_ucx=enable_tcp_over_ucx, enable_nvlink=enable_nvlink, enable_infiniband=enable_infiniband, enable_rdmacm=enable_rdmacm, net_devices=ucx_net_devices, cuda_device_index=0, ) super().__init__( n_workers=0, threads_per_worker=threads_per_worker, memory_limit=self.host_memory_limit, processes=True, data=data, local_directory=local_directory, protocol=protocol, config={ "ucx": get_ucx_config( enable_tcp_over_ucx=enable_tcp_over_ucx, enable_nvlink=enable_nvlink, enable_infiniband=enable_infiniband, enable_rdmacm=enable_rdmacm, ) }, **kwargs, ) self.new_spec["options"]["preload"] = self.new_spec["options"].get( "preload", []) + ["dask_cuda.initialize"] self.new_spec[ "options"]["preload_argv"] = self.new_spec["options"].get( "preload_argv", []) + ["--create-cuda-context"] self.cuda_visible_devices = CUDA_VISIBLE_DEVICES self.scale(n_workers) self.sync(self._correct_state)
def set_rmm(): rmm.reinitialize(pool_allocator=True, managed_memory=False, initial_pool_size=parse_bytes("6GB")) cupy.cuda.set_allocator(rmm.rmm_cupy_allocator)
def timeseries( chunk_size='128 MB', n_workers=1, chunk_over_time_dim=True, lat=320, lon=384, start='1980-01-01', freq='1D', nan=False, ): """ Create synthetic Xarray dataset filled with random data. Parameters ---------- chunk_size : str chunk size in bytes, kilo, mega or any factor of bytes n_workers : int number of dask workers chunk_over_time_dim : bool, default True Whether to chunk across time dimension or horizontal dimensions (lat, lon) lat : int number of latitude values lon : int number of longitude values start : datetime (or datetime-like string) Start of time series freq : string String like '2s' or '1H' or '12W' for the time series frequency nan : bool Whether to include nan in generated data Examples --------- >>> from benchmarks.datasets import timeseries >>> ds = timeseries('128MB', 5, chunk_over_time_dim=False, lat=500, lon=600) >>> ds <xarray.Dataset> Dimensions: (lat: 500, lon: 600, time: 267) Coordinates: * time (time) datetime64[ns] 1980-01-01 1980-01-02 ... 1980-09-23 * lon (lon) float64 -180.0 -179.4 -178.8 -178.2 ... 178.8 179.4 180.0 * lat (lat) float64 -90.0 -89.64 -89.28 -88.92 ... 88.92 89.28 89.64 90.0 Data variables: sst (time, lon, lat) float64 dask.array<shape=(267, 600, 500), chunksize=(267, 245, 245)> Attributes: history: created for compute benchmarking """ dt = np.dtype('f8') itemsize = dt.itemsize chunk_size = parse_bytes(chunk_size) total_bytes = chunk_size * n_workers size = total_bytes / itemsize timesteps = math.ceil(size / (lat * lon)) shape = (timesteps, lon, lat) if chunk_over_time_dim: x = math.ceil(chunk_size / (lon * lat * itemsize)) chunks = (x, lon, lat) else: x = math.ceil(math.sqrt(chunk_size / (timesteps * itemsize))) chunks = (timesteps, x, x) lats = xr.DataArray(np.linspace(start=-90, stop=90, num=lat), dims=['lat']) lons = xr.DataArray(np.linspace(start=-180, stop=180, num=lon), dims=['lon']) times = xr.DataArray(pd.date_range(start=start, freq=freq, periods=timesteps), dims=['time']) random_data = randn(shape=shape, chunks=chunks, nan=nan) ds = xr.DataArray( random_data, dims=['time', 'lon', 'lat'], coords={'time': times, 'lon': lons, 'lat': lats}, name='sst', encoding=None, attrs={'units': 'baz units', 'description': 'a description'}, ).to_dataset() ds.attrs = {'history': 'created for compute benchmarking'} return ds
def __init__( self, n_workers=None, threads_per_worker=1, processes=True, memory_limit=None, device_memory_limit=None, CUDA_VISIBLE_DEVICES=None, data=None, local_directory=None, protocol=None, enable_tcp_over_ucx=False, enable_infiniband=False, enable_nvlink=False, ucx_net_devices=None, **kwargs, ): if CUDA_VISIBLE_DEVICES is None: CUDA_VISIBLE_DEVICES = cuda_visible_devices(0) if isinstance(CUDA_VISIBLE_DEVICES, str): CUDA_VISIBLE_DEVICES = CUDA_VISIBLE_DEVICES.split(",") CUDA_VISIBLE_DEVICES = list(map(int, CUDA_VISIBLE_DEVICES)) if n_workers is None: n_workers = len(CUDA_VISIBLE_DEVICES) if memory_limit is None: memory_limit = MEMORY_LIMIT / n_workers self.host_memory_limit = memory_limit self.device_memory_limit = device_memory_limit if not processes: raise ValueError( "Processes are necessary in order to use multiple GPUs with Dask" ) if self.device_memory_limit is None: self.device_memory_limit = get_device_total_memory(0) elif isinstance(self.device_memory_limit, str): self.device_memory_limit = parse_bytes(self.device_memory_limit) if data is None: data = ( DeviceHostFile, { "device_memory_limit": self.device_memory_limit, "memory_limit": self.host_memory_limit, "local_directory": local_directory or dask.config.get("temporary-directory") or os.getcwd(), }, ) if enable_tcp_over_ucx or enable_infiniband or enable_nvlink: if protocol is None: protocol = "ucx" elif protocol != "ucx": raise TypeError( "Enabling InfiniBand or NVLink requires protocol='ucx'") initialize( enable_tcp_over_ucx=enable_tcp_over_ucx, enable_infiniband=enable_infiniband, enable_nvlink=enable_nvlink, ) if ucx_net_devices == "auto": try: from ucp._libs.topological_distance import TopologicalDistance # noqa except ImportError: raise ValueError( "ucx_net_devices set to 'auto' but UCX-Py is not " "installed or it's compiled without hwloc support") elif ucx_net_devices == "": raise ValueError("ucx_net_devices can not be an empty string") self.ucx_net_devices = ucx_net_devices self.set_ucx_net_devices = enable_infiniband super().__init__( n_workers=0, threads_per_worker=threads_per_worker, memory_limit=memory_limit, processes=True, data=data, local_directory=local_directory, protocol=protocol, **kwargs, ) self.new_spec["options"]["preload"] = self.new_spec["options"].get( "preload", []) + ["dask_cuda.initialize"] self.new_spec[ "options"]["preload_argv"] = self.new_spec["options"].get( "preload_argv", []) + ["--create-cuda-context"] self.cuda_visible_devices = CUDA_VISIBLE_DEVICES self.scale(n_workers) self.sync(self._correct_state)