def _run(args): _env_setup(args.logfile, args.verbose) if args.distributed: try: from dask.distributed import Client, LocalCluster except ImportError as ie: ie.msg += ('\n\nIt seems like `dask` is not installed.\n' 'Please install `dask` and `distributed` using:\n' '\n pip install dask distributed') raise client = Client( LocalCluster(n_workers=args.workers, threads_per_worker=args.threads)) client.register_worker_callbacks( lambda: _env_setup(args.logfile, args.verbose)) workers = 'dask' else: workers = args.workers synthesizers = sdgym.get_all_synthesizers() if args.models: synthesizers = {model: synthesizers[model] for model in args.models} lb = sdgym.run(synthesizers=synthesizers, datasets=args.datasets, iterations=args.iterations, output_path=args.output_path, cache_dir=args.cache_dir, workers=workers, show_progress=args.progress) if lb is not None: print(lb)
def _run(args): _env_setup(args.logfile, args.verbose) if args.distributed: try: from dask.distributed import Client, LocalCluster except ImportError as ie: ie.msg += ('\n\nIt seems like `dask` is not installed.\n' 'Please install `dask` and `distributed` using:\n' '\n pip install dask distributed') raise processes = args.workers > 1 client = Client( LocalCluster( processes=processes, n_workers=args.workers, threads_per_worker=args.threads, ), ) client.register_worker_callbacks( lambda: _env_setup(args.logfile, args.verbose)) workers = 'dask' else: workers = args.workers if args.jobs: args.jobs = json.loads(args.jobs) scores = sdgym.run( synthesizers=args.synthesizers, datasets=args.datasets, datasets_path=args.datasets_path, modalities=args.modalities, metrics=args.metrics, bucket=args.bucket, iterations=args.iterations, cache_dir=args.cache_dir, workers=workers, show_progress=args.progress, timeout=args.timeout, output_path=args.output_path, aws_key=args.aws_key, aws_secret=args.aws_secret, jobs=args.jobs, max_rows=args.max_rows, max_columns=args.max_columns, ) if args.groupby: scores = scores.groupby(args.groupby).mean().reset_index() if scores is not None: _print_table(scores)
def initialize_cluster( hpc: bool = True, jobs: int = 10, temp: str = tf.gettempdir(), scheduler: str = None, verbose: bool = True, **kwargs ) -> Client: """ Initialize a distributed dask cluster. arguments hpc: if true, initialize a HPC cluster running PBS/TORQUE jobs: number of jobs to submit to an HPC cluster temp: temp directory for intermediate/worker output scheduler: scheduler address if using a custom cluster verbose: logging verbosity returns a dask client """ ## Idk if this is actually needed dask.config.set({'temporary_directory': temp}) dask.config.set({'local_directory': temp}) if hpc: cluster = _initialize_pbs_cluster(**kwargs) cluster.scale(jobs=jobs) client = Client(cluster) elif kwargs['environment']['local']: cluster = _initialize_local_cluster(**kwargs) client = Client(cluster) elif kwargs['environment']['custom']: client = Client(address=scheduler) else: ## Should never get here log._logger.error('Something horrible happened while starting the cluster') exit(1) ## Run the logging init function on each worker and register the callback so ## future workers also run the function init_logging_partial = partial(log._initialize_logging, verbose=verbose) client.register_worker_callbacks(setup=init_logging_partial) return client
def _run(args): _env_setup(args.logfile, args.verbose) if args.distributed: try: from dask.distributed import Client, LocalCluster except ImportError as ie: ie.msg += ( '\n\nIt seems like `dask` is not installed.\n' 'Please install `dask` and `distributed` using:\n' '\n pip install dask distributed' ) raise processes = args.workers > 1 client = Client( LocalCluster( processes=processes, n_workers=args.workers, threads_per_worker=args.threads, ), ) client.register_worker_callbacks(lambda: _env_setup(args.logfile, args.verbose)) workers = 'dask' else: workers = args.workers lb = sdgym.run( synthesizers=args.synthesizers, datasets=args.datasets, datasets_path=args.datasets_path, modalities=args.modalities, metrics=args.metrics, iterations=args.iterations, cache_dir=args.cache_dir, workers=workers, show_progress=args.progress, timeout=args.timeout, output_path=args.output_path, ) if lb is not None: _print_table(lb)
cluster = KubeCluster.from_yaml('worker-cpu-spec.yml') client = Client(cluster) #cluster.adapt(minimum=0, maximum=10) cluster.scale(10) files_list = ['deap_ga.py', 'fillPool.py', 'mutations.py', 'utils.py'] for i in range(len(files_list)): fname = files_list[i] with open(fname, 'rb') as f: data = f.read() def _worker_upload(dask_worker, *, data, fname): dask_worker.loop.add_callback( callback=dask_worker.upload_file, comm=None, # not used filename=fname, data=data, load=True) client.register_worker_callbacks(setup=functools.partial( _worker_upload, data=data, fname=fname, )) bi, final_cluster = cluster_GA(nPool, eleNames, eleNums, eleRadii, generations, calc, filename, log_file, CXPB, singleTypeCluster) #view(final_cluster) #view(bi[0])
def get_client( address=None, auto_spawn=True, worker_log_level="ERROR", **clustser_kwargs, ): """ Args: address (str, optional): address of the cluster scheduler, or 'slurm' to launch a dask cluster through SLURM auto_spawn (bool, optional): automagically spawn cluster if not found work_log_level (str, optional): worker log level """ cluster_klass, client = None, None if address == "slurm": # create SLURM jobs cluster_klass = ManagedSLURMCluster elif address is None: # nothing specified, use: # - already connected client # - spawn new local cluster try: # we try to acquire current session first client = Client.current() address = client.scheduler_info()["address"] logger.info(f"connect to existing cluster (scheduler: {address})") yield client # NOTE we do NOT close client when using this method, managed by others except ValueError: # nothing exists, continue to spawn managed cluster if not auto_spawn: raise RuntimeError("please spawn a dask cluster first") cluster_klass = ManagedLocalCluster # local cluster needs address info clustser_kwargs.update({"address": address}) else: # directly specify the scheduler to connect to client = Client(address) yield client client.close() # NOTE we open this client, therefore, we need to close it ourself if not cluster_klass: # no need to spawn a cluster return with cluster_klass(**clustser_kwargs) as cluster: client = cluster.client # register loggers try: import coloredlogs except ImportError: logger.install( "install `coloredlogs` to configure loggers automatically") else: def install_logger(dask_worker): # we know this is annoying, silence it logging.getLogger("tifffile").setLevel(logging.ERROR) coloredlogs.install( level=worker_log_level, fmt="%(asctime)s %(levelname)s %(message)s", datefmt="%H:%M:%S", ) logger.debug( f'install logger for workers, level="{worker_log_level}"') client.register_worker_callbacks(install_logger) yield client
class Remote(object): """ Remote. Args: address (str): Remote scheduler address formed by `ip:port`. tls_ca_file (str, optional): TLS CA certificate file path. Defaults to None. tls_client_cert (str, optional): TLS certificate file path. Defaults to None. tls_client_key (str, optional): TLS private key file path. Defaults to None. require_encryption (bool, optional): Encrypt data exchange. Defaults to False. Note: TLS will be enabled only if all three TLS arguments are provided. Remember to change network protocol to `tls://<address>`. """ def __init__(self, address: str, tls_ca_file: str = None, tls_client_cert: str = None, tls_client_key: str = None, require_encryption: bool = False): # authentication sec = None if tls_ca_file and tls_client_cert and tls_client_key: sec = Security(tls_ca_file=tls_ca_file, tls_client_cert=tls_client_cert, tls_client_key=tls_client_key, require_encryption=require_encryption) # init self._client = Client(address=address, security=sec) self._client.register_worker_callbacks(Remote._worker_startup) @staticmethod def _worker_startup(dask_worker: Worker): os.chdir(dask_worker.local_dir) def add_dependencies(self, files): """ Add list of dependencies, order matters. Args: files (list): List of dependent files. """ # TODO: automatically resolve module dependencies if isinstance(files, str): files = [files] for f in files: self._client.upload_file(f) def scatter(self, *args, **kwargs): """ Scatter data. """ return self._client.scatter(*args, **kwargs) def submit(self, func, *args, **kwargs): """ Submit function and data. Args: func (callable): User function. """ return self._client.submit(func, *args, **kwargs) def fetch(self, futures_, **kwargs): """ Fetch data of future objects. Args: futures_ (list): Future objects. """ return self._client.gather(futures_, **kwargs) def cancel(self, futures_, **kwargs): """ Cancel job of future objects. Args: futures_ (list): Future objects. """ return self._client.cancel(futures_, **kwargs) def close(self, *args, **kwargs): """ Close connection. """ return self._client.close(*args, **kwargs)
def run_dask_function(config): """Start a Dask Cluster using dask-kubernetes and run a function. Talks to kubernetes to create `n` amount of new `pods` with a dask worker inside of each forming a `dask` cluster. Then, a function specified from `config` is being imported and run with the given arguments. The tasks created by this `function` are being run on the `dask` cluster for distributed computation. The config dict must contain the following sections: * run * dask_cluster * output Args: config (dict): Config dictionary. """ output_conf = config.get('output') if output_conf: path = output_conf.get('path') if not path: raise ValueError('An output path must be provided when providing `output`.') cluster_spec = _generate_cluster_spec(config, master=False) # Importing here to avoid an aiohttp error if not used. from dask_kubernetes import KubeCluster # pylint: disable=C0415 cluster = KubeCluster.from_dict(cluster_spec) workers = config['dask_cluster'].get('workers') if not workers: cluster.adapt() elif isinstance(workers, int): cluster.scale(workers) else: cluster.adapt(**workers) client = Client(cluster) client.get_versions(check=True) client.register_worker_callbacks(_logging_setup) try: run = _import_function(config['run']) kwargs = config['run']['args'] results = run(**kwargs) finally: client.close() cluster.close() if output_conf: bucket = output_conf.get('bucket') try: if bucket: aws_key = output_conf.get('key') aws_secret = output_conf.get('secret_key') _upload_to_s3(bucket, path, results, aws_key, aws_secret) else: dirname = os.path.dirname(path) if dirname: os.makedirs(dirname, exist_ok=True) results.to_csv(path) except Exception: # pylint: disable=W0703 print('Error storing results. Falling back to console dump.') print(_dataframe_to_csv_str(results)) return None return results