Esempio n. 1
0
def _run(args):
    _env_setup(args.logfile, args.verbose)

    if args.distributed:
        try:
            from dask.distributed import Client, LocalCluster
        except ImportError as ie:
            ie.msg += ('\n\nIt seems like `dask` is not installed.\n'
                       'Please install `dask` and `distributed` using:\n'
                       '\n    pip install dask distributed')
            raise

        client = Client(
            LocalCluster(n_workers=args.workers,
                         threads_per_worker=args.threads))
        client.register_worker_callbacks(
            lambda: _env_setup(args.logfile, args.verbose))

        workers = 'dask'
    else:
        workers = args.workers

    synthesizers = sdgym.get_all_synthesizers()
    if args.models:
        synthesizers = {model: synthesizers[model] for model in args.models}

    lb = sdgym.run(synthesizers=synthesizers,
                   datasets=args.datasets,
                   iterations=args.iterations,
                   output_path=args.output_path,
                   cache_dir=args.cache_dir,
                   workers=workers,
                   show_progress=args.progress)
    if lb is not None:
        print(lb)
Esempio n. 2
0
def _run(args):
    _env_setup(args.logfile, args.verbose)

    if args.distributed:
        try:
            from dask.distributed import Client, LocalCluster
        except ImportError as ie:
            ie.msg += ('\n\nIt seems like `dask` is not installed.\n'
                       'Please install `dask` and `distributed` using:\n'
                       '\n    pip install dask distributed')
            raise

        processes = args.workers > 1
        client = Client(
            LocalCluster(
                processes=processes,
                n_workers=args.workers,
                threads_per_worker=args.threads,
            ), )
        client.register_worker_callbacks(
            lambda: _env_setup(args.logfile, args.verbose))

        workers = 'dask'
    else:
        workers = args.workers

    if args.jobs:
        args.jobs = json.loads(args.jobs)

    scores = sdgym.run(
        synthesizers=args.synthesizers,
        datasets=args.datasets,
        datasets_path=args.datasets_path,
        modalities=args.modalities,
        metrics=args.metrics,
        bucket=args.bucket,
        iterations=args.iterations,
        cache_dir=args.cache_dir,
        workers=workers,
        show_progress=args.progress,
        timeout=args.timeout,
        output_path=args.output_path,
        aws_key=args.aws_key,
        aws_secret=args.aws_secret,
        jobs=args.jobs,
        max_rows=args.max_rows,
        max_columns=args.max_columns,
    )

    if args.groupby:
        scores = scores.groupby(args.groupby).mean().reset_index()

    if scores is not None:
        _print_table(scores)
Esempio n. 3
0
def initialize_cluster(
    hpc: bool = True,
    jobs: int = 10,
    temp: str = tf.gettempdir(),
    scheduler: str = None,
    verbose: bool = True,
    **kwargs
) -> Client:
    """
    Initialize a distributed dask cluster.

    arguments
        hpc:       if true, initialize a HPC cluster running PBS/TORQUE
        jobs:      number of jobs to submit to an HPC cluster
        temp:      temp directory for intermediate/worker output
        scheduler: scheduler address if using a custom cluster
        verbose:   logging verbosity

    returns
        a dask client
    """

    ## Idk if this is actually needed
    dask.config.set({'temporary_directory': temp})
    dask.config.set({'local_directory': temp})

    if hpc:
        cluster = _initialize_pbs_cluster(**kwargs)
        cluster.scale(jobs=jobs)
        client = Client(cluster)

    elif kwargs['environment']['local']:
        cluster = _initialize_local_cluster(**kwargs)
        client = Client(cluster)

    elif kwargs['environment']['custom']:
        client = Client(address=scheduler)

    else:
        ## Should never get here
        log._logger.error('Something horrible happened while starting the cluster')
        exit(1)

    ## Run the logging init function on each worker and register the callback so
    ## future workers also run the function
    init_logging_partial = partial(log._initialize_logging, verbose=verbose)
    client.register_worker_callbacks(setup=init_logging_partial)

    return client
Esempio n. 4
0
def _run(args):
    _env_setup(args.logfile, args.verbose)

    if args.distributed:
        try:
            from dask.distributed import Client, LocalCluster
        except ImportError as ie:
            ie.msg += (
                '\n\nIt seems like `dask` is not installed.\n'
                'Please install `dask` and `distributed` using:\n'
                '\n    pip install dask distributed'
            )
            raise

        processes = args.workers > 1
        client = Client(
            LocalCluster(
                processes=processes,
                n_workers=args.workers,
                threads_per_worker=args.threads,
            ),
        )
        client.register_worker_callbacks(lambda: _env_setup(args.logfile, args.verbose))

        workers = 'dask'
    else:
        workers = args.workers

    lb = sdgym.run(
        synthesizers=args.synthesizers,
        datasets=args.datasets,
        datasets_path=args.datasets_path,
        modalities=args.modalities,
        metrics=args.metrics,
        iterations=args.iterations,
        cache_dir=args.cache_dir,
        workers=workers,
        show_progress=args.progress,
        timeout=args.timeout,
        output_path=args.output_path,
    )
    if lb is not None:
        _print_table(lb)
Esempio n. 5
0
    cluster = KubeCluster.from_yaml('worker-cpu-spec.yml')
    client = Client(cluster)
    #cluster.adapt(minimum=0, maximum=10)
    cluster.scale(10)

    files_list = ['deap_ga.py', 'fillPool.py', 'mutations.py', 'utils.py']
    for i in range(len(files_list)):
        fname = files_list[i]
        with open(fname, 'rb') as f:
            data = f.read()

        def _worker_upload(dask_worker, *, data, fname):
            dask_worker.loop.add_callback(
                callback=dask_worker.upload_file,
                comm=None,  # not used
                filename=fname,
                data=data,
                load=True)

        client.register_worker_callbacks(setup=functools.partial(
            _worker_upload,
            data=data,
            fname=fname,
        ))

    bi, final_cluster = cluster_GA(nPool, eleNames, eleNums, eleRadii,
                                   generations, calc, filename, log_file, CXPB,
                                   singleTypeCluster)
    #view(final_cluster)
    #view(bi[0])
Esempio n. 6
0
def get_client(
    address=None,
    auto_spawn=True,
    worker_log_level="ERROR",
    **clustser_kwargs,
):
    """
    Args:
        address (str, optional): address of the cluster scheduler, or 'slurm' to launch
            a dask cluster through SLURM
        auto_spawn (bool, optional): automagically spawn cluster if not found
        work_log_level (str, optional): worker log level
    """
    cluster_klass, client = None, None
    if address == "slurm":
        # create SLURM jobs
        cluster_klass = ManagedSLURMCluster
    elif address is None:
        # nothing specified, use:
        #   - already connected client
        #   - spawn new local cluster
        try:
            # we try to acquire current session first
            client = Client.current()

            address = client.scheduler_info()["address"]
            logger.info(f"connect to existing cluster (scheduler: {address})")

            yield client
            # NOTE we do NOT close client when using this method, managed by others
        except ValueError:
            # nothing exists, continue to spawn managed cluster
            if not auto_spawn:
                raise RuntimeError("please spawn a dask cluster first")

            cluster_klass = ManagedLocalCluster

            # local cluster needs address info
            clustser_kwargs.update({"address": address})
    else:
        # directly specify the scheduler to connect to
        client = Client(address)

        yield client
        client.close()
        # NOTE we open this client, therefore, we need to close it ourself

    if not cluster_klass:
        # no need to spawn a cluster
        return

    with cluster_klass(**clustser_kwargs) as cluster:
        client = cluster.client

        # register loggers
        try:
            import coloredlogs
        except ImportError:
            logger.install(
                "install `coloredlogs` to configure loggers automatically")
        else:

            def install_logger(dask_worker):
                # we know this is annoying, silence it
                logging.getLogger("tifffile").setLevel(logging.ERROR)

                coloredlogs.install(
                    level=worker_log_level,
                    fmt="%(asctime)s %(levelname)s %(message)s",
                    datefmt="%H:%M:%S",
                )

            logger.debug(
                f'install logger for workers, level="{worker_log_level}"')
            client.register_worker_callbacks(install_logger)

        yield client
Esempio n. 7
0
class Remote(object):
    """
    Remote.
    
    Args:
        address (str): Remote scheduler address formed by `ip:port`.
        tls_ca_file (str, optional): TLS CA certificate file path. Defaults to None.
        tls_client_cert (str, optional): TLS certificate file path. Defaults to None.
        tls_client_key (str, optional): TLS private key file path. Defaults to None.
        require_encryption (bool, optional): Encrypt data exchange. Defaults to False.
        
    Note:
        TLS will be enabled only if all three TLS arguments are provided. 
        Remember to change network protocol to `tls://<address>`.
    """
    def __init__(self, address: str,
                 tls_ca_file: str = None, tls_client_cert: str = None, tls_client_key: str = None,
                 require_encryption: bool = False):
        # authentication
        sec = None
        if tls_ca_file and tls_client_cert and tls_client_key:
            sec = Security(tls_ca_file=tls_ca_file,
                           tls_client_cert=tls_client_cert,
                           tls_client_key=tls_client_key,
                           require_encryption=require_encryption)

        # init
        self._client = Client(address=address, security=sec)
        self._client.register_worker_callbacks(Remote._worker_startup)

    @staticmethod
    def _worker_startup(dask_worker: Worker):
        os.chdir(dask_worker.local_dir)

    def add_dependencies(self, files):
        """
        Add list of dependencies, order matters.
        
        Args:
            files (list): List of dependent files.
        """
        # TODO: automatically resolve module dependencies
        if isinstance(files, str):
            files = [files]
        for f in files:
            self._client.upload_file(f)

    def scatter(self, *args, **kwargs):
        """
        Scatter data.
        """
        return self._client.scatter(*args, **kwargs)

    def submit(self, func, *args, **kwargs):
        """
        Submit function and data.
        
        Args:
            func (callable): User function.
        """
        return self._client.submit(func, *args, **kwargs)

    def fetch(self, futures_, **kwargs):
        """
        Fetch data of future objects.
        
        Args:
            futures_ (list): Future objects.
        """
        return self._client.gather(futures_, **kwargs)

    def cancel(self, futures_, **kwargs):
        """
        Cancel job of future objects.
        
        Args:
            futures_ (list): Future objects.
        """
        return self._client.cancel(futures_, **kwargs)

    def close(self, *args, **kwargs):
        """
        Close connection.
        """
        return self._client.close(*args, **kwargs)
Esempio n. 8
0
def run_dask_function(config):
    """Start a Dask Cluster using dask-kubernetes and run a function.

    Talks to kubernetes to create `n` amount of new `pods` with a dask worker inside of each
    forming a `dask` cluster. Then, a function specified from `config` is being imported and
    run with the given arguments. The tasks created by this `function` are being run on the
    `dask` cluster for distributed computation.

    The config dict must contain the following sections:
        * run
        * dask_cluster
        * output

    Args:
        config (dict):
            Config dictionary.
    """
    output_conf = config.get('output')
    if output_conf:
        path = output_conf.get('path')
        if not path:
            raise ValueError('An output path must be provided when providing `output`.')

    cluster_spec = _generate_cluster_spec(config, master=False)

    # Importing here to avoid an aiohttp error if not used.
    from dask_kubernetes import KubeCluster   # pylint: disable=C0415

    cluster = KubeCluster.from_dict(cluster_spec)

    workers = config['dask_cluster'].get('workers')

    if not workers:
        cluster.adapt()
    elif isinstance(workers, int):
        cluster.scale(workers)
    else:
        cluster.adapt(**workers)

    client = Client(cluster)
    client.get_versions(check=True)
    client.register_worker_callbacks(_logging_setup)

    try:
        run = _import_function(config['run'])
        kwargs = config['run']['args']
        results = run(**kwargs)

    finally:
        client.close()
        cluster.close()

    if output_conf:
        bucket = output_conf.get('bucket')

        try:
            if bucket:
                aws_key = output_conf.get('key')
                aws_secret = output_conf.get('secret_key')
                _upload_to_s3(bucket, path, results, aws_key, aws_secret)
            else:
                dirname = os.path.dirname(path)
                if dirname:
                    os.makedirs(dirname, exist_ok=True)

                results.to_csv(path)

        except Exception:   # pylint: disable=W0703
            print('Error storing results. Falling back to console dump.')
            print(_dataframe_to_csv_str(results))

        return None

    return results