Ejemplo n.º 1
0
 def persist(self) -> Union[xa.DataArray,DataArrayGroupBy]:
     from dask.distributed import Client
     xrd: xa.DataArray = self.xr
     if isinstance(xrd,DataArrayGroupBy):
         self.logger.warn( " EDASArray.persist returning DataArrayGroupBy" )
         return xrd
     if self.loaded_data is None:
         client = Client.current()
         if client is None:
             self.loaded_data = xrd.load().persist()
         else:
             client.persist( xrd )
             self.loaded_data = xrd
     return self.loaded_data
Ejemplo n.º 2
0
def get_client(
    address=None,
    auto_spawn=True,
    worker_log_level="ERROR",
    **clustser_kwargs,
):
    """
    Args:
        address (str, optional): address of the cluster scheduler, or 'slurm' to launch
            a dask cluster through SLURM
        auto_spawn (bool, optional): automagically spawn cluster if not found
        work_log_level (str, optional): worker log level
    """
    cluster_klass, client = None, None
    if address == "slurm":
        # create SLURM jobs
        cluster_klass = ManagedSLURMCluster
    elif address is None:
        # nothing specified, use:
        #   - already connected client
        #   - spawn new local cluster
        try:
            # we try to acquire current session first
            client = Client.current()

            address = client.scheduler_info()["address"]
            logger.info(f"connect to existing cluster (scheduler: {address})")

            yield client
            # NOTE we do NOT close client when using this method, managed by others
        except ValueError:
            # nothing exists, continue to spawn managed cluster
            if not auto_spawn:
                raise RuntimeError("please spawn a dask cluster first")

            cluster_klass = ManagedLocalCluster

            # local cluster needs address info
            clustser_kwargs.update({"address": address})
    else:
        # directly specify the scheduler to connect to
        client = Client(address)

        yield client
        client.close()
        # NOTE we open this client, therefore, we need to close it ourself

    if not cluster_klass:
        # no need to spawn a cluster
        return

    with cluster_klass(**clustser_kwargs) as cluster:
        client = cluster.client

        # register loggers
        try:
            import coloredlogs
        except ImportError:
            logger.install(
                "install `coloredlogs` to configure loggers automatically")
        else:

            def install_logger(dask_worker):
                # we know this is annoying, silence it
                logging.getLogger("tifffile").setLevel(logging.ERROR)

                coloredlogs.install(
                    level=worker_log_level,
                    fmt="%(asctime)s %(levelname)s %(message)s",
                    datefmt="%H:%M:%S",
                )

            logger.debug(
                f'install logger for workers, level="{worker_log_level}"')
            client.register_worker_callbacks(install_logger)

        yield client
Ejemplo n.º 3
0
    if slurm:
        cluster = dask_jobqueue.SLURMCluster(
            queue=os.environ['SLURM_JOB_PARTITION'],
            cores=1,
            walltime='0',
            memory=opts['memory'],
            local_directory=os.getenv('TMPDIR', '/tmp'))

        client = Client(cluster)
        cluster.start_workers(opts['ntasks'])

    else:
        cluster = LocalCluster(n_workers=opts['ntasks'], threads_per_worker=1)
        client = Client(cluster)

    print(client.current().cluster.scheduler)

    # read model configuration
    parameters = configurate()

    # Sterope Main Algorithm
    population = populate()
    # simulate levels
    simulate()
    # evaluate sensitivity
    sensitivity = evaluate()
    # write reports
    report()

    # move and organize results
    backup()
Ejemplo n.º 4
0
def scheduler_info_solid(context):
    with context.resources.dask.client.as_current():
        client = Client.current()

        yield Output(client.scheduler_info(), "scheduler_info")
        yield Output(client.nthreads(), "nthreads")