Python Client.close Examples

Programming Language: Python

Namespace/Package Name: distributed

Class/Type: Client

Method/Function: close

Examples at hotexamples.com: 34

Distributed Client Close refers to the process of closing a client connection in a distributed system. This is important to prevent resource leakage and ensure efficient usage of system resources. Python provides several libraries/package for building distributed systems such as Pyro4, PyZMQ, and RPyC. Let's consider examples for each package/library.

Pyro4 example:

import Pyro4

remote_obj = Pyro4.Proxy("PYRONAME:example.remote_obj")
# some operations on remote object
remote_obj.close()

In the above example, Pyro4 library is used to create a remote object proxy and perform some operations on it. Finally, the `close()` method is called to close the connection.

PyZMQ example:

import zmq

context = zmq.Context()
socket = context.socket(zmq.REQ)
socket.connect("tcp://127.0.0.1:5555")
# some operations on the socket
socket.close()

In this example, PyZMQ library is used to create a socket connection and perform some operations on it. Finally, the `close()` method is called to close the connection.

RPyC example:

import rpyc

conn = rpyc.connect("localhost", 18861)
# some operations on the connection
conn.close()

Here, RPyC library is used to create a connection object and perform some operations on it. Finally, the `close()` method is called to close the connection. In conclusion, the examples above show how to close client connections in distributed systems using Python libraries such as Pyro4, PyZMQ, and RPyC.

Python Client.close - 34 examples found. These are the top rated real world Python examples of distributed.Client.close extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Client(30)

gather(30)

map(30)

close(30)

persist(13)

compute(12)

ncores(10)

_start(10)

_shutdown(9)

get_dataset(6)

publish_dataset(5)

get(4)

nthreads(3)

current(3)

_gather(3)

cancel(3)

_publish_dataset(3)

channel(2)

list_datasets(2)

_scatter(2)

_repr_html_(2)

_get_dataset(2)

rebalance(2)

get_worker_logs(1)

has_what(1)

_restart(1)

Example #1

Show file

def main(transcriptome_file, regulator_file, species, out_file, n_workers,
         threads_per_worker):
    print('reading data')

    tf_info = pd.read_csv(regulator_file, sep='\t', index_col=0)
    tf_names = list(tf_info.loc[tf_info['Species'] == species].index)

    df = pd.read_csv(transcriptome_file, sep='\t', index_col=0)

    num_not_expressed = (df.std(axis=1) == 0).sum()

    print(
        f'removing {num_not_expressed} genes that have zero expression in all samples'
    )
    #Filter genes that are not expressed
    df = df.loc[df.std(axis=1) > 0]

    print('starting scheduler')
    client = Client(n_workers=n_workers,
                    threads_per_worker=threads_per_worker,
                    memory_limit='48GB')

    try:
        network = grnboost2(expression_data=df.T,
                            tf_names=tf_names,
                            client_or_address=client,
                            verbose=True)

        network.to_csv(out_file, sep='\t', header=False, index=False)
    except Exception as e:
        print('Module inference error')
        print(e)
    finally:
        client.close()

Example #2

Show file

 def shutdown():
     """Shutdown all distributed cluster."""
     mode = UserConfig().data.general.cluster_mode
     gpus = str(UserConfig().data.general.worker.gpus_per_job)
     if mode == ClusterMode.Single and gpus == "-1":
         return
     try:
         logging.info("Try to shutdown cluster.")
         from vega.core.trainer.utils import get_write_ip_master_local
         from distributed import Client
         ip, port = get_write_ip_master_local()
         if ip is None or port is None:
             logging.info(
                 "Stand-alone mode, no need to shut down the cluster.")
             return
         shutdown_client = Client("{}:{}".format(ip, port))
         logging.info("Cluster will be shut down.")
         shutdown_client.shutdown()
         shutdown_client.close()
         del shutdown_client
         logging.info("Cluster is shut down.")
     except Exception as e:
         logging.error("Pipeline's cluster shutdown error: {}".format(
             str(e)))
         logging.error(traceback.format_exc())

Example #3

Show file

File: registry.py Project: bing9/grizly

 def _cancel(self, scheduler_address: Optional[str] = None) -> None:
     if not scheduler_address:
         scheduler_address = self.scheduler_address
     client = Client(scheduler_address)
     f = Future(self.name + " graph", client=client)
     f.cancel(force=True)
     client.close()

Example #4

Show file

File: test_txfunc.py Project: jpycroft/OG-USA

def dask_client():
    cluster = LocalCluster(n_workers=NUM_WORKERS, threads_per_worker=2)
    client = Client(cluster)
    yield client
    # teardown
    client.close()
    cluster.close()

Example #5

Show file

    def test_dask_read_combine_instastack(self):

        from distributed import Client, LocalCluster
        from dask.distributed import wait
        cluster = LocalCluster(n_workers=1, threads_per_worker=1)
        c = Client(cluster)
        anxcor = Anxcor()
        anxcor.set_window_length(120.0)
        times = anxcor.get_starttimes(starttime_stamp, endtime_stamp, 0.5)
        bank = WavebankWrapper(source_dir)
        anxcor.add_dataset(bank, 'nodals')

        anxcor.save_at_task(target_dir, 'combine')
        result = anxcor.process(times,dask_client=c,stack=True)

        anxcor = Anxcor()
        anxcor.set_window_length(120.0)
        bank = WavebankWrapper(source_dir)
        anxcor.add_dataset(bank, 'nodals')
        anxcor.load_at_task(target_dir, 'combine')
        result = anxcor.process(times,dask_client=c,stack=True)

        how_many_nc = _how_many_fmt(target_dir, format='.nc')
        _clean_files_in_dir(target_dir)
        c.close()
        cluster.close()
        assert 48 == how_many_nc

Example #6

Show file

File: scheduler.py Project: jsignell/dask-benchmarks

class SchedulerComputeDepsInMemory(object):

    def setup(self):
        self.client = Client()

        # Generate 10 indepdent tasks
        x = [delayed(random.random)() for _ in range(10)]
        # Generate lots of interrelated dependent tasks
        n = 200
        for _ in range(10, n):
            random_subset = [random.choice(x) for _ in range(5)]
            random_max = delayed(max)(random_subset)
            x.append(random_max)

        # Persist tasks into distributed memory and wait to finish
        y = self.client.persist(x)
        wait(y)

        self.x = x

    def teardown(self):
        self.client.close()

    def time_compute_deps_already_in_memory(self):
        """
        Measure compute time when dependent tasks are already in memory.
        xref https://github.com/dask/distributed/pull/3293
        """
        compute(*self.x, scheduler=self.client)

Example #7

Show file

class LocalDaskDistributor(DistributorBaseClass):
    """
    Distributor using a local dask cluster and inproc communication.
    """
    def __init__(self, n_workers):
        """
        Initiates a LocalDaskDistributor instance.

        Parameters
        ----------
        n_workers : int
            How many workers should the local dask cluster have?
        """

        super().__init__()
        import tempfile

        from distributed import Client, LocalCluster

        # attribute .local_dir_ is the path where the local dask workers store temporary files
        self.local_dir_ = tempfile.mkdtemp()
        cluster = LocalCluster(n_workers=n_workers,
                               processes=False,
                               local_dir=self.local_dir_)

        self.client = Client(cluster)
        self.n_workers = n_workers

    def distribute(self, func, partitioned_chunks, kwargs):
        """
        Calculates the features in a parallel fashion by distributing the map command to the dask workers on a local
        machine

        Parameters
        ----------
        func : Callable
            Function to send to each worker.
        partitioned_chunks : List
            List of data chunks, each chunk is processed by one woker
        kwargs : Dict
            Parameters for the map function
        Returns
        -------
        List
            The result of the calculation as a list - each item should be the result of the application of func
            to a single element.
        """

        if isinstance(partitioned_chunks, Iterable):
            # since dask 2.0.0 client map no longer accepts iterables
            partitioned_chunks = list(partitioned_chunks)
        result = self.client.gather(
            self.client.map(partial(func, **kwargs), partitioned_chunks))
        return result

    def close(self):
        """
        Closes the connection to the local Dask Scheduler
        """
        self.client.close()

Example #8

Show file

 def cancel(self, scheduler_address=None):
     if not scheduler_address:
         scheduler_address = self.scheduler_address
     client = Client(scheduler_address)
     f = Future(self.name + "_graph", client=client)
     f.cancel(force=True)
     client.close()

Example #9

Show file

def shutdown_cluster():
    """Shutdown all distributed cluster."""
    # detect master is running
    if not General._parallel:
        return
    try:
        logging.info("Try to shutdown cluster.")

        # stop ReportServer
        from zeus.report import ReportServer
        ReportServer.stop()

        # stop Master
        from zeus.trainer.utils import load_master_ip
        from distributed import Client
        ip, port = load_master_ip()
        if ip is None or port is None:
            logging.info("Stand-alone mode, no need to shut down the cluster.")
            return
        shutdown_client = Client("{}:{}".format(ip, port))
        logging.info("Cluster will be shut down.")
        shutdown_client.shutdown()
        shutdown_client.close()
        del shutdown_client
        time.sleep(15)
        logging.info("Cluster is shut down.")
    except Exception as e:
        logging.error("Pipeline's cluster shutdown error: {}".format(str(e)))
        logging.error(traceback.format_exc())

Example #10

Show file

    def submit(
        self,
        client: Client = None,
        scheduler_address: str = None,
        priority: int = None,
        resources: Dict[str, Any] = None,
        show_progress=False,
        **kwargs,
    ) -> None:

        if not priority:
            priority = self.priority

        if not resources:
            resources = self.resources

        if not client:
            client = Client(scheduler_address)

        self.scheduler_address = client.scheduler.address

        computation = client.compute(self.graph,
                                     retries=3,
                                     priority=priority,
                                     resources=resources)
        if show_progress:
            progress(computation)
        fire_and_forget(computation)
        if scheduler_address:
            client.close()
        return None

Example #11

Show file

File: utils.py Project: IMAXT/owl-pipeline-client

        def wrapper(config: Dict, logconfig: Dict, cluster=None):

            if self.schema is not None:
                _config = self.schema(config)
            else:
                _config = config

            if cluster is not None:
                try:
                    client = Client(cluster.scheduler_address)
                except Exception:
                    traceback_str = traceback.format_exc()
                    raise Exception('Error occurred. Original traceback '
                                    'is\n%s\n' % traceback_str)
            else:
                client = None
            try:
                func.main.config = config  # type: ignore
                return func.main(**_config)  # type: ignore
            except Exception:
                traceback_str = traceback.format_exc()
                raise Exception('Error occurred. Original traceback '
                                'is\n%s\n' % traceback_str)
            finally:
                if client is not None:
                    client.close()

Example #12

Show file

def dask_client():
    from distributed import Client
    client = Client(processes=False,
                    threads_per_worker=1,
                    dashboard_address=None)
    yield client
    client.close()
    del client

Example #13

Show file

File: distribution.py Project: jeffzi/tsfresh

class ClusterDaskDistributor(DistributorBaseClass):
    """
    Distributor using a dask cluster, meaning that the calculation is spread over a cluster
    """
    def __init__(self, address):
        """
        Sets up a distributor that connects to a Dask Scheduler to distribute the calculaton of the features

        :param address: the ip address and port number of the Dask Scheduler
        :type address: str
        """

        from distributed import Client

        self.client = Client(address=address)

    def calculate_best_chunk_size(self, data_length):
        """
        Uses the number of dask workers in the cluster (during execution time, meaning when you start the extraction)
        to find the optimal chunk_size.

        :param data_length: A length which defines how many calculations there need to be.
        :type data_length: int
        """
        n_workers = len(self.client.scheduler_info()["workers"])
        chunk_size, extra = divmod(data_length, n_workers * 5)
        if extra:
            chunk_size += 1
        return chunk_size

    def distribute(self, func, partitioned_chunks, kwargs):
        """
        Calculates the features in a parallel fashion by distributing the map command to the dask workers on a cluster

        :param func: the function to send to each worker.
        :type func: callable
        :param partitioned_chunks: The list of data chunks - each element is again
            a list of chunks - and should be processed by one worker.
        :type partitioned_chunks: iterable
        :param kwargs: parameters for the map function
        :type kwargs: dict of string to parameter

        :return: The result of the calculation as a list - each item should be the result of the application of func
            to a single element.
        """
        if isinstance(partitioned_chunks, Iterable):
            # since dask 2.0.0 client map no longer accepts iterables
            partitioned_chunks = list(partitioned_chunks)
        result = self.client.gather(
            self.client.map(partial(func, **kwargs), partitioned_chunks))
        return [item for sublist in result for item in sublist]

    def close(self):
        """
        Closes the connection to the Dask Scheduler
        """
        self.client.close()

Example #14

Show file

def dask_client_close(daskclient: Client):
    """ Close the Dask Client

    Parameters
    ----------
    daskclient : Dask distributed.Client
    """
    daskclient.close()
    return

Example #15

Show file

File: distribution.py Project: SriRamaKusu/tsfresh

class ClusterDaskDistributor(DistributorBaseClass):
    """
    Distributor using a dask cluster, meaning that the calculation is spread over a cluster
    """

    def __init__(self, address):
        """
        Sets up a distributor that connects to a Dask Scheduler to distribute the calculaton of the features

        :param address: the ip address and port number of the Dask Scheduler
        :type address: str
        """

        from distributed import Client

        self.client = Client(address=address)

    def calculate_best_chunk_size(self, data_length):
        """
        Uses the number of dask workers in the cluster (during execution time, meaning when you start the extraction)
        to find the optimal chunk_size.

        :param data_length: A length which defines how many calculations there need to be.
        :type data_length: int
        """
        n_workers = len(self.client.scheduler_info()["workers"])
        chunk_size, extra = divmod(data_length, n_workers * 5)
        if extra:
            chunk_size += 1
        return chunk_size

    def distribute(self, func, partitioned_chunks, kwargs):
        """
        Calculates the features in a parallel fashion by distributing the map command to the dask workers on a cluster

        :param func: the function to send to each worker.
        :type func: callable
        :param partitioned_chunks: The list of data chunks - each element is again
            a list of chunks - and should be processed by one worker.
        :type partitioned_chunks: iterable
        :param kwargs: parameters for the map function
        :type kwargs: dict of string to parameter

        :return: The result of the calculation as a list - each item should be the result of the application of func
            to a single element.
        """

        result = self.client.gather(self.client.map(partial(func, **kwargs), partitioned_chunks))
        return [item for sublist in result for item in sublist]

    def close(self):
        """
        Closes the connection to the Dask Scheduler
        """
        self.client.close()

Example #16

Show file

File: registry.py Project: bing9/grizly

    def submit(
        self,
        client: Client = None,
        scheduler_address: str = None,
        priority: int = 1,
        to_dask: bool = True,
    ) -> Any:

        if self._is_running():
            msg = f"Job {self.name} is already running. Please use Job._cancel() to cancel the job."
            raise JobAlreadyRunningError(msg)

        if to_dask:
            if client is None:
                self.scheduler_address = scheduler_address or os.getenv(
                    "GRIZLY_DASK_SCHEDULER_ADDRESS")
                client = Client(scheduler_address)
            else:
                self.scheduler_address = client.scheduler.address

        self.logger.info(f"Submitting job {self.name}...")
        job_run = JobRun(job_name=self.name, logger=self.logger, db=self.db)
        job_run.status = "running"
        log_stream = self.__get_log_stream()

        start = time()
        try:
            result = self.func(*self.args, **self.kwargs)
            job_run.status = "success"
        except Exception:
            result = None
            job_run.status = "fail"
            _, exc_value, _ = sys.exc_info()
            job_run.traceback = traceback.format_exc()
            job_run.error = str(exc_value)

        job_run.logs = log_stream.getvalue()
        job_run.result = result

        if to_dask:
            client.close()

        self.logger.info(
            f"Job {self.name} finished with status {job_run.status}")
        end = time()
        job_run.finished_at = datetime.now(timezone.utc)
        job_run.duration = int(end - start)

        conditions_flags = self.__check_conditions(job_run)

        for condition, flag in conditions_flags.items():
            if flag:
                self.__submit_downstream_jobs(condition=condition)

        return result

Example #17

Show file

File: distribution.py Project: zuoxiaolei/tsfresh

class LocalDaskDistributor(DistributorBaseClass):
    """
    Distributor using a local dask cluster and inproc communication.
    """
    def __init__(self, n_workers):
        """

        Initiates a LocalDaskDistributor instance.

        :param n_workers: How many workers should the local dask cluster have?
        :type n_workers: int
        """

        from distributed import LocalCluster, Client
        import tempfile

        # attribute .local_dir_ is the path where the local dask workers store temporary files
        self.local_dir_ = tempfile.mkdtemp()
        cluster = LocalCluster(n_workers=n_workers,
                               processes=False,
                               local_dir=self.local_dir_)

        self.client = Client(cluster)
        self.n_workers = n_workers

    def distribute(self, func, partitioned_chunks, kwargs):
        """
        Calculates the features in a parallel fashion by distributing the map command to the dask workers on a local
        machine

        :param func: the function to send to each worker.
        :type func: callable
        :param partitioned_chunks: The list of data chunks - each element is again
            a list of chunks - and should be processed by one worker.
        :type partitioned_chunks: iterable
        :param kwargs: parameters for the map function
        :type kwargs: dict of string to parameter

        :return: The result of the calculation as a list - each item should be the result of the application of func
            to a single element.
        """

        if isinstance(partitioned_chunks, Iterable):
            # since dask 2.0.0 client map no longer accepts iteratables
            partitioned_chunks = list(partitioned_chunks)
        result = self.client.gather(
            self.client.map(partial(func, **kwargs), partitioned_chunks))
        return [item for sublist in result for item in sublist]

    def close(self):
        """
        Closes the connection to the local Dask Scheduler
        """
        self.client.close()

Example #18

Show file

def test_no_undesired_distributed_cache_hit(loop):
    # Dask has a pickle cache for callables that are called many times. Because
    # the dask backends used to wrapp both the functions and the arguments
    # under instances of the Batch callable class this caching mechanism could
    # lead to bugs as described in: https://github.com/joblib/joblib/pull/1055
    # The joblib-dask backend has been refactored to avoid bundling the
    # arguments as an attribute of the Batch instance to avoid this problem.
    # This test serves as non-regression problem.

    # Use a large number of input arguments to give the AutoBatchingMixin
    # enough tasks to kick-in.
    lists = [[] for _ in range(100)]
    np = pytest.importorskip('numpy')
    X = np.arange(int(1e6))

    def isolated_operation(list_, data=None):
        if data is not None:
            np.testing.assert_array_equal(data, X)
        list_.append(uuid4().hex)
        return list_

    cluster = LocalCluster(n_workers=1, threads_per_worker=2)
    client = Client(cluster)
    try:
        with parallel_backend('dask') as (ba, _):
            # dispatches joblib.parallel.BatchedCalls
            res = Parallel()(delayed(isolated_operation)(list_)
                             for list_ in lists)

        # The original arguments should not have been mutated as the mutation
        # happens in the dask worker process.
        assert lists == [[] for _ in range(100)]

        # Here we did not pass any large numpy array as argument to
        # isolated_operation so no scattering event should happen under the
        # hood.
        counts = count_events('receive-from-scatter', client)
        assert sum(counts.values()) == 0
        assert all([len(r) == 1 for r in res])

        with parallel_backend('dask') as (ba, _):
            # Append a large array which will be scattered by dask, and
            # dispatch joblib._dask.Batch
            res = Parallel()(delayed(isolated_operation)(list_, data=X)
                             for list_ in lists)

        # This time, auto-scattering should have kicked it.
        counts = count_events('receive-from-scatter', client)
        assert sum(counts.values()) > 0
        assert all([len(r) == 1 for r in res])
    finally:
        client.close()
        cluster.close()

Example #19

Show file

    def test_client(self):
        lc = LocalCluster(diagnostics_port=None)
        passed = Client(lc)

        client, shutdown_callback = _prepare_client(passed)

        self.assertEquals(client, passed)

        shutdown_callback()
        passed.close()
        lc.close()
        lc.status

        self.assertEquals(lc.status.value, 'closed')

Example #20

Show file

    def test_dask_execution(self):

        from distributed import Client, LocalCluster
        cluster = LocalCluster(n_workers=1, threads_per_worker=1)
        c = Client(cluster)
        anxcor = Anxcor()
        anxcor.set_window_length(120.0)
        times = anxcor.get_starttimes(starttime_stamp,endtime_stamp, 0.5)
        bank = WavebankWrapper(source_dir)
        anxcor.add_dataset(bank, 'nodals')
        result = anxcor.process(times,dask_client=c)
        pairs  = list(result.coords['rec'].values)+ list(result.coords['src'].values)
        c.close()
        cluster.close()
        assert 6 ==len(pairs)

Example #21

Show file

File: distribution.py Project: SriRamaKusu/tsfresh

class LocalDaskDistributor(DistributorBaseClass):
    """
    Distributor using a local dask cluster and inproc communication.
    """

    def __init__(self, n_workers):
        """

        Initiates a LocalDaskDistributor instance.

        :param n_workers: How many workers should the local dask cluster have?
        :type n_workers: int
        """

        from distributed import LocalCluster, Client
        import tempfile

        # attribute .local_dir_ is the path where the local dask workers store temporary files
        self.local_dir_ = tempfile.mkdtemp()
        cluster = LocalCluster(n_workers=n_workers, processes=False, local_dir=self.local_dir_)

        self.client = Client(cluster)
        self.n_workers = n_workers

    def distribute(self, func, partitioned_chunks, kwargs):
        """
        Calculates the features in a parallel fashion by distributing the map command to the dask workers on a local
        machine

        :param func: the function to send to each worker.
        :type func: callable
        :param partitioned_chunks: The list of data chunks - each element is again
            a list of chunks - and should be processed by one worker.
        :type partitioned_chunks: iterable
        :param kwargs: parameters for the map function
        :type kwargs: dict of string to parameter

        :return: The result of the calculation as a list - each item should be the result of the application of func
            to a single element.
        """
        result = self.client.gather(self.client.map(partial(func, **kwargs), partitioned_chunks))
        return [item for sublist in result for item in sublist]

    def close(self):
        """
        Closes the connection to the local Dask Scheduler
        """
        self.client.close()

Example #22

Show file

 def test_dask_execution_exclude_with_stack_number(self):
     from distributed import Client, LocalCluster
     cluster = LocalCluster(n_workers=1, threads_per_worker=1)
     c = Client(cluster)
     anxcor = Anxcor()
     anxcor.set_window_length(120)
     anxcor.set_task_kwargs('crosscorrelate', dict(max_tau_shift=20.0))
     times = anxcor.get_starttimes(starttime_stamp, endtime_stamp, 0.5)
     bank = WavebankWrapper(source_dir)
     anxcor.set_must_exclude_single_stations('AX.1')
     anxcor.add_dataset(bank, 'nodals')
     result = anxcor.process(times, dask_client=c, stack=10)
     pairs = list(result.coords['rec'].values) + list(result.coords['src'].values)
     c.close()
     cluster.close()
     assert 4 == len(pairs)

Example #23

Show file

File: test_retrieval.py Project: zzwei1/PyDDA

def test_twpice_case():
    """ Use a test case from TWP-ICE """
    Grid0 = pyart.io.read_grid(pydda.tests.EXAMPLE_RADAR0)
    Grid1 = pyart.io.read_grid(pydda.tests.EXAMPLE_RADAR1)
    sounding = pyart.io.read_arm_sonde(pydda.tests.SOUNDING_PATH)

    u_init, v_init, w_init = pydda.initialization.make_wind_field_from_profile(
        Grid0, sounding[1], vel_field='corrected_velocity')

    Grids = pydda.retrieval.get_dd_wind_field(
        [Grid0, Grid1], u_init, v_init, w_init, Co=100, Cm=1500.0,
        Cz=0, Cmod=0.0, vel_name='corrected_velocity',
        refl_field='reflectivity', frz=5000.0,
        filt_iterations=0, mask_outside_opt=True, upper_bc=1)

    # In this test grid, we expect the mean flow to be to the southeast
    # Maximum updrafts should be at least 10 m/s
    u_mean = np.nanmean(Grids[0].fields['u']['data'])
    v_mean = np.nanmean(Grids[0].fields['v']['data'])
    w_max = np.max(Grids[0].fields['v']['data'])

    assert u_mean > 0
    assert v_mean < 0
    assert w_max > 10

    # Now we will test the nesting. Do the same retrieval, and make sure
    # that we get the same result within a prescribed tolerance
    cluster = LocalCluster(n_workers=2, processes=True)
    client = Client(cluster)
    Grids2 = pydda.retrieval.get_dd_wind_field_nested(
        [Grid0, Grid1], u_init, v_init, w_init, client, Co=100, Cm=1500.0,
        Cz=0, Cmod=0.0, vel_name='corrected_velocity',
        refl_field='reflectivity', frz=5000.0,
        filt_iterations=0, mask_outside_opt=True, upper_bc=1)

    # Make sure features are correlated between both versions. No reason
    # to expect the same answer, but features should be correlated
    # Nesting tends to make the updrafts a bit better resolved, so expect
    # less of an outright correlation (but still strong)
    assert np.corrcoef(Grids2[0].fields["u"]["data"].flatten(),
                       Grids[0].fields["u"]["data"].flatten())[0, 1] > 0.9
    assert np.corrcoef(Grids2[0].fields["v"]["data"].flatten(),
                       Grids[0].fields["v"]["data"].flatten())[0, 1] > 0.9
    assert np.corrcoef(Grids2[0].fields["w"]["data"].flatten(),
                       Grids[0].fields["w"]["data"].flatten())[0, 1] > 0.5
    cluster.close()
    client.close()

Example #24

Show file

def reduce_caps(capsules, allcpgs, min_capsule_len):
    cluster = LocalCluster(n_workers=multiprocessing.cpu_count() * 2,
                           threads_per_worker=20)
    client = Client(cluster)
    capsule_names = list(capsules.keys())

    capsules_bag = db.from_sequence(list(capsules.values()))
    capsules_intersect = capsules_bag.map(lambda x: np.intersect1d(x, allcpgs))
    capsules_len = capsules_intersect.map(lambda x: x
                                          if len(x) >= min_capsule_len else [])
    # with get_task_stream(plot='save', filename="task-stream.html") as ts:
    capsules = capsules_len.compute()
    capsules = dict([(capsule_names[i], capsules[i].tolist())
                     for i in range(len(capsule_names)) if len(capsules[i])])
    #print(list(capsules.keys()))
    client.close()
    return capsules

Example #25

Show file

File: dask_script.py Project: wigging/pythonic

def main():
    """
    Use the Dask distributed client to run a function in parallel.
    """
    client = Client(n_workers=8)

    numbers = [3, 4, 5, 8, 12, 18, 25]
    futures = []

    for n in numbers:
        a = client.submit(adder, n)
        futures.append(a)

    results = client.gather(futures)
    print(results)

    client.close()

Example #26

Show file

File: test_dask.py Project: joblib/joblib

def test_wait_for_workers_timeout():
    # Start a cluster with 0 worker:
    cluster = LocalCluster(n_workers=0, processes=False, threads_per_worker=2)
    client = Client(cluster)
    try:
        with parallel_backend('dask', wait_for_workers_timeout=0.1):
            # Short timeout: DaskDistributedBackend
            msg = "DaskDistributedBackend has no worker after 0.1 seconds."
            with pytest.raises(TimeoutError, match=msg):
                Parallel()(delayed(inc)(i) for i in range(10))

        with parallel_backend('dask', wait_for_workers_timeout=0):
            # No timeout: fallback to generic joblib failure:
            msg = "DaskDistributedBackend has no active worker"
            with pytest.raises(RuntimeError, match=msg):
                Parallel()(delayed(inc)(i) for i in range(10))
    finally:
        client.close()
        cluster.close()

Example #27

Show file

File: test_dask.py Project: joblib/joblib

def test_wait_for_workers(cluster_strategy):
    cluster = LocalCluster(n_workers=0, processes=False, threads_per_worker=2)
    client = Client(cluster)
    if cluster_strategy == "adaptive":
        cluster.adapt(minimum=0, maximum=2)
    elif cluster_strategy == "late_scaling":
        # Tell the cluster to start workers but this is a non-blocking call
        # and new workers might take time to connect. In this case the Parallel
        # call should wait for at least one worker to come up before starting
        # to schedule work.
        cluster.scale(2)
    try:
        with parallel_backend('dask'):
            # The following should wait a bit for at least one worker to
            # become available.
            Parallel()(delayed(inc)(i) for i in range(10))
    finally:
        client.close()
        cluster.close()

Example #28

Show file

    def test_with_distributed_client(self):
        lc = LocalCluster(diagnostics_port=None)
        client = Client(lc)

        graph = create_graph(net1_ex_matrix,
                             net1_gene_names,
                             net1_tf_names,
                             "GBM",
                             SGBM_KWARGS,
                             target_genes=list(self.test_range),
                             client=client)

        network_df = client.compute(graph, sync=True)

        self.assertEquals(len(self.test_range),
                          len(network_df['target'].unique()))

        client.close()
        lc.close()

Example #29

Show file

File: test_dask.py Project: MadhuriManjushaP/my_ml_service

def test_wait_for_workers(cluster_strategy):
    cluster = LocalCluster(n_workers=0, processes=False, threads_per_worker=2)
    client = Client(cluster)
    if cluster_strategy == "adaptive":
        cluster.adapt(minimum=0, maximum=2)
    elif cluster_strategy == "late_scaling":
        # Tell the cluster to start workers but this is a non-blocking call
        # and new workers might take time to connect. In this case the Parallel
        # call should wait for at least one worker to come up before starting
        # to schedule work.
        cluster.scale(2)
    try:
        with parallel_backend('dask'):
            # The following should wait a bit for at least one worker to
            # become available.
            Parallel()(delayed(inc)(i) for i in range(10))
    finally:
        client.close()
        cluster.close()

Example #30

Show file

File: test_dask.py Project: MadhuriManjushaP/my_ml_service

def test_wait_for_workers_timeout():
    # Start a cluster with 0 worker:
    cluster = LocalCluster(n_workers=0, processes=False, threads_per_worker=2)
    client = Client(cluster)
    try:
        with parallel_backend('dask', wait_for_workers_timeout=0.1):
            # Short timeout: DaskDistributedBackend
            msg = "DaskDistributedBackend has no worker after 0.1 seconds."
            with pytest.raises(TimeoutError, match=msg):
                Parallel()(delayed(inc)(i) for i in range(10))

        with parallel_backend('dask', wait_for_workers_timeout=0):
            # No timeout: fallback to generic joblib failure:
            msg = "DaskDistributedBackend has no active worker"
            with pytest.raises(RuntimeError, match=msg):
                Parallel()(delayed(inc)(i) for i in range(10))
    finally:
        client.close()
        cluster.close()

Example #31

Show file

class ClientSuite(object):
    def setup(self):
        self.client = Client()

    def teardown(self):
        self.client.close()

    def time_trivial_tasks(self):
        """
        Measure scheduler and communication overhead by running
        a bunch of unrelated trivial tasks.
        """
        @delayed(pure=True)
        def inc(x):
            return x + 1

        L = [inc(i) for i in range(500)]
        total = delayed(sum, pure=True)(L)

        total.compute(scheduler=self.client)

Example #32

Show file

File: test_local.py Project: PhanidharJammula/py

def test_dont_select_closed_worker():
    # Make sure distributed does not try to reuse a client from a
    # closed cluster (https://github.com/dask/distributed/issues/2840).
    with clean(threads=False):
        cluster = LocalCluster(n_workers=0)
        c = Client(cluster)
        cluster.scale(2)
        assert c == get_client()

        c.close()
        cluster.close()

        cluster2 = LocalCluster(n_workers=0)
        c2 = Client(cluster2)
        cluster2.scale(2)

        current_client = get_client()
        assert c2 == current_client

        cluster2.close()
        c2.close()

Example #33

Show file

def dask_compute_grid(ddclient=None, func=None, **kwargs):
    temp_cluster = False
    completed = []
    
    if ddclient is None:
        print('creating local dask distributed cluster...')
        # ddclient = Client()
        ddclient = Client()

        temp_cluster = True
#         print('cluster dashboard available at: ' + get_ddclient_dashboard_address(ddclient))
    
    try:
        print('cluster dashboard available at: ' + dask_get_ddclient_dashboard_address(ddclient))
        from IPython.display import display
        display(ddclient)
        tfunc = make_return_tuple(func)
        kwargs_list = ([(k, i) for i in v] for k, v in kwargs.items())
        
        # tuple of cartesian products of {{(arg_name, arg_val) | arg_val in arg_vals} | arg_name in arg_names}
        cart_prod_tup = product(*kwargs_list)
        cart_prod_dicts = [dict(i) for i in cart_prod_tup]

        print('submitting {} jobs to cluster...'.format(len(cart_prod_dicts)))
        futures = [ddclient.submit(tfunc, **kwargs) for kwargs in cart_prod_dicts]

        print('computing jobs...')
        completed = ddclient.gather(futures)

        print('computation done')
    
    finally:
        if temp_cluster:
            print('shutting down cluster...')
            ddclient.close()
    
    print('done')
    return completed

Example #34

Show file

    def run(self,
            workflows: List[Workflow],
            overwrite_params: Dict = None) -> None:
        """[summary]

        Parameters
        ----------
        workflows : List[Workflow]
            Workflows to run.
        overwrite_params : Dict, optional
            Workflow parameters to overwrite (applies to all passed workflows), by default None

        Returns
        -------
        Dict
            Dictionary including each workflow together with its run status.
        """

        self.logger.info(f"Checking for pending workflows...")

        if overwrite_params:
            self.logger.debug(
                f"Overwriting workflow parameters: {overwrite_params}")
            for workflow in workflows:
                self.overwrite_params(workflow, params=overwrite_params)

        client = Client(self.client_address)

        for workflow in workflows:
            if self.should_run(workflow):
                self.logger.info(
                    f"Worfklow {workflow.name} has been enqueued...")
                workflow.env = self.env
                workflow.submit(client=client)
        else:
            self.logger.info(f"No pending workflows found")
            client.close()
        return None