예제 #1
0
def ctx(request, dask_executor):
    if request.param == 'inline':
        yield Context.make_with('inline')
    elif request.param == "dask_executor":
        yield Context(executor=dask_executor)
    elif request.param == "delayed_default":
        yield Context(executor=DelayedJobExecutor())
    elif request.param == "delayed_dist":
        with distributed.Client(n_workers=2,
                                threads_per_worker=4,
                                processes=True) as _:
            yield Context(executor=DelayedJobExecutor())
    elif request.param == "dask_make_default":
        try:
            ctx = Context.make_with('dask-make-default')
            yield ctx
        finally:
            # cleanup: Close cluster and client
            # This is also tested below, here just to make
            # sure things behave as expected.
            assert isinstance(ctx.executor, DaskJobExecutor)
            ctx.executor.is_local = True
            ctx.close()
    elif request.param == "dask_integration":
        with distributed.Client(n_workers=2,
                                threads_per_worker=4,
                                processes=False) as _:
            yield Context.make_with("dask-integration")
    elif request.param == "concurrent":
        yield Context.make_with("threads")
    elif request.param == "delayed":
        yield Context(executor=DelayedJobExecutor())
예제 #2
0
파일: executor.py 프로젝트: wbnns/stems
def setup_executor(address=None, n_workers=None, threads_per_worker=1, **kwds):
    """ Setup a Dask distributed cluster scheduler client

    Parameters
    ----------
    address : str, optional
        This can be the address of a ``Scheduler`` server, like a string
        ``'127.0.0.1:8786'``. If ``None``, sets up a ``LocalCluster``
    n_workers : int, optional
        Number of workers. Only used if setting up a ``LocalCluster``
    threads_per_worker : int, optional
        Number of threads per worker
    kwds
        Additional options passed to :py:func:`distributed.Client`

    Returns
    -------
    distributed.Client
        Distributed compute client
    """
    import distributed
    try:
        client = distributed.Client(address=address,
                                    n_workers=n_workers,
                                    threads_per_worker=threads_per_worker,
                                    **kwds)
    except Exception as e:
        logger.exception('Could not start `distributed` cluster')
        raise
    else:
        return client
def ee_dask_deploy(config, pb_id, image, n_workers=1, buffers=[], secrets=[]):
    """Deploy Dask execution engine.

    :param config: configuration DB handle
    :param pb_id: processing block ID
    :param image: Docker image to deploy
    :param n_workers: number of Dask workers
    :param buffers: list of buffers to mount on Dask workers
    :param secrets: list of secrets to mount on Dask workers
    :return: deployment ID and Dask client handle

    """
    # Make deployment
    deploy_id = "proc-{}-dask".format(pb_id)
    values = {"image": image, "worker.replicas": n_workers}
    for i, b in enumerate(buffers):
        values["buffers[{}]".format(i)] = b
    for i, s in enumerate(secrets):
        values["secrets[{}]".format(i)] = s
    deploy = ska_sdp_config.Deployment(
        deploy_id, "helm", {"chart": "dask", "values": values}
    )
    for txn in config.txn():
        txn.create_deployment(deploy)

    # Wait for scheduler to become available
    scheduler = deploy_id + "-scheduler." + os.environ["SDP_HELM_NAMESPACE"] + ":8786"
    client = None
    while client is None:
        try:
            client = distributed.Client(scheduler, timeout=1)
        except:
            pass

    return deploy_id, client
예제 #4
0
def test_retrieve() -> None:
    cluster = distributed.LocalCluster(
        ip='localhost:8786',
        # I want a bokeh interface to check progress
        dashboard_address='localhost:8787',
        # single process, single thread allows ctrl+C backtrace to
        # show where the code is getting stuck. Otherwise, it will say,
        # "I'm stuck waiting for other processes." It also makes
        # time_code more meaningful
        processes=False,
        threads_per_worker=1,
    )
    # TODO: put this in a reusable module

    with distributed.Client(cluster):

        # disable the cache, because I don't want to persist these results
        # in the cloud
        for cached_func in [
                retrieve.get_rfs, retrieve.get_paragraphs,
                retrieve.get_raw_forms, retrieve.get_indexes
        ]:
            assert isinstance(cached_func, Cache)
            cast(Cache, cached_func).disabled = True

        rfs = dask.bag.zip(  # pylint: disable=unused-variable
            retrieve.get_indexes('10-K', 1995, 1),
            retrieve.get_rfs(1995, 1)).take(10, npartitions=1)
예제 #5
0
    def fullReconstruction(self):

        from xicam.Tomography.widgets.volumeviewer import VolumeViewer
        volumeviewer = VolumeViewer()
        self.recontabs.addTab(volumeviewer, '????')

        currentitem = self.headermodel.item(self.rawtabview.currentIndex())
        if not currentitem: msg.showMessage('Error: You must open files before reconstructing.')
        try:
            msg.showBusy()
            msg.showMessage('Running slice reconstruction...', level=msg.INFO)
            currentheader = self.headermodel.item(self.rawtabview.currentIndex()).header
            readprocess = self.workflow.processes[0]  # hopefully! TODO: require a readprocess first
            readprocess.path.value = currentheader.startdoc['path']

            numofsinograms = currentheader.meta_array('primary').shape[1]

            executor = DaskExecutor()
            client = distributed.Client()

            def chunkiterator(workflow):
                for i in range(0, int(numofsinograms), int(readprocess.chunksize.value)):
                    readprocess.sinoindex.value = i
                    yield executor.execute(workflow)

            _reconthread = QThreadFutureIterator(chunkiterator, self.workflow,
                                                 callback_slot=partial(self.showReconstruction, mode=self.fullrecon),
                                                 except_slot=self.exceptionCallback)
            _reconthread.start()
        except Exception as ex:
            msg.logError(ex)
            msg.showReady()
            msg.clearMessage()
예제 #6
0
 def get_dask(
     self,
     ca_path="dask_ca.crt",
     client_cert_path="dask_client_cert.pem",
     hostname=None,
     port=8786,
 ):
     with open(ca_path, "w") as fout:
         fout.write(self.config["tls_ca"])
     userproxy_path = os.environ.get("X509_USER_PROXY",
                                     "/tmp/x509up_u%d" % os.getuid())
     with open(userproxy_path, "rb") as fin:
         userproxy = fin.read()
     result = self.api.post("/clientkey", data={"proxycert": userproxy})
     if result.status_code == 401:
         raise RuntimeError(
             "Authorization denied while retrieving dask certificate")
     elif result.status_code != 200:
         raise RuntimeError("Error while retrieving dask certificate")
     with open(client_cert_path, "w") as fout:
         fout.write(result.text)
     sec = distributed.security.Security(
         tls_ca_file=ca_path,
         tls_client_cert=client_cert_path,
         require_encryption=True,
     )
     if hostname is None:
         hostname = self.hostname
     url = f"tls://{hostname}:{port}"
     return distributed.Client(url, security=sec)
예제 #7
0
파일: dataset_io.py 프로젝트: visr/xarray
 def setup(self):
     try:
         import distributed
     except ImportError:
         raise NotImplementedError
     self.client = distributed.Client()
     self.write = create_delayed_write()
예제 #8
0
def get_client() -> distributed.Client:
    address = os.environ.get('SCHEDULER_PORT', None)

    for _ in range(10):
        try:
            client = distributed.Client(address=address)
        except OSError:
            continue
        else:
            break
    else:
        # try > else > break was never hit
        raise ValueError('Could not connect to scheduler')

    n_workers_ = os.environ.get('N_WORKERS', None)
    if n_workers_ is not None:
        n_workers = int(n_workers_)
        logging.debug('Waiting for: %d workers', n_workers)
        client.wait_for_workers(n_workers)

    egg_url = os.environ.get('DEPLOY_EGG', None)
    if egg_url is not None:
        logging.debug('Downloading and uploading egg: %s', egg_url)
        with tempfile.TemporaryDirectory() as tempdir:
            egg_path = Path(tempdir) / 'code.egg'
            src = urllib.request.urlopen(egg_url)
            with egg_path.open('wb') as dst:
                shutil.copyfileobj(src, dst)
            client.upload_file(str(egg_path))

    return client
예제 #9
0
def executor(kind: str,
             max_workers: int,
             daemon=True) -> typing.Iterator[Executor]:
    """General purpose utility to get an executor with its as_completed handler

    This allows us to easily use other executors as needed.
    """
    if kind == "thread":
        with ThreadPoolExecutor(max_workers=max_workers) as pool_t:
            yield pool_t
    elif kind == "process":
        with ProcessPoolExecutor(max_workers=max_workers) as pool_p:
            yield pool_p
    elif kind in ["dask", "dask-process", "dask-thread"]:
        import dask
        import distributed
        from distributed.cfexecutor import ClientExecutor

        processes = kind == "dask" or kind == "dask-process"

        with dask.config.set({"distributed.worker.daemon": daemon}):
            with distributed.LocalCluster(
                    n_workers=max_workers,
                    processes=processes,
            ) as cluster:
                with distributed.Client(cluster) as client:
                    yield ClientExecutor(client)
    else:
        raise NotImplementedError("That kind is not implemented")
예제 #10
0
def test_prctl_on_docker(module_scoped_container_getter, tmp_path):
    network_info = module_scoped_container_getter.get("scheduler").network_info[0]
    client = distributed.Client(
        f"tcp://{network_info.hostname}:{network_info.host_port}", set_as_default=False
    )

    core_test(client, tmp_path)
예제 #11
0
def dask_client():
    cluster = distributed.LocalCluster(n_workers=3, threads_per_worker=1)
    client = distributed.Client(cluster)

    yield client

    client.close()
    cluster.close()
예제 #12
0
def test_use_distributed():
    # This Client is pretty cheap to start
    # since it only uses threads
    with distributed.Client(n_workers=1, threads_per_worker=1,
                            processes=False) as c:
        ctx = Context.make_with("dask-integration")
        assert isinstance(ctx.executor, DaskJobExecutor)
        assert ctx.executor.client is c
예제 #13
0
 def client(self, attempts=10, **kwargs):
     '''Wait for scheduler to be initialized and return Client(self)'''
     block(self.instances[0][2])
     for i in reversed(range(attempts)):
         try:
             return distributed.Client(self, **kwargs)
         except (TimeoutError, ConnectionRefusedError, OSError) as e:
             if i == 0: raise e
예제 #14
0
def external_client():
    # Explicitly specify we want only 4 workers so that when running on
    # continuous integration we don't request too many.
    cluster = distributed.LocalCluster(n_workers=4)
    client = distributed.Client(cluster)
    yield client
    client.close()
    cluster.close()
예제 #15
0
 def get_context(self):
     """Returns Dask Client for Scheduler"""
     details = self.get_config_data()
     if details is not None:
         print("Connect to Dask: %s" % details["master_url"])
         client = distributed.Client(details["master_url"])
         return client
     return None
예제 #16
0
def dispatch_computations(job_args, tmp_dir, timestamp):
    client = distributed.Client('localhost:8786')
    webbrowser.open('http://localhost:8787')
    jobs = []
    for job_arg in job_args:
        job = client.submit(compute, *job_arg)
        jobs.append(job)
    check_jobs(jobs, job_args, tmp_dir, timestamp)
예제 #17
0
def pipeline_scan(st,
                  segments=None,
                  cl=None,
                  host=None,
                  cfile=None,
                  vys_timeout=vys_timeout_default,
                  mem_read=0.,
                  mem_search=0.,
                  throttle=False,
                  mockseg=None):
    """ Given rfpipe state and dask distributed client, run search pipline.
    """

    if cl is None:
        if host is None:
            cl = distributed.Client(
                n_workers=1,
                threads_per_worker=16,
                resources={
                    "READER": 1,
                    "MEMORY": 16e9
                },
                local_dir="/lustre/evla/test/realfast/scratch")
        else:
            cl = distributed.Client('{0}:{1}'.format(host, '8786'))

    if not isinstance(segments, list):
        segments = list(range(st.nsegment))

    futures = []
    sleeptime = throttle * 0.8 * st.nints * st.inttime / st.nsegment  # bit shorter than scan
    for segment in segments:
        futures.append(
            pipeline_seg(st,
                         segment,
                         cl=cl,
                         cfile=cfile,
                         vys_timeout=vys_timeout,
                         mem_read=mem_read,
                         mem_search=mem_search,
                         mockseg=mockseg))
        if throttle:
            sleep(sleeptime)

    return futures  # list of tuples of futures (seg, data, cc, acc)
예제 #18
0
 def check_dask(self):
     try:
         import distributed
         client = distributed.Client(self.nodes[0].strip() + ":8786")
         print "Found %d workers: %s" % (len(brokers.keys()), str(brokers))
         return client.scheduler_info()
     except:
         pass
     return None
예제 #19
0
 def test_executor_disables_watch_worker_events_with_false(self):
     with distributed.Client(n_workers=1,
                             processes=False,
                             set_as_default=False) as client:
         executor = DaskExecutor(address=client.scheduler.address,
                                 watch_worker_status=False)
         with executor.start():
             assert executor.watch_worker_status is False
             assert executor._watch_dask_events_task is None
예제 #20
0
def test_bias_batch_recommend_dask(ml_folds: MLFolds):
    algo = Bias(damping=5)
    algo = TopN(algo)

    with closing(distributed.Client()), joblib.parallel_backend('dask'):
        recs = ml_folds.eval_all(algo, dask=True)
        assert isinstance(recs, ddf.DataFrame)

        ml_folds.check_positive_ndcg(recs)
예제 #21
0
def dask_client(mock_service_envs: None) -> Iterable[distributed.Client]:
    print(pformat(dask.config.get("distributed")))
    with distributed.LocalCluster(
        worker_class=distributed.Worker,
        **{
            "resources": {"CPU": 10, "GPU": 10, "MPI": 1},
            "preload": "simcore_service_dask_sidecar.tasks",
        },
    ) as cluster:
        with distributed.Client(cluster) as client:
            yield client
예제 #22
0
    def test_connect_to_running_cluster(self):
        with distributed.Client(processes=False, set_as_default=False) as client:
            executor = DaskExecutor(address=client.scheduler.address)
            assert executor.address == client.scheduler.address
            assert executor.cluster_class is None
            assert executor.cluster_kwargs is None
            assert executor.client_kwargs == {"set_as_default": False}

            with executor.start():
                res = executor.wait(executor.submit(lambda x: x + 1, 1))
                assert res == 2
예제 #23
0
    def setup(self):
        try:
            import distributed
        except ImportError:
            raise NotImplementedError()

        # TODO: Lazily skipped in CI as it is very demanding and slow.
        # Improve times and remove errors.
        _skip_slow()

        self.client = distributed.Client()
        self.write = create_delayed_write()
예제 #24
0
 def test_executor_enables_watch_worker_events_with_true(self):
     with distributed.Client(n_workers=1,
                             processes=False,
                             set_as_default=False) as client:
         executor = DaskExecutor(
             address=client.scheduler.address,
             watch_worker_status=True,
             adapt_kwargs={"maximum": 4},
         )
         with executor.start():
             assert executor.watch_worker_status is True
             assert executor._watch_dask_events_task is not None
예제 #25
0
def init_client(processes, max_memory):
    memory_limit = int(max_memory / processes)
    memory_limit = '{0:d}MB'.format(memory_limit)
    logger.info(
        'Initialising client with {0:d} workers and {1:s} per worker'.format(
            processes, memory_limit))
    cluster = distributed.LocalCluster(n_workers=processes,
                                       threads_per_worker=1,
                                       memory_limit=memory_limit,
                                       local_directory='/scratch/u/u300636')
    client = distributed.Client(cluster)
    logger.info('Initialised client: {0}'.format(client))
    return client
예제 #26
0
    def execute(self, wf, client=None):
        # global client, graph

        services = {"machines":
            [
                {
                    "name": "freyja",
                    "address": "freyja.nsls2.bnl.gov",
                    "port": 22,
                    "username": "******",
                    "password": "******",
                    "environment": {"PYTHONUSERBASE": "/tmp"},
                    "config_dir": "/tmp/camera"
                }
            ],
            "graph":
                {
                    "configure":
                        [
                            {
                                "machine": "freyja",
                                "apps":
                                    [
                                        {"name": "dask/dask-scheduler"}
                                    ],
                                "tasks": ["dask-cluster"]
                            }
                        ]
                }
        }

        services = json.dumps(services)
        print(services)

        graph = task_graph.Graph()
        graph.parse_stream(services)
        graph.start_tasks()
        graph.connect()
        graph.execute()

        meta_data = graph.machines[0].tasks[0].request_meta_data()

        local_port = graph.machines[0].node.get_free_local_port()
        remote_port = meta_data[0][0]
        print(local_port, remote_port)
        graph.machines[0].node.forward_tunnel(local_port, "localhost", remote_port)

        client = distributed.Client("tcp://localhost:" + str(local_port))

        return super(CamLinkExecutor, self).execute(wf, client)[0]
예제 #27
0
    def start(self):
        if (self.tls_ca) or (self.tls_key) or (self.tls_cert):
            from distributed.security import Security
            security = Security(
                tls_client_key=self.tls_key,
                tls_client_cert=self.tls_cert,
                tls_ca_file=self.tls_ca,
            )
        else:
            security = None

        self.client = distributed.Client(self.cluster_address,
                                         security=security)
        self.futures = {}
예제 #28
0
def test_setup_executor_distributed(n_workers=1, threads_per_worker=2):
    cluster = distributed.LocalCluster(n_workers=n_workers,
                                       threads_per_worker=threads_per_worker)
    client = distributed.Client(cluster)
    address = cluster.scheduler.address

    test = executor.setup_executor(address)

    assert test.scheduler.address == cluster.scheduler.address
    assert client.scheduler_info() == test.scheduler_info()

    test.close()
    cluster.close()
    client.close()
예제 #29
0
def start_jupyterlab():
    scheduler_address = get_scheduler_address()
    client = distributed.Client(f'{scheduler_address}')
    host = client.run_on_scheduler(socket.gethostname)
    userid = getpass.getuser()

    with open(LOG_DIRECTORY + 'jupyterlab.log', 'wb') as fp:
        subprocess.run(f'jupyter-lab --port=7787 --no-browser &',
                       shell=SHELL,
                       check=True,
                       stdout=fp,
                       stderr=subprocess.STDOUT)

    ssh_jpylab = f"ssh -t -L 7787:localhost:7787 -L 8787:localhost:8787 {userid}@theta.alcf.anl.gov ssh -t -L 7787:localhost:7787 -L 8787:localhost:8787 {MOMNODE} ssh -t -L 7787:localhost:7787 -L 8787:localhost:8787 {host}"
    print(
        f"\nTo connect to JupyterLab and Dask dashboard, execute the following command in a shell on your local machine:\n    {ssh_jpylab}\n"
    )
    logging.info(
        f"To connect to JupyterLab and Dask dashboard, execute the following command in a shell on your local machine:\n    {ssh_jpylab}\n"
    )

    jpylab_url = ''
    counter = 0
    while jpylab_url == '':
        time.sleep(1)
        #f"sed -n '/^[^[].*localhost:7787/p' {LOG_DIRECTORY}jupyterlab.log",
        w = subprocess.run(
            f"jupyter server list | sed -n '/localhost:7787/ s/ .*//p'",
            shell=SHELL,
            capture_output=True,
            text=True)
        jpylab_url = w.stdout.strip()
        counter += 1
        if counter > 20:
            jpylab_url = 'http://localhost:7787/'
            break

    print(
        f"To open JupyterLab, go to (see log file {LOG_DIRECTORY}jupyterlab.log):\n    {jpylab_url}\n"
    )
    logging.info(
        f"To open JupyterLab, go to (see log file {LOG_DIRECTORY}jupyterlab.log):\n    {jpylab_url}\n"
    )

    print(
        f"To open the Dask dashboard, go to:\n    http://localhost:8787/status\n"
    )
    logging.info(
        f"To open the Dask dashboard, go to:\n    http://localhost:8787/status\n"
    )
예제 #30
0
    def start(self):
        if self.tls_ca or self.tls_key or self.tls_cert:
            from distributed.security import Security
            security = Security(
                tls_client_key=self.tls_key,
                tls_client_cert=self.tls_cert,
                tls_ca_file=self.tls_ca,
                require_encryption=True,
            )
        else:
            security = None

        self.client = distributed.Client(self.cluster_address, security=security)
        self.futures = {}