コード例 #1
0
ファイル: dask_executor.py プロジェクト: Nextdoor/airflow
    def test_backfill_integration(self):
        """
        Test that DaskExecutor can be used to backfill example dags
        """
        cluster = LocalCluster()

        dags = [
            dag for dag in self.dagbag.dags.values()
            if dag.dag_id in [
                'example_bash_operator',
                # 'example_python_operator',
            ]
        ]

        for dag in dags:
            dag.clear(
                start_date=DEFAULT_DATE,
                end_date=DEFAULT_DATE)

        for i, dag in enumerate(sorted(dags, key=lambda d: d.dag_id)):
            job = BackfillJob(
                dag=dag,
                start_date=DEFAULT_DATE,
                end_date=DEFAULT_DATE,
                ignore_first_depends_on_past=True,
                executor=DaskExecutor(
                    cluster_address=cluster.scheduler_address))
            job.run()

        cluster.close()
コード例 #2
0
ファイル: test_application.py プロジェクト: dask/distributed
def test_bokeh_shutsdown_without_cluster___del__(loop):
    c = LocalCluster(2, loop=loop, scheduler_port=0,
                     services={('http', 0): HTTPScheduler})
    proc = c.diagnostics.process
    # don't run the del, as it isn't ever run in python < 3.5 due to cycles
    c.__del__ = lambda self: None
    del c
    start = time()
    while True:
        if proc.poll() is not None:
            break
        assert time() < start + 5
        sleep(0.01)
コード例 #3
0
class DaskExecutorTest(BaseDaskTest):

    def setUp(self):
        self.dagbag = DagBag(include_examples=True)
        self.cluster = LocalCluster()

    @unittest.skipIf(SKIP_DASK, 'Dask unsupported by this configuration')
    def test_dask_executor_functions(self):
        executor = DaskExecutor(cluster_address=self.cluster.scheduler_address)
        self.assert_tasks_on_executor(executor)

    @unittest.skipIf(SKIP_DASK, 'Dask unsupported by this configuration')
    def test_backfill_integration(self):
        """
        Test that DaskExecutor can be used to backfill example dags
        """
        dags = [
            dag for dag in self.dagbag.dags.values()
            if dag.dag_id in [
                'example_bash_operator',
                # 'example_python_operator',
            ]
        ]

        for dag in dags:
            dag.clear(
                start_date=DEFAULT_DATE,
                end_date=DEFAULT_DATE)

        for i, dag in enumerate(sorted(dags, key=lambda d: d.dag_id)):
            job = BackfillJob(
                dag=dag,
                start_date=DEFAULT_DATE,
                end_date=DEFAULT_DATE,
                ignore_first_depends_on_past=True,
                executor=DaskExecutor(
                    cluster_address=self.cluster.scheduler_address))
            job.run()

    def tearDown(self):
        self.cluster.close(timeout=5)
コード例 #4
0
ファイル: dask_executor.py プロジェクト: Nextdoor/airflow
    def test_dask_executor_functions(self):
        cluster = LocalCluster()

        executor = DaskExecutor(cluster_address=cluster.scheduler_address)

        # start the executor
        executor.start()

        success_command = 'echo 1'
        fail_command = 'exit 1'

        executor.execute_async(key='success', command=success_command)
        executor.execute_async(key='fail', command=fail_command)

        success_future = next(
            k for k, v in executor.futures.items() if v == 'success')
        fail_future = next(
            k for k, v in executor.futures.items() if v == 'fail')

        # wait for the futures to execute, with a timeout
        timeout = datetime.datetime.now() + datetime.timedelta(seconds=30)
        while not (success_future.done() and fail_future.done()):
            if datetime.datetime.now() > timeout:
                raise ValueError(
                    'The futures should have finished; there is probably '
                    'an error communciating with the Dask cluster.')

        # both tasks should have finished
        self.assertTrue(success_future.done())
        self.assertTrue(fail_future.done())

        # check task exceptions
        self.assertTrue(success_future.exception() is None)
        self.assertTrue(fail_future.exception() is not None)

        cluster.close()
コード例 #5
0
ファイル: test_with_dask.py プロジェクト: tpboudreau/xgboost
def test_dask_missing_value_reg() -> None:
    with LocalCluster(n_workers=kWorkers) as cluster:
        with Client(cluster) as client:
            X_0 = np.ones((20 // 2, kCols))
            X_1 = np.zeros((20 // 2, kCols))
            X = np.concatenate([X_0, X_1], axis=0)
            np.random.shuffle(X)
            X = da.from_array(X)
            X = X.rechunk(20, 1)
            y = da.random.randint(0, 3, size=20)
            y.rechunk(20)
            regressor = xgb.dask.DaskXGBRegressor(verbosity=1,
                                                  n_estimators=2,
                                                  missing=0.0)
            regressor.client = client
            regressor.set_params(tree_method='hist')
            regressor.fit(X, y, eval_set=[(X, y)])
            dd_predt = regressor.predict(X).compute()

            np_X = X.compute()
            np_predt = regressor.get_booster().predict(
                xgb.DMatrix(np_X, missing=0.0))
            np.testing.assert_allclose(np_predt, dd_predt)
コード例 #6
0
ファイル: deploy.py プロジェクト: JaneliaSciComp/dask-janelia
def get_LocalCluster(threads_per_worker: int = 1,
                     n_workers: int = 0,
                     **kwargs):
    """
    Creata a distributed.LocalCluster with defaults that make it more similar to a deployment on the Janelia Compute cluster.
    This function is a light wrapper around the distributed.LocalCluster constructor.

    Parameters
    ----------
    n_workers: int
        The number of workers to start the cluster with. This defaults to 0 here.
    threads_per_worker: int
        The number of threads to assign to each worker.
    **kwargs:
        Additional keyword arguments passed to the LocalCluster constructor
    Examples
    --------

    >>> cluster = get_LocalCluster(threads_per_worker=8)
    """
    return LocalCluster(n_workers=n_workers,
                        threads_per_worker=threads_per_worker,
                        **kwargs)
コード例 #7
0
def test_no_more_workers_than_tasks():
    loop = IOLoop.current()
    cluster = yield LocalCluster(
        0,
        scheduler_port=0,
        silence_logs=False,
        processes=False,
        dashboard_address=None,
        loop=loop,
        asynchronous=True,
    )
    yield cluster._start()
    try:
        adapt = cluster.adapt(minimum=0, maximum=4, interval="10 ms")
        client = yield Client(cluster, asynchronous=True, loop=loop)
        cluster.scheduler.task_duration["slowinc"] = 1000

        yield client.submit(slowinc, 1, delay=0.100)

        assert len(cluster.scheduler.workers) <= 1
    finally:
        yield client.close()
        yield cluster.close()
コード例 #8
0
async def test_adapt_down():
    """ Ensure that redefining adapt with a lower maximum removes workers """
    async with LocalCluster(
        0,
        asynchronous=True,
        processes=False,
        scheduler_port=0,
        silence_logs=False,
        dashboard_address=None,
    ) as cluster:
        async with Client(cluster, asynchronous=True) as client:
            cluster.adapt(interval="20ms", maximum=5)

            futures = client.map(slowinc, range(1000), delay=0.1)
            while len(cluster.scheduler.workers) < 5:
                await gen.sleep(0.1)

            cluster.adapt(maximum=2)

            start = time()
            while len(cluster.scheduler.workers) != 2:
                await gen.sleep(0.1)
                assert time() < start + 1
コード例 #9
0
ファイル: test_with_dask.py プロジェクト: poker-HuDL/xgboost
def test_dask_regressor() -> None:
    with LocalCluster(n_workers=kWorkers) as cluster:
        with Client(cluster) as client:
            X, y, w = generate_array(with_weights=True)
            regressor = xgb.dask.DaskXGBRegressor(verbosity=1, n_estimators=2)
            assert regressor._estimator_type == "regressor"
            assert sklearn.base.is_regressor(regressor)

            regressor.set_params(tree_method='hist')
            regressor.client = client
            regressor.fit(X, y, sample_weight=w, eval_set=[(X, y)])
            prediction = regressor.predict(X)

            assert prediction.ndim == 1
            assert prediction.shape[0] == kRows

            history = regressor.evals_result()

            assert isinstance(prediction, da.Array)
            assert isinstance(history, dict)

            assert list(history['validation_0'].keys())[0] == 'rmse'
            assert len(history['validation_0']['rmse']) == 2
コード例 #10
0
def test_worker_keys():
    """ Ensure that redefining adapt with a lower maximum removes workers """
    cluster = yield LocalCluster(
        0,
        asynchronous=True,
        processes=False,
        scheduler_port=0,
        silence_logs=False,
        dashboard_address=None,
    )

    try:
        yield [
            cluster.start_worker(name="a-1"),
            cluster.start_worker(name="a-2"),
            cluster.start_worker(name="b-1"),
            cluster.start_worker(name="b-2"),
        ]

        while len(cluster.scheduler.workers) != 4:
            yield gen.sleep(0.01)

        def key(ws):
            return ws.name.split("-")[0]

        cluster._adaptive_options = {"worker_key": key}

        adaptive = cluster.adapt(minimum=1)
        yield adaptive._adapt()

        while len(cluster.scheduler.workers) == 4:
            yield gen.sleep(0.01)

        names = {ws.name for ws in cluster.scheduler.workers.values()}
        assert names == {"a-1", "a-2"} or names == {"b-1", "b-2"}
    finally:
        yield cluster.close()
コード例 #11
0
    def test_data_initialization(self):
        '''Assert each worker has the correct amount of data, and DMatrix initialization doesn't
        generate unnecessary copies of data.

        '''
        with LocalCluster(n_workers=2) as cluster:
            with Client(cluster) as client:
                X, y = generate_array()
                n_partitions = X.npartitions
                m = xgb.dask.DaskDMatrix(client, X, y)
                workers = list(xgb.dask._get_client_workers(client).keys())
                rabit_args = client.sync(xgb.dask._get_rabit_args, workers, client)
                n_workers = len(workers)

                def worker_fn(worker_addr, data_ref):
                    with xgb.dask.RabitContext(rabit_args):
                        local_dtrain = xgb.dask._dmatrix_from_worker_map(**data_ref)
                        total = np.array([local_dtrain.num_row()])
                        total = xgb.rabit.allreduce(total, xgb.rabit.Op.SUM)
                        assert total[0] == kRows

                futures = client.map(
                    worker_fn, workers, [m.create_fn_args()] * len(workers),
                    pure=False, workers=workers)
                client.gather(futures)

                has_what = client.has_what()
                cnt = 0
                data = set()
                for k, v in has_what.items():
                    for d in v:
                        cnt += 1
                        data.add(d)

                assert len(data) == cnt
                # Subtract the on disk resource from each worker
                assert cnt - n_workers == n_partitions
コード例 #12
0
def test_adaptive_local_cluster_multi_workers():
    cluster = yield LocalCluster(0, scheduler_port=0, silence_logs=False,
                                 processes=False, diagnostics_port=None,
                                 asynchronous=True)
    try:
        cluster.scheduler.allowed_failures = 1000
        alc = Adaptive(cluster.scheduler, cluster, interval=100)
        c = yield Client(cluster, asynchronous=True)

        futures = c.map(slowinc, range(100), delay=0.01)

        start = time()
        while not cluster.scheduler.workers:
            yield gen.sleep(0.01)
            assert time() < start + 15, alc.log

        yield c.gather(futures)
        del futures

        start = time()
        # while cluster.workers:
        while cluster.scheduler.workers:
            yield gen.sleep(0.01)
            assert time() < start + 15, alc.log

        # assert not cluster.workers
        assert not cluster.scheduler.workers
        yield gen.sleep(0.2)
        # assert not cluster.workers
        assert not cluster.scheduler.workers

        futures = c.map(slowinc, range(100), delay=0.01)
        yield c.gather(futures)

    finally:
        yield c._close()
        yield cluster._close()
コード例 #13
0
ファイル: test_with_dask.py プロジェクト: lookenwu/xgboost
def test_dask_classifier():
    with LocalCluster(n_workers=kWorkers) as cluster:
        with Client(cluster) as client:
            X, y = generate_array()
            y = (y * 10).astype(np.int32)
            classifier = xgb.dask.DaskXGBClassifier(verbosity=1,
                                                    n_estimators=2)
            classifier.client = client
            classifier.fit(X, y, eval_set=[(X, y)])
            prediction = classifier.predict(X)

            assert prediction.ndim == 2
            assert prediction.shape[0] == kRows

            history = classifier.evals_result()

            assert isinstance(prediction, da.Array)
            assert isinstance(history, dict)

            assert list(history.keys())[0] == 'validation_0'
            assert list(history['validation_0'].keys())[0] == 'merror'
            assert len(list(history['validation_0'])) == 1
            assert len(history['validation_0']['merror']) == 2

            assert classifier.n_classes_ == 10

            # Test with dataframe.
            X_d = dd.from_dask_array(X)
            y_d = dd.from_dask_array(y)
            classifier.fit(X_d, y_d)

            assert classifier.n_classes_ == 10
            prediction = classifier.predict(X_d)

            assert prediction.ndim == 2
            assert prediction.shape[0] == kRows
コード例 #14
0
def test_from_dask_dataframe():
    with LocalCluster(n_workers=kWorkers) as cluster:
        with Client(cluster) as client:
            X, y = generate_array()

            X = dd.from_dask_array(X)
            y = dd.from_dask_array(y)

            dtrain = DaskDMatrix(client, X, y)
            booster = xgb.dask.train(
                client, {}, dtrain, num_boost_round=2)['booster']

            prediction = xgb.dask.predict(client, model=booster, data=dtrain)

            assert prediction.ndim == 1
            assert isinstance(prediction, da.Array)
            assert prediction.shape[0] == kRows

            with pytest.raises(ValueError):
                # evals_result is not supported in dask interface.
                xgb.dask.train(
                    client, {}, dtrain, num_boost_round=2, evals_result={})
            # force prediction to be computed
            from_dmatrix = prediction.compute()

            prediction = xgb.dask.predict(client, model=booster, data=X)
            from_df = prediction.compute()

            assert isinstance(prediction, dd.Series)
            assert np.all(prediction.compute().values == from_dmatrix)
            assert np.all(from_dmatrix == from_df.to_numpy())

            series_predictions = xgb.dask.inplace_predict(client, booster, X)
            assert isinstance(series_predictions, dd.Series)
            np.testing.assert_allclose(series_predictions.compute().values,
                                       from_dmatrix)
コード例 #15
0
ファイル: test_adaptive.py プロジェクト: sklam/distributed
def test_adaptive_local_cluster(loop):
    with LocalCluster(0,
                      scheduler_port=0,
                      silence_logs=False,
                      diagnostics_port=None,
                      loop=loop) as cluster:
        alc = Adaptive(cluster.scheduler, cluster, interval=100)
        with Client(cluster, loop=loop) as c:
            assert not c.ncores()
            future = c.submit(lambda x: x + 1, 1)
            assert future.result() == 2
            assert c.ncores()

            sleep(0.1)
            assert c.ncores()  # still there after some time

            del future

            start = time()
            while cluster.scheduler.ncores:
                sleep(0.01)
                assert time() < start + 5

            assert not c.ncores()
コード例 #16
0
ファイル: test_adaptive.py プロジェクト: sklam/distributed
def test_avoid_churn():
    """ We want to avoid creating and deleting workers frequently

    Instead we want to wait a few beats before removing a worker in case the
    user is taking a brief pause between work
    """
    cluster = yield LocalCluster(0,
                                 asynchronous=True,
                                 processes=False,
                                 scheduler_port=0,
                                 silence_logs=False,
                                 diagnostics_port=None)
    client = yield Client(cluster, asynchronous=True)
    try:
        adapt = Adaptive(cluster.scheduler, cluster, interval=20, wait_count=5)

        for i in range(10):
            yield client.submit(slowinc, i, delay=0.040)
            yield gen.sleep(0.040)

        assert frequencies(pluck(1, adapt.log)) == {'up': 1}
    finally:
        yield client._close()
        yield cluster._close()
コード例 #17
0
ファイル: dask-dot-prod2.py プロジェクト: MattBBaker/ucx-py
def main(args=None):
    args = parse_args(args)

    if args.protocol == 'ucx':
        sched_str = "ucx://"+ args.server + ":13337"
        client = Client(sched_str)
    elif args.protocol == 'tcp':
        sched_str = "tcp://"+ args.server + ":13337"
        client = Client(sched_str)
    else:
        kwargs = {'n_workers': 2, 'threads_per_worker': 40}
        kwargs['processes'] = args.protocol == 'tcp'
        cluster = LocalCluster(**kwargs)
        client = Client(cluster)

    print(f"Connected to {client}")
    N = int(args.length)
    P = int(args.length)
    RS = da.random.RandomState(RandomState=cupy.random.RandomState)
    #RS = da.random.RandomState(123)
    X = RS.normal(10, 1, size=(N, P))
    #X = da.random.uniform(size=(N, P), chunks=(N/100, P/100))
    X.persist()
    print(format_bytes(X.nbytes))

    result = (X + X.T).sum() #(x + x.T).sum().compute()
    start = clock()
    result.compute()
    #with get_task_stream() as ts:
    #    result.compute()
    stop = clock()
    #print(ts.data)
    print(result)
    print(format_bytes(X.nbytes))
    print(f"\tTook {stop - start:0.2f}s")
    time.sleep(1)
コード例 #18
0
def spawn_cluster_and_client(
        address: Optional[str] = None,
        **kwargs) -> Tuple[Optional[LocalCluster], Optional[Client]]:
    """
    If provided an address, create a Dask Client connection.
    If not provided an address, create a LocalCluster and Client connection.
    If not provided an address, other Dask kwargs are accepted and passed down to the
    LocalCluster object.

    Notes
    -----
    When using this function, the processing machine or container must have networking
    capabilities enabled to function properly.
    """
    cluster = None
    if address is not None:
        client = Client(address)
        log.info(f"Connected to Remote Dask Cluster: {client}")
    else:
        cluster = LocalCluster(**kwargs)
        client = Client(cluster)
        log.info(f"Connected to Local Dask Cluster: {client}")

    return cluster, client
コード例 #19
0
def test_adaptive_local_cluster(loop):
    with LocalCluster(0,
                      scheduler_port=0,
                      silence_logs=False,
                      dashboard_address=None,
                      loop=loop) as cluster:
        alc = cluster.adapt(interval="100 ms")
        with Client(cluster, loop=loop) as c:
            assert not c.nthreads()
            future = c.submit(lambda x: x + 1, 1)
            assert future.result() == 2
            assert c.nthreads()

            sleep(0.1)
            assert c.nthreads()  # still there after some time

            del future

            start = time()
            while cluster.scheduler.nthreads:
                sleep(0.01)
                assert time() < start + 5

            assert not c.nthreads()
コード例 #20
0
async def test_adaptive_local_cluster_multi_workers(cleanup):
    async with LocalCluster(
        0,
        scheduler_port=0,
        silence_logs=False,
        processes=False,
        dashboard_address=None,
        asynchronous=True,
    ) as cluster:

        cluster.scheduler.allowed_failures = 1000
        adapt = cluster.adapt(interval="100 ms")
        async with Client(cluster, asynchronous=True) as c:
            futures = c.map(slowinc, range(100), delay=0.01)

            start = time()
            while not cluster.scheduler.workers:
                await asyncio.sleep(0.01)
                assert time() < start + 15, adapt.log

            await c.gather(futures)
            del futures

            start = time()
            # while cluster.workers:
            while cluster.scheduler.workers:
                await asyncio.sleep(0.01)
                assert time() < start + 15, adapt.log

            # no workers for a while
            for i in range(10):
                assert not cluster.scheduler.workers
                await asyncio.sleep(0.05)

            futures = c.map(slowinc, range(100), delay=0.01)
            await c.gather(futures)
コード例 #21
0
ファイル: core.py プロジェクト: lebedov/dask-drmaa
    def __init__(self, template=None, cleanup_interval=1000, hostname=None,
                 script=None, preexec_commands=(), copy_script=True,
                 ip='',
                 **kwargs):
        """
        Dask workers launched by a DRMAA-compatible cluster

        Parameters
        ----------
        template: dict
            Dictionary specifying options to pass to the DRMAA cluster
            and the worker. Relevant items are:

            jobName: string
                Name of the job as known by the DRMAA cluster.
            args: list
                Extra string arguments to pass to dask-worker
            outputPath: string
                Path to the dask-worker stdout. Must start with ':'.
                Defaults to worker.JOBID.TASKID.out in current directory.
            errorPath: string
                Path to the dask-worker stderr. Must start with ':'
                Defaults to worker.JOBID.TASKID.err in current directory.
            workingDirectory: string
                Where dask-worker runs, defaults to current directory
            nativeSpecification: string
                Options native to the job scheduler

        cleanup_interval: int
            Time interval in seconds at which closed workers are cleaned.
            Defaults to 1000
        hostname: string
            Host on which to start the local scheduler, defaults to localhost
        script: string (optional)
            Path to the dask-worker executable script.
            A temporary file will be made if none is provided (recommended)
        preexec_commands: tuple (optional)
            Commands to be executed first by temporary script. Cannot be
            specified at the same time as script.
        copy_script: bool
            Whether should copy the passed script to the current working
            directory. This is primarily to work around an issue with SGE.
        ip: string
            IP of the scheduler, default is the empty string
            which will listen on the primary ip address of the host
        **kwargs:
            Additional keyword arguments to be passed to the local scheduler

        Examples
        --------
        >>> from dask_drmaa import DRMAACluster          # doctest: +SKIP
        >>> cluster = DRMAACluster()                     # doctest: +SKIP
        >>> cluster.start_workers(10)                    # doctest: +SKIP

        >>> from distributed import Client               # doctest: +SKIP
        >>> client = Client(cluster)                     # doctest: +SKIP

        >>> future = client.submit(lambda x: x + 1, 10)  # doctest: +SKIP
        >>> future.result()                              # doctest: +SKIP
        11
        """
        self.hostname = hostname or socket.gethostname()
        logger.info("Start local scheduler at %s", self.hostname)
        self.local_cluster = LocalCluster(n_workers=0, ip=ip, **kwargs)

        if script is None:
            fn = os.path.abspath(tempfile.mktemp(
                suffix='.sh',
                prefix='dask-worker-script-',
                dir=os.path.curdir,
            ))
            self.script = fn
            self._should_cleanup_script = True

            script_contents = make_job_script(executable=worker_bin_path,
                                              name='%s.%s' % (JOB_ID, TASK_ID),
                                              preexec=preexec_commands)
            with open(fn, 'wt') as f:
                f.write(script_contents)

            @atexit.register
            def remove_script():
                if os.path.exists(fn):
                    os.remove(fn)

            os.chmod(self.script, 0o777)

        else:
            self._should_cleanup_script = False
            if copy_script:
                with ignoring(EnvironmentError):  # may be in the same path
                    shutil.copy(script, os.path.curdir)  # python 2.x returns None
                    script = os.path.join(os.path.curdir, os.path.basename(script))
                    self._should_cleanup_script = True
            self.script = os.path.abspath(script)
            assert not preexec_commands, "Cannot specify both script and preexec_commands"

        # TODO: check that user-provided script is executable

        self.template = merge(default_template,
                              {'remoteCommand': self.script},
                              template or {})

        self._cleanup_callback = PeriodicCallback(callback=self.cleanup_closed_workers,
                                                  callback_time=cleanup_interval,
                                                  io_loop=self.scheduler.loop)
        self._cleanup_callback.start()

        self.workers = {}  # {job-id: WorkerSpec}
コード例 #22
0
ファイル: core.py プロジェクト: lebedov/dask-drmaa
class DRMAACluster(Cluster):
    def __init__(self, template=None, cleanup_interval=1000, hostname=None,
                 script=None, preexec_commands=(), copy_script=True,
                 ip='',
                 **kwargs):
        """
        Dask workers launched by a DRMAA-compatible cluster

        Parameters
        ----------
        template: dict
            Dictionary specifying options to pass to the DRMAA cluster
            and the worker. Relevant items are:

            jobName: string
                Name of the job as known by the DRMAA cluster.
            args: list
                Extra string arguments to pass to dask-worker
            outputPath: string
                Path to the dask-worker stdout. Must start with ':'.
                Defaults to worker.JOBID.TASKID.out in current directory.
            errorPath: string
                Path to the dask-worker stderr. Must start with ':'
                Defaults to worker.JOBID.TASKID.err in current directory.
            workingDirectory: string
                Where dask-worker runs, defaults to current directory
            nativeSpecification: string
                Options native to the job scheduler

        cleanup_interval: int
            Time interval in seconds at which closed workers are cleaned.
            Defaults to 1000
        hostname: string
            Host on which to start the local scheduler, defaults to localhost
        script: string (optional)
            Path to the dask-worker executable script.
            A temporary file will be made if none is provided (recommended)
        preexec_commands: tuple (optional)
            Commands to be executed first by temporary script. Cannot be
            specified at the same time as script.
        copy_script: bool
            Whether should copy the passed script to the current working
            directory. This is primarily to work around an issue with SGE.
        ip: string
            IP of the scheduler, default is the empty string
            which will listen on the primary ip address of the host
        **kwargs:
            Additional keyword arguments to be passed to the local scheduler

        Examples
        --------
        >>> from dask_drmaa import DRMAACluster          # doctest: +SKIP
        >>> cluster = DRMAACluster()                     # doctest: +SKIP
        >>> cluster.start_workers(10)                    # doctest: +SKIP

        >>> from distributed import Client               # doctest: +SKIP
        >>> client = Client(cluster)                     # doctest: +SKIP

        >>> future = client.submit(lambda x: x + 1, 10)  # doctest: +SKIP
        >>> future.result()                              # doctest: +SKIP
        11
        """
        self.hostname = hostname or socket.gethostname()
        logger.info("Start local scheduler at %s", self.hostname)
        self.local_cluster = LocalCluster(n_workers=0, ip=ip, **kwargs)

        if script is None:
            fn = os.path.abspath(tempfile.mktemp(
                suffix='.sh',
                prefix='dask-worker-script-',
                dir=os.path.curdir,
            ))
            self.script = fn
            self._should_cleanup_script = True

            script_contents = make_job_script(executable=worker_bin_path,
                                              name='%s.%s' % (JOB_ID, TASK_ID),
                                              preexec=preexec_commands)
            with open(fn, 'wt') as f:
                f.write(script_contents)

            @atexit.register
            def remove_script():
                if os.path.exists(fn):
                    os.remove(fn)

            os.chmod(self.script, 0o777)

        else:
            self._should_cleanup_script = False
            if copy_script:
                with ignoring(EnvironmentError):  # may be in the same path
                    shutil.copy(script, os.path.curdir)  # python 2.x returns None
                    script = os.path.join(os.path.curdir, os.path.basename(script))
                    self._should_cleanup_script = True
            self.script = os.path.abspath(script)
            assert not preexec_commands, "Cannot specify both script and preexec_commands"

        # TODO: check that user-provided script is executable

        self.template = merge(default_template,
                              {'remoteCommand': self.script},
                              template or {})

        self._cleanup_callback = PeriodicCallback(callback=self.cleanup_closed_workers,
                                                  callback_time=cleanup_interval,
                                                  io_loop=self.scheduler.loop)
        self._cleanup_callback.start()

        self.workers = {}  # {job-id: WorkerSpec}

    def adapt(self, **kwargs):
        """ Turn on adaptivity

        For keyword arguments see dask_drmaa.adaptive.Adaptive

        Examples
        --------
        >>> cluster.adapt(minimum=0, maximum=10, interval='500ms')

        See Also
        --------
        Cluster: an interface for other clusters to inherit from
        """
        from .adaptive import Adaptive

        with ignoring(AttributeError):
            self._adaptive.stop()
        if not hasattr(self, '_adaptive_options'):
            self._adaptive_options = {}

        self._adaptive_options.update(kwargs)
        self._adaptive = Adaptive(
            self, self.scheduler, **self._adaptive_options
        )

        return self._adaptive

    @gen.coroutine
    def _start(self):
        pass

    @property
    def scheduler(self):
        return self.local_cluster.scheduler

    def create_job_template(self, **kwargs):
        template = self.template.copy()
        if kwargs:
            template.update(kwargs)
        template['args'] = [self.scheduler_address] + template['args']

        jt = get_session().createJobTemplate()
        valid_attributes = dir(jt)

        for key, value in template.items():
            if key not in valid_attributes:
                raise ValueError("Invalid job template attribute %s" % key)
            setattr(jt, key, value)

        return jt

    def start_workers(self, n=1, **kwargs):
        if n == 0:
            return

        with log_errors():
            with self.create_job_template(**kwargs) as jt:
                ids = get_session().runBulkJobs(jt, 1, n, 1)
                logger.info("Start %d workers. Job ID: %s", len(ids), ids[0].split('.')[0])
                self.workers.update(
                    {jid: WorkerSpec(job_id=jid, kwargs=kwargs,
                                     stdout=worker_out_path_template % dict(jid=jid, ext='out'),
                                     stderr=worker_out_path_template % dict(jid=jid, ext='err'),
                                     )
                     for jid in ids})

    @gen.coroutine
    def stop_workers(self, worker_ids, sync=False):
        if isinstance(worker_ids, str):
            worker_ids = [worker_ids]
        elif worker_ids:
            worker_ids = list(worker_ids)
        else:
            return

        # Let the scheduler gracefully retire workers first
        ids_to_ips = {
            v['name']: k for k, v in self.scheduler.worker_info.items()
        }
        worker_ips = [ids_to_ips[wid]
                      for wid in worker_ids
                      if wid in ids_to_ips]
        retired = yield self.scheduler.retire_workers(workers=worker_ips,
                                                      close_workers=True)
        logger.info("Retired workers %s", retired)
        for wid in list(worker_ids):
            try:
                get_session().control(wid, drmaa.JobControlAction.TERMINATE)
            except drmaa.errors.InvalidJobException:
                pass
            try:
                self.workers.pop(wid)
            except KeyError:
                # If we have multiple callers at once, it may have already
                # been popped off
                pass

        logger.info("Stop workers %s", worker_ids)
        if sync:
            get_session().synchronize(worker_ids, dispose=True)

    @gen.coroutine
    def scale_up(self, n, **kwargs):
        yield [self.start_workers(**kwargs)
               for _ in range(n - len(self.workers))]

    @gen.coroutine
    def scale_down(self, workers):
        workers = set(workers)
        yield self.scheduler.retire_workers(workers=workers)

    def close(self):
        logger.info("Closing DRMAA cluster")
        self.stop_workers(self.workers, sync=True)

        self.local_cluster.close()
        if self._should_cleanup_script and os.path.exists(self.script):
            os.remove(self.script)

    def __enter__(self):
        return self

    def __exit__(self, *args):
        self.close()

    def cleanup_closed_workers(self):
        for jid in list(self.workers):
            if get_session().jobStatus(jid) in ('closed', 'done'):
                logger.info("Removing closed worker %s", jid)
                del self.workers[jid]

    def __del__(self):
        try:
            self.close()
        except:
            pass

    def __str__(self):
        return "<%s: %d workers>" % (self.__class__.__name__, len(self.workers))

    __repr__ = __str__
コード例 #23
0
ファイル: test_with_dask.py プロジェクト: Firamis/xgboost-1
def test_aft_survival():
    with LocalCluster(n_workers=1) as cluster:
        with Client(cluster) as client:
            run_aft_survival(client, DaskDMatrix)
コード例 #24
0
ファイル: test_with_dask.py プロジェクト: Firamis/xgboost-1
def test_empty_dmatrix_approx():
    with LocalCluster(n_workers=kWorkers) as cluster:
        with Client(cluster) as client:
            parameters = {'tree_method': 'approx'}
            run_empty_dmatrix_reg(client, parameters)
            run_empty_dmatrix_cls(client, parameters)
コード例 #25
0
ファイル: test_with_dask.py プロジェクト: Firamis/xgboost-1
def test_boost_from_prediction(tree_method):
    if tree_method == 'approx':
        pytest.xfail(reason='test_boost_from_prediction[approx] is flaky')

    from sklearn.datasets import load_breast_cancer
    X, y = load_breast_cancer(return_X_y=True)

    X_ = dd.from_array(X, chunksize=100)
    y_ = dd.from_array(y, chunksize=100)

    with LocalCluster(n_workers=4) as cluster:
        with Client(cluster) as _:
            model_0 = xgb.dask.DaskXGBClassifier(
                learning_rate=0.3,
                random_state=123,
                n_estimators=4,
                tree_method=tree_method,
            )
            model_0.fit(X=X_, y=y_)
            margin = model_0.predict_proba(X_, output_margin=True)

            model_1 = xgb.dask.DaskXGBClassifier(
                learning_rate=0.3,
                random_state=123,
                n_estimators=4,
                tree_method=tree_method,
            )
            model_1.fit(X=X_, y=y_, base_margin=margin)
            predictions_1 = model_1.predict(X_, base_margin=margin)
            proba_1 = model_1.predict_proba(X_, base_margin=margin)

            cls_2 = xgb.dask.DaskXGBClassifier(
                learning_rate=0.3,
                random_state=123,
                n_estimators=8,
                tree_method=tree_method,
            )
            cls_2.fit(X=X_, y=y_)
            predictions_2 = cls_2.predict(X_)
            proba_2 = cls_2.predict_proba(X_)

            cls_3 = xgb.dask.DaskXGBClassifier(
                learning_rate=0.3,
                random_state=123,
                n_estimators=8,
                tree_method=tree_method,
            )
            cls_3.fit(X=X_, y=y_)
            proba_3 = cls_3.predict_proba(X_)

            # compute variance of probability percentages between two of the
            # same model, use this to check to make sure approx is functioning
            # within normal parameters
            expected_variance = np.max(np.abs(proba_3 - proba_2)).compute()

            if expected_variance > 0:
                margin_variance = np.max(np.abs(proba_1 - proba_2)).compute()
                # Ensure the margin variance is less than the expected variance + 10%
                assert np.all(margin_variance <= expected_variance + .1)
            else:
                np.testing.assert_equal(predictions_1.compute(),
                                        predictions_2.compute())
                np.testing.assert_almost_equal(proba_1.compute(),
                                               proba_2.compute())
コード例 #26
0
ファイル: prediction.py プロジェクト: youkawa/LightGBM
import dask.array as da
from distributed import Client, LocalCluster
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error

import lightgbm as lgb

if __name__ == "__main__":
    print("loading data")

    X, y = make_regression(n_samples=1000, n_features=50)

    print("initializing a Dask cluster")

    cluster = LocalCluster(n_workers=2)
    client = Client(cluster)

    print("created a Dask LocalCluster")

    print("distributing training data on the Dask cluster")

    dX = da.from_array(X, chunks=(100, 50))
    dy = da.from_array(y, chunks=(100, ))

    print("beginning training")

    dask_model = lgb.DaskLGBMRegressor(n_estimators=10)
    dask_model.fit(dX, dy)
    assert dask_model.fitted_

    print("done training")
コード例 #27
0
ファイル: dask_executor.py プロジェクト: caseybrown89/airflow
class DaskExecutorTest(unittest.TestCase):

    def setUp(self):
        self.dagbag = DagBag(include_examples=True)
        self.cluster = LocalCluster()

    @unittest.skipIf(SKIP_DASK, 'Dask unsupported by this configuration')
    def test_dask_executor_functions(self):
        executor = DaskExecutor(cluster_address=self.cluster.scheduler_address)

        # start the executor
        executor.start()

        success_command = 'echo 1'
        fail_command = 'exit 1'

        executor.execute_async(key='success', command=success_command)
        executor.execute_async(key='fail', command=fail_command)

        success_future = next(
            k for k, v in executor.futures.items() if v == 'success')
        fail_future = next(
            k for k, v in executor.futures.items() if v == 'fail')

        # wait for the futures to execute, with a timeout
        timeout = timezone.utcnow() + timedelta(seconds=30)
        while not (success_future.done() and fail_future.done()):
            if timezone.utcnow() > timeout:
                raise ValueError(
                    'The futures should have finished; there is probably '
                    'an error communciating with the Dask cluster.')

        # both tasks should have finished
        self.assertTrue(success_future.done())
        self.assertTrue(fail_future.done())

        # check task exceptions
        self.assertTrue(success_future.exception() is None)
        self.assertTrue(fail_future.exception() is not None)

    @unittest.skipIf(SKIP_DASK, 'Dask unsupported by this configuration')
    def test_backfill_integration(self):
        """
        Test that DaskExecutor can be used to backfill example dags
        """
        dags = [
            dag for dag in self.dagbag.dags.values()
            if dag.dag_id in [
                'example_bash_operator',
                # 'example_python_operator',
            ]
        ]

        for dag in dags:
            dag.clear(
                start_date=DEFAULT_DATE,
                end_date=DEFAULT_DATE)

        for i, dag in enumerate(sorted(dags, key=lambda d: d.dag_id)):
            job = BackfillJob(
                dag=dag,
                start_date=DEFAULT_DATE,
                end_date=DEFAULT_DATE,
                ignore_first_depends_on_past=True,
                executor=DaskExecutor(
                    cluster_address=self.cluster.scheduler_address))
            job.run()

    def tearDown(self):
        self.cluster.close(timeout=5)
if __name__ == '__main__':

	if sum(metadata.columns == grouping_variable) < 1:
		exit('Grouping variable not found in metadata.')

	## Load randing databases
	db_fnames = glob.glob(db_folder)
	def name(fname):
		return os.path.basename(fname).split(".")[0]
	dbs = [RankingDatabase(fname=fname, name=name(fname)) for fname in db_fnames]
	dbs


	## Initialize cluster
	local_cluster = LocalCluster(n_workers=n_cores, threads_per_worker=1, processes=False, memory_limit=memory_limit)
	custom_client = Client(local_cluster)


	## Load TFs
	tf_names = load_tf_names(TFs_file)


	## Collect here regulons passing correlation filter
	cortest_passed_regulons = []


	for i in range(0, iterations):

		## Split to train and test
		data[grouping_variable] = metadata[grouping_variable]
コード例 #29
0
def run_espei(run_settings):
    """Wrapper around the ESPEI fitting procedure, taking only a settings dictionary.

    Parameters
    ----------
    run_settings : dict
        Dictionary of input settings

    Returns
    -------
    Either a Database (for generate parameters only) or a tuple of (Database, sampler)
    """
    run_settings = get_run_settings(run_settings)
    system_settings = run_settings['system']
    output_settings = run_settings['output']
    generate_parameters_settings = run_settings.get('generate_parameters')
    mcmc_settings = run_settings.get('mcmc')

    # handle verbosity
    verbosity = {0: logging.WARNING, 1: logging.INFO, 2: logging.DEBUG}
    logging.basicConfig(level=verbosity[output_settings['verbosity']])

    # load datasets and handle i/o
    logging.debug('Loading and checking datasets.')
    dataset_path = system_settings['datasets']
    datasets = load_datasets(sorted(recursive_glob(dataset_path, '*.json')))
    if len(datasets.all()) == 0:
        logging.warning(
            'No datasets were found in the path {}. This should be a directory containing dataset files ending in `.json`.'
            .format(dataset_path))
    logging.debug('Finished checking datasets')

    with open(system_settings['phase_models']) as fp:
        phase_models = json.load(fp)

    if generate_parameters_settings is not None:
        refdata = generate_parameters_settings['ref_state']
        excess_model = generate_parameters_settings['excess_model']
        dbf = generate_parameters(phase_models, datasets, refdata,
                                  excess_model)
        dbf.to_file(output_settings['output_db'], if_exists='overwrite')

    if mcmc_settings is not None:
        tracefile = output_settings['tracefile']
        probfile = output_settings['probfile']
        # check that the MCMC output files do not already exist
        # only matters if we are actually running MCMC
        if os.path.exists(tracefile):
            raise OSError(
                'Tracefile "{}" exists and would be overwritten by a new run. Use the ``output.tracefile`` setting to set a different name.'
                .format(tracefile))
        if os.path.exists(probfile):
            raise OSError(
                'Probfile "{}" exists and would be overwritten by a new run. Use the ``output.probfile`` setting to set a different name.'
                .format(probfile))

        # scheduler setup
        if mcmc_settings['scheduler'] == 'MPIPool':
            # check that cores is not an input setting
            if mcmc_settings.get('cores') != None:
                logging.warning("MPI does not take the cores input setting.")
            from emcee.utils import MPIPool
            # code recommended by emcee: if not master, wait for instructions then exit
            client = MPIPool()
            if not client.is_master():
                logging.debug(
                    'MPIPool is not master. Waiting for instructions...')
                client.wait()
                sys.exit(0)
            logging.info("Using MPIPool on {} MPI ranks".format(client.size))
        elif mcmc_settings['scheduler'] == 'dask':
            from distributed import LocalCluster
            cores = mcmc_settings.get('cores', multiprocessing.cpu_count())
            if (cores > multiprocessing.cpu_count()):
                cores = multiprocessing.cpu_count()
                logging.warning(
                    "The number of cores chosen is larger than available. "
                    "Defaulting to run on the {} available cores.".format(
                        cores))
            scheduler = LocalCluster(n_workers=cores,
                                     threads_per_worker=1,
                                     processes=True)
            client = ImmediateClient(scheduler)
            client.run(logging.basicConfig,
                       level=verbosity[output_settings['verbosity']])
            logging.info("Running with dask scheduler: %s [%s cores]" %
                         (scheduler, sum(client.ncores().values())))
            try:
                logging.info(
                    "bokeh server for dask scheduler at localhost:{}".format(
                        client.scheduler_info()['services']['bokeh']))
            except KeyError:
                logging.info("Install bokeh to use the dask bokeh server.")
        elif mcmc_settings['scheduler'] == 'emcee':
            from emcee.interruptible_pool import InterruptiblePool
            cores = mcmc_settings.get('cores', multiprocessing.cpu_count())
            if (cores > multiprocessing.cpu_count()):
                cores = multiprocessing.cpu_count()
                logging.warning(
                    "The number of cores chosen is larger than available. "
                    "Defaulting to run on the {} available cores.".format(
                        cores))
            client = InterruptiblePool(processes=cores)
            logging.info("Using multiprocessing on {} cores".format(cores))
        elif mcmc_settings['scheduler'] == 'None':
            client = None
            logging.info(
                "Not using a parallel scheduler. ESPEI is running MCMC on a single core."
            )

        # get a Database
        if mcmc_settings.get('input_db'):
            dbf = Database(mcmc_settings.get('input_db'))

        # load the restart chain if needed
        if mcmc_settings.get('restart_chain'):
            restart_chain = np.load(mcmc_settings.get('restart_chain'))
        else:
            restart_chain = None

        # load the remaning mcmc fitting parameters
        mcmc_steps = mcmc_settings.get('mcmc_steps')
        save_interval = mcmc_settings.get('mcmc_save_interval')
        chains_per_parameter = mcmc_settings.get('chains_per_parameter')
        chain_std_deviation = mcmc_settings.get('chain_std_deviation')
        deterministic = mcmc_settings.get('deterministic')

        dbf, sampler = mcmc_fit(
            dbf,
            datasets,
            scheduler=client,
            mcmc_steps=mcmc_steps,
            chains_per_parameter=chains_per_parameter,
            chain_std_deviation=chain_std_deviation,
            save_interval=save_interval,
            tracefile=tracefile,
            probfile=probfile,
            restart_chain=restart_chain,
            deterministic=deterministic,
        )

        dbf.to_file(output_settings['output_db'], if_exists='overwrite')
        # close the scheduler, if possible
        if hasattr(client, 'close'):
            client.close()
        return dbf, sampler
    return dbf
コード例 #30
0
    def run(
        self,
        clean: bool = False,
        debug: bool = False,
        **kwargs,
    ):
        """
        Run a flow with your steps.
        Parameters
        ----------
        clean: bool
            Should the local staging directory be cleaned prior to this run.
            Default: False (Do not clean)
        debug: bool
            A debug flag for the developer to use to manipulate how much data runs,
            how it is processed, etc.
            Default: False (Do not debug)
        Notes
        -----
        Documentation on prefect:
        https://docs.prefect.io/core/
        Basic prefect example:
        https://docs.prefect.io/core/
        """
        # Initalize steps
        select_data = steps.SelectData()
        compute_cell_metrics = steps.ComputeCellMetrics()
        gather_test_visualize = steps.GatherTestVisualize()

        # Choose executor
        if debug:
            exe = LocalExecutor()
        else:

            # Create local cluster
            log.info("Creating LocalCluster")
            current_mem_gb = psutil.virtual_memory().available / 2**30
            n_workers = int(current_mem_gb // 4)
            cluster = LocalCluster(n_workers=n_workers)
            log.info("Created LocalCluster")

            # Set distributed_executor_address
            distributed_executor_address = cluster.scheduler_address

            # Batch size on local cluster
            batch_size = int(psutil.cpu_count() // n_workers)

            # Log dashboard URI
            log.info(f"Dask dashboard available at: {cluster.dashboard_link}")

            # Use dask cluster
            exe = DaskExecutor(distributed_executor_address)

        # Configure your flow
        with Flow("polar_express") as flow:
            # If you want to clean the local staging directories pass clean
            # If you want to utilize some debugging functionality pass debug
            # If you don't utilize any of these, just pass the parameters you need.

            # step 1: select cells and store in annotation file
            selected_cells_manifest = select_data(
                clean=clean,
                debug=debug,
                distributed_executor_address=distributed_executor_address,
                batch_size=batch_size,
                **
                kwargs,  # Allows us to pass `--n {some integer}` or other params
            )

            # step 2: compute metrics for each of the cells
            cell_metrics_manifest = compute_cell_metrics(
                selected_cells_manifest,
                clean=clean,
                debug=debug,
                distributed_executor_address=distributed_executor_address,
                batch_size=batch_size,
                **
                kwargs,  # Allows us to pass `--n {some integer}` or other params
            )

            # step 3: gather the computed metrics and create visualizations
            gather_test_visualize(
                cell_metrics_manifest,
                clean=clean,
                debug=debug,
                **
                kwargs,  # Allows us to pass `--n {some integer}` or other params
            )

        # Run flow and get ending state
        state = flow.run(executor=exe)

        # Get and display any outputs you want to see on your local terminal
        log.info(select_data.get_result(state, flow))
        log.info(compute_cell_metrics.get_result(state, flow))
        log.info(gather_test_visualize.get_result(state, flow))
コード例 #31
0
from arboreto.algo import grnboost2
from itertools import izip_longest
import sys


def grouper(iterable, n, fillvalue=None):
    args = [iter(iterable)] * n
    return izip_longest(*args, fillvalue=fillvalue)


in_file = '../../PlanExp/rawdata/GSE111764.counts.noheader'
tf_file = '../../PlanExp/rawdata/go-regulators-reddien.txt'

sys.stderr.write("\nStarting Dusk cluster...")
local_cluster = LocalCluster(n_workers=32,
                             threads_per_worker=1,
                             memory_limit=8e10)
custom_client = Client(local_cluster)
sys.stderr.write("done.\n")

# ex_matrix is a DataFrame with gene names as column names
sys.stderr.write("\nReading count matrix...")
ex_matrix = pd.read_csv(in_file, sep='\t', index_col=0, header=None).T
sys.stderr.write("done.\n")

# tf_names is read using a utility function included in Arboreto
sys.stderr.write("\nLoading putative transcription factors...")
tf_names = load_tf_names(tf_file)
sys.stderr.write("done.\n")
sys.stderr.write("\nPredicting co-expression network in chunks...\n")
i = 0
コード例 #32
0
from distributed import LocalCluster

c = LocalCluster(processes=False)

c.scheduler(

from dask_jobqueue import SLURMCluster
from dask.distributed import Client
from distributed import Scheduler
from tornado.ioloop import IOLoop
from threading import Thread


cluster = SLURMCluster(processes=12, queue="DGE", project="davek", memory="36GB")

cluster.start_workers(12)

client = Client(cluster)

def inc(x):
  return x + 1


x = client.submit(inc,10)

L = client.map(inc, range(1000))

コード例 #33
0
ファイル: core.py プロジェクト: h0bot/dask-drmaa
class DRMAACluster(object):
    def __init__(self,
                 template=None,
                 cleanup_interval=1000,
                 hostname=None,
                 script=None,
                 preexec_commands=(),
                 **kwargs):
        """
        Dask workers launched by a DRMAA-compatible cluster

        Parameters
        ----------
        jobName: string
            Name of the job as known by the DRMAA cluster.
        script: string (optional)
            Path to the dask-worker executable script.
            A temporary file will be made if none is provided (recommended)
        args: list
            Extra string arguments to pass to dask-worker
        outputPath: string
        errorPath: string
        workingDirectory: string
            Where dask-worker runs, defaults to current directory
        nativeSpecification: string
            Options native to the job scheduler

        Examples
        --------
        >>> from dask_drmaa import DRMAACluster          # doctest: +SKIP
        >>> cluster = DRMAACluster()                     # doctest: +SKIP
        >>> cluster.start_workers(10)                    # doctest: +SKIP

        >>> from distributed import Client               # doctest: +SKIP
        >>> client = Client(cluster)                     # doctest: +SKIP

        >>> future = client.submit(lambda x: x + 1, 10)  # doctest: +SKIP
        >>> future.result()                              # doctest: +SKIP
        11
        """
        self.hostname = hostname or socket.gethostname()
        logger.info("Start local scheduler at %s", self.hostname)
        self.local_cluster = LocalCluster(n_workers=0, ip='', **kwargs)

        if script is None:
            fn = tempfile.mktemp(suffix='sh',
                                 prefix='dask-worker-script',
                                 dir=os.path.curdir)
            self.script = fn

            script_contents = make_job_script(executable=worker_bin_path,
                                              name='%s.%s' % (JOB_ID, TASK_ID),
                                              preexec=preexec_commands)
            with open(fn, 'wt') as f:
                f.write(script_contents)

            @atexit.register
            def remove_script():
                if os.path.exists(fn):
                    os.remove(fn)

            os.chmod(self.script, 0o777)

        else:
            assert not preexec_commands, "Cannot specify both script and preexec_commands"

        # TODO: check that user-provided script is executable

        self.template = merge(default_template, {'remoteCommand': self.script},
                              template or {})

        self._cleanup_callback = PeriodicCallback(
            callback=self.cleanup_closed_workers,
            callback_time=cleanup_interval,
            io_loop=self.scheduler.loop)
        self._cleanup_callback.start()

        self.workers = {}  # {job-id: WorkerSpec}

    @gen.coroutine
    def _start(self):
        pass

    @property
    def scheduler(self):
        return self.local_cluster.scheduler

    @property
    def scheduler_address(self):
        return self.scheduler.address

    def create_job_template(self, **kwargs):
        template = self.template.copy()
        if kwargs:
            template.update(kwargs)
        template['args'] = [self.scheduler_address] + template['args']

        jt = get_session().createJobTemplate()
        valid_attributes = dir(jt)

        for key, value in template.items():
            if key not in valid_attributes:
                raise ValueError("Invalid job template attribute %s" % key)
            setattr(jt, key, value)

        return jt

    def start_workers(self, n=1, **kwargs):
        with log_errors():
            with self.create_job_template(**kwargs) as jt:
                ids = get_session().runBulkJobs(jt, 1, n, 1)
                logger.info("Start %d workers. Job ID: %s", len(ids),
                            ids[0].split('.')[0])
                self.workers.update({
                    jid: WorkerSpec(
                        job_id=jid,
                        kwargs=kwargs,
                        stdout=worker_out_path_template %
                        dict(jid=jid, kind='out'),
                        stderr=worker_out_path_template %
                        dict(jid=jid, kind='err'),
                    )
                    for jid in ids
                })

    def stop_workers(self, worker_ids, sync=False):
        if isinstance(worker_ids, str):
            worker_ids = [worker_ids]

        for wid in list(worker_ids):
            try:
                get_session().control(wid, drmaa.JobControlAction.TERMINATE)
            except drmaa.errors.InvalidJobException:
                pass
            self.workers.pop(wid)

        logger.info("Stop workers %s", worker_ids)
        if sync:
            get_session().synchronize(worker_ids, dispose=True)

    def close(self):
        logger.info("Closing DRMAA cluster")
        self.local_cluster.close()
        if self.workers:
            self.stop_workers(self.workers, sync=True)

        if os.path.exists(self.script):
            os.remove(self.script)

    def __enter__(self):
        return self

    def __exit__(self, *args):
        self.close()

    def cleanup_closed_workers(self):
        for jid in list(self.workers):
            if get_session().jobStatus(jid) in ('closed', 'done'):
                logger.info("Removing closed worker %s", jid)
                del self.workers[jid]

    def __del__(self):
        try:
            self.close()
        except:
            pass

    def __str__(self):
        return "<%s: %d workers>" % (self.__class__.__name__, len(
            self.workers))

    __repr__ = __str__
コード例 #34
0
from dask.distributed import Client
from distributed import LocalCluster

from examples.multiobjective.parallel.zdt1_modified import ZDT1Modified
from jmetal.algorithm.multiobjective.nsgaii import DistributedNSGAII
from jmetal.operator import PolynomialMutation, SBXCrossover
from jmetal.util.termination_criterion import StoppingByEvaluations

if __name__ == '__main__':
    problem = ZDT1Modified()

    # setup Dask client
    client = Client(LocalCluster(n_workers=24))

    ncores = sum(client.ncores().values())
    print(f'{ncores} cores available')

    # creates the algorithm
    max_evaluations = 25000

    algorithm = DistributedNSGAII(
        problem=problem,
        population_size=100,
        mutation=PolynomialMutation(probability=1.0 / problem.number_of_variables, distribution_index=20),
        crossover=SBXCrossover(probability=1.0, distribution_index=20),
        termination_criterion=StoppingByEvaluations(max=max_evaluations),
        number_of_cores=ncores,
        client=client
    )

    algorithm.run()
コード例 #35
0
ファイル: core.py プロジェクト: h0bot/dask-drmaa
    def __init__(self,
                 template=None,
                 cleanup_interval=1000,
                 hostname=None,
                 script=None,
                 preexec_commands=(),
                 **kwargs):
        """
        Dask workers launched by a DRMAA-compatible cluster

        Parameters
        ----------
        jobName: string
            Name of the job as known by the DRMAA cluster.
        script: string (optional)
            Path to the dask-worker executable script.
            A temporary file will be made if none is provided (recommended)
        args: list
            Extra string arguments to pass to dask-worker
        outputPath: string
        errorPath: string
        workingDirectory: string
            Where dask-worker runs, defaults to current directory
        nativeSpecification: string
            Options native to the job scheduler

        Examples
        --------
        >>> from dask_drmaa import DRMAACluster          # doctest: +SKIP
        >>> cluster = DRMAACluster()                     # doctest: +SKIP
        >>> cluster.start_workers(10)                    # doctest: +SKIP

        >>> from distributed import Client               # doctest: +SKIP
        >>> client = Client(cluster)                     # doctest: +SKIP

        >>> future = client.submit(lambda x: x + 1, 10)  # doctest: +SKIP
        >>> future.result()                              # doctest: +SKIP
        11
        """
        self.hostname = hostname or socket.gethostname()
        logger.info("Start local scheduler at %s", self.hostname)
        self.local_cluster = LocalCluster(n_workers=0, ip='', **kwargs)

        if script is None:
            fn = tempfile.mktemp(suffix='sh',
                                 prefix='dask-worker-script',
                                 dir=os.path.curdir)
            self.script = fn

            script_contents = make_job_script(executable=worker_bin_path,
                                              name='%s.%s' % (JOB_ID, TASK_ID),
                                              preexec=preexec_commands)
            with open(fn, 'wt') as f:
                f.write(script_contents)

            @atexit.register
            def remove_script():
                if os.path.exists(fn):
                    os.remove(fn)

            os.chmod(self.script, 0o777)

        else:
            assert not preexec_commands, "Cannot specify both script and preexec_commands"

        # TODO: check that user-provided script is executable

        self.template = merge(default_template, {'remoteCommand': self.script},
                              template or {})

        self._cleanup_callback = PeriodicCallback(
            callback=self.cleanup_closed_workers,
            callback_time=cleanup_interval,
            io_loop=self.scheduler.loop)
        self._cleanup_callback.start()

        self.workers = {}  # {job-id: WorkerSpec}
コード例 #36
0
 def setUp(self):
     self.dagbag = DagBag(include_examples=True)
     self.cluster = LocalCluster()