Ejemplo n.º 1
0
def test_get_scale_up_kwargs(loop):
    with LocalCluster(0, scheduler_port=0, silence_logs=False,
                      diagnostics_port=None, loop=loop) as cluster:

        alc = Adaptive(cluster.scheduler, cluster, interval=100,
                       scale_factor=3)
        assert alc.get_scale_up_kwargs() == {'n': 1}

        with Client(cluster, loop=loop) as c:
            future = c.submit(lambda x: x + 1, 1)
            assert future.result() == 2
            assert c.ncores()
            assert alc.get_scale_up_kwargs() == {'n': 3}
Ejemplo n.º 2
0
def test_min_max():
    loop = IOLoop.current()
    cluster = yield LocalCluster(0, scheduler_port=0, silence_logs=False,
                                 processes=False, diagnostics_port=None,
                                 loop=loop, asynchronous=True)
    yield cluster._start()
    try:
        adapt = Adaptive(cluster.scheduler, cluster, minimum=1, maximum=2,
                         interval=20)
        c = yield Client(cluster, asynchronous=True, loop=loop)

        start = time()
        while not cluster.scheduler.workers:
            yield gen.sleep(0.01)
            assert time() < start + 1

        yield gen.sleep(0.2)
        assert len(cluster.scheduler.workers) == 1
        assert frequencies(pluck(1, adapt.log)) == {'up': 1}

        futures = c.map(slowinc, range(100), delay=0.1)

        start = time()
        while len(cluster.scheduler.workers) < 2:
            yield gen.sleep(0.01)
            assert time() < start + 1

        assert len(cluster.scheduler.workers) == 2
        yield gen.sleep(0.5)
        assert len(cluster.scheduler.workers) == 2
        assert len(cluster.workers) == 2
        assert frequencies(pluck(1, adapt.log)) == {'up': 2}

        del futures

        start = time()
        while len(cluster.scheduler.workers) != 1:
            yield gen.sleep(0.01)
            assert time() < start + 1
        assert frequencies(pluck(1, adapt.log)) == {'up': 2, 'down': 1}
    finally:
        yield c._close()
        yield cluster._close()
Ejemplo n.º 3
0
def test_adaptive_local_cluster_multi_workers():
    loop = IOLoop.current()
    cluster = LocalCluster(0,
                           scheduler_port=0,
                           silence_logs=False,
                           processes=False,
                           diagnostics_port=None,
                           loop=loop,
                           start=False)
    try:
        cluster.scheduler.allowed_failures = 1000
        alc = Adaptive(cluster.scheduler, cluster, interval=100)
        c = yield Client(cluster, asynchronous=True, loop=loop)

        futures = c.map(slowinc, range(100), delay=0.01)

        start = time()
        while not cluster.scheduler.worker_info:
            yield gen.sleep(0.01)
            assert time() < start + 15

        yield c._gather(futures)
        del futures

        start = time()
        while cluster.workers:
            yield gen.sleep(0.01)
            assert time() < start + 5

        assert not cluster.workers
        assert not cluster.scheduler.workers
        yield gen.sleep(0.2)
        assert not cluster.workers
        assert not cluster.scheduler.workers

        futures = c.map(slowinc, range(100), delay=0.01)
        yield c._gather(futures)

    finally:
        yield c._close()
        yield cluster._close()
Ejemplo n.º 4
0
def test_adaptive_local_cluster_multi_workers():
    loop = IOLoop.current()
    cluster = LocalCluster(0,
                           scheduler_port=0,
                           silence_logs=False,
                           nanny=False,
                           diagnostic_port=None,
                           loop=loop,
                           start=False)
    alc = Adaptive(cluster.scheduler, cluster, interval=100)
    c = Client(cluster, start=False, loop=loop)
    yield c._start()

    for i in range(20):
        futures = c.map(slowinc, range(100), delay=0.01)
        yield c._gather(futures)
        del futures
        yield gen.sleep(0.1)

    yield c._shutdown()
    yield cluster._close()
Ejemplo n.º 5
0
def test_adaptive_local_cluster(loop):
    with LocalCluster(0, scheduler_port=0, silence_logs=False,
                      diagnostics_port=None, loop=loop) as cluster:
        alc = Adaptive(cluster.scheduler, cluster, interval=100)
        with Client(cluster, loop=loop) as c:
            assert not c.ncores()
            future = c.submit(lambda x: x + 1, 1)
            assert future.result() == 2
            assert c.ncores()

            sleep(0.1)
            assert c.ncores()  # still there after some time

            del future

            start = time()
            while cluster.scheduler.ncores:
                sleep(0.01)
                assert time() < start + 5

            assert not c.ncores()
Ejemplo n.º 6
0
def test_avoid_churn():
    """ We want to avoid creating and deleting workers frequently

    Instead we want to wait a few beats before removing a worker in case the
    user is taking a brief pause between work
    """
    cluster = yield LocalCluster(0, asynchronous=True, processes=False,
                                 scheduler_port=0, silence_logs=False,
                                 diagnostics_port=None)
    client = yield Client(cluster, asynchronous=True)
    try:
        adapt = Adaptive(cluster.scheduler, cluster, interval=20, wait_count=5)

        for i in range(10):
            yield client.submit(slowinc, i, delay=0.040)
            yield gen.sleep(0.040)

        assert frequencies(pluck(1, adapt.log)) == {'up': 1}
    finally:
        yield client._close()
        yield cluster._close()
Ejemplo n.º 7
0
def test_adaptive_local_cluster_multi_workers():
    loop = IOLoop.current()
    cluster = LocalCluster(0,
                           scheduler_port=0,
                           silence_logs=False,
                           nanny=False,
                           diagnostics_port=None,
                           loop=loop,
                           start=False)
    cluster.scheduler.allowed_failures = 1000
    alc = Adaptive(cluster.scheduler, cluster, interval=100)
    c = Client(cluster, start=False, loop=loop)
    yield c._start()

    futures = c.map(slowinc, range(100), delay=0.01)

    start = time()
    while not cluster.workers:
        yield gen.sleep(0.01)
        assert time() < start + 5

    yield c._gather(futures)
    del futures

    start = time()
    while cluster.workers:
        yield gen.sleep(0.01)
        assert time() < start + 5

    assert not cluster.workers
    yield gen.sleep(0.2)
    assert not cluster.workers

    futures = c.map(slowinc, range(100), delay=0.01)
    yield c._gather(futures)

    yield c._shutdown()
    yield cluster._close()
Ejemplo n.º 8
0
    def __init__(
            self,
            pod_template=None,
            name=None,
            namespace=None,
            n_workers=None,
            host=None,
            port=None,
            env=None,
            **kwargs
    ):
        name = name or dask.config.get('kubernetes.name')
        namespace = namespace or dask.config.get('kubernetes.namespace')
        n_workers = n_workers if n_workers is not None else dask.config.get('kubernetes.count.start')
        host = host or dask.config.get('kubernetes.host')
        port = port if port is not None else dask.config.get('kubernetes.port')
        env = env if env is not None else dask.config.get('kubernetes.env')

        if not pod_template and dask.config.get('kubernetes.worker-template', None):
            d = dask.config.get('kubernetes.worker-template')
            pod_template = make_pod_from_dict(d)

        if not pod_template and dask.config.get('kubernetes.worker-template-path', None):
            import yaml
            fn = dask.config.get('kubernetes.worker-template-path')
            fn = fn.format(**os.environ)
            with open(fn) as f:
                d = yaml.safe_load(f)
            pod_template = make_pod_from_dict(d)

        if not pod_template:
            msg = ("Worker pod specification not provided. See KubeCluster "
                   "docstring for ways to specify workers")
            raise ValueError(msg)

        self.cluster = LocalCluster(ip=host or socket.gethostname(),
                                    scheduler_port=port,
                                    n_workers=0, **kwargs)
        try:
            kubernetes.config.load_incluster_config()
        except kubernetes.config.ConfigException:
            kubernetes.config.load_kube_config()

        self.core_api = kubernetes.client.CoreV1Api()

        if namespace is None:
            namespace = _namespace_default()
        
        name = name.format(user=getpass.getuser(),
                           uuid=str(uuid.uuid4())[:10],
                           **os.environ)
        name = escape(name)
        
        self.pod_template = clean_pod_template(pod_template)
        # Default labels that can't be overwritten
        self.pod_template.metadata.labels['dask.pydata.org/cluster-name'] = name
        self.pod_template.metadata.labels['user'] = escape(getpass.getuser())
        self.pod_template.metadata.labels['app'] = 'dask'
        self.pod_template.metadata.labels['component'] = 'dask-worker'
        self.pod_template.metadata.namespace = namespace

        self.pod_template.spec.containers[0].env.append(
            kubernetes.client.V1EnvVar(name='DASK_SCHEDULER_ADDRESS',
                                       value=self.scheduler_address)
        )
        if env:
            self.pod_template.spec.containers[0].env.extend([
                kubernetes.client.V1EnvVar(name=k, value=str(v))
                for k, v in env.items()
            ])
        self.pod_template.metadata.generate_name = name

        finalize(self, _cleanup_pods, self.namespace, self.pod_template.metadata.labels)

        if n_workers:
            self.scale(n_workers)
Ejemplo n.º 9
0
    def __init__(self,
                 pod_template=None,
                 name=None,
                 namespace=None,
                 n_workers=None,
                 host=None,
                 port=None,
                 env=None,
                 auth=ClusterAuth.DEFAULT,
                 **kwargs):
        name = name or dask.config.get("kubernetes.name")
        namespace = namespace or dask.config.get("kubernetes.namespace")
        n_workers = (n_workers if n_workers is not None else
                     dask.config.get("kubernetes.count.start"))
        host = host or dask.config.get("kubernetes.host")
        port = port if port is not None else dask.config.get("kubernetes.port")
        env = env if env is not None else dask.config.get("kubernetes.env")

        if not pod_template and dask.config.get("kubernetes.worker-template",
                                                None):
            d = dask.config.get("kubernetes.worker-template")
            d = dask.config.expand_environment_variables(d)
            pod_template = make_pod_from_dict(d)

        if not pod_template and dask.config.get(
                "kubernetes.worker-template-path", None):
            import yaml

            fn = dask.config.get("kubernetes.worker-template-path")
            fn = fn.format(**os.environ)
            with open(fn) as f:
                d = yaml.safe_load(f)
            d = dask.config.expand_environment_variables(d)
            pod_template = make_pod_from_dict(d)

        if not pod_template:
            msg = ("Worker pod specification not provided. See KubeCluster "
                   "docstring for ways to specify workers")
            raise ValueError(msg)

        pod_template = clean_pod_template(pod_template)
        ClusterAuth.load_first(auth)

        self.core_api = kubernetes.client.CoreV1Api()

        if namespace is None:
            namespace = _namespace_default()

        name = name.format(user=getpass.getuser(),
                           uuid=str(uuid.uuid4())[:10],
                           **os.environ)
        name = escape(name)
        self.pod_template = pod_template

        # Default labels that can't be overwritten
        self.pod_template.metadata.labels["dask.org/cluster-name"] = name
        self.pod_template.metadata.labels["user"] = escape(getpass.getuser())
        self.pod_template.metadata.labels["app"] = "dask"
        self.pod_template.metadata.labels["component"] = "dask-worker"
        self.pod_template.metadata.namespace = namespace

        self.cluster = LocalCluster(host=host or socket.gethostname(),
                                    scheduler_port=port,
                                    n_workers=0,
                                    **kwargs)

        # TODO: handle any exceptions here, ensure self.cluster is properly
        # cleaned up.
        self.pod_template.spec.containers[0].env.append(
            kubernetes.client.V1EnvVar(name="DASK_SCHEDULER_ADDRESS",
                                       value=self.scheduler_address))
        if env:
            self.pod_template.spec.containers[0].env.extend([
                kubernetes.client.V1EnvVar(name=k, value=str(v))
                for k, v in env.items()
            ])
        self.pod_template.metadata.generate_name = name

        finalize(self, _cleanup_pods, self.namespace,
                 self.pod_template.metadata.labels)

        if n_workers:
            try:
                self.scale(n_workers)
            except Exception:
                self.cluster.close()
                raise