def test_simple(c, s): with MarathonCluster(s, cpus=1, mem=256) as mc: ac = Adaptive(s, mc) ac.adapt() yield gen.sleep(0.1) assert not s.ncores futures = c.map(lambda x: x + 1, range(10)) start = time() while not s.ready: yield gen.sleep(0.01) assert time() < start + 5 ac.adapt() results = yield c._gather(futures) assert s.transition_log if s.worker_info: tasks = mc.client.list_tasks(app_id=mc.app.id) names = {d['name'] for d in s.worker_info.values()} assert names == {t.id for t in tasks} yield ac._retire_workers() start = time() while len(s.worker_info) > 1: yield gen.sleep(0.01) assert time() < start + 5 assert len(s.who_has) == len(futures)
def adapt(self): """ Start up an Adaptive deployment if not already started This makes the cluster request resources in accordance to current demand on the scheduler """ from distributed.deploy import Adaptive if self._adaptive: return else: self._adaptive = Adaptive(self.scheduler, self, startup_cost=5, key=lambda ws: ws.host)
def test_get_scale_up_kwargs(loop): with LocalCluster(0, scheduler_port=0, silence_logs=False, diagnostics_port=None, loop=loop) as cluster: alc = Adaptive(cluster.scheduler, cluster, interval=100, scale_factor=3) assert alc.get_scale_up_kwargs() == {'n': 1} with Client(cluster, loop=loop) as c: future = c.submit(lambda x: x + 1, 1) assert future.result() == 2 assert c.ncores() assert alc.get_scale_up_kwargs() == {'n': 3}
def start_cluster(clustertype='local', n=8, Adaptive_deploy=False): #add the json config file ? if clustertype == 'local': cluster = LocalCluster(n_workers=n) elif clustertype == 'LSF': cluster = MyLSFCluster(n_workers=n) adapative_cluster = None if Adaptive_deploy == True: #turns on cluster size management adapative_cluster = Adaptive(cluster.scheduler, cluster) cluster.scheduler.start() client = Client(cluster) return client, cluster, adapative_cluster
def test_min_max(): loop = IOLoop.current() cluster = yield LocalCluster(0, scheduler_port=0, silence_logs=False, processes=False, diagnostics_port=None, loop=loop, asynchronous=True) yield cluster._start() try: adapt = Adaptive(cluster.scheduler, cluster, minimum=1, maximum=2, interval=20) c = yield Client(cluster, asynchronous=True, loop=loop) start = time() while not cluster.scheduler.workers: yield gen.sleep(0.01) assert time() < start + 1 yield gen.sleep(0.2) assert len(cluster.scheduler.workers) == 1 assert frequencies(pluck(1, adapt.log)) == {'up': 1} futures = c.map(slowinc, range(100), delay=0.1) start = time() while len(cluster.scheduler.workers) < 2: yield gen.sleep(0.01) assert time() < start + 1 assert len(cluster.scheduler.workers) == 2 yield gen.sleep(0.5) assert len(cluster.scheduler.workers) == 2 assert len(cluster.workers) == 2 assert frequencies(pluck(1, adapt.log)) == {'up': 2} del futures start = time() while len(cluster.scheduler.workers) != 1: yield gen.sleep(0.01) assert time() < start + 1 assert frequencies(pluck(1, adapt.log)) == {'up': 2, 'down': 1} finally: yield c._close() yield cluster._close()
def test_adaptive_local_cluster_multi_workers(): loop = IOLoop.current() cluster = LocalCluster(0, scheduler_port=0, silence_logs=False, processes=False, diagnostics_port=None, loop=loop, start=False) try: cluster.scheduler.allowed_failures = 1000 alc = Adaptive(cluster.scheduler, cluster, interval=100) c = yield Client(cluster, asynchronous=True, loop=loop) futures = c.map(slowinc, range(100), delay=0.01) start = time() while not cluster.scheduler.worker_info: yield gen.sleep(0.01) assert time() < start + 15 yield c._gather(futures) del futures start = time() while cluster.workers: yield gen.sleep(0.01) assert time() < start + 5 assert not cluster.workers assert not cluster.scheduler.workers yield gen.sleep(0.2) assert not cluster.workers assert not cluster.scheduler.workers futures = c.map(slowinc, range(100), delay=0.01) yield c._gather(futures) finally: yield c._close() yield cluster._close()
def test_adaptive_local_cluster_multi_workers(): loop = IOLoop.current() cluster = LocalCluster(0, scheduler_port=0, silence_logs=False, nanny=False, diagnostic_port=None, loop=loop, start=False) alc = Adaptive(cluster.scheduler, cluster, interval=100) c = Client(cluster, start=False, loop=loop) yield c._start() for i in range(20): futures = c.map(slowinc, range(100), delay=0.01) yield c._gather(futures) del futures yield gen.sleep(0.1) yield c._shutdown() yield cluster._close()
def test_adaptive_local_cluster(loop): with LocalCluster(0, scheduler_port=0, silence_logs=False, diagnostics_port=None, loop=loop) as cluster: alc = Adaptive(cluster.scheduler, cluster, interval=100) with Client(cluster, loop=loop) as c: assert not c.ncores() future = c.submit(lambda x: x + 1, 1) assert future.result() == 2 assert c.ncores() sleep(0.1) assert c.ncores() # still there after some time del future start = time() while cluster.scheduler.ncores: sleep(0.01) assert time() < start + 5 assert not c.ncores()
def test_avoid_churn(): """ We want to avoid creating and deleting workers frequently Instead we want to wait a few beats before removing a worker in case the user is taking a brief pause between work """ cluster = yield LocalCluster(0, asynchronous=True, processes=False, scheduler_port=0, silence_logs=False, diagnostics_port=None) client = yield Client(cluster, asynchronous=True) try: adapt = Adaptive(cluster.scheduler, cluster, interval=20, wait_count=5) for i in range(10): yield client.submit(slowinc, i, delay=0.040) yield gen.sleep(0.040) assert frequencies(pluck(1, adapt.log)) == {'up': 1} finally: yield client._close() yield cluster._close()
def test_adapt(loop): thread = Thread(target=loop.start) thread.daemon = True thread.start() s = Scheduler(loop=loop) s.start(0) with MarathonCluster(s, cpus=1, mem=1000) as mc: ac = Adaptive(s, mc) with Client(s.address, loop=loop) as c: assert not s.ncores x = c.submit(lambda x: x + 1, 1) x.result() assert len(s.ncores) == 1 del x start = time() while s.ncores: sleep(0.01) assert time() < start + 5
def __init__(self, loop=None, nworkers=0, ip=None, scheduler_port=0, diagnostics_port=8787, services={}, adaptive=False, silence_logs=logging.CRITICAL, **kwargs): if silence_logs: for l in ['distributed.scheduler', 'distributed.worker', 'distributed.core', 'distributed.nanny']: logging.getLogger(l).setLevel(silence_logs) self.loop = loop or IOLoop() if not self.loop._running: self._thread = Thread(target=self.loop.start) self._thread.daemon = True self._thread.start() while not self.loop._running: sleep(0.001) if diagnostics_port is not None: try: from distributed.bokeh.scheduler import BokehScheduler except ImportError: logger.info('To start diagnostics server please install Bokeh') else: services[('bokeh', diagnostics_port)] = BokehScheduler self.scheduler = Scheduler(loop=self.loop, services=services) self.workers = MarathonWorkers(self.scheduler, **kwargs) if adaptive: self.adaptive = Adaptive(self.scheduler, self.workers) if ip is None: ip = '127.0.0.1' self.scheduler_port = scheduler_port self.scheduler.start((ip, scheduler_port)) self.workers.start(nworkers) self.status = 'running' logging.info('Scheduler address: {}'.format(self.scheduler.address))
def test_adaptive_local_cluster_multi_workers(): loop = IOLoop.current() cluster = LocalCluster(0, scheduler_port=0, silence_logs=False, nanny=False, diagnostics_port=None, loop=loop, start=False) cluster.scheduler.allowed_failures = 1000 alc = Adaptive(cluster.scheduler, cluster, interval=100) c = Client(cluster, start=False, loop=loop) yield c._start() futures = c.map(slowinc, range(100), delay=0.01) start = time() while not cluster.workers: yield gen.sleep(0.01) assert time() < start + 5 yield c._gather(futures) del futures start = time() while cluster.workers: yield gen.sleep(0.01) assert time() < start + 5 assert not cluster.workers yield gen.sleep(0.2) assert not cluster.workers futures = c.map(slowinc, range(100), delay=0.01) yield c._gather(futures) yield c._shutdown() yield cluster._close()
def dask_setup(scheduler): cluster = KubeCluster() adapative_cluster = Adaptive(scheduler, cluster)