def test_request_more_than_one(loop): with SGECluster(scheduler_port=0) as cluster: adapt = Adaptive(cluster) with Client(cluster, loop=loop) as client: futures = client.map(slowinc, range(1000), delay=0.2) while len(cluster.scheduler.workers) < 3: sleep(0.1)
def test_sge_cpus(loop): with SGECluster(scheduler_port=0) as cluster: cluster.start_workers(1, cpus=2) with Client(cluster, loop=loop) as client: while len(cluster.scheduler.ncores) < 1: sleep(0.1) assert list(cluster.scheduler.ncores.values()) == [2]
def test_sge_memory(loop): with SGECluster(scheduler_port=0) as cluster: cluster.start_workers(2, memory=3e9, memory_fraction=0.5) with Client(cluster, loop=loop) as client: while len(cluster.scheduler.ncores) < 2: sleep(0.1) assert all(info['memory_limit'] == 1.5e9 for info in cluster.scheduler.worker_info.values())
def test_dont_request_if_not_enough_tasks(loop): with SGECluster(scheduler_port=0) as cluster: adapt = Adaptive(cluster) with Client(cluster, loop=loop) as client: cluster.scheduler.task_duration['slowinc'] = 1000 future = client.submit(slowinc, 1, delay=1000) for i in range(50): sleep(0.1) assert len(cluster.workers) < 2
def test_dont_over_request(loop, interval): with SGECluster(scheduler_port=0) as cluster: adapt = Adaptive(cluster) with Client(cluster, loop=loop) as client: future = client.submit(inc, 1) assert future.result() == 2 assert len(cluster.scheduler.workers) == 1 for i in range(5): sleep(0.2) assert len(cluster.scheduler.workers) == 1
def test_dont_request_on_many_short_tasks(loop): with SGECluster(scheduler_port=0) as cluster: adapt = Adaptive(cluster, interval=50, startup_cost=10) with Client(cluster, loop=loop) as client: cluster.scheduler.task_duration['slowinc'] = 0.001 futures = client.map(slowinc, range(1000), delay=0.001) while not cluster.scheduler.workers: sleep(0.01) for i in range(20): sleep(0.1) assert len(cluster.workers) < 2
def test_dont_request_if_idle(loop): with SGECluster(scheduler_port=0) as cluster: cluster.start_workers(1) with Client(cluster, loop=loop) as client: while not cluster.scheduler.workers: sleep(0.1) futures = client.map(slowinc, range(1000), delay=0.2, workers=first(cluster.scheduler.workers)) adapt = Adaptive(cluster, interval=2000) for i in range(60): sleep(0.1) assert len(cluster.workers) < 5
def test_adaptive_memory(loop): with SGECluster(scheduler_port=0) as cluster: adapt = Adaptive(cluster=cluster) with Client(cluster, loop=loop) as client: future = client.submit(inc, 1, resources={'memory': 1e9}) assert future.result() == 2 assert len(cluster.scheduler.ncores) > 0 r = list(cluster.scheduler.worker_resources.values())[0] assert r['memory'] > 1e9 del future start = time() while client.ncores(): sleep(0.3) assert time() < start + 10 """ # TODO: jobs aren't shutting down when process ends
def test_adaptive_memory(loop): with SGECluster(scheduler_port=0, cleanup_interval=100) as cluster: adapt = Adaptive(cluster) with Client(cluster, loop=loop) as client: future = client.submit(inc, 1, resources={'memory': 1e9}) assert future.result() == 2 assert len(cluster.scheduler.ncores) > 0 r = list(cluster.scheduler.worker_resources.values())[0] assert r['memory'] > 1e9 del future start = time() while client.ncores(): sleep(0.3) assert time() < start + 10 start = time() while cluster.workers: sleep(0.1) assert time() < start + 10
def test_adaptive_normal_tasks(loop): with SGECluster(scheduler_port=0) as cluster: adapt = Adaptive(cluster) with Client(cluster, loop=loop) as client: future = client.submit(inc, 1) assert future.result() == 2
sample(config) else: train_task = continue_training if config.should_continue else begin_training if config.cluster: import dask import dask.multiprocessing from dask.distributed import Client from dask_drmaa import SGECluster memory = 2**36 job = dask.delayed(train_task)(config) # start cluster workers cluster = SGECluster() client = Client(cluster) cluster.start_workers(1, memory=memory) print("Web interface opened on port {}".format( client.scheduler_info()["services"]["bokeh"])) # start the actual computation results = dask.compute(job, get=client.get) print(results) # shut down all workers cluster.close() else: res = train_task(config) print(res)
def test_order_warns(loop): with SGECluster(scheduler_port=0) as cluster: scheduler = cluster.scheduler with pytest.warns(FutureWarning): adapt = Adaptive(scheduler, cluster)