def test_map(client): with client.get_executor() as e: N = 10 it = e.map(inc, range(N)) expected = set(range(1, N + 1)) for x in it: expected.remove(x) assert not expected with client.get_executor(pure=False) as e: N = 10 it = e.map(slowinc, range(N), [0.3] * N, timeout=1.2) results = [] with pytest.raises(TimeoutError): for x in it: results.append(x) assert 2 <= len(results) < 7 with client.get_executor(pure=False) as e: N = 10 # Not consuming the iterator will cancel remaining tasks it = e.map(slowinc, range(N), [0.3] * N) for x in take(2, it): pass # Some tasks still processing assert number_of_processing_tasks(client) > 0 # Garbage collect the iterator => remaining tasks are cancelled del it sleep(0.5) assert number_of_processing_tasks(client) == 0
def create_merge_tree(func, keys, token): """Create a task tree that merges all the keys with a reduction function. Parameters ---------- func: callable Reduction function that accepts a single list of values to reduce. keys: iterable Keys to reduce from the source dask graph. token: object Included in each key of the returned dict. This creates a k-ary tree where k depends on the current level and is greater the further away a node is from the root node. This reduces the total number of nodes (thereby reducing scheduler overhead), but still has beneficial properties of trees. For reasonable numbers of keys, N < 1e5, the total number of nodes in the tree is roughly ``N**0.78``. For 1e5 < N < 2e5, is it roughly ``N**0.8``. """ level = 0 prev_width = len(keys) prev_keys = iter(keys) rv = {} while prev_width > 1: width = tree_width(prev_width) groups = tree_groups(prev_width, width) keys = [(token, level, i) for i in range(width)] rv.update( (key, (func, list(take(num, prev_keys)))) for num, key in zip(groups, keys) ) prev_width = width prev_keys = iter(keys) level += 1 return rv
async def recommendations(self, target: int) -> dict: """ Make scale up/down recommendations based on current state and target """ plan = self.plan requested = self.requested observed = self.observed if target == len(plan): self.close_counts.clear() return {"status": "same"} elif target > len(plan): self.close_counts.clear() return {"status": "up", "n": target} elif target < len(plan): not_yet_arrived = requested - observed to_close = set() if not_yet_arrived: to_close.update( (toolz.take(len(plan) - target, not_yet_arrived))) if target < len(plan) - len(to_close): L = await self.workers_to_close(target=target) to_close.update(L) firmly_close = set() for w in to_close: self.close_counts[w] += 1 if self.close_counts[w] >= self.wait_count: firmly_close.add(w) for k in list(self.close_counts): # clear out unseen keys if k in firmly_close or k not in to_close: del self.close_counts[k] if firmly_close: return {"status": "down", "workers": list(firmly_close)} else: return {"status": "same"}
def f(_): sub = Sub("a") return list(toolz.take(5, sub))