def func(): with worker_client() as ee: x = ee.submit(inc, 1, workers=a_address) y = ee.submit(inc, 2, workers=b_address) xx, yy = ee.gather([x, y]) return xx, yy
def f(i): with worker_client(separate_thread=False) as client: get_worker().count += 1 assert get_worker().count <= 3 sleep(random.random() / 40) assert get_worker().count <= 3 get_worker().count -= 1 return i
def mysum(): result = 0 sub_tasks = [delayed(double)(i) for i in range(100)] with worker_client() as lc: futures = lc.compute(sub_tasks) for f in as_completed(futures): result += f.result() return result
def f(i): with worker_client() as c: q = Queue('x', client=c) for _ in range(100): future = q.get() x = future.result() y = c.submit(inc, x) q.put(y) sleep(0.01) result = q.get().result() return result
def func(): with worker_client() as c: correct = True for data in [[1, 2], (1, 2), {1, 2}]: futures = c.scatter(data) correct &= type(futures) == type(data) o = object() futures = c.scatter({'x': o}) correct &= get_worker().data['x'] is o return correct
def f(i): with worker_client() as c: v = Variable('x', client=c) for _ in range(NITERS): future = v.get() x = future.result() y = c.submit(inc, x) v.set(y) sleep(0.01 * random.random()) result = v.get().result() sleep(0.1) # allow fire-and-forget messages to clear return result
def func(): with worker_client() as c: futures = c.scatter([1, 2, 3, 4, 5]) assert isinstance(futures, (list, tuple)) assert len(futures) == 5 x = dict(get_worker().data) y = {f.key: i for f, i in zip(futures, [1, 2, 3, 4, 5])} assert x == y total = c.submit(sum, futures) return total.result()
def func(): with worker_client() as c: x = np.ones(5) future = c.scatter(x) assert future.type == np.ndarray
def f(): with worker_client() as lc: return lc.loop is get_worker().loop
def full_func(*args, **kwargs): # global funcs_to_debug # funcs_to_debug = [] print('DECORATOR distributed...', args) if not is_io_path(args[0]): return func(*args, **kwargs) # fileworker_address = '10.11.8.149:8795' # fileworker_address = '10.11.8.149' # mtimes = [] # this should be processed on a fileworker # recursive_func_application_with_linear_output(list(args),get_mtime_from_path,mtimes) # try: from distributed import worker_client with worker_client(timeout=1000) as e: mtimes = e.submit(recursive_func_application_with_list_output, *(list(args), get_mtime_from_path), resources={ 'files': 1 }).result() # except: # mtimes = recursive_func_application_with_list_output(list(args),get_mtime_from_path) # recursive_func_application_with_linear_output(list(args),get_mtime_from_path,mtimes) # print(mtimes) highest_mtime = np.array(mtimes[1:]).max() # print funcs_to_debug[0].orig_name # print func if not mtimes[0] == -1: # if os.path.exists(args[0]): # if func.func_name not in [i.orig_name for i in funcs_to_debug]: # if os.path.getmtime(args[0]) >= highest_mtime: if mtimes[0] >= highest_mtime: return args[0] # print('here its calculating!') nargs = [] for iarg, arg in enumerate(args): if not iarg: continue # try: # with worker_client() as e: res = e.submit(recursive_func_application, *(arg, process_input_element), resources={ 'files': 1 }).result() # except: # res = recursive_func_application(arg,process_input_element) # print(arg,res) nargs.append(res) # nargs.append(recursive_func_application(arg,process_input_element)) # print('la'+str(args)) result = func(*nargs, **kwargs) # this should be processed on a fileworker # try: nresult = e.submit(process_output_element, *(result, args[0]), resources={ 'files': 1 }).result() # except: # nresult = process_output_element(result,args[0]) return nresult
def f(): with worker_client(): pass return threading.current_thread() in get_worker().executor._threads
def func(x): with worker_client() as wc: y = wc.submit(lambda: 1 + x) return wc.gather(y)
def func(): with worker_client(timeout=0) as wc: print("hello")
def mysum(): with worker_client() as c: with c.get_executor() as e: return sum(e.map(double, range(30)))
def f(): with worker_client(): return dask.delayed(lambda x: x)(1).compute()
def func(): with worker_client(timeout=0) as wc: print('hello')
def f(x): with worker_client() as c: return True
def add(x, y): with distributed.worker_client(): time.sleep(30 * 60) return x + y
def long(delay): with worker_client() as c: sleep(delay)
def run(self, matrices: Optional[Union[Union[str, Path], List[Path]]] = None, filepath_column: str = "filepath", **kwargs) -> List[Path]: """ Invert the list of matrices provided. If running in the command line, this will lookup the prior step's produced manifest for matrice retrieval. If running in the workflow, uses the direct output of the prior step. Parameters ---------- matrices: Optional[Union[Union[str, Path], List[Path]]] A path to a csv manifest to use or directly a list of paths of serialized arrays to invert. Default: self.step_local_staging_dir.parent / "mappedraw" / manifest.csv filepath_column: str If providing a path to a csv manifest, the column to use for matrices. Default: "filepath" Returns ------- inverted: List[Path] The list of paths to the inverted matrices. """ # Default matrices value if matrices is None: matrices = self.step_local_staging_dir.parent / "mappedraw" / "manifest.csv" # Get the matrices from the csv if provided a path if isinstance(matrices, (str, Path)): # Resolve the filepath and check for existance matrices = Path(matrices).resolve(strict=True) # Read csv raw_data = pd.read_csv(matrices) # Convert the specified column into a list of paths matrices = [Path(f) for f in raw_data[filepath_column]] # Storage dir inverted_dir = self.step_local_staging_dir / "inverted" # Connect to an executor with worker_client() as client: # Create random arrays futures = client.map( self._invert_array, matrices, [inverted_dir for i in range(len(matrices))], ) # Blocking until all are done inversion_infos = client.gather(futures) # Configure manifest dataframe for storage tracking self.manifest = pd.DataFrame(index=range(len(matrices)), columns=["filepath"]) for i, path in inversion_infos: self.manifest.at[i, "filepath"] = path # Save the manifest self.manifest.to_csv(self.step_local_staging_dir / "manifest.csv", index=False) # Return list of paths return list(self.manifest["filepath"])
def _process_table_identifiers( pdf: DataFrame, dimension_combinations: Optional[List[List[str]]] = None, max_combination_length: int = 5) -> List[List[str]]: """ Dask wrapper around extracting identifiers from a single sampled table (pdf). This method submits multiple sub-tasks to identify possible identifier combinations, waits for them to complete and returns one or more dimension combinations. Note that the `worker_client` call forces the task to secede from the Worker's thread-pool, therefore it does not block any other computations and cannot cause a deadlock while waiting for sub-tasks to finish. """ with timed_block('[idparser] Computing number of rows took {:.3f} seconds', logger, logging.DEBUG): num_rows = len(pdf) with timed_block('[idparser] Pruning columns took {:.3f} seconds', logger, logging.DEBUG): # filter out columns that contain at least X% null values - null values can't be parts of the primary key columns = [ col for col, count in pdf.count().compute().items() if count / num_rows >= NON_NULL_VALUES_RATIO ] with worker_client(separate_thread=True) as client: # type: Client with timed_block( '[idparser] Generating combinations took {:.3f} seconds', logger, logging.DEBUG): # explore all possible dimension combinations if none are provided if dimension_combinations is None: all_possible_combinations = itertools.chain.from_iterable( itertools.combinations(columns, i) for i in range( 1, min(max_combination_length, len(columns)) + 1)) generated_combinations: List[List[str]] = [ sorted(combination) for combination in all_possible_combinations ] else: generated_combinations = dimension_combinations with timed_block( '[idparser] Waiting for all combination tasks took {:.3f} seconds', logger, logging.DEBUG): with timed_block( '[idparser] Submitting all combination tasks took {:.3f} seconds', logger, logging.DEBUG): # submit "per dimension combination" tasks futures = client.map( lambda combination: _process_possible_identifier_combination(pdf, combination), generated_combinations, key=[ f'comb_{combination}_{str(uuid4())}' for combination in generated_combinations ], # priority=100, # batch_size=32, retries=2, ) results = client.gather(futures) return [ dimensions for dimensions, num_duplicates in results if num_duplicates == 0 ]
def func(x): with worker_client() as c: x = c.submit(inc, x) y = c.submit(double, x) result = x.result() + y.result() return result
def f(): with worker_client() as lc: return lc.loop is lc.worker.loop
def go(self): with worker_client() as wc: futures = [wc.submit(self.go_, pset_i=i, **pset) \ for i, pset in self.iterpsets()] futures = wc.gather(futures) return futures