def compute_batch_parallel(fxn, keys): """Execute a function in parallel on the entire batch of keys, using a multi-threaded executor. This is a helper function which subclasses of LazyDict can use to implement `compute_batch`. Note that speedups will only be obtained if compute is IO bound, due to Python's GIL. Args: fxn (Callable): function to be called in parallel keys (list): a list of keys Returns: list: result is equivalent to [fxn(key) for key in keys] """ no_result_failure = Failure.silent( 'No result returned by SimpleExecutor.') results = [no_result_failure] * len(keys) with SimpleExecutor(fxn) as ex: for i, key in enumerate(keys): ex.submit(i, key) for i, val in ex.results(): results[i] = val for result in results: assert result != no_result_failure return results
def test_context_manager(self): fxn = lambda x: 2 * x with SimpleExecutor(fxn, max_workers=2) as ex: for i, x in enumerate(range(10)): ex.submit(i, x) results = {k: v for k, v in ex.results()} correct = {k: 2 * k for k in range(10)} assert results == correct
def _get_all_hits(get_page): """Given a function that retrieves a single page of HITs, retrieve all HITs. WARNING: - this function can be quite slow. - results are returned in no particular order. Args: get_page (Callable[[int, int], list[HIT]]): a function which takes a page size and page number. and returns a list of HITs. kwargs: page_size (int) page_number (int) Returns: generator[HIT] """ page_size = 100 # HITs per page # compute the pages that need to be fetched search_results = get_page(page_size=page_size, page_number=1) total_hits = int(search_results.TotalNumResults) total_pages = total_hits / page_size + bool(total_hits % page_size) page_nums = list(range(1, total_pages + 1)) # fetch all the pages in parallel fetch_page = lambda i: get_page(page_size=page_size, page_number=i) with SimpleExecutor(fetch_page) as executor: for i in page_nums: executor.submit(i, i) for i, page in verboserate(executor.results(), desc='Fetching pages of HITs', total=total_pages): if isinstance(page, Failure): print page.traceback continue for hit in page: yield hit