def test_set_write_scopes(): target_path = "./tests/fixtures/write_scope5.txt" if os.path.exists(target_path): os.remove(target_path) # fake write scopes, such as when called on a remote worker: _old_scopes = scoping.current_io_scopes() scoping.set_io_scopes(["./tests", "fixtures"]) write("test", "write_scope5.txt", mode="w") assert os.path.isfile(target_path) # restore write scopes for later tests scoping.set_io_scopes(_old_scopes)
def _load_urls(urls, cache=None, **kwargs): pages = {} caller_io_scopes = current_io_scopes() def _do_load(url): set_io_scopes(caller_io_scopes) return load(url, cache=cache, **kwargs) with ThreadPoolExecutor(max_workers=8) as executor: future_to_urls = {executor.submit(_do_load, url): url for url in urls} for future in as_completed(future_to_urls): url = future_to_urls[future] try: pages[url] = future.result() except Exception as exc: pages[url] = exc log.error("Loading {} generated an exception: {}".format( url, exc)) ordered = [pages[url] for url in urls] return ordered
def load_using_loader(url_or_handle, decompressor, loader, cache, **kwargs): if is_handle(url_or_handle): with decompressor(url_or_handle) as decompressor_handle: result = loader(decompressor_handle, **kwargs) else: url = url_or_handle try: with read_handle(url, cache=cache) as handle: with decompressor(handle) as decompressor_handle: result = loader(decompressor_handle, **kwargs) except (DecodeError, ValueError): log.warning( "While loading '%s' an error occurred. Purging cache once and trying again; if this fails we will raise an Exception! Current io scopes: %r", url, current_io_scopes(), ) # since this may have been cached, it's our responsibility to try again once # since we use a handle here, the next DecodeError should propagate upwards with read_handle(url, cache="purge") as handle: result = load_using_loader(handle, decompressor, loader, cache, **kwargs) return result
def batch_save(save_ops: List[Tuple], num_workers: int = 16): caller_io_scopes = current_io_scopes() current_save_context = CaptureSaveContext.current_save_context() def _do_save(save_op_tuple: Tuple): set_io_scopes(caller_io_scopes) if len(save_op_tuple) == 2: return save(save_op_tuple[0], save_op_tuple[1], save_context=current_save_context) elif len(save_op_tuple) == 3: return save(save_op_tuple[0], save_op_tuple[1], save_context=current_save_context, **(save_op_tuple[2])) else: raise ValueError(f'unknown save tuple size: {len(save_op_tuple)}') with ThreadPoolExecutor(max_workers=num_workers) as executor: save_op_futures = [ executor.submit(_do_save, save_op_tuple) for save_op_tuple in save_ops ] return [future.result() for future in save_op_futures]
def _return_io_scope(io_scope_path): with io_scope(io_scope_path): time.sleep(np.random.uniform(0.05, 0.1)) return current_io_scopes()[-1]