Esempio n. 1
0
def test_set_write_scopes():
    target_path = "./tests/fixtures/write_scope5.txt"
    if os.path.exists(target_path):
        os.remove(target_path)

    # fake write scopes, such as when called on a remote worker:
    _old_scopes = scoping.current_io_scopes()
    scoping.set_io_scopes(["./tests", "fixtures"])

    write("test", "write_scope5.txt", mode="w")

    assert os.path.isfile(target_path)

    # restore write scopes for later tests
    scoping.set_io_scopes(_old_scopes)
Esempio n. 2
0
def _load_urls(urls, cache=None, **kwargs):
    pages = {}
    caller_io_scopes = current_io_scopes()

    def _do_load(url):
        set_io_scopes(caller_io_scopes)
        return load(url, cache=cache, **kwargs)

    with ThreadPoolExecutor(max_workers=8) as executor:
        future_to_urls = {executor.submit(_do_load, url): url for url in urls}
        for future in as_completed(future_to_urls):
            url = future_to_urls[future]
            try:
                pages[url] = future.result()
            except Exception as exc:
                pages[url] = exc
                log.error("Loading {} generated an exception: {}".format(
                    url, exc))
    ordered = [pages[url] for url in urls]
    return ordered
Esempio n. 3
0
def load_using_loader(url_or_handle, decompressor, loader, cache, **kwargs):
    if is_handle(url_or_handle):
        with decompressor(url_or_handle) as decompressor_handle:
            result = loader(decompressor_handle, **kwargs)
    else:
        url = url_or_handle
        try:
            with read_handle(url, cache=cache) as handle:
                with decompressor(handle) as decompressor_handle:
                    result = loader(decompressor_handle, **kwargs)
        except (DecodeError, ValueError):
            log.warning(
                "While loading '%s' an error occurred. Purging cache once and trying again; if this fails we will raise an Exception! Current io scopes: %r",
                url,
                current_io_scopes(),
            )
            # since this may have been cached, it's our responsibility to try again once
            # since we use a handle here, the next DecodeError should propagate upwards
            with read_handle(url, cache="purge") as handle:
                result = load_using_loader(handle, decompressor, loader, cache,
                                           **kwargs)
    return result
Esempio n. 4
0
def batch_save(save_ops: List[Tuple], num_workers: int = 16):
    caller_io_scopes = current_io_scopes()
    current_save_context = CaptureSaveContext.current_save_context()

    def _do_save(save_op_tuple: Tuple):
        set_io_scopes(caller_io_scopes)
        if len(save_op_tuple) == 2:
            return save(save_op_tuple[0],
                        save_op_tuple[1],
                        save_context=current_save_context)
        elif len(save_op_tuple) == 3:
            return save(save_op_tuple[0],
                        save_op_tuple[1],
                        save_context=current_save_context,
                        **(save_op_tuple[2]))
        else:
            raise ValueError(f'unknown save tuple size: {len(save_op_tuple)}')

    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        save_op_futures = [
            executor.submit(_do_save, save_op_tuple)
            for save_op_tuple in save_ops
        ]
        return [future.result() for future in save_op_futures]
Esempio n. 5
0
 def _return_io_scope(io_scope_path):
     with io_scope(io_scope_path):
         time.sleep(np.random.uniform(0.05, 0.1))
         return current_io_scopes()[-1]