Exemple #1
0
def open_cached_render_result(
        crr: CachedRenderResult) -> ContextManager[RenderResult]:
    """
    Yield a RenderResult equivalent to the one passed to `cache_render_result()`.

    Raise CorruptCacheError if the cached data does not match `crr`. That can
    mean:

        * The cached Parquet file is corrupt
        * The cached Parquet file is missing
        * `crr` is stale -- the cached result is for a different delta. This
          could be detected by a `Workflow.cooperative_lock()`, too, should the
          caller want to distinguish this error from the others.

    The returned RenderResult is backed by an mmapped file on disk, so it
    doesn't require much physical RAM.
    """
    if not crr.table_metadata.columns:
        # Zero-column tables aren't written to cache
        yield RenderResult(
            ArrowTable.from_zero_column_metadata(
                TableMetadata(crr.table_metadata.n_rows, [])),
            crr.errors,
            crr.json,
        )
        return

    with tempfile_context(prefix="cached-render-result") as arrow_path:
        # raise CorruptCacheError (deleting `arrow_path` in the process)
        result = load_cached_render_result(crr, arrow_path)

        yield result
Exemple #2
0
def load_cached_render_result(crr: CachedRenderResult,
                              path: Path) -> RenderResult:
    """
    Return a RenderResult equivalent to the one passed to `cache_render_result()`.

    Raise CorruptCacheError if the cached data does not match `crr`. That can
    mean:

        * The cached Parquet file is corrupt
        * The cached Parquet file is missing
        * `crr` is stale -- the cached result is for a different delta. This
          could be detected by a `Workflow.cooperative_lock()`, too, should the
          caller want to distinguish this error from the others.

    The returned RenderResult is backed by an mmapped file on disk -- the one
    supplied as `path`. It doesn't require much physical RAM: the Linux kernel
    may page out data we aren't using.
    """
    if not crr.table_metadata.columns:
        # Zero-column tables aren't written to cache
        return RenderResult(
            ArrowTable.from_zero_column_metadata(
                TableMetadata(crr.table_metadata.n_rows, [])),
            crr.errors,
            crr.json,
        )

    # raises CorruptCacheError
    with downloaded_parquet_file(crr) as parquet_path:
        try:
            # raises ArrowIOError
            cjwparquet.convert_parquet_file_to_arrow_file(parquet_path, path)
        except pyarrow.ArrowIOError as err:
            raise CorruptCacheError from err
    # TODO handle validation errors => CorruptCacheError
    arrow_table = ArrowTable.from_trusted_file(path, crr.table_metadata)
    return RenderResult(arrow_table, crr.errors, crr.json)