Esempio n. 1
0
async def _step_to_text_stream(step: Step, format: Literal["csv", "json"],
                               *args) -> Awaitable[SubprocessOutputFileLike]:
    """Download the step's cached result and streaming it as CSV/JSON.

    Raise CorruptCacheError if there is no cached result or it is invalid.

    Raise OSError if `/usr/bin/parquet-to-text-stream` cannot start.
    """
    cached_result = step.cached_render_result
    if cached_result is None:
        raise CorruptCacheError

    if not cached_result.table_metadata.columns:
        if format == "csv":
            return io.BytesIO(b"")
        else:
            return io.BytesIO(b"[]")

    # raise CorruptCacheError
    with downloaded_parquet_file(cached_result) as parquet_path:
        output = SubprocessOutputFileLike([
            "/usr/bin/parquet-to-text-stream",
            str(parquet_path), format, *args
        ])
        await output.stdout_ready()
        # It's okay to delete the file now (i.e., exit the context manager)

        return output
Esempio n. 2
0
def _with_downloaded_cached_render_result(
        ctx: contextlib.ExitStack, maybe_crr: Optional[CachedRenderResult],
        dir: Path) -> Tuple[Optional[Path], TableMetadata]:
    if maybe_crr is None:
        return (None, TableMetadata())
    else:
        try:
            parquet_path = ctx.enter_context(
                rendercache.downloaded_parquet_file(maybe_crr, dir=dir))
            return (parquet_path, maybe_crr.table_metadata)
        except rendercache.CorruptCacheError:
            # This is probably a race. That's okay. Treat missing
            # cache as, "there is no input". (This is user-visible
            # but likely uncommon.)
            return (None, TableMetadata())
Esempio n. 3
0
def wfmodule_public_csv(request: HttpRequest, wf_module: WfModule):
    def schedule_render_and_suggest_retry():
        """
        Schedule a render and return a response asking the user to retry.

        It is a *bug* that we publish URLs that aren't guaranteed to work.
        Because we publish URLs that do not work, let's be transparent and
        give them the 500-level error code they deserve.
        """
        # We don't have a cached result, and we don't know how long it'll
        # take to get one. The user will simply need to try again....
        nonlocal wf_module
        workflow = wf_module.workflow
        async_to_sync(rabbitmq.queue_render)(workflow.id,
                                             workflow.last_delta_id)
        response = HttpResponse(b"", content_type="text/csv", status=503)
        response["Retry-After"] = "30"
        return response

    cached_result = wf_module.cached_render_result
    if not cached_result:
        return schedule_render_and_suggest_retry()

    try:
        with downloaded_parquet_file(cached_result) as parquet_path:
            output = SubprocessOutputFileLike(
                ["/usr/bin/parquet-to-text-stream",
                 str(parquet_path), "csv"])
            # It's okay to delete the file now (i.e., exit the context manager)
    except CorruptCacheError:
        return schedule_render_and_suggest_retry()

    return FileResponse(
        output,
        as_attachment=True,
        filename=("Workflow %d - %s-%d.csv" %
                  (cached_result.workflow_id, wf_module.module_id_name,
                   wf_module.id)),
        content_type="text/csv; charset=utf-8; header=present",
    )
Esempio n. 4
0
def _execute_step_save(
    workflow: Workflow, step: Step, result: LoadedRenderResult
) -> SaveResult:
    """Call rendercache.cache_render_result() and build notifications.OutputDelta.

    All this runs synchronously within a database lock. (It's a separate
    function so that when we're done awaiting it, we can continue executing in
    a context that doesn't use a database thread.)

    Raise UnneededExecution if the Step has changed in the interim.
    """
    # raises UnneededExecution
    with contextlib.ExitStack() as exit_stack:
        safe_step = exit_stack.enter_context(locked_step(workflow, step))
        if safe_step.notifications and workflow.owner_id is not None:
            stale_crr = safe_step.get_stale_cached_render_result()
            if stale_crr is None:
                stale_parquet_file = None
            elif stale_crr.status == "ok":
                try:
                    stale_parquet_file = exit_stack.enter_context(
                        rendercache.downloaded_parquet_file(stale_crr)
                    )
                except rendercache.CorruptCacheError:
                    # No, let's not send an email. Corrupt cache probably means
                    # we've been messing with our codebase.
                    logger.exception(
                        "Ignoring CorruptCacheError on workflow %d, step %d because we are about to overwrite it",
                        workflow.id,
                        step.id,
                    )
                    stale_crr = None
                    stale_parquet_file = None
            else:
                # status is 'error'/'unreachable'. There's no Parquet file.
                stale_parquet_file = None
        else:
            stale_crr = None
            stale_parquet_file = None

        rendercache.cache_render_result(
            workflow, safe_step, step.last_relevant_delta_id, result
        )

        is_changed = False  # nothing to email, usually
        if stale_crr is not None:
            fresh_crr = safe_step.cached_render_result

            if (
                fresh_crr.status != stale_crr.status
                or fresh_crr.errors != stale_crr.errors
                or fresh_crr.json != stale_crr.json
                or fresh_crr.table_metadata != stale_crr.table_metadata
            ):
                # Output other than table data has changed (e.g., nRows)
                is_changed = True

            if not is_changed and fresh_crr.status == "ok":
                # Download the new parquet file and compare to the old one
                fresh_parquet_file = exit_stack.enter_context(
                    rendercache.downloaded_parquet_file(fresh_crr)
                )
                is_changed = not cjwparquet.are_files_equal(
                    stale_parquet_file, fresh_parquet_file
                )

        if is_changed:
            with connection.cursor() as cursor:
                # Don't import cjworkbench.models.userprofile: it relies on
                # settings.FREE_TIER_USAGE_LIMITS, buy renderer doesn't set it.
                #
                # TODO nix django-ORM.
                cursor.execute(
                    """
                    SELECT locale_id
                    FROM cjworkbench_userprofile
                    WHERE user_id = %s
                    """,
                    [safe_step.workflow.owner_id],
                )
                locale_id = cursor.fetchone()[0]
            maybe_delta = notifications.OutputDelta(
                user=safe_step.workflow.owner,
                workflow=safe_step.workflow,
                step=safe_step,
                locale_id=locale_id,
            )
        else:
            maybe_delta = None

        return SaveResult(safe_step.cached_render_result, maybe_delta)