async def _step_to_text_stream(step: Step, format: Literal["csv", "json"], *args) -> Awaitable[SubprocessOutputFileLike]: """Download the step's cached result and streaming it as CSV/JSON. Raise CorruptCacheError if there is no cached result or it is invalid. Raise OSError if `/usr/bin/parquet-to-text-stream` cannot start. """ cached_result = step.cached_render_result if cached_result is None: raise CorruptCacheError if not cached_result.table_metadata.columns: if format == "csv": return io.BytesIO(b"") else: return io.BytesIO(b"[]") # raise CorruptCacheError with downloaded_parquet_file(cached_result) as parquet_path: output = SubprocessOutputFileLike([ "/usr/bin/parquet-to-text-stream", str(parquet_path), format, *args ]) await output.stdout_ready() # It's okay to delete the file now (i.e., exit the context manager) return output
def _with_downloaded_cached_render_result( ctx: contextlib.ExitStack, maybe_crr: Optional[CachedRenderResult], dir: Path) -> Tuple[Optional[Path], TableMetadata]: if maybe_crr is None: return (None, TableMetadata()) else: try: parquet_path = ctx.enter_context( rendercache.downloaded_parquet_file(maybe_crr, dir=dir)) return (parquet_path, maybe_crr.table_metadata) except rendercache.CorruptCacheError: # This is probably a race. That's okay. Treat missing # cache as, "there is no input". (This is user-visible # but likely uncommon.) return (None, TableMetadata())
def wfmodule_public_csv(request: HttpRequest, wf_module: WfModule): def schedule_render_and_suggest_retry(): """ Schedule a render and return a response asking the user to retry. It is a *bug* that we publish URLs that aren't guaranteed to work. Because we publish URLs that do not work, let's be transparent and give them the 500-level error code they deserve. """ # We don't have a cached result, and we don't know how long it'll # take to get one. The user will simply need to try again.... nonlocal wf_module workflow = wf_module.workflow async_to_sync(rabbitmq.queue_render)(workflow.id, workflow.last_delta_id) response = HttpResponse(b"", content_type="text/csv", status=503) response["Retry-After"] = "30" return response cached_result = wf_module.cached_render_result if not cached_result: return schedule_render_and_suggest_retry() try: with downloaded_parquet_file(cached_result) as parquet_path: output = SubprocessOutputFileLike( ["/usr/bin/parquet-to-text-stream", str(parquet_path), "csv"]) # It's okay to delete the file now (i.e., exit the context manager) except CorruptCacheError: return schedule_render_and_suggest_retry() return FileResponse( output, as_attachment=True, filename=("Workflow %d - %s-%d.csv" % (cached_result.workflow_id, wf_module.module_id_name, wf_module.id)), content_type="text/csv; charset=utf-8; header=present", )
def _execute_step_save( workflow: Workflow, step: Step, result: LoadedRenderResult ) -> SaveResult: """Call rendercache.cache_render_result() and build notifications.OutputDelta. All this runs synchronously within a database lock. (It's a separate function so that when we're done awaiting it, we can continue executing in a context that doesn't use a database thread.) Raise UnneededExecution if the Step has changed in the interim. """ # raises UnneededExecution with contextlib.ExitStack() as exit_stack: safe_step = exit_stack.enter_context(locked_step(workflow, step)) if safe_step.notifications and workflow.owner_id is not None: stale_crr = safe_step.get_stale_cached_render_result() if stale_crr is None: stale_parquet_file = None elif stale_crr.status == "ok": try: stale_parquet_file = exit_stack.enter_context( rendercache.downloaded_parquet_file(stale_crr) ) except rendercache.CorruptCacheError: # No, let's not send an email. Corrupt cache probably means # we've been messing with our codebase. logger.exception( "Ignoring CorruptCacheError on workflow %d, step %d because we are about to overwrite it", workflow.id, step.id, ) stale_crr = None stale_parquet_file = None else: # status is 'error'/'unreachable'. There's no Parquet file. stale_parquet_file = None else: stale_crr = None stale_parquet_file = None rendercache.cache_render_result( workflow, safe_step, step.last_relevant_delta_id, result ) is_changed = False # nothing to email, usually if stale_crr is not None: fresh_crr = safe_step.cached_render_result if ( fresh_crr.status != stale_crr.status or fresh_crr.errors != stale_crr.errors or fresh_crr.json != stale_crr.json or fresh_crr.table_metadata != stale_crr.table_metadata ): # Output other than table data has changed (e.g., nRows) is_changed = True if not is_changed and fresh_crr.status == "ok": # Download the new parquet file and compare to the old one fresh_parquet_file = exit_stack.enter_context( rendercache.downloaded_parquet_file(fresh_crr) ) is_changed = not cjwparquet.are_files_equal( stale_parquet_file, fresh_parquet_file ) if is_changed: with connection.cursor() as cursor: # Don't import cjworkbench.models.userprofile: it relies on # settings.FREE_TIER_USAGE_LIMITS, buy renderer doesn't set it. # # TODO nix django-ORM. cursor.execute( """ SELECT locale_id FROM cjworkbench_userprofile WHERE user_id = %s """, [safe_step.workflow.owner_id], ) locale_id = cursor.fetchone()[0] maybe_delta = notifications.OutputDelta( user=safe_step.workflow.owner, workflow=safe_step.workflow, step=safe_step, locale_id=locale_id, ) else: maybe_delta = None return SaveResult(safe_step.cached_render_result, maybe_delta)