def create_stored_object( workflow_id: int, wf_module_id: int, path: Path, stored_at: Optional[timezone.datetime] = None, ) -> StoredObject: """ Write and return a new StoredObject. The caller should call enforce_storage_limits() after calling this. Raise IntegrityError if a database race prevents saving this. Raise a minio error if writing to minio failed. In case of partial completion, a StoredObject will exist in the database but no file will be saved in minio. """ if stored_at is None: stored_at = timezone.now() key = _build_key(workflow_id, wf_module_id) size = path.stat().st_size stored_object = StoredObject.objects.create( stored_at=stored_at, wf_module_id=wf_module_id, bucket=BUCKET, key=key, size=size, hash="unhashed", ) minio.fput_file(BUCKET, key, path) return stored_object
def import_zipfile(path: Path) -> clientside.Module: """ Save a zipfile to database and minio and build a `clientside.Module`. Raise `WorkbenchModuleImportError` if `path` points to an invalid module. Otherwise, do not raise any errors one can sensibly recover from. """ temp_zipfile = ModuleZipfile(path) validate_zipfile(temp_zipfile) # raise WorkbenchModuleImportError module_id = temp_zipfile.module_id version = temp_zipfile.version module_spec = temp_zipfile.get_spec() js_module = temp_zipfile.get_optional_js_module() or "" minio.fput_file(minio.ExternalModulesBucket, "%s/%s" % (module_id, path.name), path) ModuleVersion.objects.update_or_create( id_name=module_id, source_version_hash=version, spec=asdict(temp_zipfile.get_spec()), js_module=js_module, ) return clientside.Module(module_spec, js_module)
def cache_render_result(workflow: Workflow, wf_module: WfModule, delta_id: int, result: RenderResult) -> None: """ Save `result` for later viewing. Raise AssertionError if `delta_id` is not what we expect. Since this alters data, be sure to call it within a lock: with workflow.cooperative_lock(): wf_module.refresh_from_db() # may change delta_id cache_render_result(workflow, wf_module, delta_id, result) """ assert delta_id == wf_module.last_relevant_delta_id assert result is not None json_bytes = json_encode(result.json).encode("utf-8") if not result.table.metadata.columns: if result.errors: status = "error" else: status = "unreachable" else: status = "ok" wf_module.cached_render_result_delta_id = delta_id wf_module.cached_render_result_errors = result.errors wf_module.cached_render_result_error = "" # DELETEME wf_module.cached_render_result_quick_fixes = [] # DELETEME wf_module.cached_render_result_status = status wf_module.cached_render_result_json = json_bytes wf_module.cached_render_result_columns = result.table.metadata.columns wf_module.cached_render_result_nrows = result.table.metadata.n_rows # Now we get to the part where things can end up inconsistent. Try to # err on the side of not-caching when that happens. delete_parquet_files_for_wf_module( workflow.id, wf_module.id) # makes old cache inconsistent wf_module.save( update_fields=WF_MODULE_FIELDS) # makes new cache inconsistent if result.table.metadata.columns: # only write non-zero-column tables with tempfile_context() as parquet_path: parquet.write(parquet_path, result.table.table) minio.fput_file(BUCKET, parquet_key(workflow.id, wf_module.id, delta_id), parquet_path) # makes new cache consistent