def _step_delete_secret_and_build_delta( workflow: Workflow, step: Step, param: str) -> Optional[clientside.Update]: """Write a new secret (or `None`) to `step`, or raise. Return a `clientside.Update`, or `None` if the database is not modified. Raise Workflow.DoesNotExist if the Workflow was deleted. """ with workflow.cooperative_lock(): # raises Workflow.DoesNotExist try: step.refresh_from_db() except Step.DoesNotExist: return None # no-op if step.secrets.get(param) is None: return None # no-op step.secrets = dict(step.secrets) # shallow copy del step.secrets[param] step.save(update_fields=["secrets"]) return clientside.Update( steps={ step.id: clientside.StepUpdate(secrets=step.secret_metadata) })
def _do_set_notifications(scope, step: Step, notifications: bool): step.notifications = notifications step.save(update_fields=["notifications"]) if notifications: server.utils.log_user_event_from_scope(scope, "Enabled email notifications", {"stepId": step.id})
def get_migrated_params( step: Step, *, module_zipfile: ModuleZipfile = None ) -> Dict[str, Any]: """Read `step.params`, calling migrate_params() or using cache fields. Call this within a `Workflow.cooperative_lock()`. If migrate_params() was already called for this version of the module, return the cached value. See `step.cached_migrated_params`, `step.cached_migrated_params_module_version`. Raise `ModuleError` if migration fails. Raise `KeyError` if the module was deleted. Raise `RuntimeError` (unrecoverable) if there is a problem loading or executing the module. (Modules are validated before import, so this should not happen.) The result may be invalid. Call `validate()` to raise a `ValueError` to detect that case. TODO avoid holding the database lock whilst executing stuff on the kernel. (This will involve auditing and modifying all callers to handle new error cases.) """ if module_zipfile is None: # raise KeyError module_zipfile = MODULE_REGISTRY.latest(step.module_id_name) stale = ( module_zipfile.version == "develop" # works if cached version (and thus cached _result_) is None or module_zipfile.version != step.cached_migrated_params_module_version ) if not stale: return step.cached_migrated_params else: # raise ModuleError params = invoke_migrate_params(module_zipfile, step.params) step.cached_migrated_params = params step.cached_migrated_params_module_version = module_zipfile.version try: step.save( update_fields=[ "cached_migrated_params", "cached_migrated_params_module_version", ] ) except ValueError: # Step was deleted, so we get: # "ValueError: Cannot force an update in save() with no primary key." pass return params
def _do_mark_result_unchanged( workflow_id: int, step: Step, now: datetime.datetime ) -> None: """Do database manipulations for mark_result_unchanged(). Modify `step` in-place. Raise Step.DoesNotExist or Workflow.DoesNotExist in case of a race. """ with _locked_step(workflow_id, step): step.is_busy = False step.last_update_check = now step.save(update_fields=["is_busy", "last_update_check"])
def _step_set_secret_and_build_delta( workflow: Workflow, step: Step, param: str, secret: str) -> Optional[clientside.Update]: """Write a new secret to `step`, or raise. Return a `clientside.Update`, or `None` if the database is not modified. Raise Workflow.DoesNotExist if the Workflow was deleted. """ with workflow.cooperative_lock(): # raises Workflow.DoesNotExist try: step.refresh_from_db() except Step.DoesNotExist: return None # no-op if step.secrets.get(param, {}).get("secret") == secret: return None # no-op try: module_zipfile = MODULE_REGISTRY.latest(step.module_id_name) except KeyError: raise HandlerError( f"BadRequest: ModuleZipfile {step.module_id_name} does not exist" ) module_spec = module_zipfile.get_spec() if not any(p.type == "secret" and p.secret_logic.provider == "string" for p in module_spec.param_fields): raise HandlerError( f"BadRequest: param is not a secret string parameter") created_at = datetime.datetime.now() created_at_str = ( created_at.strftime("%Y-%m-%dT%H:%M:%S") + "." + created_at.strftime("%f")[0:3] # milliseconds + "Z") step.secrets = { **step.secrets, param: { "name": created_at_str, "secret": secret }, } step.save(update_fields=["secrets"]) return clientside.Update( steps={ step.id: clientside.StepUpdate(secrets=step.secret_metadata) })
def _locked_step(workflow_id: int, step: Step): """Refresh step from database and yield with workflow lock. Raise Workflow.DoesNotExist or Step.DoesNotExist in the event of a race. (Even soft-deleted Step or Tab raises Step.DoesNotExist, to simulate hard deletion -- because sooner or later soft-delete won't be a thing any more.) """ # raise Workflow.DoesNotExist with Workflow.lookup_and_cooperative_lock(id=workflow_id): # raise Step.DoesNotExist step.refresh_from_db() if step.is_deleted or step.tab.is_deleted: raise Step.DoesNotExist("soft-deleted") yield
def _get_workflow_as_clientside_update(user, session, workflow_id: int) -> WorkflowUpdateData: """ Return (clientside.Update, delta_id). Raise Workflow.DoesNotExist if a race deletes the Workflow. The purpose of this method is to hide races from users who disconnect and reconnect while changes are being made. It's okay for things to be slightly off, as long as users don't notice. (Long-term, we can build better a more-correct synchronization strategy.) """ with Workflow.authorized_lookup_and_cooperative_lock( "read", user, session, pk=workflow_id) as workflow_lock: workflow = workflow_lock.workflow update = clientside.Update( workflow=workflow.to_clientside(), tabs={tab.slug: tab.to_clientside() for tab in workflow.live_tabs}, steps={ step.id: step.to_clientside() for step in Step.live_in_workflow(workflow) }, ) return WorkflowUpdateData(update, workflow.last_delta_id)
def _load_step_and_service(workflow: Workflow, step_id: int, param: str) -> Tuple[Step, oauth.OAuthService]: """Load Step and OAuthService from the database, or raise. Raise Step.DoesNotExist if the Step is deleted or missing. Raise SecretDoesNotExist if the Step does not have the given param. Invoke this within a Workflow.cooperative_lock(). """ # raises Step.DoesNotExist step = Step.live_in_workflow(workflow).get(pk=step_id) # raises KeyError, RuntimeError try: module_zipfile = MODULE_REGISTRY.latest(step.module_id_name) except KeyError: raise SecretDoesNotExist( f"Module {step.module_id_name} does not exist") module_spec = module_zipfile.get_spec() for field in module_spec.param_fields: if (isinstance(field, ParamField.Secret) and field.id_name == param and (isinstance(field.secret_logic, ParamField.Secret.Logic.Oauth1a) or isinstance(field.secret_logic, ParamField.Secret.Logic.Oauth2))): service_name = field.secret_logic.service service = oauth.OAuthService.lookup_or_none(service_name) if service is None: raise OauthServiceNotConfigured( f'OAuth not configured for "{service_name}" service') return step, service else: raise SecretDoesNotExist( f"Param {param} does not point to an OAuth secret")
def make_init_state(request, workflow: Workflow, modules: Dict[str, ModuleZipfile]) -> Dict[str, Any]: """Build a dict to embed as JSON in `window.initState` in HTML. Raise Http404 if the workflow disappeared. Side-effect: update workflow.last_viewed_at. """ try: with workflow.cooperative_lock(): # raise DoesNotExist on race if request.user.is_anonymous: user = None else: lock_user_by_id(request.user.id, for_write=False) user = query_clientside_user(request.user.id) workflow.last_viewed_at = datetime.datetime.now() workflow.save(update_fields=["last_viewed_at"]) state = clientside.Init( user=user, workflow=workflow.to_clientside(), tabs={ tab.slug: tab.to_clientside() for tab in workflow.live_tabs }, steps={ step.id: step.to_clientside( force_module_zipfile=modules.get(step.module_id_name)) for step in Step.live_in_workflow( workflow).prefetch_related("tab") }, modules={ module_id: clientside.Module( spec=module.get_spec(), js_module=module.get_optional_js_module(), ) for module_id, module in modules.items() }, blocks={ block.slug: block.to_clientside() for block in workflow.blocks.all() }, settings={ "bigTableRowsPerTile": settings.BIG_TABLE_ROWS_PER_TILE, "bigTableColumnsPerTile": settings.BIG_TABLE_COLUMNS_PER_TILE, }, ) except Workflow.DoesNotExist: raise Http404("Workflow was recently deleted") ctx = JsonizeContext(request.locale_id, modules) return jsonize_clientside_init(state, ctx)
def _write_step_position(workflow: Workflow, step_id: int) -> None: """Write position in DB, or raise (Workflow|Tab|Step).DoesNotExist.""" with workflow.cooperative_lock(): # raises Workflow.DoesNotExist # Raises Step.DoesNotExist, e.g. if tab.is_deleted step = Step.live_in_workflow(workflow).get(pk=step_id) tab = step.tab tab.selected_step_position = step.order tab.save(update_fields=["selected_step_position"]) workflow.selected_tab_position = tab.position workflow.save(update_fields=["selected_tab_position"])
def load_database_objects(workflow_id: int, step_id: int) -> DatabaseObjects: """Query Step info. Raise `Step.DoesNotExist` or `Workflow.DoesNotExist` if the step was deleted. Catch a `ModuleError` from migrate_params() and return it as part of the `DatabaseObjects`. """ with Workflow.lookup_and_cooperative_lock(id=workflow_id) as workflow: # raise Step.DoesNotExist step = Step.live_in_workflow(workflow).get(id=step_id) # module_zipfile try: module_zipfile = MODULE_REGISTRY.latest(step.module_id_name) except KeyError: module_zipfile = None # migrated_params_or_error if module_zipfile is None: migrated_params_or_error = {} else: try: migrated_params_or_error = cjwstate.params.get_migrated_params( step, module_zipfile=module_zipfile) # raise ModuleError except ModuleError as err: migrated_params_or_error = err # stored_object try: stored_object = step.stored_objects.get( stored_at=step.stored_data_version) except StoredObject.DoesNotExist: stored_object = None # input_crr try: # raise Step.DoesNotExist -- but we'll catch this one prev_module = step.tab.live_steps.get(order=step.order - 1) input_crr = prev_module.cached_render_result # may be None except Step.DoesNotExist: input_crr = None return DatabaseObjects( step, module_zipfile, migrated_params_or_error, stored_object, input_crr, )
def clear_cached_render_result_for_step(step: Step) -> None: """Delete a CachedRenderResult, if it exists. This deletes the Parquet file from disk, _then_ empties relevant database fields and saves them (and only them). """ delete_parquet_files_for_step(step.workflow_id, step.id) step.cached_render_result_delta_id = None step.cached_render_result_errors = [] step.cached_render_result_json = b"null" step.cached_render_result_status = None step.cached_render_result_columns = None step.cached_render_result_nrows = None step.save(update_fields=STEP_FIELDS)
def _do_create_result( workflow_id: int, step: Step, result: FetchResult, now: datetime.datetime ) -> None: """Do database manipulations for create_result(). Modify `step` in-place. Do *not* do the logic in SetStepDataVersion. We're creating a new version, not doing something undoable. Raise Step.DoesNotExist or Workflow.DoesNotExist in case of a race. """ with _locked_step(workflow_id, step): storedobjects.create_stored_object( workflow_id, step.id, result.path, stored_at=now ) storedobjects.delete_old_files_to_enforce_storage_limits(step=step) # Assume caller sends new list to clients via SetStepDataVersion step.fetch_errors = result.errors step.is_busy = False step.last_update_check = now step.save(update_fields=["fetch_errors", "is_busy", "last_update_check"])
def _do_try_set_autofetch(scope, step: Step, auto_update_data: bool, update_interval: int): # We may ROLLBACK; if we do, we need to remember the old values old_auto_update_data = step.auto_update_data old_update_interval = step.update_interval check_quota = (auto_update_data and step.auto_update_data and update_interval < step.update_interval) or ( auto_update_data and not step.auto_update_data) quota_exceeded = None try: with transaction.atomic(): step.auto_update_data = auto_update_data step.update_interval = update_interval if auto_update_data: step.next_update = datetime.datetime.now( ) + datetime.timedelta(seconds=update_interval) else: step.next_update = None step.save(update_fields=[ "auto_update_data", "update_interval", "next_update" ]) # Now before we commit, let's see if we've surpassed the user's limit; # roll back if we have. # # Only rollback if we're _increasing_ our fetch count. If we're # lowering it, allow that -- even if the user is over limit, we still # want to commit because it's an improvement. if check_quota: autofetches = autofetch.list_autofetches_json(scope) if autofetches["nFetchesPerDay"] > autofetches[ "maxFetchesPerDay"]: raise AutofetchQuotaExceeded(autofetches) except AutofetchQuotaExceeded as err: step.auto_update_data = old_auto_update_data step.update_interval = old_update_interval quota_exceeded = err.autofetches retval = { "isAutofetch": step.auto_update_data, "fetchInterval": step.update_interval, } if quota_exceeded is not None: retval["quotaExceeded"] = quota_exceeded # a dict return retval
def cache_render_result( workflow: Workflow, step: Step, delta_id: int, result: LoadedRenderResult ) -> None: """Save `result` for later viewing. Raise AssertionError if `delta_id` is not what we expect. Since this alters data, call it within a lock: with workflow.cooperative_lock(): step.refresh_from_db() # may change delta_id cache_render_result(workflow, step, delta_id, result) """ assert delta_id == step.last_relevant_delta_id assert result is not None json_bytes = json_encode(result.json).encode("utf-8") if not result.columns: if result.errors: status = "error" else: status = "unreachable" else: status = "ok" step.cached_render_result_delta_id = delta_id step.cached_render_result_errors = result.errors step.cached_render_result_status = status step.cached_render_result_json = json_bytes step.cached_render_result_columns = result.columns step.cached_render_result_nrows = result.table.num_rows # Now we get to the part where things can end up inconsistent. Try to # err on the side of not-caching when that happens. delete_parquet_files_for_step(workflow.id, step.id) # makes old cache inconsistent step.save(update_fields=STEP_FIELDS) # makes new cache inconsistent if result.table.num_columns: # only write non-zero-column tables with tempfile_context() as parquet_path: cjwparquet.write(parquet_path, result.table) s3.fput_file( BUCKET, parquet_key(workflow.id, step.id, delta_id), parquet_path ) # makes new cache consistent
def _get_workflow_as_clientside_update(self) -> WorkflowUpdateData: """Return (clientside.Update, delta_id). Raise Workflow.DoesNotExist if a race deletes the Workflow. """ with self._lookup_requested_workflow_with_auth_and_cooperative_lock( ) as workflow_lock: workflow = workflow_lock.workflow update = clientside.Update( workflow=workflow.to_clientside(), tabs={ tab.slug: tab.to_clientside() for tab in workflow.live_tabs }, steps={ step.id: step.to_clientside() for step in Step.live_in_workflow(workflow) }, ) return WorkflowUpdateData(update, workflow.last_delta_id)
def _do_set_file_upload_api_token(step: Step, api_token: Optional[str]): step.file_upload_api_token = api_token step.save(update_fields=["file_upload_api_token"])
def _load_workflow_and_step_sync( request: HttpRequest, workflow_id_or_secret_id: Union[int, str], step_slug: str, accessing: Literal["all", "chart", "table"], ) -> Tuple[Workflow, Step]: """Load (Workflow, Step) from database, or raise Http404 or PermissionDenied. `Step.tab` will be loaded. (`Step.tab.workflow_id` is needed to access the render cache.) To avoid PermissionDenied: * The workflow must be public; OR * The user must be workflow owner, editor or viewer; OR * The user must be workflow report-viewer and the step must be a chart or table in the report. """ try: if isinstance(workflow_id_or_secret_id, int): search = {"id": workflow_id_or_secret_id} has_secret = False else: search = {"secret_id": workflow_id_or_secret_id} has_secret = True with Workflow.lookup_and_cooperative_lock(**search) as workflow_lock: workflow = workflow_lock.workflow if (has_secret or workflow.public or workflow.request_authorized_owner(request)): need_report_auth = False elif request.user is None or request.user.is_anonymous: raise PermissionDenied() else: try: acl_entry = workflow.acl.filter( email=request.user.email).get() except AclEntry.DoesNotExist: raise PermissionDenied() if acl_entry.role in {Role.VIEWER, Role.EDITOR}: need_report_auth = False elif acl_entry.role == Role.REPORT_VIEWER: need_report_auth = True else: raise PermissionDenied() # role we don't handle yet step = (Step.live_in_workflow( workflow.id).select_related("tab").get(slug=step_slug) ) # or Step.DoesNotExist if need_report_auth: # user is report-viewer if workflow.has_custom_report: if (accessing == "chart" and workflow.blocks.filter(step_id=step.id).exists()): pass # the step is a chart elif (accessing == "table" and workflow.blocks.filter(tab_id=step.tab_id).exists() and not step.tab.live_steps.filter( order__gt=step.order)): pass # step is a table (last step of a report-included tab) else: raise PermissionDenied() else: # Auto-report: all Charts are allowed; everything else is not try: if accessing == "chart" and (MODULE_REGISTRY.latest( step.module_id_name).get_spec().html_output): pass else: raise PermissionDenied() except KeyError: # not a module raise PermissionDenied() return workflow, step except (Workflow.DoesNotExist, Step.DoesNotExist): raise Http404()
def _workflow_has_notifications(workflow_id: int) -> bool: """Detect whether a workflow sends email on changes.""" return Step.live_in_workflow(workflow_id).filter( notifications=True).exists()
def _load_step_by_slug(workflow: Workflow, step_slug: str) -> Step: """Return a Step or raises HandlerError.""" try: return Step.live_in_workflow(workflow).get(slug=step_slug) except Step.DoesNotExist: raise HandlerError("DoesNotExist: Step not found")
def _load_step_by_id(workflow: Workflow, step_id: int) -> Step: """Return a Step or raises HandlerError.""" try: return Step.live_in_workflow(workflow).get(id=step_id) except Step.DoesNotExist: raise HandlerError("DoesNotExist: Step not found")
def _do_set_collapsed(step: Step, is_collapsed: bool): step.is_collapsed = is_collapsed step.save(update_fields=["is_collapsed"])
def _do_clear_unseen_notification(step: Step): step.has_unseen_notification = False step.save(update_fields=["has_unseen_notification"])