def setUp(self): super().setUp() self.ctx = contextlib.ExitStack() self.chroot_context = self.ctx.enter_context( EDITABLE_CHROOT.acquire_context()) self.basedir = self.ctx.enter_context( self.chroot_context.tempdir_context(prefix="basedir-"))
def setUp(self): super().setUp() self.ctx = contextlib.ExitStack() self.chroot_context = self.ctx.enter_context(EDITABLE_CHROOT.acquire_context()) self.basedir = self.ctx.enter_context(self.chroot_context.tempdir_context()) self.output_path = self.ctx.enter_context( self.chroot_context.tempfile_context(dir=self.basedir) )
def setUp(self): super().setUp() self.ctx = contextlib.ExitStack() self.chroot_context = self.ctx.enter_context( EDITABLE_CHROOT.acquire_context()) basedir = self.ctx.enter_context( self.chroot_context.tempdir_context(prefix="test_wf_module-")) self.output_path = self.ctx.enter_context( self.chroot_context.tempfile_context(prefix="output-", dir=basedir))
def _execute(self, workflow, flow, tab_results, expect_log_level=logging.DEBUG): with EDITABLE_CHROOT.acquire_context() as chroot_context: with chroot_context.tempdir_context(prefix="test_tab") as tempdir: with chroot_context.tempfile_context( prefix="execute-tab-output", suffix=".arrow", dir=tempdir ) as out_path: with self.assertLogs("renderer.execute", level=expect_log_level): result = self.run_with_async_db( execute_tab_flow( chroot_context, workflow, flow, tab_results, out_path ) ) yield result
def setUp(self): super().setUp() self.ctx = contextlib.ExitStack() self.chroot_context = self.ctx.enter_context(EDITABLE_CHROOT.acquire_context()) basedir = self.ctx.enter_context( self.chroot_context.tempdir_context(prefix="test_step-") ) self.empty_table_path = self.ctx.enter_context( self.chroot_context.tempfile_context(prefix="empty-table-", dir=basedir) ) with pa.ipc.RecordBatchFileWriter(self.empty_table_path, pa.schema([])): pass self.output_path = self.ctx.enter_context( self.chroot_context.tempfile_context(prefix="output-", dir=basedir) )
async def execute_workflow(workflow: Workflow, delta_id: int) -> None: """ Ensure all `workflow.tabs[*].live_wf_modules` cache fresh render results. Raise UnneededExecution if the inputs become stale (at which point we don't care about results any more). WEBSOCKET NOTES: each wf_module is executed in turn. After each execution, we notify clients of its new columns and status. """ # raises UnneededExecution pending_tab_flows = await _load_tab_flows(workflow, delta_id) # tab_shapes: keep track of outputs of each tab. (Outputs are used as # inputs into other tabs.) Before render begins, all outputs are `None`. # We'll execute tabs dependencies-first; if a WfModule depends on a # `tab_shape` we haven't rendered yet, that's because it _couldn't_ be # rendered first -- prompting a `TabCycleError`. # # `tab_shapes.keys()` returns tab slugs in the Workflow's tab order -- that # is, the order the user determines. tab_results: Dict[Tab, Optional[RenderResult]] = { flow.tab: None for flow in pending_tab_flows } output_paths = [] # Execute one tab_flow at a time. # # We don't hold a DB lock throughout the loop: the loop can take a long # time; it might be run multiple times simultaneously (even on different # computers); and `await` doesn't work with locks. with EDITABLE_CHROOT.acquire_context() as chroot_context: with chroot_context.tempdir_context("render-") as basedir: async def execute_tab_flow_into_new_file( tab_flow: TabFlow) -> RenderResult: nonlocal workflow, tab_results, output_paths output_path = basedir / ("tab-output-%s.arrow" % tab_flow.tab_slug.replace("/", "-")) return await execute_tab_flow(chroot_context, workflow, tab_flow, tab_results, output_path) while pending_tab_flows: ready_flows, dependent_flows = partition_ready_and_dependent( pending_tab_flows) if not ready_flows: # All flows are dependent -- meaning they all have cycles. Execute # them last; they can detect their cycles through `tab_results`. break for tab_flow in ready_flows: result = await execute_tab_flow_into_new_file(tab_flow) tab_results[tab_flow.tab] = result pending_tab_flows = dependent_flows # iterate # Now, `pending_tab_flows` only contains flows with cycles. Execute # them. No need to update `tab_results`: If tab1 and tab 2 depend on # each other, they should have the same error ("Cycle"). for tab_flow in pending_tab_flows: await execute_tab_flow_into_new_file(tab_flow)
async def fetch(*, workflow_id: int, wf_module_id: int, now: Optional[timezone.datetime] = None) -> None: # 1. Load database objects # - missing WfModule? Return prematurely # - database error? _exit(1) # - module_zipfile missing/invalid? user-visible error # - migrate_params() fails? user-visible error # 2. Calculate result # 2a. Build fetch kwargs # 2b. Call fetch (no errors possible -- LoadedModule catches them) # 3. Save result (and send delta) # - database errors? _exit(1) # - other error (bug in `save`)? Log exception and ignore # 4. Update WfModule last-fetch time # - database errors? _exit(1) with crash_on_database_error(): logger.info("begin fetch(workflow_id=%d, wf_module_id=%d)", workflow_id, wf_module_id) try: ( wf_module, module_zipfile, migrated_params, stored_object, input_crr, ) = await load_database_objects(workflow_id, wf_module_id) except (Workflow.DoesNotExist, WfModule.DoesNotExist): logger.info("Skipping fetch of deleted WfModule %d-%d", workflow_id, wf_module_id) return # Prepare secrets -- mangle user values so modules have all they need. # # This can involve, e.g., HTTP request to OAuth2 token servers. # # TODO unit-test this code path if module_zipfile is None: secrets = {} else: module_spec = module_zipfile.get_spec() secrets = await fetcher.secrets.prepare_secrets( module_spec.param_fields, wf_module.secrets) if now is None: now = timezone.now() with contextlib.ExitStack() as ctx: chroot_context = ctx.enter_context(EDITABLE_CHROOT.acquire_context()) basedir = ctx.enter_context( chroot_context.tempdir_context(prefix="fetch-")) output_path = ctx.enter_context( chroot_context.tempfile_context(prefix="fetch-result-", dir=basedir)) # get last_fetch_result (This can't error.) last_fetch_result = _stored_object_to_fetch_result( ctx, stored_object, wf_module.fetch_errors, dir=basedir) result = await asyncio.get_event_loop().run_in_executor( None, fetch_or_wrap_error, ctx, chroot_context, basedir, wf_module.module_id_name, module_zipfile, migrated_params, secrets, last_fetch_result, input_crr, output_path, ) try: with crash_on_database_error(): if last_fetch_result is not None and versions.are_fetch_results_equal( last_fetch_result, result): await save.mark_result_unchanged(workflow_id, wf_module, now) else: await save.create_result(workflow_id, wf_module, result, now) except asyncio.CancelledError: raise except Exception: # Log exceptions but keep going. # TODO [adamhooper, 2019-09-12] really? I think we don't want this. # Make `fetch.save() robust, then nix this handler logger.exception(f"Error fetching {wf_module}") with crash_on_database_error(): await update_next_update_time(workflow_id, wf_module, now)
async def fetch(*, workflow_id: int, step_id: int, now: Optional[datetime.datetime] = None) -> None: # 1. Load database objects # - missing Step? Return prematurely # - database error? Raise # - module_zipfile missing/invalid? user-visible error # - migrate_params() fails? user-visible error # 2. Calculate result # 2a. Build fetch kwargs # 2b. Call fetch (no errors possible -- LoadedModule catches them) # 3. Save result (and create SetStepDataVersion => queueing a render) # - database errors? Raise # - rabbitmq errors? Raise # - other error (bug in `save`)? Raise # 4. Update Step last-fetch time # - database errors? Raise logger.info("begin fetch(workflow_id=%d, step_id=%d)", workflow_id, step_id) try: ( step, module_zipfile, migrated_params, stored_object, input_crr, ) = await load_database_objects(workflow_id, step_id) except (Workflow.DoesNotExist, Step.DoesNotExist): logger.info("Skipping fetch of deleted Step %d-%d", workflow_id, step_id) return # Prepare secrets -- mangle user values so modules have all they need. # # This can involve, e.g., HTTP request to OAuth2 token servers. # # TODO unit-test this code path if module_zipfile is None: secrets = {} else: module_spec = module_zipfile.get_spec() secrets = await fetcher.secrets.prepare_secrets( module_spec.param_fields, step.secrets) if now is None: now = datetime.datetime.now() with contextlib.ExitStack() as exit_stack: chroot_context = exit_stack.enter_context( EDITABLE_CHROOT.acquire_context()) basedir = exit_stack.enter_context( chroot_context.tempdir_context(prefix="fetch-")) output_path = exit_stack.enter_context( chroot_context.tempfile_context(prefix="fetch-result-", dir=basedir)) # get last_fetch_result (This can't error.) last_fetch_result = _stored_object_to_fetch_result(exit_stack, stored_object, step.fetch_errors, dir=basedir) result = await asyncio.get_event_loop().run_in_executor( None, fetch_or_wrap_error, exit_stack, chroot_context, basedir, step.module_id_name, module_zipfile, migrated_params, secrets, last_fetch_result, input_crr, output_path, ) if last_fetch_result is not None and versions.are_fetch_results_equal( last_fetch_result, result): await save.mark_result_unchanged(workflow_id, step, now) else: await save.create_result(workflow_id, step, result, now) await update_next_update_time(workflow_id, step, now)