def test_mark_result_unchanged(self, send_update): send_update.side_effect = async_noop workflow = Workflow.create_and_init() step = workflow.tabs.first().steps.create( order=0, slug="step-1", is_busy=True, fetch_errors=[FetchError(I18nMessage("foo", {}, "module"))], ) now = datetime.datetime(2019, 10, 22, 12, 22) self.run_with_async_db( save.mark_result_unchanged(workflow.id, step, now)) self.assertEqual(step.stored_objects.count(), 0) self.assertEqual(step.fetch_errors, [FetchError(I18nMessage("foo", {}, "module"))]) self.assertEqual(step.is_busy, False) self.assertEqual(step.last_update_check, now) step.refresh_from_db() self.assertEqual(step.fetch_errors, [FetchError(I18nMessage("foo", {}, "module"))]) self.assertEqual(step.is_busy, False) self.assertEqual(step.last_update_check, now) send_update.assert_called_with( workflow.id, clientside.Update(steps={ step.id: clientside.StepUpdate(is_busy=False, last_fetched_at=now) }), )
def test_different_errors(self): self.assertFalse( are_fetch_results_equal( FetchResult(self.old_path, [FetchError(I18nMessage("foo", {}, None))]), FetchResult(self.old_path, [FetchError(I18nMessage("bar", {}, None))]), ))
def test_fetch_truncate(self): def fetch(params): return pd.DataFrame({"A": [1, 2, 3]}) with tempfile_context(dir=self.basedir) as outfile: result = self._test_fetch(fetch, output_filename=outfile.name) self.assertEqual( result, FetchResult( outfile, errors=[ FetchError( I18nMessage( "py.cjwkernel.pandas.types.ProcessResult.truncate_in_place_if_too_big.warning", { "old_number": 3, "new_number": 2 }, None, )) ], ), ) assert_arrow_table_equals( read_parquet_as_arrow( outfile, [Column("A", ColumnType.Number("{:,}"))]), make_table(make_column("A", [1, 2])), )
def test_fetch_return_error(self): async def fetch(params): return "bad things" with tempfile_context(dir=self.basedir) as outfile: result = self._test_fetch(fetch, output_filename=outfile.name) self.assertEqual(result.errors, [FetchError(TODO_i18n("bad things"))]) self.assertEqual(outfile.read_bytes(), b"")
def test_fetch_return_tuple_path_and_error(self): with tempfile_context(dir=self.basedir) as outfile: async def fetch(params): outfile.write_text("xyz") return outfile, "foo" result = self._test_fetch(fetch, output_filename=outfile.name) self.assertEqual(result.errors, [FetchError(TODO_i18n("foo"))])
def test_fetch_return_errors(self): with tempfile_context(dir=self.basedir) as outfile: async def fetch(params): return [("message.id", {"k": "v"}, "module")] result = self._test_fetch(fetch, output_filename=outfile.name) self.assertEqual( result.errors, [FetchError(I18nMessage("message.id", {"k": "v"}, "module"))], )
def test_fetch_return_tuple_path_and_errors(self): with tempfile_context(dir=self.basedir) as outfile: async def fetch(params): outfile.write_text("xyz") return ( outfile, [("foo", { "a": "b" }, "module"), ("bar", { "b": 1 }, "cjwmodule")], ) result = self._test_fetch(fetch, output_filename=outfile.name) self.assertEqual( result.errors, [ FetchError(I18nMessage("foo", {"a": "b"}, "module")), FetchError(I18nMessage("bar", {"b": 1}, "cjwmodule")), ], )
def user_visible_bug_fetch_result(output_path: Path, message: str) -> FetchResult: output_path.write_bytes(b"") return FetchResult( path=output_path, # empty errors=[ FetchError( trans( "py.fetcher.fetch.user_visible_bug_during_fetch", default= "Something unexpected happened. We have been notified and are " "working to fix it. If this persists, contact us. Error code: {message}", arguments={"message": message}, )) ], )
def test_default_render_returns_fetch_result(self): # Functionality used by libraryofcongress # # TODO nix this functionality. with ModuleTestEnv() as env: with parquet_file({"A": [2]}, dir=env.basedir) as parquet_path: outcome = env.call_render( make_table(), {}, fetch_result=FetchResult( path=parquet_path, errors=[FetchError(TODO_i18n("A warning"))]), ) self.assertEqual( outcome.result, RenderResult([RenderError(TODO_i18n("A warning"))])) assert_arrow_table_equals(outcome.read_table(), make_table(make_column("A", [2])))
def test_create_result(self, send_update): send_update.side_effect = async_noop workflow = Workflow.create_and_init() step = workflow.tabs.first().steps.create( order=0, slug="step-1", is_busy=True, fetch_errors=[FetchError(I18nMessage("foo", {}, "module"))], ) now = datetime.datetime(2019, 10, 22, 12, 22) with parquet_file({"A": [1], "B": ["x"]}) as parquet_path: self.run_with_async_db( save.create_result(workflow.id, step, FetchResult(parquet_path), now)) self.assertEqual(step.stored_objects.count(), 1) self.assertEqual(step.fetch_errors, []) self.assertEqual(step.is_busy, False) self.assertEqual(step.last_update_check, now) step.refresh_from_db() self.assertEqual(step.fetch_errors, []) self.assertEqual(step.is_busy, False) self.assertEqual(step.last_update_check, now) send_update.assert_called_with( workflow.id, clientside.Update(steps={ step.id: clientside.StepUpdate(is_busy=False, last_fetched_at=now) }), ) workflow.refresh_from_db() self.assertEqual(workflow.deltas.last().command_name, SetStepDataVersion.__name__)
def fetch_or_wrap_error( exit_stack: contextlib.ExitStack, chroot_context: ChrootContext, basedir: Path, module_id_name: str, module_zipfile: ModuleZipfile, migrated_params_or_error: Union[Dict[str, Any], ModuleError], secrets: Dict[str, Any], last_fetch_result: Optional[FetchResult], maybe_input_crr: Optional[CachedRenderResult], output_path: Path, ): """Fetch, and do not raise any exceptions worth catching. Exceptions are wrapped -- the result is a FetchResult with `.errors`. This function is slow indeed. Perhaps call it from EventLoop.run_in_executor(). (Why not make it async? Because all the logic inside -- compile module, fetch() -- is sandboxed, meaning it gets its own processes. We may eventually avoid asyncio entirely in `fetcher`. These problems are all handled: * Module was deleted (`module_zipfile is None`) * Module times out (`cjwkernel.errors.ModuleTimeoutError`), in `fetch()`. * Module crashes (`cjwkernel.errors.ModuleExitedError`), in `fetch()`. * migrated_params_or_error is a `ModuleError` * migrated_params_or_error is invalid (`ValueError`) * input_crr points to a nonexistent file (`FileNotFoundError`) """ # module_zipfile=None is allowed if module_zipfile is None: logger.info("fetch() deleted module '%s'", module_id_name) return FetchResult( output_path, [ FetchError( trans( "py.fetcher.fetch.no_loaded_module", default="Cannot fetch: module was deleted", )) ], ) module_spec = module_zipfile.get_spec() param_schema = module_spec.param_schema if isinstance(migrated_params_or_error, ModuleError): # raise the exception so we can log it try: raise migrated_params_or_error except ModuleError: # We'll always get here logger.exception("%s:migrate_params() raised error", module_zipfile.path.name) return user_visible_bug_fetch_result( output_path, format_for_user_debugging(migrated_params_or_error)) migrated_params = migrated_params_or_error try: param_schema.validate(migrated_params) except ValueError: logger.exception("Invalid return value from %s:migrate_params()", module_zipfile.path.name) return user_visible_bug_fetch_result( output_path, "%s:migrate_params() output invalid params" % module_zipfile.path.name, ) # get input_metadata, input_parquet_path. (This can't error.) input_parquet_path, input_metadata = _download_cached_render_result( exit_stack, maybe_input_crr, dir=basedir) # Clean params, so they're of the correct type. (This can't error.) params = fetchprep.clean_value(param_schema, migrated_params, input_metadata) # actually fetch try: return invoke_fetch( module_zipfile, chroot_context=chroot_context, basedir=basedir, params=params, secrets=secrets, last_fetch_result=last_fetch_result, input_parquet_filename=(None if input_parquet_path is None else input_parquet_path.name), output_filename=output_path.name, ) except ModuleError as err: logger.exception("Error calling %s:fetch()", module_zipfile.path.name) return user_visible_bug_fetch_result(output_path, format_for_user_debugging(err))
def _dict_to_fetch_error(value: Dict[str, Any]) -> RenderError: return FetchError(_dict_to_i18n_message(value["message"]), )
def _err(self, message: I18nMessage) -> FetchResult: return FetchResult(self.output_path, [FetchError(message)])