def test_default_render_returns_fetch_result(self): # Functionality used by libraryofcongress with ExitStack() as ctx: input_arrow_table = ctx.enter_context( arrow_table_context({"A": [1]}, dir=self.basedir) ) parquet_filename = Path( ctx.enter_context(parquet_file({"A": [2]}, dir=self.basedir)).name ).name out_filename = ctx.enter_context(tempfile_context(dir=self.basedir)).name thrift_result = module.render_thrift( ttypes.RenderRequest( str(self.basedir), input_arrow_table.to_thrift(), Params({}).to_thrift(), ttypes.Tab("tab-1", "Tab 1"), ttypes.FetchResult( parquet_filename, [RenderError(I18nMessage.TODO_i18n("A warning")).to_thrift()], ), out_filename, ) ) result = RenderResult.from_thrift(thrift_result, self.basedir) assert_render_result_equals( result, RenderResult( arrow_table({"A": [2]}), [RenderError(I18nMessage.TODO_i18n("A warning"))], ), )
def test_mark_result_unchanged(self, send_update): send_update.side_effect = async_noop workflow = Workflow.create_and_init() step = workflow.tabs.first().steps.create( order=0, slug="step-1", is_busy=True, fetch_errors=[RenderError(I18nMessage("foo", {}, "module"))], ) now = datetime.datetime(2019, 10, 22, 12, 22) self.run_with_async_db( save.mark_result_unchanged(workflow.id, step, now)) self.assertEqual(step.stored_objects.count(), 0) self.assertEqual(step.fetch_errors, [RenderError(I18nMessage("foo", {}, "module"))]) self.assertEqual(step.is_busy, False) self.assertEqual(step.last_update_check, now) step.refresh_from_db() self.assertEqual(step.fetch_errors, [RenderError(I18nMessage("foo", {}, "module"))]) self.assertEqual(step.is_busy, False) self.assertEqual(step.last_update_check, now) send_update.assert_called_with( workflow.id, clientside.Update(steps={ step.id: clientside.StepUpdate(is_busy=False, last_fetched_at=now) }), )
def test_quick_fixes(self): err = PromptingError([ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B", "C"], "text", frozenset({"number"})), ]) result = err.as_render_errors() self.assertEqual( result, [ RenderError( I18nMessage( "py.renderer.execute.types.PromptingError.WrongColumnType.general.message.before_convert_buttons", { "columns": 1, "0": "A", "found_type": "text", }, None, ), [ QuickFix( I18nMessage( "py.renderer.execute.types.PromptingError.WrongColumnType.general.quick_fix", {"wanted_type": "number"}, None, ), QuickFixAction.PrependStep("converttexttonumber", {"colnames": ["A"]}), ) ], ), RenderError( I18nMessage( "py.renderer.execute.types.PromptingError.WrongColumnType.general.message.before_convert_buttons", { "columns": 2, "0": "B", "1": "C", "found_type": "text", }, None, ), [ QuickFix( I18nMessage( "py.renderer.execute.types.PromptingError.WrongColumnType.general.quick_fix", {"wanted_type": "number"}, None, ), QuickFixAction.PrependStep( "converttexttonumber", {"colnames": ["B", "C"]}), ) ], ), ], )
def test_different_errors(self): self.assertFalse( are_fetch_results_equal( FetchResult(self.old_path, [RenderError(I18nMessage("foo", {}, None))]), FetchResult(self.old_path, [RenderError(I18nMessage("bar", {}, None))]), ))
def test_fetch_result_happy_path(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, fetch_errors=[ RenderError(I18nMessage("foo", {}, "module")), RenderError(I18nMessage("bar", {"x": "y"}, "cjwmodule")), ], ) with parquet_file({"A": [1]}) as path: so = create_stored_object(workflow.id, step.id, path) step.stored_data_version = so.stored_at step.save(update_fields=["stored_data_version"]) module_zipfile = create_module_zipfile( "x", spec_kwargs={"loads_data": True}, python_code=textwrap.dedent( """ import pyarrow as pa import pandas as pd from pandas.testing import assert_frame_equal from cjwkernel.types import RenderError, I18nMessage def render(table, params, *, fetch_result, **kwargs): assert fetch_result.errors == [ RenderError(I18nMessage("foo", {}, "module")), RenderError(I18nMessage("bar", {"x": "y"}, "cjwmodule")), ] fetch_dataframe = pa.parquet.read_table(str(fetch_result.path)) assert_frame_equal(fetch_dataframe, pd.DataFrame({"A": [1]})) return pd.DataFrame() """ ), ) with self.assertLogs(level=logging.INFO): self.run_with_async_db( execute_step( chroot_context=self.chroot_context, workflow=workflow, step=step, module_zipfile=module_zipfile, params={}, tab_name=tab.name, input_path=self.empty_table_path, input_table_columns=[], tab_results={}, output_path=self.output_path, ) )
async def do_download( sheet_id: str, sheet_mime_type: str, oauth2_client: oauth2.Client, output_path: Path ) -> FetchResult: """ Download spreadsheet from Google. If `sheet_mime_type` is 'application/vnd.google-apps.spreadsheet', use GDrive API to _export_ a text/csv. Otherwise, use GDrive API to _download_ the file. """ if sheet_mime_type == "application/vnd.google-apps.spreadsheet": url = _generate_google_sheet_url(sheet_id) sheet_mime_type = "text/csv" else: url = _generate_gdrive_file_url(sheet_id) # and use the passed sheet_mime_type url, headers, _ = oauth2_client.add_token(url, headers={}) try: await httpfile.download(url, output_path, headers=headers, ssl=SSL_CONTEXT) except HttpError.NotSuccess as err: response = err.response if response.status_code == 401: return TODO_i18n_fetch_error( output_path, "Invalid credentials. Please reconnect to Google Drive." ) elif response.status_code == 403: return TODO_i18n_fetch_error( output_path, "You chose a file your logged-in user cannot access. Please reconnect to Google Drive or choose a different file.", ) elif response.status_code == 404: return TODO_i18n_fetch_error( output_path, "File not found. Please choose a different file." ) else: # HACK: *err.i18n_message because i18n_message is a tuple # compatible with I18nMessage() ctor return FetchResult( output_path, errors=[RenderError(I18nMessage(*err.i18n_message))] ) except HttpError as err: # HACK: *err.i18n_message because i18n_message is a tuple # compatible with I18nMessage() ctor return FetchResult( output_path, errors=[RenderError(I18nMessage(*err.i18n_message))] ) return FetchResult(output_path)
def test_double_clear(self): result = RenderResult(arrow_table({"A": [1]}), [RenderError(I18nMessage("X", {}, None), [])], {}) cache_render_result(self.workflow, self.step, 1, result) clear_cached_render_result_for_step(self.step) clear_cached_render_result_for_step(self.step) # don't crash
def test_deleted_module(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create( order=0, slug="step-1", module_id_name="deleted_module", last_relevant_delta_id=workflow.last_delta_id, ) result = self.run_with_async_db( execute_step( chroot_context=self.chroot_context, workflow=workflow, step=step, module_zipfile=None, params={}, tab_name=tab.name, input_path=self.empty_table_path, input_table_columns=[], tab_results={}, output_path=self.output_path, ) ) self.assertEqual(result.columns, []) self.assertEqual(self.output_path.read_bytes(), b"") step.refresh_from_db() self.assertEqual( step.cached_render_result.errors, [RenderError(I18nMessage("py.renderer.execute.step.noModule", {}, None))], )
def test_render_without_input_or_loads_data_raises_no_loaded_data(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, ) module_zipfile = create_module_zipfile( "x", spec_kwargs={"loads_data": False}, python_code="def render(table, params): return None", ) result = self.run_with_async_db( execute_step( self.chroot_context, workflow, step, module_zipfile, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, )) assert_render_result_equals( result, RenderResult(errors=[ RenderError( I18nMessage("py.renderer.execute.step.NoLoadedDataError", {}, None)) ]), )
def test_deleted_module(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() step = tab.steps.create( order=0, slug="step-1", module_id_name="deleted_module", last_relevant_delta_id=workflow.last_delta_id, ) result = self.run_with_async_db( execute_step( self.chroot_context, workflow, step, None, {}, tab.to_arrow(), RenderResult(), {}, self.output_path, )) expected = RenderResult(errors=[ RenderError( I18nMessage("py.renderer.execute.step.noModule", {}, None)) ]) assert_render_result_equals(result, expected) step.refresh_from_db() self.assertEqual(step.cached_render_result.errors, expected.errors)
def parse_csv( path: Path, *, output_path: Path, encoding: Optional[str], delimiter: Optional[str], has_header: bool, autoconvert_text_to_numbers: bool, ) -> RenderResult: result = _parse_csv( path, encoding=encoding, delimiter=delimiter, has_header=has_header, autoconvert_text_to_numbers=autoconvert_text_to_numbers, ) with pyarrow.ipc.RecordBatchFileWriter( output_path.as_posix(), schema=result.table.schema) as writer: writer.write_table(result.table) metadata = infer_table_metadata(result.table) if len(metadata.columns) == 0: arrow_table = ArrowTable() else: arrow_table = ArrowTable(output_path, result.table, metadata) if result.warnings: # TODO when we support i18n, this will be even simpler.... en_message = "\n".join([str(warning) for warning in result.warnings]) errors = [RenderError(I18nMessage.TODO_i18n(en_message))] else: errors = [] return RenderResult(arrow_table, errors)
def test_render_arrow_table_settings(self): def render(arrow_table, params, output_path, *, settings, **kwargs): return [("x", {"n": settings.MAX_ROWS_PER_TABLE})] result = self._test_render(render, {"A": [1]}) self.assertEqual(result.errors, [RenderError(I18nMessage("x", {"n": 12}, None))])
def test_render_xlsx_bad_content(self): with tempfile_context("fetch-") as http_path: httpfile.write( http_path, {"url": "http://example.com/hello"}, "200 OK", [("content-type", XLSX_MIME_TYPE)], io.BytesIO("ceçi n'est pas une .xlsx".encode("utf-8")), ) result = render_arrow( ArrowTable(), P(has_header=True), "tab-x", FetchResult(http_path), self.output_path, ) self.assertEqual( result, RenderResult( ArrowTable(), [ RenderError( I18nMessage.TODO_i18n( 'Error reading Excel file: Unsupported format, or corrupt file: Expected BOF record; found b"ce\\xc3\\xa7i n\'"' ) ) ], ), )
def test_quick_fixes_no_conversions_yet(self): # Let's see how our users get stuck and *then* decide whether to build # other, more esoteric converters. [2021-05-03, adamhooper] *I* would # love a UNIX timestamp <=> integer converter; but would other users be # too confused if a quick-fix suggested to add one in the wrong place? err = PromptingError([ PromptingError.WrongColumnType(["A"], "timestamp", frozenset({"number"})) ]) result = err.as_render_errors() self.assertEqual( result, [ RenderError( I18nMessage( "py.renderer.execute.types.PromptingError.WrongColumnType.general.message.without_convert_buttons", { "columns": 1, "0": "A", "found_type": "timestamp", "best_wanted_type": "number", }, None, ), [], ), ], )
def test_execute_migrate_params_module_error_gives_default_params(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() create_module_zipfile( "mod", spec_kwargs={ "loads_data": True, "parameters": [{ "id_name": "x", "type": "string", "default": "def" }], }, python_code=textwrap.dedent(""" import json def render(table, params): return "params: " + json.dumps(params) def migrate_params(params): cause_module_error() # NameError """), ) step = tab.steps.create(order=0, slug="step-1", module_id_name="mod", params={"x": "good"}) self._execute(workflow) step.refresh_from_db() self.assertEqual( step.cached_render_result_errors, [RenderError(TODO_i18n('params: {"x": "def"}'))], )
def test_double_clear(self): result = RenderResult( arrow_table({"A": [1]}), [RenderError(I18nMessage("X", []), [])], {} ) cache_render_result(self.workflow, self.wf_module, self.delta.id, result) clear_cached_render_result_for_wf_module(self.wf_module) clear_cached_render_result_for_wf_module(self.wf_module) # don't crash
def _stored_object_to_fetch_result( ctx: contextlib.ExitStack, stored_object: Optional[StoredObject], wf_module_fetch_error: str, dir: Path, ) -> Optional[FetchResult]: """ Given a StoredObject (or None), return a FetchResult (or None). This cannot error. Any errors lead to a `None` return value. """ if stored_object is None: return None else: try: last_fetch_path = ctx.enter_context( storedobjects.downloaded_file(stored_object, dir=dir)) if wf_module_fetch_error: errors = [ RenderError(I18nMessage.TODO_i18n(wf_module_fetch_error)) ] else: errors = [] return FetchResult(last_fetch_path, errors) except FileNotFoundError: return None
def test_quick_fixes_convert_to_text(self): err = PromptingError([ PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"})) ]) result = err.as_render_errors() self.assertEqual( result, [ RenderError( I18nMessage( "py.renderer.execute.types.PromptingError.WrongColumnType.as_error_message.shouldBeText", { "columns": 2, "0": "A", "1": "B" }, None, ), [ QuickFix( I18nMessage( "py.renderer.execute.types.PromptingError.WrongColumnType.as_quick_fixes.shouldBeText", {}, None, ), QuickFixAction.PrependStep( "converttotext", {"colnames": ["A", "B"]}), ) ], ) ], )
def test_render_fetch_error(self): fetch_errors = [RenderError(I18nMessage("x", {"y": "z"}))] with tempfile_context() as empty_path: with self.render(P(), FetchResult(empty_path, fetch_errors)) as result: assert_arrow_table_equals(result.table, ArrowTable()) self.assertEqual(result.errors, fetch_errors)
def test_execute_migrate_params_module_error_gives_default_params(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() delta1 = workflow.last_delta create_module_zipfile( "mod", spec_kwargs={ "parameters": [{ "id_name": "x", "type": "string", "default": "def" }] }, python_code=textwrap.dedent(""" import json def render(table, params): return "params: " + json.dumps(params) def migrate_params(params): cause_module_error() # NameError """), ) wf_module = tab.wf_modules.create( order=0, slug="step-1", last_relevant_delta_id=delta1.id, module_id_name="mod", params={"x": "good"}, ) self._execute(workflow) wf_module.refresh_from_db() self.assertEqual( wf_module.cached_render_result_errors, [RenderError(I18nMessage.TODO_i18n('params: {"x": "def"}'))], )
def test_render_error(self): path = self._file(b"A,B\nx,y", suffix=".json") result = upload.render_arrow( ArrowTable(), { "file": path, "has_header": True }, "tab-x", None, self.output_path, ) assert_arrow_table_equals(result.table, {}) self.assertEqual( result.errors, [ RenderError( message=I18nMessage( id="TODO_i18n", args={ "text": "JSON parse error at byte 0: Invalid value." }, ), quick_fixes=[], ) ], )
def test_report_module_error(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wf_module = tab.wf_modules.create( order=0, slug="step-1", module_id_name="x", last_relevant_delta_id=workflow.last_delta_id, ) def render(*args, fetch_result, **kwargs): raise ModuleExitedError(-9, "") with self._stub_module(render): result = self.run_with_async_db( execute_wfmodule( self.chroot_context, workflow, wf_module, {}, Tab(tab.slug, tab.name), RenderResult(), {}, self.output_path, )) self.assertEqual( result, RenderResult(errors=[ RenderError( I18nMessage.TODO_i18n( "Something unexpected happened. We have been notified and are " "working to fix it. If this persists, contact us. Error code: " "SIGKILL")) ]), )
def test_deleted_module(self): workflow = Workflow.create_and_init() tab = workflow.tabs.first() wf_module = tab.wf_modules.create( order=0, slug="step-1", module_id_name="deleted_module", last_relevant_delta_id=workflow.last_delta_id, ) result = self.run_with_async_db( execute_wfmodule( self.chroot_context, workflow, wf_module, {}, tab.to_arrow(), RenderResult(), {}, self.output_path, )) expected = RenderResult(errors=[ RenderError( I18nMessage.TODO_i18n( "Please delete this step: an administrator uninstalled its code." )) ]) self.assertEqual(result, expected) wf_module.refresh_from_db() self.assertEqual(wf_module.cached_render_result.errors, expected.errors)
def test_fetch_nothing(self): with tempfile_context(prefix="output-") as output_path: result = fetch_arrow(P(file=None), {}, None, None, output_path) self.assertEqual( result.errors, [RenderError(I18nMessage.TODO_i18n("Please choose a file"))], )
def render(*args, fetch_result, **kwargs): self.assertEqual( fetch_result.errors, [RenderError(I18nMessage.TODO_i18n("maybe an error"))], ) assert_arrow_table_equals( pyarrow.parquet.read_table(str(fetch_result.path)), {"A": [1]}) return RenderResult()
def as_render_error(self) -> RenderError: """Build a RenderError that describes this error.""" return RenderError( trans( "py.renderer.execute.types.PromptingError.CannotCoerceValueToNumber", default="“{value}” is not a number. Please enter a number.", arguments={"value": self.value}, ))
def fetch_arrow( params: Dict[str, Any], secrets: Dict[str, Any], last_fetch_result, input_table_parquet_path, output_path: Path, ) -> FetchResult: file_meta = params["file"] if not file_meta: return FetchResult( output_path, errors=[RenderError(I18nMessage.TODO_i18n("Please choose a file"))], ) # Ignore file_meta['url']. That's for the client's web browser, not for # an API request. sheet_id = file_meta["id"] if not sheet_id: # [adamhooper, 2019-12-06] has this ever happened? return FetchResult( output_path, errors=[RenderError(I18nMessage.TODO_i18n("Please choose a file"))], ) # backwards-compat for old entries without 'mimeType', 2018-06-13 sheet_mime_type = file_meta.get( "mimeType", "application/vnd.google-apps.spreadsheet" ) secret = secrets.get("google_credentials") if not secret: return TODO_i18n_fetch_error(output_path, "Please connect to Google Drive.") if "error" in secret: return FetchResult( output_path, errors=[RenderError(I18nMessage.from_dict(secret["error"]))] ) assert "secret" in secret oauth2_client = oauth2.Client( client_id=None, # unneeded token_type=secret["secret"]["token_type"], access_token=secret["secret"]["access_token"], ) return asyncio.run( do_download(sheet_id, sheet_mime_type, oauth2_client, output_path) )
def test_fetch_return_tuple_path_and_error(self): with tempfile_context(dir=self.basedir) as outfile: async def fetch(params): outfile.write_text("xyz") return outfile, "foo" result = self._test_fetch(fetch, output_filename=outfile.name) self.assertEqual(result.errors, [RenderError(I18nMessage.TODO_i18n("foo"))])
def test_render_deprecated_parquet_warning(self): errors = [RenderError(I18nMessage.TODO_i18n("truncated table"))] with parquet_file({"A": [1, 2], "B": [3, 4]}) as fetched_path: with self.render(P(), FetchResult(fetched_path, errors)) as result: assert_arrow_table_equals(result.table, { "A": [1, 2], "B": [3, 4] }) self.assertEqual(result.errors, errors)
def test_fetch_return_tuple_path_and_errors(self): with tempfile_context(dir=self.basedir) as outfile: async def fetch(params): outfile.write_text("xyz") return ( outfile, [("foo", {"a": "b"}, "module"), ("bar", {"b": 1}, "cjwmodule")], ) result = self._test_fetch(fetch, output_filename=outfile.name) self.assertEqual( result.errors, [ RenderError(I18nMessage("foo", {"a": "b"}, "module")), RenderError(I18nMessage("bar", {"b": 1}, "cjwmodule")), ], )