Example #1
0
 def test_default_render_returns_fetch_result(self):
     # Functionality used by libraryofcongress
     with ExitStack() as ctx:
         input_arrow_table = ctx.enter_context(
             arrow_table_context({"A": [1]}, dir=self.basedir)
         )
         parquet_filename = Path(
             ctx.enter_context(parquet_file({"A": [2]}, dir=self.basedir)).name
         ).name
         out_filename = ctx.enter_context(tempfile_context(dir=self.basedir)).name
         thrift_result = module.render_thrift(
             ttypes.RenderRequest(
                 str(self.basedir),
                 input_arrow_table.to_thrift(),
                 Params({}).to_thrift(),
                 ttypes.Tab("tab-1", "Tab 1"),
                 ttypes.FetchResult(
                     parquet_filename,
                     [RenderError(I18nMessage.TODO_i18n("A warning")).to_thrift()],
                 ),
                 out_filename,
             )
         )
         result = RenderResult.from_thrift(thrift_result, self.basedir)
         assert_render_result_equals(
             result,
             RenderResult(
                 arrow_table({"A": [2]}),
                 [RenderError(I18nMessage.TODO_i18n("A warning"))],
             ),
         )
Example #2
0
    def test_mark_result_unchanged(self, send_update):
        send_update.side_effect = async_noop
        workflow = Workflow.create_and_init()
        step = workflow.tabs.first().steps.create(
            order=0,
            slug="step-1",
            is_busy=True,
            fetch_errors=[RenderError(I18nMessage("foo", {}, "module"))],
        )
        now = datetime.datetime(2019, 10, 22, 12, 22)

        self.run_with_async_db(
            save.mark_result_unchanged(workflow.id, step, now))
        self.assertEqual(step.stored_objects.count(), 0)

        self.assertEqual(step.fetch_errors,
                         [RenderError(I18nMessage("foo", {}, "module"))])
        self.assertEqual(step.is_busy, False)
        self.assertEqual(step.last_update_check, now)
        step.refresh_from_db()
        self.assertEqual(step.fetch_errors,
                         [RenderError(I18nMessage("foo", {}, "module"))])
        self.assertEqual(step.is_busy, False)
        self.assertEqual(step.last_update_check, now)

        send_update.assert_called_with(
            workflow.id,
            clientside.Update(steps={
                step.id:
                clientside.StepUpdate(is_busy=False, last_fetched_at=now)
            }),
        )
Example #3
0
 def test_quick_fixes(self):
     err = PromptingError([
         PromptingError.WrongColumnType(["A"], "text",
                                        frozenset({"number"})),
         PromptingError.WrongColumnType(["B", "C"], "text",
                                        frozenset({"number"})),
     ])
     result = err.as_render_errors()
     self.assertEqual(
         result,
         [
             RenderError(
                 I18nMessage(
                     "py.renderer.execute.types.PromptingError.WrongColumnType.general.message.before_convert_buttons",
                     {
                         "columns": 1,
                         "0": "A",
                         "found_type": "text",
                     },
                     None,
                 ),
                 [
                     QuickFix(
                         I18nMessage(
                             "py.renderer.execute.types.PromptingError.WrongColumnType.general.quick_fix",
                             {"wanted_type": "number"},
                             None,
                         ),
                         QuickFixAction.PrependStep("converttexttonumber",
                                                    {"colnames": ["A"]}),
                     )
                 ],
             ),
             RenderError(
                 I18nMessage(
                     "py.renderer.execute.types.PromptingError.WrongColumnType.general.message.before_convert_buttons",
                     {
                         "columns": 2,
                         "0": "B",
                         "1": "C",
                         "found_type": "text",
                     },
                     None,
                 ),
                 [
                     QuickFix(
                         I18nMessage(
                             "py.renderer.execute.types.PromptingError.WrongColumnType.general.quick_fix",
                             {"wanted_type": "number"},
                             None,
                         ),
                         QuickFixAction.PrependStep(
                             "converttexttonumber",
                             {"colnames": ["B", "C"]}),
                     )
                 ],
             ),
         ],
     )
Example #4
0
 def test_different_errors(self):
     self.assertFalse(
         are_fetch_results_equal(
             FetchResult(self.old_path,
                         [RenderError(I18nMessage("foo", {}, None))]),
             FetchResult(self.old_path,
                         [RenderError(I18nMessage("bar", {}, None))]),
         ))
Example #5
0
    def test_fetch_result_happy_path(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
            fetch_errors=[
                RenderError(I18nMessage("foo", {}, "module")),
                RenderError(I18nMessage("bar", {"x": "y"}, "cjwmodule")),
            ],
        )
        with parquet_file({"A": [1]}) as path:
            so = create_stored_object(workflow.id, step.id, path)
        step.stored_data_version = so.stored_at
        step.save(update_fields=["stored_data_version"])

        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": True},
            python_code=textwrap.dedent(
                """
                import pyarrow as pa
                import pandas as pd
                from pandas.testing import assert_frame_equal
                from cjwkernel.types import RenderError, I18nMessage

                def render(table, params, *, fetch_result, **kwargs):
                    assert fetch_result.errors == [
                        RenderError(I18nMessage("foo", {}, "module")),
                        RenderError(I18nMessage("bar", {"x": "y"}, "cjwmodule")),
                    ]
                    fetch_dataframe = pa.parquet.read_table(str(fetch_result.path))
                    assert_frame_equal(fetch_dataframe, pd.DataFrame({"A": [1]}))
                    return pd.DataFrame()
                """
            ),
        )

        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                execute_step(
                    chroot_context=self.chroot_context,
                    workflow=workflow,
                    step=step,
                    module_zipfile=module_zipfile,
                    params={},
                    tab_name=tab.name,
                    input_path=self.empty_table_path,
                    input_table_columns=[],
                    tab_results={},
                    output_path=self.output_path,
                )
            )
Example #6
0
async def do_download(
    sheet_id: str, sheet_mime_type: str, oauth2_client: oauth2.Client, output_path: Path
) -> FetchResult:
    """
    Download spreadsheet from Google.

    If `sheet_mime_type` is 'application/vnd.google-apps.spreadsheet', use
    GDrive API to _export_ a text/csv. Otherwise, use GDrive API to _download_
    the file.
    """
    if sheet_mime_type == "application/vnd.google-apps.spreadsheet":
        url = _generate_google_sheet_url(sheet_id)
        sheet_mime_type = "text/csv"
    else:
        url = _generate_gdrive_file_url(sheet_id)
        # and use the passed sheet_mime_type

    url, headers, _ = oauth2_client.add_token(url, headers={})

    try:
        await httpfile.download(url, output_path, headers=headers, ssl=SSL_CONTEXT)
    except HttpError.NotSuccess as err:
        response = err.response
        if response.status_code == 401:
            return TODO_i18n_fetch_error(
                output_path, "Invalid credentials. Please reconnect to Google Drive."
            )
        elif response.status_code == 403:
            return TODO_i18n_fetch_error(
                output_path,
                "You chose a file your logged-in user cannot access. Please reconnect to Google Drive or choose a different file.",
            )
        elif response.status_code == 404:
            return TODO_i18n_fetch_error(
                output_path, "File not found. Please choose a different file."
            )
        else:
            # HACK: *err.i18n_message because i18n_message is a tuple
            # compatible with I18nMessage() ctor
            return FetchResult(
                output_path, errors=[RenderError(I18nMessage(*err.i18n_message))]
            )
    except HttpError as err:
        # HACK: *err.i18n_message because i18n_message is a tuple
        # compatible with I18nMessage() ctor
        return FetchResult(
            output_path, errors=[RenderError(I18nMessage(*err.i18n_message))]
        )

    return FetchResult(output_path)
 def test_double_clear(self):
     result = RenderResult(arrow_table({"A": [1]}),
                           [RenderError(I18nMessage("X", {}, None), [])],
                           {})
     cache_render_result(self.workflow, self.step, 1, result)
     clear_cached_render_result_for_step(self.step)
     clear_cached_render_result_for_step(self.step)  # don't crash
Example #8
0
    def test_deleted_module(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="deleted_module",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        result = self.run_with_async_db(
            execute_step(
                chroot_context=self.chroot_context,
                workflow=workflow,
                step=step,
                module_zipfile=None,
                params={},
                tab_name=tab.name,
                input_path=self.empty_table_path,
                input_table_columns=[],
                tab_results={},
                output_path=self.output_path,
            )
        )
        self.assertEqual(result.columns, [])
        self.assertEqual(self.output_path.read_bytes(), b"")

        step.refresh_from_db()
        self.assertEqual(
            step.cached_render_result.errors,
            [RenderError(I18nMessage("py.renderer.execute.step.noModule", {}, None))],
        )
Example #9
0
    def test_render_without_input_or_loads_data_raises_no_loaded_data(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        step = tab.steps.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )
        module_zipfile = create_module_zipfile(
            "x",
            spec_kwargs={"loads_data": False},
            python_code="def render(table, params): return None",
        )

        result = self.run_with_async_db(
            execute_step(
                self.chroot_context,
                workflow,
                step,
                module_zipfile,
                {},
                Tab(tab.slug, tab.name),
                RenderResult(),
                {},
                self.output_path,
            ))
        assert_render_result_equals(
            result,
            RenderResult(errors=[
                RenderError(
                    I18nMessage("py.renderer.execute.step.NoLoadedDataError",
                                {}, None))
            ]),
        )
Example #10
0
 def test_deleted_module(self):
     workflow = Workflow.create_and_init()
     tab = workflow.tabs.first()
     step = tab.steps.create(
         order=0,
         slug="step-1",
         module_id_name="deleted_module",
         last_relevant_delta_id=workflow.last_delta_id,
     )
     result = self.run_with_async_db(
         execute_step(
             self.chroot_context,
             workflow,
             step,
             None,
             {},
             tab.to_arrow(),
             RenderResult(),
             {},
             self.output_path,
         ))
     expected = RenderResult(errors=[
         RenderError(
             I18nMessage("py.renderer.execute.step.noModule", {}, None))
     ])
     assert_render_result_equals(result, expected)
     step.refresh_from_db()
     self.assertEqual(step.cached_render_result.errors, expected.errors)
Example #11
0
def parse_csv(
    path: Path,
    *,
    output_path: Path,
    encoding: Optional[str],
    delimiter: Optional[str],
    has_header: bool,
    autoconvert_text_to_numbers: bool,
) -> RenderResult:
    result = _parse_csv(
        path,
        encoding=encoding,
        delimiter=delimiter,
        has_header=has_header,
        autoconvert_text_to_numbers=autoconvert_text_to_numbers,
    )
    with pyarrow.ipc.RecordBatchFileWriter(
            output_path.as_posix(), schema=result.table.schema) as writer:
        writer.write_table(result.table)

    metadata = infer_table_metadata(result.table)

    if len(metadata.columns) == 0:
        arrow_table = ArrowTable()
    else:
        arrow_table = ArrowTable(output_path, result.table, metadata)
    if result.warnings:
        # TODO when we support i18n, this will be even simpler....
        en_message = "\n".join([str(warning) for warning in result.warnings])
        errors = [RenderError(I18nMessage.TODO_i18n(en_message))]
    else:
        errors = []

    return RenderResult(arrow_table, errors)
Example #12
0
    def test_render_arrow_table_settings(self):
        def render(arrow_table, params, output_path, *, settings, **kwargs):
            return [("x", {"n": settings.MAX_ROWS_PER_TABLE})]

        result = self._test_render(render, {"A": [1]})
        self.assertEqual(result.errors,
                         [RenderError(I18nMessage("x", {"n": 12}, None))])
 def test_render_xlsx_bad_content(self):
     with tempfile_context("fetch-") as http_path:
         httpfile.write(
             http_path,
             {"url": "http://example.com/hello"},
             "200 OK",
             [("content-type", XLSX_MIME_TYPE)],
             io.BytesIO("ceçi n'est pas une .xlsx".encode("utf-8")),
         )
         result = render_arrow(
             ArrowTable(),
             P(has_header=True),
             "tab-x",
             FetchResult(http_path),
             self.output_path,
         )
     self.assertEqual(
         result,
         RenderResult(
             ArrowTable(),
             [
                 RenderError(
                     I18nMessage.TODO_i18n(
                         'Error reading Excel file: Unsupported format, or corrupt file: Expected BOF record; found b"ce\\xc3\\xa7i n\'"'
                     )
                 )
             ],
         ),
     )
Example #14
0
 def test_quick_fixes_no_conversions_yet(self):
     # Let's see how our users get stuck and *then* decide whether to build
     # other, more esoteric converters. [2021-05-03, adamhooper] *I* would
     # love a UNIX timestamp <=> integer converter; but would other users be
     # too confused if a quick-fix suggested to add one in the wrong place?
     err = PromptingError([
         PromptingError.WrongColumnType(["A"], "timestamp",
                                        frozenset({"number"}))
     ])
     result = err.as_render_errors()
     self.assertEqual(
         result,
         [
             RenderError(
                 I18nMessage(
                     "py.renderer.execute.types.PromptingError.WrongColumnType.general.message.without_convert_buttons",
                     {
                         "columns": 1,
                         "0": "A",
                         "found_type": "timestamp",
                         "best_wanted_type": "number",
                     },
                     None,
                 ),
                 [],
             ),
         ],
     )
Example #15
0
    def test_execute_migrate_params_module_error_gives_default_params(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        create_module_zipfile(
            "mod",
            spec_kwargs={
                "loads_data":
                True,
                "parameters": [{
                    "id_name": "x",
                    "type": "string",
                    "default": "def"
                }],
            },
            python_code=textwrap.dedent("""
                import json
                def render(table, params): return "params: " + json.dumps(params)
                def migrate_params(params): cause_module_error()  # NameError
                """),
        )
        step = tab.steps.create(order=0,
                                slug="step-1",
                                module_id_name="mod",
                                params={"x": "good"})

        self._execute(workflow)

        step.refresh_from_db()
        self.assertEqual(
            step.cached_render_result_errors,
            [RenderError(TODO_i18n('params: {"x": "def"}'))],
        )
Example #16
0
 def test_double_clear(self):
     result = RenderResult(
         arrow_table({"A": [1]}), [RenderError(I18nMessage("X", []), [])], {}
     )
     cache_render_result(self.workflow, self.wf_module, self.delta.id, result)
     clear_cached_render_result_for_wf_module(self.wf_module)
     clear_cached_render_result_for_wf_module(self.wf_module)  # don't crash
Example #17
0
def _stored_object_to_fetch_result(
    ctx: contextlib.ExitStack,
    stored_object: Optional[StoredObject],
    wf_module_fetch_error: str,
    dir: Path,
) -> Optional[FetchResult]:
    """
    Given a StoredObject (or None), return a FetchResult (or None).

    This cannot error. Any errors lead to a `None` return value.
    """
    if stored_object is None:
        return None
    else:
        try:
            last_fetch_path = ctx.enter_context(
                storedobjects.downloaded_file(stored_object, dir=dir))
            if wf_module_fetch_error:
                errors = [
                    RenderError(I18nMessage.TODO_i18n(wf_module_fetch_error))
                ]
            else:
                errors = []
            return FetchResult(last_fetch_path, errors)
        except FileNotFoundError:
            return None
Example #18
0
 def test_quick_fixes_convert_to_text(self):
     err = PromptingError([
         PromptingError.WrongColumnType(["A", "B"], None,
                                        frozenset({"text"}))
     ])
     result = err.as_render_errors()
     self.assertEqual(
         result,
         [
             RenderError(
                 I18nMessage(
                     "py.renderer.execute.types.PromptingError.WrongColumnType.as_error_message.shouldBeText",
                     {
                         "columns": 2,
                         "0": "A",
                         "1": "B"
                     },
                     None,
                 ),
                 [
                     QuickFix(
                         I18nMessage(
                             "py.renderer.execute.types.PromptingError.WrongColumnType.as_quick_fixes.shouldBeText",
                             {},
                             None,
                         ),
                         QuickFixAction.PrependStep(
                             "converttotext", {"colnames": ["A", "B"]}),
                     )
                 ],
             )
         ],
     )
Example #19
0
 def test_render_fetch_error(self):
     fetch_errors = [RenderError(I18nMessage("x", {"y": "z"}))]
     with tempfile_context() as empty_path:
         with self.render(P(), FetchResult(empty_path,
                                           fetch_errors)) as result:
             assert_arrow_table_equals(result.table, ArrowTable())
             self.assertEqual(result.errors, fetch_errors)
    def test_execute_migrate_params_module_error_gives_default_params(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        delta1 = workflow.last_delta
        create_module_zipfile(
            "mod",
            spec_kwargs={
                "parameters": [{
                    "id_name": "x",
                    "type": "string",
                    "default": "def"
                }]
            },
            python_code=textwrap.dedent("""
                import json
                def render(table, params): return "params: " + json.dumps(params)
                def migrate_params(params): cause_module_error()  # NameError
                """),
        )
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=delta1.id,
            module_id_name="mod",
            params={"x": "good"},
        )

        self._execute(workflow)

        wf_module.refresh_from_db()
        self.assertEqual(
            wf_module.cached_render_result_errors,
            [RenderError(I18nMessage.TODO_i18n('params: {"x": "def"}'))],
        )
 def test_render_error(self):
     path = self._file(b"A,B\nx,y", suffix=".json")
     result = upload.render_arrow(
         ArrowTable(),
         {
             "file": path,
             "has_header": True
         },
         "tab-x",
         None,
         self.output_path,
     )
     assert_arrow_table_equals(result.table, {})
     self.assertEqual(
         result.errors,
         [
             RenderError(
                 message=I18nMessage(
                     id="TODO_i18n",
                     args={
                         "text":
                         "JSON parse error at byte 0: Invalid value."
                     },
                 ),
                 quick_fixes=[],
             )
         ],
     )
    def test_report_module_error(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )

        def render(*args, fetch_result, **kwargs):
            raise ModuleExitedError(-9, "")

        with self._stub_module(render):
            result = self.run_with_async_db(
                execute_wfmodule(
                    self.chroot_context,
                    workflow,
                    wf_module,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
        self.assertEqual(
            result,
            RenderResult(errors=[
                RenderError(
                    I18nMessage.TODO_i18n(
                        "Something unexpected happened. We have been notified and are "
                        "working to fix it. If this persists, contact us. Error code: "
                        "SIGKILL"))
            ]),
        )
 def test_deleted_module(self):
     workflow = Workflow.create_and_init()
     tab = workflow.tabs.first()
     wf_module = tab.wf_modules.create(
         order=0,
         slug="step-1",
         module_id_name="deleted_module",
         last_relevant_delta_id=workflow.last_delta_id,
     )
     result = self.run_with_async_db(
         execute_wfmodule(
             self.chroot_context,
             workflow,
             wf_module,
             {},
             tab.to_arrow(),
             RenderResult(),
             {},
             self.output_path,
         ))
     expected = RenderResult(errors=[
         RenderError(
             I18nMessage.TODO_i18n(
                 "Please delete this step: an administrator uninstalled its code."
             ))
     ])
     self.assertEqual(result, expected)
     wf_module.refresh_from_db()
     self.assertEqual(wf_module.cached_render_result.errors,
                      expected.errors)
 def test_fetch_nothing(self):
     with tempfile_context(prefix="output-") as output_path:
         result = fetch_arrow(P(file=None), {}, None, None, output_path)
         self.assertEqual(
             result.errors,
             [RenderError(I18nMessage.TODO_i18n("Please choose a file"))],
         )
 def render(*args, fetch_result, **kwargs):
     self.assertEqual(
         fetch_result.errors,
         [RenderError(I18nMessage.TODO_i18n("maybe an error"))],
     )
     assert_arrow_table_equals(
         pyarrow.parquet.read_table(str(fetch_result.path)), {"A": [1]})
     return RenderResult()
Example #26
0
 def as_render_error(self) -> RenderError:
     """Build a RenderError that describes this error."""
     return RenderError(
         trans(
             "py.renderer.execute.types.PromptingError.CannotCoerceValueToNumber",
             default="“{value}” is not a number. Please enter a number.",
             arguments={"value": self.value},
         ))
Example #27
0
def fetch_arrow(
    params: Dict[str, Any],
    secrets: Dict[str, Any],
    last_fetch_result,
    input_table_parquet_path,
    output_path: Path,
) -> FetchResult:
    file_meta = params["file"]
    if not file_meta:
        return FetchResult(
            output_path,
            errors=[RenderError(I18nMessage.TODO_i18n("Please choose a file"))],
        )

    # Ignore file_meta['url']. That's for the client's web browser, not for
    # an API request.
    sheet_id = file_meta["id"]
    if not sheet_id:
        # [adamhooper, 2019-12-06] has this ever happened?
        return FetchResult(
            output_path,
            errors=[RenderError(I18nMessage.TODO_i18n("Please choose a file"))],
        )

    # backwards-compat for old entries without 'mimeType', 2018-06-13
    sheet_mime_type = file_meta.get(
        "mimeType", "application/vnd.google-apps.spreadsheet"
    )

    secret = secrets.get("google_credentials")
    if not secret:
        return TODO_i18n_fetch_error(output_path, "Please connect to Google Drive.")
    if "error" in secret:
        return FetchResult(
            output_path, errors=[RenderError(I18nMessage.from_dict(secret["error"]))]
        )
    assert "secret" in secret
    oauth2_client = oauth2.Client(
        client_id=None,  # unneeded
        token_type=secret["secret"]["token_type"],
        access_token=secret["secret"]["access_token"],
    )

    return asyncio.run(
        do_download(sheet_id, sheet_mime_type, oauth2_client, output_path)
    )
Example #28
0
    def test_fetch_return_tuple_path_and_error(self):
        with tempfile_context(dir=self.basedir) as outfile:

            async def fetch(params):
                outfile.write_text("xyz")
                return outfile, "foo"

            result = self._test_fetch(fetch, output_filename=outfile.name)
            self.assertEqual(result.errors, [RenderError(I18nMessage.TODO_i18n("foo"))])
Example #29
0
 def test_render_deprecated_parquet_warning(self):
     errors = [RenderError(I18nMessage.TODO_i18n("truncated table"))]
     with parquet_file({"A": [1, 2], "B": [3, 4]}) as fetched_path:
         with self.render(P(), FetchResult(fetched_path, errors)) as result:
             assert_arrow_table_equals(result.table, {
                 "A": [1, 2],
                 "B": [3, 4]
             })
             self.assertEqual(result.errors, errors)
Example #30
0
    def test_fetch_return_tuple_path_and_errors(self):
        with tempfile_context(dir=self.basedir) as outfile:

            async def fetch(params):
                outfile.write_text("xyz")
                return (
                    outfile,
                    [("foo", {"a": "b"}, "module"), ("bar", {"b": 1}, "cjwmodule")],
                )

            result = self._test_fetch(fetch, output_filename=outfile.name)
            self.assertEqual(
                result.errors,
                [
                    RenderError(I18nMessage("foo", {"a": "b"}, "module")),
                    RenderError(I18nMessage("bar", {"b": 1}, "cjwmodule")),
                ],
            )