Esempio n. 1
0
 def test_default_render_returns_fetch_result(self):
     # Functionality used by libraryofcongress
     with ExitStack() as ctx:
         input_arrow_table = ctx.enter_context(
             arrow_table_context({"A": [1]}, dir=self.basedir)
         )
         parquet_filename = Path(
             ctx.enter_context(parquet_file({"A": [2]}, dir=self.basedir)).name
         ).name
         out_filename = ctx.enter_context(tempfile_context(dir=self.basedir)).name
         thrift_result = module.render_thrift(
             ttypes.RenderRequest(
                 str(self.basedir),
                 input_arrow_table.to_thrift(),
                 Params({}).to_thrift(),
                 ttypes.Tab("tab-1", "Tab 1"),
                 ttypes.FetchResult(
                     parquet_filename,
                     [RenderError(I18nMessage.TODO_i18n("A warning")).to_thrift()],
                 ),
                 out_filename,
             )
         )
         result = RenderResult.from_thrift(thrift_result, self.basedir)
         assert_render_result_equals(
             result,
             RenderResult(
                 arrow_table({"A": [2]}),
                 [RenderError(I18nMessage.TODO_i18n("A warning"))],
             ),
         )
Esempio n. 2
0
    def test_quick_fixes(self):
        err = PromptingError([
            PromptingError.WrongColumnType(["A"], "text",
                                           frozenset({"number"})),
            PromptingError.WrongColumnType(["B", "C"], "datetime",
                                           frozenset({"number"})),
        ])
        quick_fixes_result = err.as_quick_fixes()
        self.assertEqual(
            quick_fixes_result,
            [
                QuickFix(
                    I18nMessage.TODO_i18n("Convert Text to Numbers"),
                    QuickFixAction.PrependStep("converttexttonumber",
                                               {"colnames": ["A"]}),
                ),
                QuickFix(
                    I18nMessage.TODO_i18n("Convert Dates & Times to Numbers"),
                    QuickFixAction.PrependStep("converttexttonumber",
                                               {"colnames": ["B", "C"]}),
                ),
            ],
        )

        error_result = err.as_error_str()
        self.assertEqual(
            error_result,
            ("The column “A” must be converted from Text to Numbers.\n\n"
             "The columns “B” and “C” must be converted from Dates & Times to Numbers."
             ),
        )
Esempio n. 3
0
 def test_render_xlsx_bad_content(self):
     with tempfile_context("fetch-") as http_path:
         httpfile.write(
             http_path,
             {"url": "http://example.com/hello"},
             "200 OK",
             [("content-type", XLSX_MIME_TYPE)],
             io.BytesIO("ceçi n'est pas une .xlsx".encode("utf-8")),
         )
         result = render_arrow(
             ArrowTable(),
             P(has_header=True),
             "tab-x",
             FetchResult(http_path),
             self.output_path,
         )
     self.assertEqual(
         result,
         RenderResult(
             ArrowTable(),
             [
                 RenderError(
                     I18nMessage.TODO_i18n(
                         'Error reading Excel file: Unsupported format, or corrupt file: Expected BOF record; found b"ce\\xc3\\xa7i n\'"'
                     )
                 )
             ],
         ),
     )
Esempio n. 4
0
def parse_csv(
    path: Path,
    *,
    output_path: Path,
    encoding: Optional[str],
    delimiter: Optional[str],
    has_header: bool,
    autoconvert_text_to_numbers: bool,
) -> RenderResult:
    result = _parse_csv(
        path,
        encoding=encoding,
        delimiter=delimiter,
        has_header=has_header,
        autoconvert_text_to_numbers=autoconvert_text_to_numbers,
    )
    with pyarrow.ipc.RecordBatchFileWriter(
            output_path.as_posix(), schema=result.table.schema) as writer:
        writer.write_table(result.table)

    metadata = infer_table_metadata(result.table)

    if len(metadata.columns) == 0:
        arrow_table = ArrowTable()
    else:
        arrow_table = ArrowTable(output_path, result.table, metadata)
    if result.warnings:
        # TODO when we support i18n, this will be even simpler....
        en_message = "\n".join([str(warning) for warning in result.warnings])
        errors = [RenderError(I18nMessage.TODO_i18n(en_message))]
    else:
        errors = []

    return RenderResult(arrow_table, errors)
    def test_report_module_error(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id,
        )

        def render(*args, fetch_result, **kwargs):
            raise ModuleExitedError(-9, "")

        with self._stub_module(render):
            result = self.run_with_async_db(
                execute_wfmodule(
                    self.chroot_context,
                    workflow,
                    wf_module,
                    {},
                    Tab(tab.slug, tab.name),
                    RenderResult(),
                    {},
                    self.output_path,
                ))
        self.assertEqual(
            result,
            RenderResult(errors=[
                RenderError(
                    I18nMessage.TODO_i18n(
                        "Something unexpected happened. We have been notified and are "
                        "working to fix it. If this persists, contact us. Error code: "
                        "SIGKILL"))
            ]),
        )
Esempio n. 6
0
def _stored_object_to_fetch_result(
    ctx: contextlib.ExitStack,
    stored_object: Optional[StoredObject],
    wf_module_fetch_error: str,
    dir: Path,
) -> Optional[FetchResult]:
    """
    Given a StoredObject (or None), return a FetchResult (or None).

    This cannot error. Any errors lead to a `None` return value.
    """
    if stored_object is None:
        return None
    else:
        try:
            last_fetch_path = ctx.enter_context(
                storedobjects.downloaded_file(stored_object, dir=dir))
            if wf_module_fetch_error:
                errors = [
                    RenderError(I18nMessage.TODO_i18n(wf_module_fetch_error))
                ]
            else:
                errors = []
            return FetchResult(last_fetch_path, errors)
        except FileNotFoundError:
            return None
 def test_fetch_nothing(self):
     with tempfile_context(prefix="output-") as output_path:
         result = fetch_arrow(P(file=None), {}, None, None, output_path)
         self.assertEqual(
             result.errors,
             [RenderError(I18nMessage.TODO_i18n("Please choose a file"))],
         )
    def test_execute_migrate_params_module_error_gives_default_params(self):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        delta1 = workflow.last_delta
        create_module_zipfile(
            "mod",
            spec_kwargs={
                "parameters": [{
                    "id_name": "x",
                    "type": "string",
                    "default": "def"
                }]
            },
            python_code=textwrap.dedent("""
                import json
                def render(table, params): return "params: " + json.dumps(params)
                def migrate_params(params): cause_module_error()  # NameError
                """),
        )
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=delta1.id,
            module_id_name="mod",
            params={"x": "good"},
        )

        self._execute(workflow)

        wf_module.refresh_from_db()
        self.assertEqual(
            wf_module.cached_render_result_errors,
            [RenderError(I18nMessage.TODO_i18n('params: {"x": "def"}'))],
        )
 def test_deleted_module(self):
     workflow = Workflow.create_and_init()
     tab = workflow.tabs.first()
     wf_module = tab.wf_modules.create(
         order=0,
         slug="step-1",
         module_id_name="deleted_module",
         last_relevant_delta_id=workflow.last_delta_id,
     )
     result = self.run_with_async_db(
         execute_wfmodule(
             self.chroot_context,
             workflow,
             wf_module,
             {},
             tab.to_arrow(),
             RenderResult(),
             {},
             self.output_path,
         ))
     expected = RenderResult(errors=[
         RenderError(
             I18nMessage.TODO_i18n(
                 "Please delete this step: an administrator uninstalled its code."
             ))
     ])
     self.assertEqual(result, expected)
     wf_module.refresh_from_db()
     self.assertEqual(wf_module.cached_render_result.errors,
                      expected.errors)
 def render(*args, fetch_result, **kwargs):
     self.assertEqual(
         fetch_result.errors,
         [RenderError(I18nMessage.TODO_i18n("maybe an error"))],
     )
     assert_arrow_table_equals(
         pyarrow.parquet.read_table(str(fetch_result.path)), {"A": [1]})
     return RenderResult()
Esempio n. 11
0
def fetch_arrow(
    params: Dict[str, Any],
    secrets: Dict[str, Any],
    last_fetch_result,
    input_table_parquet_path,
    output_path: Path,
) -> FetchResult:
    file_meta = params["file"]
    if not file_meta:
        return FetchResult(
            output_path,
            errors=[RenderError(I18nMessage.TODO_i18n("Please choose a file"))],
        )

    # Ignore file_meta['url']. That's for the client's web browser, not for
    # an API request.
    sheet_id = file_meta["id"]
    if not sheet_id:
        # [adamhooper, 2019-12-06] has this ever happened?
        return FetchResult(
            output_path,
            errors=[RenderError(I18nMessage.TODO_i18n("Please choose a file"))],
        )

    # backwards-compat for old entries without 'mimeType', 2018-06-13
    sheet_mime_type = file_meta.get(
        "mimeType", "application/vnd.google-apps.spreadsheet"
    )

    secret = secrets.get("google_credentials")
    if not secret:
        return TODO_i18n_fetch_error(output_path, "Please connect to Google Drive.")
    if "error" in secret:
        return FetchResult(
            output_path, errors=[RenderError(I18nMessage.from_dict(secret["error"]))]
        )
    assert "secret" in secret
    oauth2_client = oauth2.Client(
        client_id=None,  # unneeded
        token_type=secret["secret"]["token_type"],
        access_token=secret["secret"]["access_token"],
    )

    return asyncio.run(
        do_download(sheet_id, sheet_mime_type, oauth2_client, output_path)
    )
Esempio n. 12
0
 def test_render_deprecated_parquet_warning(self):
     errors = [RenderError(I18nMessage.TODO_i18n("truncated table"))]
     with parquet_file({"A": [1, 2], "B": [3, 4]}) as fetched_path:
         with self.render(P(), FetchResult(fetched_path, errors)) as result:
             assert_arrow_table_equals(result.table, {
                 "A": [1, 2],
                 "B": [3, 4]
             })
             self.assertEqual(result.errors, errors)
Esempio n. 13
0
    def test_fetch_return_tuple_path_and_error(self):
        with tempfile_context(dir=self.basedir) as outfile:

            async def fetch(params):
                outfile.write_text("xyz")
                return outfile, "foo"

            result = self._test_fetch(fetch, output_filename=outfile.name)
            self.assertEqual(result.errors, [RenderError(I18nMessage.TODO_i18n("foo"))])
Esempio n. 14
0
    def test_render_truncate(self):
        def render(table, params):
            return pd.DataFrame({"A": [1, 2, 3]})

        result = self._test_render(render)
        assert_arrow_table_equals(result.table, {"A": [1, 2]})
        self.assertEqual(
            result.errors,
            [RenderError(I18nMessage.TODO_i18n("Truncated output from 3 rows to 2"))],
        )
Esempio n. 15
0
def _load_fetch_result(
    wf_module: WfModule, basedir: Path, exit_stack: contextlib.ExitStack
) -> Optional[FetchResult]:
    """
    Download user-selected StoredObject to `basedir`, so render() can read it.

    Edge cases:

    Create no file (and return `None`) if the user did not select a
    StoredObject, or if the selected StoredObject does not point to a file
    on minio.

    The caller should ensure "leave `path` alone" means "return an empty
    FetchResult". The FetchResult may still have an error.
    """
    try:
        stored_object = wf_module.stored_objects.get(
            stored_at=wf_module.stored_data_version
        )
    except StoredObject.DoesNotExist:
        return None
    if not stored_object.bucket or not stored_object.key:
        return None

    with contextlib.ExitStack() as inner_stack:
        path = inner_stack.enter_context(
            tempfile_context(prefix="fetch-result-", dir=basedir)
        )

        try:
            minio.download(stored_object.bucket, stored_object.key, path)
            # Download succeeded, so we no longer want to delete `path`
            # right _now_ ("now" means, "in inner_stack.close()"). Instead,
            # transfer ownership of `path` to exit_stack.
            exit_stack.callback(inner_stack.pop_all().close)
        except FileNotFoundError:
            # A few StoredObjects -- very old ones with size=0 -- are
            # *intentionally* not in minio. It turns out modules from that era
            # treated empty-file and None as identical. The _modules_ must
            # preserve that logic for backwards compatibility; so it's safe to
            # return `None` here.
            #
            # Other than that, if the file doesn't exist it's a race: either
            # the fetch result is too _new_ (it's in the database but its file
            # hasn't been written yet) or the fetch result is half-deleted (its
            # file was deleted and it's still in the database). In either case,
            # pretend the fetch result does not exist in the database -- i.e.,
            # return `None`.
            return None

    if wf_module.fetch_error:
        errors = [RenderError(I18nMessage.TODO_i18n(wf_module.fetch_error))]
    else:
        errors = []
    return FetchResult(path, errors)
 def test_duplicate_column_names_renamed(self):
     result = render_arrow(P(csv="A,A\na,b", has_header_row=True))
     assert_arrow_table_equals(result.table, {"A": ["a"], "A 2": ["b"]})
     self.assertEqual(
         result.errors,
         [
             RenderError(
                 I18nMessage.TODO_i18n(
                     "Renamed 1 duplicate column names (see “A 2”)"))
         ],
     )
Esempio n. 17
0
    def test_fetch_return_error(self):
        async def fetch(params):
            return "bad things"

        with tempfile_context(dir=self.basedir) as outfile:
            result = self._test_fetch(fetch, output_filename=outfile.name)
            self.assertEqual(result.path, outfile)
            self.assertEqual(
                result.errors,
                [RenderError(I18nMessage.TODO_i18n("bad things"))])
            self.assertEqual(outfile.read_bytes(), b"")
Esempio n. 18
0
 def test_not_found(self):
     self.mock_http_response = MockHttpResponse(404)
     with self.fetch(P(), secrets=secrets(DEFAULT_SECRET)) as result:
         self.assertEqual(result.path.read_bytes(), b"")
         self.assertEqual(
             result.errors,
             [
                 RenderError(
                     I18nMessage.TODO_i18n(
                         "File not found. Please choose a different file."))
             ],
         )
Esempio n. 19
0
async def prepare_secret_oauth1a(logic: ParamSpecSecret.Logic.Oauth1a,
                                 value: UserProvidedSecret) -> ModuleSecret:
    """
    Prepare an OAuth1a secret for a module fetch() call.

    SECURITY: beware: we provide the module with our consumer secret. The
    module can masquerade as Workbench. The module will be able to authenticate
    with the provider as the end user, forever.

    A non-`None` UserProvidedSecret has a "secret" sub-dict with keys:

        * `oauth_token`: OAuth 1.0a access token provided by service for user.
        * `oauth_token_secret`: OAuth 1.0 access token provided by service for user.

    On success, ModuleSecret "secret" sub-dict will have keys:

        * `consumer_key`: for signing requests.
        * `consumer_secret`: for signing requests.
        * `resource_owner_key`: `oauth_token` (OAuth 1.0a access token)
        * `resource_owner_secret`: `oauth_token_secret` (OAuth 1.0a access token)

    Otherwise, ModuleSecret "error" value will be an I18nMessage-compatible
    dict describing the problem.

    All problems that may cause an "error":

        * After the user set a valid secret, Workbench was reconfigured and the
          provider was disabled.
    """
    if not value:
        return None

    service: oauth.OAuth1 = oauth.OAuthService.lookup_or_none(logic.service)
    if not service:
        return _secret_error(
            value,
            I18nMessage.TODO_i18n("Service %r is no longer configured" %
                                  logic.service),
        )

    return {
        **value,
        "secret": {
            "consumer_key":
            service.consumer_key,
            "consumer_secret":
            service.consumer_secret,
            "resource_owner_key":
            value.get("secret", {}).get("oauth_token", ""),
            "resource_owner_secret":
            value.get("secret", {}).get("oauth_token_secret", ""),
        },
    }
 def test_render_deprecated_parquet_warning(self):
     errors = [RenderError(I18nMessage.TODO_i18n("truncated table"))]
     with parquet_file({"A": [1, 2], "B": [3, 4]}) as fetched_path:
         result = render_arrow(
             ArrowTable(),
             P(),
             "tab-x",
             FetchResult(fetched_path, errors=errors),
             self.output_path,
         )
     assert_arrow_table_equals(result.table, {"A": [1, 2], "B": [3, 4]})
     self.assertEqual(result.errors, errors)
Esempio n. 21
0
 def test_invalid_auth_error(self):
     self.mock_http_response = MockHttpResponse(401)
     with self.fetch(P(), secrets=secrets(DEFAULT_SECRET)) as result:
         self.assertEqual(result.path.read_bytes(), b"")
         self.assertEqual(
             result.errors,
             [
                 RenderError(
                     I18nMessage.TODO_i18n(
                         "Invalid credentials. Please reconnect to Google Drive."
                     ))
             ],
         )
Esempio n. 22
0
 def test_missing_secret_error(self):
     with self.fetch(P(), {}) as result:
         self.assertEqual(result.path.read_bytes(), b"")
         self.assertEqual(
             result.errors,
             [
                 RenderError(
                     I18nMessage.TODO_i18n(
                         "Please connect to Google Drive."))
             ],
         )
     # Should not make any request
     self.assertIsNone(self.last_http_requestline)
Esempio n. 23
0
def user_visible_bug_fetch_result(output_path: Path,
                                  message: str) -> FetchResult:
    output_path.write_bytes(b"")
    return FetchResult(
        path=output_path,  # empty
        errors=[
            RenderError(
                I18nMessage.TODO_i18n(
                    "Something unexpected happened. We have been notified and are "
                    "working to fix it. If this persists, contact us. Error code: "
                    + message))
        ],
    )
Esempio n. 24
0
 def test_detect_unknown_file_extension(self):
     with _data_file(b"A,B\nx,y", suffix=".bin") as bin_path:
         result = parse_file(bin_path, output_path=self.output_path)
     assert_arrow_table_equals(result.table, {})
     self.assertEqual(
         result.errors,
         [
             RenderError(
                 I18nMessage.TODO_i18n(
                     "Unknown file extension '.bin'. Please try a different file."
                 ))
         ],
     )
 def test_missing_secret_error(self):
     with tempfile_context() as output_path:
         result = fetch_arrow(P(), secrets(None), None, None, output_path)
         self.assertEqual(result.path.read_bytes(), b"")
         self.assertEqual(
             result.errors,
             [
                 RenderError(
                     I18nMessage.TODO_i18n(
                         "Please connect to Google Drive."))
             ],
         )
     # Should not make any request
     self.assertIsNone(self.last_http_requestline)
Esempio n. 26
0
 def test_no_access_error(self):
     self.mock_http_response = MockHttpResponse(403)
     with self.fetch(P(), secrets=secrets(DEFAULT_SECRET)) as result:
         self.assertEqual(result.path.read_bytes(), b"")
         self.assertEqual(
             result.errors,
             [
                 RenderError(
                     I18nMessage.TODO_i18n(
                         "You chose a file your logged-in user cannot access. "
                         "Please reconnect to Google Drive or choose a different file."
                     ))
             ],
         )
Esempio n. 27
0
 def test_fetch_http_404(self):
     self.mock_http_response = MockHttpResponse(404,
                                                [("Content-Length", 0)])
     url = self.build_url("/not-found")
     with self.fetch(url) as result:
         self.assertEqual(result.path.read_bytes(), b"")
         self.assertEqual(
             result.errors,
             [
                 RenderError(
                     I18nMessage.TODO_i18n(
                         "Error from server: HTTP 404 Not Found"))
             ],
         )
Esempio n. 28
0
def _wrap_render_errors(render_call):
    try:
        return render_call()
    except ModuleError as err:
        return RenderResult(
            errors=[
                RenderError(
                    I18nMessage.TODO_i18n(
                        "Something unexpected happened. We have been notified and are "
                        "working to fix it. If this persists, contact us. Error code: "
                        + format_for_user_debugging(err)
                    )
                )
            ]
        )
Esempio n. 29
0
def parse_file(
    path: Path,
    *,
    output_path: Path,
    encoding: Optional[str] = None,
    mime_type: Optional[MimeType] = None,
    has_header: bool = True,
) -> RenderResult:
    if mime_type is None:
        ext = "".join(path.suffixes).lower()
        try:
            mime_type = MimeType.from_extension(ext)
        except KeyError:
            return RenderResult(errors=[
                RenderError(
                    I18nMessage.TODO_i18n(
                        "Unknown file extension %r. Please try a different file."
                        % ext))
            ])

    if mime_type in {MimeType.CSV, MimeType.TSV, MimeType.TXT}:
        delimiter: Optional[str] = {
            MimeType.CSV: ",",
            MimeType.TSV: "\t",
            MimeType.TXT: None,
        }[mime_type]
        return parse_csv(
            path,
            output_path=output_path,
            encoding=encoding,
            delimiter=delimiter,
            has_header=has_header,
            autoconvert_text_to_numbers=True,
        )
    elif mime_type == MimeType.JSON:
        return parse_json(path, output_path=output_path, encoding=encoding)
    elif mime_type == MimeType.XLS:
        return parse_xls_file(path,
                              output_path=output_path,
                              has_header=has_header,
                              autoconvert_types=True)
    elif mime_type == MimeType.XLSX:
        return parse_xlsx_file(path,
                               output_path=output_path,
                               has_header=has_header,
                               autoconvert_types=True)
    else:
        raise RuntimeError("Unhandled MIME type")
Esempio n. 30
0
    def test_fetch_truncate(self):
        def fetch(params):
            return pd.DataFrame({"A": [1, 2, 3]})

        with tempfile_context(dir=self.basedir) as outfile:
            result = self._test_fetch(fetch, output_filename=outfile.name)
            self.assertEqual(
                result.errors,
                [
                    RenderError(
                        I18nMessage.TODO_i18n("Truncated output from 3 rows to 2")
                    )
                ],
            )
            arrow_table = pa.parquet.read_table(str(outfile), use_threads=False)
            assert_arrow_table_equals(arrow_table, {"A": [1, 2]})