Esempio n. 1
0
def validate_zipfile(module_zipfile: ModuleZipfile) -> None:
    """Ensure `path` points to a valid ModuleZipfile.

    Raise `WorkbenchModuleImportError` with an English-language description
    of the flaw otherwise. (This can help module authors fix their mistakes.)
    """
    try:
        module_zipfile.get_spec()  # raise KeyError, ValueError, BadZipFile
        # raise KeyError, UnicodeDecodeError, SyntaxError, BadZipFile
        compiled_module = module_zipfile.compile_code_without_executing()
        cjwstate.modules.kernel.validate(compiled_module)  # raise ModuleError
        module_zipfile.get_optional_html()  # raise UnicodeError, BadZipFile
        module_zipfile.get_optional_js_module()  # raise UnicodeError, BadZipFile
    except zipfile.BadZipFile as err:
        raise WorkbenchModuleImportError("Bad zipfile: %s" % str(err)) from err
    except ValueError as err:
        raise WorkbenchModuleImportError(
            "Module .yaml is invalid: %s" % str(err)
        ) from err
    except KeyError as err:
        raise WorkbenchModuleImportError(
            "Zipfile is missing a required file: %s" % str(err)
        ) from err
    except SyntaxError as err:
        raise WorkbenchModuleImportError(
            "Module Python code has a syntax error: %s" % str(err)
        ) from err
    except UnicodeError as err:
        raise WorkbenchModuleImportError(
            "Module Python, HTML or JS code is invalid UTF-8: %s" % str(err)
        ) from err
    except ModuleError as err:
        raise WorkbenchModuleImportError(
            "Module Python code failed to run: %s" % str(err)
        ) from err
Esempio n. 2
0
def import_zipfile(path: Path) -> clientside.Module:
    """
    Save a zipfile to database and minio and build a `clientside.Module`.

    Raise `WorkbenchModuleImportError` if `path` points to an invalid module.

    Otherwise, do not raise any errors one can sensibly recover from.
    """
    temp_zipfile = ModuleZipfile(path)
    validate_zipfile(temp_zipfile)  # raise WorkbenchModuleImportError
    module_id = temp_zipfile.module_id
    version = temp_zipfile.version
    module_spec = temp_zipfile.get_spec()
    js_module = temp_zipfile.get_optional_js_module() or ""

    minio.fput_file(minio.ExternalModulesBucket,
                    "%s/%s" % (module_id, path.name), path)
    ModuleVersion.objects.update_or_create(
        id_name=module_id,
        source_version_hash=version,
        spec=asdict(temp_zipfile.get_spec()),
        js_module=js_module,
    )

    return clientside.Module(module_spec, js_module)
Esempio n. 3
0
def download_module_zipfile(
    tempdir: Path,
    module_id: ModuleId,
    version: ModuleVersion,
    *,
    deprecated_spec: Dict[str, Any],
    deprecated_js_module: str,
) -> ModuleZipfile:
    """
    Produce a local-path ModuleZipfile by downloading from minio.

    Raise `RuntimeError` (_from_ another kind of error -- `FileNotFoundError`,
    `KeyError`, `ValueError`, `SyntaxError`, `BadZipFile`,
    `UnicodeDecodeError` or more) if the zipfile is not a valid Workbench
    module. We spend the time testing the zipfile for validity because A) it's
    good to catch errors quickly; and B) fetcher, renderer and server all need
    to execute code on each module, so they're destined to validate the module
    anyway.

    The zipfile is always written to "{tempdir}/{module_id}.{version}.zip".
    This function is not re-entrant when called with the same parameters.
    Callers may use locks to avoid trying to download the same data multiple
    times.
    """
    logger.info("download_module_zipfile(%s.%s.zip)", module_id, version)

    zippath = tempdir / ("%s.%s.zip" % (module_id, version))
    try:
        _download_module_zipfile_modern(zippath, module_id, version)
    except FileNotFoundError as original_error:
        try:
            _download_module_zipfile_deprecated(
                zippath,
                module_id,
                version,
                spec=deprecated_spec,
                js_module=deprecated_js_module,
            )
        except FileNotFoundError:
            raise RuntimeError from original_error

    ret = ModuleZipfile(zippath)  # raise ZipfileError
    try:
        # raise KeyError or SyntaxError
        compiled_module = ret.compile_code_without_executing()
        ret.get_spec()  # raise KeyError or ValueError
        cjwstate.modules.kernel.validate(compiled_module)  # raise ModuleError
    except Exception as err:
        raise RuntimeError from err
    return ret
Esempio n. 4
0
def _execute_step_pre(
    *,
    basedir: Path,
    exit_stack: contextlib.ExitStack,
    workflow: Workflow,
    step: Step,
    module_zipfile: ModuleZipfile,
    raw_params: Dict[str, Any],
    input_path: Path,
    input_table_columns: List[Column],
    tab_results: Dict[Tab, Optional[StepResult]],
) -> ExecuteStepPreResult:
    """First step of execute_step().

    Raise TabCycleError or TabOutputUnreachableError if the module depends on
    tabs with errors.

    Raise NoLoadedDataError if there is no input table and the module's
    loads_data is False (the default).

    Raise PromptingError if the module parameters are invalid.

    Raise UnneededExecution if `step` has changed.

    (We won't call the render() method in any of these cases.)

    All this runs synchronously within a database lock. (It's a separate
    function so that when we're done awaiting it, we can continue executing in
    a context that doesn't use a database thread.)

    `tab_results.keys()` must be ordered as the Workflow's tabs are.
    """
    # raises UnneededExecution
    with locked_step(workflow, step) as safe_step:
        fetch_result = _load_fetch_result(safe_step, basedir, exit_stack)

        module_spec = module_zipfile.get_spec()
        if not module_spec.loads_data and not input_table_columns:
            raise NoLoadedDataError

        # raise TabCycleError, TabOutputUnreachableError, PromptingError
        params, tab_outputs, uploaded_files = renderprep.prep_params(
            params=raw_params,
            schema=module_spec.param_schema,
            step_id=step.id,
            input_table_columns=input_table_columns,
            tab_results=tab_results,
            basedir=basedir,
            exit_stack=exit_stack,
        )

        return ExecuteStepPreResult(fetch_result, params, tab_outputs, uploaded_files)
Esempio n. 5
0
def _execute_wfmodule_pre(
    basedir: Path,
    exit_stack: contextlib.ExitStack,
    workflow: Workflow,
    wf_module: WfModule,
    module_zipfile: ModuleZipfile,
    raw_params: Dict[str, Any],
    input_table: ArrowTable,
    tab_results: Dict[Tab, Optional[RenderResult]],
) -> ExecuteStepPreResult:
    """
    First step of execute_wfmodule().

    Raise TabCycleError or TabOutputUnreachableError if the module depends on
    tabs with errors. (We won't call the render() method in that case.)

    Raise PromptingError if the module parameters are invalid. (We'll skip
    render() and prompt the user with quickfixes in that case.)

    Raise UnneededExecution if `wf_module` has changed.

    All this runs synchronously within a database lock. (It's a separate
    function so that when we're done awaiting it, we can continue executing in
    a context that doesn't use a database thread.)

    `tab_results.keys()` must be ordered as the Workflow's tabs are.
    """
    # raises UnneededExecution
    with locked_wf_module(workflow, wf_module) as safe_wf_module:
        fetch_result = _load_fetch_result(safe_wf_module, basedir, exit_stack)

        module_spec = module_zipfile.get_spec()
        param_schema = module_spec.get_param_schema()
        render_context = renderprep.RenderContext(
            wf_module.id,
            input_table,
            tab_results,
            basedir,
            exit_stack,
            raw_params,  # ugh
        )
        # raise TabCycleError, TabOutputUnreachableError, PromptingError
        params = renderprep.get_param_values(param_schema, raw_params,
                                             render_context)

        return ExecuteStepPreResult(fetch_result, params)
Esempio n. 6
0
def _build_source_catalog(module_zipfile: ModuleZipfile) -> Catalog:
    source_catalog = Catalog(default_locale)
    spec = module_zipfile.get_spec()
    for message_id, source_string in find_spec_messages(spec).items():
        source_catalog.add(message_id, string=source_string)
    with zipfile.ZipFile(module_zipfile.path, mode="r") as zf:
        for info in zf.infolist():
            if info.filename.endswith(".py"):
                with zf.open(info) as code_io:
                    for message_id, message_properties in find_messages_in_module_code(
                            code_io, info.filename).items():
                        source_catalog.add(
                            message_id,
                            string=message_properties["string"],
                            auto_comments=message_properties["comments"],
                            locations=message_properties["locations"],
                        )
    return source_catalog
Esempio n. 7
0
def _get_migrated_params(wf_module: WfModule,
                         module_zipfile: ModuleZipfile) -> Dict[str, Any]:
    """
    Build the Params dict which will be passed to render().

    Call LoadedModule.migrate_params() to ensure the params are up-to-date.

    On ModuleError or ValueError, log the error and return default params. This
    will render the "wrong" thing ... but the front-end should show the migrate
    error (as it's rendering the form) so users should figure out the problem.
    (What's the alternative? Abort the whole workflow render? We can't render
    _any_ module until we've migrated _all_ modules; and it's hard to imagine
    showing the user a huge, aborted render.)

    Assume we are called within a `workflow.cooperative_lock()`.
    """
    if module_zipfile is None:
        # This is a deleted module. Renderer will pass the input through to
        # the output.
        return {}

    module_spec = module_zipfile.get_spec()
    param_schema = module_spec.get_param_schema()

    try:
        result = get_migrated_params(wf_module, module_zipfile=module_zipfile)
    except ModuleError:
        # LoadedModule logged this error; no need to log it again.
        return param_schema.coerce(None)

    # Is the module buggy? It might be. Log that error, and return a valid
    # set of params anyway -- even if it isn't the params the user wants.
    try:
        param_schema.validate(result)
        return result
    except ValueError as err:
        logger.exception(
            "%s:migrate_params() gave wrong retval: %s",
            module_zipfile.path.name,
            str(err),
        )
        return param_schema.coerce(result)
Esempio n. 8
0
def fetch_or_wrap_error(
    ctx: contextlib.ExitStack,
    chroot_context: ChrootContext,
    basedir: Path,
    module_id_name: str,
    module_zipfile: ModuleZipfile,
    migrated_params_or_error: Union[Dict[str, Any], ModuleError],
    secrets: Dict[str, Any],
    last_fetch_result: Optional[FetchResult],
    maybe_input_crr: Optional[CachedRenderResult],
    output_path: Path,
):
    """
    Fetch, and do not raise any exceptions worth catching.

    Exceptions are wrapped -- the result is a FetchResult with `.errors`.

    This function is slow indeed. Perhaps call it from
    EventLoop.run_in_executor(). (Why not make it async? Because all the logic
    inside -- compile module, fetch() -- is sandboxed, meaning it gets its own
    processes. We may eventually avoid asyncio entirely in `fetcher`.

    These problems are all handled:

    * Module was deleted (`module_zipfile is None`)
    * Module times out (`cjwkernel.errors.ModuleTimeoutError`), in `fetch()`.
    * Module crashes (`cjwkernel.errors.ModuleExitedError`), in `fetch()`.
    * migrated_params_or_error is a `ModuleError`
    * migrated_params_or_error is invalid (`ValueError`)
    * input_crr points to a nonexistent file (`FileNotFoundError`)
    """
    # module_zipfile=None is allowed
    if module_zipfile is None:
        logger.info("fetch() deleted module '%s'", module_id_name)
        return FetchResult(
            output_path,
            [
                RenderError(
                    I18nMessage.trans(
                        "py.fetcher.fetch.no_loaded_module",
                        default="Cannot fetch: module was deleted",
                    ))
            ],
        )
    module_spec = module_zipfile.get_spec()
    param_schema = module_spec.get_param_schema()

    if isinstance(migrated_params_or_error, ModuleError):
        # raise the exception so we can log it
        try:
            raise migrated_params_or_error
        except ModuleError:
            # We'll always get here
            logger.exception("%s:migrate_params() raised error",
                             module_zipfile.path.name)
        return user_visible_bug_fetch_result(
            output_path, format_for_user_debugging(migrated_params_or_error))
    migrated_params = migrated_params_or_error

    try:
        param_schema.validate(migrated_params)
    except ValueError:
        logger.exception("Invalid return value from %s:migrate_params()",
                         module_zipfile.path.name)
        return user_visible_bug_fetch_result(
            output_path,
            "%s:migrate_params() output invalid params" %
            module_zipfile.path.name,
        )

    # get input_metadata, input_parquet_path. (This can't error.)
    input_parquet_path, input_metadata = _download_cached_render_result(
        ctx, maybe_input_crr, dir=basedir)

    # Clean params, so they're of the correct type. (This can't error.)
    params = Params(
        fetchprep.clean_value(param_schema, migrated_params, input_metadata))

    # actually fetch
    try:
        return invoke_fetch(
            module_zipfile,
            chroot_context=chroot_context,
            basedir=basedir,
            params=params,
            secrets=secrets,
            last_fetch_result=last_fetch_result,
            input_parquet_filename=(None if input_parquet_path is None else
                                    input_parquet_path.name),
            output_filename=output_path.name,
        )
    except ModuleError as err:
        logger.exception("Error calling %s:fetch()", module_zipfile.path.name)
        return user_visible_bug_fetch_result(output_path,
                                             format_for_user_debugging(err))