Ejemplo n.º 1
0
    def _coerce_3tuple(
        cls, value, try_fallback_columns: Iterable[Column] = []
    ) -> ProcessResult:
        if isinstance(value[0], str) and isinstance(value[1], dict):
            return cls(errors=[coerce_RenderError(value)])
        elif isinstance(value[0], pd.DataFrame) or value[0] is None:
            dataframe, error, json = value
            if dataframe is None:
                dataframe = pd.DataFrame()
            elif not isinstance(dataframe, pd.DataFrame):
                raise ValueError("Expected DataFrame got %s" % type(dataframe).__name__)
            if json is None:
                json = {}
            elif not isinstance(json, dict):
                raise ValueError("Expected JSON dict, got %s" % type(json).__name__)

            errors = coerce_RenderError_list(error)

            validate_dataframe(dataframe, settings=settings)
            columns = _infer_columns(dataframe, {}, try_fallback_columns)
            return cls(dataframe=dataframe, errors=errors, json=json, columns=columns)
        else:
            raise ValueError(
                "Expected (Dataframe, RenderError, json) or I18nMessage return type; got (%s,%s, %s)"
                % (
                    type(value[0]).__name__,
                    type(value[1]).__name__,
                    type(value[2]).__name__,
                )
            )
Ejemplo n.º 2
0
    def coerce(
        cls, value: Any, try_fallback_columns: Iterable[Column] = []
    ) -> ProcessResult:
        """
        Convert any value to a ProcessResult.

        The rules:

        * value is None => return empty dataframe
        * value is a ProcessResult => return it
        * value is a DataFrame => empty error and json
        * value is a ModuleError => errors get populated using the data in it; empty dataframe and json
        * value is a (DataFrame, ModuleError) => empty json (either may be None)
        * value is a (DataFrame, ModuleError, dict) => obvious (any may be None)
        * value is a dict => pass it as kwargs
        * else we generate an error with empty dataframe and json

        `try_fallback_columns` is a List of Columns that should pre-empt
        automatically-generated `columns` but _not_ pre-empt
        `value['column_formats']` if it exists. For example: in a list of
        steps, we use the prior step's output columns as "fallback" definitions
        for _this_ step's output columns, if the module didn't specify others.
        This trick lets us preserve number formats implicitly -- most modules
        needn't worry about them.

        Raise `ValueError` if `value` cannot be coerced -- including if
        `validate_dataframe()` raises an error.
        """
        if value is None:
            return cls(dataframe=pd.DataFrame())
        elif isinstance(value, ProcessResult):
            # TODO ban `ProcessResult` retvals from `fetch()`, then omit this
            # case. ProcessResult should be internal.
            validate_dataframe(value.dataframe, settings=settings)
            return value
        elif isinstance(value, list) or isinstance(value, str):
            return cls(errors=coerce_RenderError_list(value))
        elif isinstance(value, pd.DataFrame):
            validate_dataframe(value, settings=settings)
            columns = _infer_columns(value, {}, try_fallback_columns)
            return cls(dataframe=value, columns=columns)
        elif isinstance(value, dict):
            return cls._coerce_dict(value, try_fallback_columns)
        elif isinstance(value, tuple):
            if len(value) == 2:
                return cls._coerce_2tuple(value, try_fallback_columns)
            elif len(value) == 3:
                return cls._coerce_3tuple(value, try_fallback_columns)
            else:
                raise ValueError(
                    "Expected 2-tuple or 3-tuple return value; got %d-tuple"
                    % len(value)
                )
        else:
            raise ValueError("Invalid return type %s" % type(value).__name__)
Ejemplo n.º 3
0
    def _coerce_2tuple(
        cls, value, try_fallback_columns: Iterable[Column] = []
    ) -> ProcessResult:
        if isinstance(value[0], str) and isinstance(value[1], dict):
            return cls(errors=[coerce_RenderError(value)])
        elif isinstance(value[0], pd.DataFrame) or value[0] is None:
            dataframe, error = value
            if dataframe is None:
                dataframe = pd.DataFrame()

            errors = coerce_RenderError_list(error)

            validate_dataframe(dataframe, settings=settings)
            columns = _infer_columns(dataframe, {}, try_fallback_columns)
            return cls(dataframe=dataframe, errors=errors, columns=columns)
        else:
            raise ValueError(
                "Expected (Dataframe, RenderError) or (str, dict) return type; got (%s,%s)"
                % (type(value[0]).__name__, type(value[1]).__name__)
            )
Ejemplo n.º 4
0
    def _coerce_dict(
        cls, value, try_fallback_columns: Iterable[Column] = []
    ) -> ProcessResult:
        if "message" in value and "quickFixes" in value:
            return cls(errors=[coerce_RenderError(value)])
        else:
            value = dict(value)  # shallow copy
            errors = coerce_RenderError_list(value.pop("errors", []))

            # Coerce old-style error and quick_fixes, if it's there
            if "error" in value:
                legacy_error_message = coerce_I18nMessage(value.pop("error"))
                legacy_error_quick_fixes = [
                    coerce_QuickFix(v) for v in value.pop("quick_fixes", [])
                ]
                errors.append(
                    RenderError(legacy_error_message, legacy_error_quick_fixes)
                )
            elif "quick_fixes" in value:
                raise ValueError("You cannot return quick fixes without an error")

            dataframe = value.pop("dataframe", pd.DataFrame())
            validate_dataframe(dataframe, settings=settings)

            column_formats = value.pop("column_formats", {})
            value["columns"] = _infer_columns(
                dataframe, column_formats, try_fallback_columns
            )

            try:
                return cls(dataframe=dataframe, errors=errors, **value)
            except TypeError as err:
                raise ValueError(
                    (
                        "ProcessResult input must only contain {dataframe, "
                        "errors, json, column_formats} keys"
                    )
                ) from err
Ejemplo n.º 5
0
def eval_process(code, table):
    """Runs `code`'s "process" method; return (retval, error, log).

    stdout, stderr, exception tracebacks, and error messages will all be
    written to log. (The UX is: log is displayed as a monospaced console to the
    user -- presumably the person who wrote the code.)

    If there's an Exception `err`, `str(err)` will be returned as the retval.

    This function doesn't a sandbox! It isn't secure! It merely captures
    output.

    This should never raise an exception. An exception is a bug in this
    _module_. A bug in _user code_ must be presented to the user. (TODO handle
    timeout, out-of-memory.)
    """
    log = io.StringIO()
    eval_globals = {"pd": pd, "np": np, "math": math}

    def ret(dataframe: pd.DataFrame = EMPTY_DATAFRAME, error: str = ""):
        """Usage: `return ret(table, message)`"""
        log.write(error)
        return dataframe, error, log.getvalue()

    try:
        compiled_code = compile(code, "your code", "exec")
    except SyntaxError as err:
        return ret(error="Line %d: %s" % (err.lineno, err))
    except ValueError:
        # Apparently this is another thing that compile() can raise
        return ret(error="Your code contains null bytes")

    # Override sys.stdout and sys.stderr ... but only in the context of
    # `process()`. After `process()`, the module needs its original values
    # again so it can send a Thrift object over stdout and log errors (which
    # should never happen) to stderr.
    #
    # This function's sandbox isn't perfect, but we aren't protecting anything
    # dangerous. Writing to the _original_ `sys.stdout` and `sys.stderr` can at
    # worst cause a single `ModuleExitedError`, which would email us. That's
    # the security risk: an email to us.
    with redirect_stdout(log), redirect_stderr(log):
        try:
            exec(compiled_code, eval_globals)  # raise any exception

            if "process" not in eval_globals:
                return ret(error='Please define a "process(table)" function')
            process = eval_globals["process"]
            if len(signature(process).parameters) != 1:
                return ret(
                    error=
                    "Please make your process(table) function accept exactly 1 argument"
                )

            retval = process(table)  # raise any exception
        except Exception:
            # An error in the code or in process()
            etype, value, tb = sys.exc_info()
            tb = tb.tb_next  # omit this method from the stack trace
            traceback.print_exception(etype, value, tb)
            return ret(error=f"Line {tb.tb_lineno}: {etype.__name__}: {value}")

    if isinstance(retval, pd.DataFrame):
        try:
            validate_dataframe(retval)  # raise ValueError
        except ValueError as err:
            return ret(error="Unhandled DataFrame: %s" % str(err))
        return ret(retval)
    elif isinstance(retval, str):
        return ret(error=retval)
    else:
        return ret(error=("Please make process(table) return a pd.DataFrame. "
                          "(Yours returned a %s.)" % type(retval).__name__))