Beispiel #1
0
def update_spec(expr: ibis.Expr, spec: dict):
    """
    Takes in an ibis expression and a spec, updating the spec and returning a new ibis expr
    """
    original_expr = expr

    # iterate through transforms and move as many as we can into the ibis expression
    # logic modified from
    # https://github.com/vega/vega-lite-transforms2sql/blob/3b360144305a6cec79792036049e8a920e4d2c9e/transforms2sql.ts#L7
    for transform in spec.get("transform", []):
        groupby = transform.pop("groupby", None)
        if groupby:
            all_fields_exist = all(
                [field in expr.columns for field in groupby])
            if not all_fields_exist:
                transform["groupby"] = groupby
                # we referenced a field that isnt in the expression because it was an aggregate we coudnt process
                continue
            expr = expr.groupby(groupby)

        aggregate = transform.pop("aggregate", None)
        if aggregate:
            expr = expr.aggregate([
                vl_aggregate_to_grouping_expr(original_expr, a)
                for a in aggregate
            ])

        filter_ = transform.pop("filter", None)
        if filter_:
            # https://vega.github.io/vega-lite/docs/filter.html#field-predicate
            field = filter_["field"]
            field_expr = original_expr[field]
            if "range" in filter_:
                min, max = filter_["range"]
                preds = [field_expr >= min, field_expr <= max]
            elif "equal" in filter_:
                preds = [field_expr == filter_["equal"]]
            elif "gt" in filter_:
                preds = [field_expr > filter_["gt"]]
            elif "lt" in filter_:
                preds = [field_expr < filter_["lt"]]
            elif "lte" in filter_:
                preds = [field_expr <= filter_["lte"]]
            elif "gte" in filter_:
                preds = [field_expr >= filter_["gte"]]
            else:
                # put filter back if we cant transform itt
                transform["filter"] = filter_
                continue
            expr = expr.filter(preds)

    # remove empty transforms
    spec["transform"] = [i for i in spec.get("transform", []) if i]
    # remove key if empty
    if not spec["transform"]:
        del spec["transform"]

    return expr
Beispiel #2
0
def empty(expr: ibis.Expr) -> pandas.DataFrame:
    """
    Creates an empty DF for a ibis expression, based on the schema

    https://github.com/ibis-project/ibis/issues/1676#issuecomment-441472528
    """
    return expr.schema().apply_to(pandas.DataFrame(columns=expr.columns))
Beispiel #3
0
def timeunit(transform: dict, expr: ibis.Expr) -> ibis.Expr:
    """
    Apply a vega time unit transform to an ibis expression.
    https://vega.github.io/vega/docs/transforms/timeunit/

    It transforms it into the Ibis truncate expression.
    https://docs.ibis-project.org/generated/ibis.expr.api.TimestampValue.truncate.html

    Parameters
    ----------
    transform: dict
        A JSON-able dictionary representing the vega transform.
    expr: ibis.Expr
        The expression to which to apply the transform.

    Returns
    -------
    transformed_expr: the transformed expression
    """
    assert transform.pop("type") == "timeunit"
    field = expr[transform.pop("field")]
    as_start, as_end = transform.pop("as")
    units = transform.pop("units")
    if transform:
        raise NotImplementedError(
            f"timeunit transform: {list(transform)} keys are not supported")
    if units == ["year"]:
        start = field.truncate("Y")
        delta = ibis.interval(years=1)
    elif units == ["year", "month"]:
        start = field.truncate("M")
        delta = ibis.interval(months=1)
    elif units == ["year", "month", "date"]:
        start = field.truncate("D")
        delta = ibis.interval(days=1)
    elif units == ["year", "month", "date", "hours"]:
        start = field.truncate("h")
        delta = ibis.interval(hours=1)
    elif units == ["year", "month", "date", "hours", "minutes"]:
        start = field.truncate("m")
        delta = ibis.interval(minutes=1)
    elif units == ["year", "month", "date", "hours", "minutes", "seconds"]:
        start = field.truncate("s")
        delta = ibis.interval(seconds=1)
    elif units == [
            "year",
            "month",
            "date",
            "hours",
            "minutes",
            "seconds",
            "milliseconds",
    ]:
        start = field.truncate("ms")
        delta = ibis.interval(milliseconds=1)
    else:
        raise NotImplementedError(
            f"timeunit transform: {units} units are not supported")
    return expr.mutate([start.name(as_start), (start + delta).name(as_end)])
def aggregate(transform: dict, expr: ibis.Expr) -> ibis.Expr:
    groupby, = transform["groupby"]
    op, = transform["ops"]
    field, = transform["fields"]
    as_, = transform["as"]
    expr = expr.group_by(groupby).aggregate(
        [getattr(expr[field], _translate_op(op))().name(as_)])
    return expr
Beispiel #5
0
def collect(transform: dict, expr: ibis.Expr) -> ibis.Expr:
    fields = promote_list(transform["sort"]["field"])
    orders = promote_list(transform["sort"].get("order", ["ascending"] * len(fields)))
    assert len(fields) == len(orders)

    rules = [
        (field, (True if order == "ascending" else False))
        for field, order in zip(fields, orders)
    ]
    return expr.sort_by(rules)
def aggregate(transform: dict, expr: ibis.Expr) -> ibis.Expr:
    groupby = transform["groupby"]
    # It's undocumented, but an undefined "ops" value defaults to ["count"]'
    # https://github.com/vega/vega/blob/4d10f9da0df0833c90ff259bbd0960f7cb05e3bf/packages/vega-transforms/src/Aggregate.js#L159-L161
    ops = transform.get("ops", ["count"])
    fields = transform.get("fields", [None])
    as_ = transform.get("as", [None])

    expr = expr.group_by(groupby).aggregate([
        _aggregate(expr, field, op, as__)
        for (field, op, as__) in zip(fields, ops, as_)
    ])
    return expr
Beispiel #7
0
def raise_unsupported_expr_error(expr: ibis.Expr):
    """Raise an unsupported expression error for given expression.

    Parameters
    ----------
    expr : ibis.Expr

    Raises
    ------
    com.UnsupportedOperationError
    """
    msg = "OmniSciDB backend doesn't support {} operation!"
    op = expr.op()
    raise com.UnsupportedOperationError(msg.format(type(op)))
    def maybe_add_random_sort(
        self, data_client: ibis.client, table: ibis.Expr
    ) -> ibis.Expr:
        """Return a randomly sorted query if it is supported for the client."""
        if type(data_client) in RANDOM_SORT_SUPPORTS:
            return table.sort_by(
                RandomSortKey(RANDOM_SORT_SUPPORTS[type(data_client)]).to_expr()
            )

        if type(data_client) != TeradataClient:
            # Teradata 'SAMPLE' is random by nature and does not require a sort by
            logging.warning(
                "Data Client %s Does Not Enforce Random Sort on Sample",
                str(type(data_client)),
            )
        return table
Beispiel #9
0
def apply(expr: ibis.Expr, transforms: Any) -> ibis.Expr:
    """Apply transform or transforms to the expression.

    Parameters
    ----------
    expr: ibis.Expr
    transform: list
        A transform specification or list of transform specifications.
        Each specification must be valid according to Vega's transform
        schema.

    Returns
    -------
    expr_transformed : ibis.expr
        The transformed dataframe.
    """
    if transforms is None:
        return expr
    transforms = promote_list(transforms)

    # First traverse list of transforms, and find any that create bins
    # The resulting bin fields, we create as the source fields,
    # Because, for some reason, the filter happens before
    # the binning, but it refers to the field created by the binning
    # See the signals in https://vega.github.io/editor/#/gist/9c7d4dee819450e59cf7381f4d47fee0/example.vl.json
    # as an example
    # TODO: Bring this up with Dominik and see why this is
    for t in transforms:
        if t["type"] == "bin":
            expr = expr.mutate(expr[t["field"]].name(t["as"][0]))

    # Have extra processing for extents that create signals
    # can probably remove once https://github.com/vega/vega-lite/issues/5320 is fixed.
    signal_mapping = {}

    for t in transforms:
        if t["type"] == "extent":
            assert {"field", "signal_", "type"} == t.keys()
            signal_mapping[t["signal_"]] = t["field"]
            continue
        # Change binning that reference  signal extent with actual value
        if "extent" in t and "signal" in t["extent"]:
            t["extent"] = signal_mapping.pop(t["extent"]["signal"])
        expr = _delegate_transform(t, expr)
    return expr
Beispiel #10
0
def formula(transform: dict, expr: ibis.Expr) -> ibis.Expr:
    """
    Apply a vega formula transform to an ibis expression.
    https://vega.github.io/vega/docs/transforms/formula/

    This transform evaluates a vega expression, which is not fully
    implemented. Not every expression will work.

    Parameters
    ----------
    transform: dict
        A JSON-able dictionary representing the vega transform.
    expr: ibis.Expr
        The expression to which to apply the transform.

    Returns
    -------
    transformed_expr: the transformed expression
    """
    col = transform["as"]
    calc = transform["expr"]
    new_col = eval_vegajs(calc, expr).name(col)
    return expr.mutate(new_col)
Beispiel #11
0
def bin(transform: BinTransform, expr: ibis.Expr) -> ibis.Expr:
    """
    Apply a vega bin transform to an ibis expression.
    https://vega.github.io/vega/docs/transforms/bin/
    """

    field = expr[transform["field"]]
    as_left, as_right = transform["as"]
    maxbins = transform["maxbins"]
    extent = expr[transform["extent"]]

    # Precompute min/max or else we get
    # "Expression 'xxx' is not being grouped"
    # errors
    with tracer.start_span("bin_transform:min") as span:
        min_expr = extent.min()
        span.log_kv({"sql": min_expr.compile()})
        min_ = min_expr.execute()
    with tracer.start_span("bin_transform:max") as span:
        max_expr = extent.max()
        span.log_kv({"sql": max_expr.compile()})
        max_ = max_expr.execute()

    # Cast these to floats to work around
    # https://github.com/ibis-project/ibis/issues/1934
    binwidth = (max_ - min_) / maxbins

    bin_ = (((field / _float(binwidth)) - _float(min_ / binwidth))).floor()
    left = (min_ + (bin_ * binwidth)).name(as_left)
    right = (((min_ + binwidth) + (bin_ * binwidth))).name(as_right)

    # add the two new fields and remove the initial column
    return expr.mutate(
        [left, right]
        # + [c for c in expr.columns if c not in {transform["field"], as_left, as_right}]
    )
Beispiel #12
0
def collect(transform: dict, expr: ibis.Expr) -> ibis.Expr:
    """
    Apply a vega collect transform to an ibis expression.
    https://vega.github.io/vega/docs/transforms/collect/

    Parameters
    ----------
    transform: dict
        A JSON-able dictionary representing the vega transform.
    expr: ibis.Expr
        The expression to which to apply the transform.

    Returns
    -------
    transformed_expr: the transformed expression
    """
    fields = promote_list(transform["sort"]["field"])
    orders = promote_list(transform["sort"].get("order",
                                                ["ascending"] * len(fields)))
    assert len(fields) == len(orders)

    rules = [(field, (True if order == "ascending" else False))
             for field, order in zip(fields, orders)]
    return expr.sort_by(rules)
Beispiel #13
0
def formula(transform: dict, expr: ibis.Expr) -> ibis.Expr:
    col = transform["as"]
    calc = transform["expr"]
    new_col = eval_vegajs(calc, expr).name(col)
    return expr.mutate(new_col)
Beispiel #14
0
def vl_aggregate_to_grouping_expr(expr: ibis.Expr, a: dict) -> ibis.Expr:
    if "field" in a:
        expr = expr[a["field"]]
    op = translate_op(a["op"])
    expr = getattr(expr, op)()
    return expr.name(a["as"])
Beispiel #15
0
def get_client(expr: ibis.Expr) -> ibis.client.Client:
    return expr.op().table.op().source
Beispiel #16
0
def filter(transform: dict, expr: ibis.Expr) -> ibis.Expr:
    calc = transform["expr"]
    return expr.filter(eval_vegajs(calc, expr))