def timeunit(transform: dict, expr: ibis.Expr) -> ibis.Expr: """ Apply a vega time unit transform to an ibis expression. https://vega.github.io/vega/docs/transforms/timeunit/ It transforms it into the Ibis truncate expression. https://docs.ibis-project.org/generated/ibis.expr.api.TimestampValue.truncate.html Parameters ---------- transform: dict A JSON-able dictionary representing the vega transform. expr: ibis.Expr The expression to which to apply the transform. Returns ------- transformed_expr: the transformed expression """ assert transform.pop("type") == "timeunit" field = expr[transform.pop("field")] as_start, as_end = transform.pop("as") units = transform.pop("units") if transform: raise NotImplementedError( f"timeunit transform: {list(transform)} keys are not supported") if units == ["year"]: start = field.truncate("Y") delta = ibis.interval(years=1) elif units == ["year", "month"]: start = field.truncate("M") delta = ibis.interval(months=1) elif units == ["year", "month", "date"]: start = field.truncate("D") delta = ibis.interval(days=1) elif units == ["year", "month", "date", "hours"]: start = field.truncate("h") delta = ibis.interval(hours=1) elif units == ["year", "month", "date", "hours", "minutes"]: start = field.truncate("m") delta = ibis.interval(minutes=1) elif units == ["year", "month", "date", "hours", "minutes", "seconds"]: start = field.truncate("s") delta = ibis.interval(seconds=1) elif units == [ "year", "month", "date", "hours", "minutes", "seconds", "milliseconds", ]: start = field.truncate("ms") delta = ibis.interval(milliseconds=1) else: raise NotImplementedError( f"timeunit transform: {units} units are not supported") return expr.mutate([start.name(as_start), (start + delta).name(as_end)])
def apply(expr: ibis.Expr, transforms: Any) -> ibis.Expr: """Apply transform or transforms to the expression. Parameters ---------- expr: ibis.Expr transform: list A transform specification or list of transform specifications. Each specification must be valid according to Vega's transform schema. Returns ------- expr_transformed : ibis.expr The transformed dataframe. """ if transforms is None: return expr transforms = promote_list(transforms) # First traverse list of transforms, and find any that create bins # The resulting bin fields, we create as the source fields, # Because, for some reason, the filter happens before # the binning, but it refers to the field created by the binning # See the signals in https://vega.github.io/editor/#/gist/9c7d4dee819450e59cf7381f4d47fee0/example.vl.json # as an example # TODO: Bring this up with Dominik and see why this is for t in transforms: if t["type"] == "bin": expr = expr.mutate(expr[t["field"]].name(t["as"][0])) # Have extra processing for extents that create signals # can probably remove once https://github.com/vega/vega-lite/issues/5320 is fixed. signal_mapping = {} for t in transforms: if t["type"] == "extent": assert {"field", "signal_", "type"} == t.keys() signal_mapping[t["signal_"]] = t["field"] continue # Change binning that reference signal extent with actual value if "extent" in t and "signal" in t["extent"]: t["extent"] = signal_mapping.pop(t["extent"]["signal"]) expr = _delegate_transform(t, expr) return expr
def formula(transform: dict, expr: ibis.Expr) -> ibis.Expr: """ Apply a vega formula transform to an ibis expression. https://vega.github.io/vega/docs/transforms/formula/ This transform evaluates a vega expression, which is not fully implemented. Not every expression will work. Parameters ---------- transform: dict A JSON-able dictionary representing the vega transform. expr: ibis.Expr The expression to which to apply the transform. Returns ------- transformed_expr: the transformed expression """ col = transform["as"] calc = transform["expr"] new_col = eval_vegajs(calc, expr).name(col) return expr.mutate(new_col)
def bin(transform: BinTransform, expr: ibis.Expr) -> ibis.Expr: """ Apply a vega bin transform to an ibis expression. https://vega.github.io/vega/docs/transforms/bin/ """ field = expr[transform["field"]] as_left, as_right = transform["as"] maxbins = transform["maxbins"] extent = expr[transform["extent"]] # Precompute min/max or else we get # "Expression 'xxx' is not being grouped" # errors with tracer.start_span("bin_transform:min") as span: min_expr = extent.min() span.log_kv({"sql": min_expr.compile()}) min_ = min_expr.execute() with tracer.start_span("bin_transform:max") as span: max_expr = extent.max() span.log_kv({"sql": max_expr.compile()}) max_ = max_expr.execute() # Cast these to floats to work around # https://github.com/ibis-project/ibis/issues/1934 binwidth = (max_ - min_) / maxbins bin_ = (((field / _float(binwidth)) - _float(min_ / binwidth))).floor() left = (min_ + (bin_ * binwidth)).name(as_left) right = (((min_ + binwidth) + (bin_ * binwidth))).name(as_right) # add the two new fields and remove the initial column return expr.mutate( [left, right] # + [c for c in expr.columns if c not in {transform["field"], as_left, as_right}] )
def formula(transform: dict, expr: ibis.Expr) -> ibis.Expr: col = transform["as"] calc = transform["expr"] new_col = eval_vegajs(calc, expr).name(col) return expr.mutate(new_col)