def update_spec(expr: ibis.Expr, spec: dict): """ Takes in an ibis expression and a spec, updating the spec and returning a new ibis expr """ original_expr = expr # iterate through transforms and move as many as we can into the ibis expression # logic modified from # https://github.com/vega/vega-lite-transforms2sql/blob/3b360144305a6cec79792036049e8a920e4d2c9e/transforms2sql.ts#L7 for transform in spec.get("transform", []): groupby = transform.pop("groupby", None) if groupby: all_fields_exist = all( [field in expr.columns for field in groupby]) if not all_fields_exist: transform["groupby"] = groupby # we referenced a field that isnt in the expression because it was an aggregate we coudnt process continue expr = expr.groupby(groupby) aggregate = transform.pop("aggregate", None) if aggregate: expr = expr.aggregate([ vl_aggregate_to_grouping_expr(original_expr, a) for a in aggregate ]) filter_ = transform.pop("filter", None) if filter_: # https://vega.github.io/vega-lite/docs/filter.html#field-predicate field = filter_["field"] field_expr = original_expr[field] if "range" in filter_: min, max = filter_["range"] preds = [field_expr >= min, field_expr <= max] elif "equal" in filter_: preds = [field_expr == filter_["equal"]] elif "gt" in filter_: preds = [field_expr > filter_["gt"]] elif "lt" in filter_: preds = [field_expr < filter_["lt"]] elif "lte" in filter_: preds = [field_expr <= filter_["lte"]] elif "gte" in filter_: preds = [field_expr >= filter_["gte"]] else: # put filter back if we cant transform itt transform["filter"] = filter_ continue expr = expr.filter(preds) # remove empty transforms spec["transform"] = [i for i in spec.get("transform", []) if i] # remove key if empty if not spec["transform"]: del spec["transform"] return expr
def empty(expr: ibis.Expr) -> pandas.DataFrame: """ Creates an empty DF for a ibis expression, based on the schema https://github.com/ibis-project/ibis/issues/1676#issuecomment-441472528 """ return expr.schema().apply_to(pandas.DataFrame(columns=expr.columns))
def timeunit(transform: dict, expr: ibis.Expr) -> ibis.Expr: """ Apply a vega time unit transform to an ibis expression. https://vega.github.io/vega/docs/transforms/timeunit/ It transforms it into the Ibis truncate expression. https://docs.ibis-project.org/generated/ibis.expr.api.TimestampValue.truncate.html Parameters ---------- transform: dict A JSON-able dictionary representing the vega transform. expr: ibis.Expr The expression to which to apply the transform. Returns ------- transformed_expr: the transformed expression """ assert transform.pop("type") == "timeunit" field = expr[transform.pop("field")] as_start, as_end = transform.pop("as") units = transform.pop("units") if transform: raise NotImplementedError( f"timeunit transform: {list(transform)} keys are not supported") if units == ["year"]: start = field.truncate("Y") delta = ibis.interval(years=1) elif units == ["year", "month"]: start = field.truncate("M") delta = ibis.interval(months=1) elif units == ["year", "month", "date"]: start = field.truncate("D") delta = ibis.interval(days=1) elif units == ["year", "month", "date", "hours"]: start = field.truncate("h") delta = ibis.interval(hours=1) elif units == ["year", "month", "date", "hours", "minutes"]: start = field.truncate("m") delta = ibis.interval(minutes=1) elif units == ["year", "month", "date", "hours", "minutes", "seconds"]: start = field.truncate("s") delta = ibis.interval(seconds=1) elif units == [ "year", "month", "date", "hours", "minutes", "seconds", "milliseconds", ]: start = field.truncate("ms") delta = ibis.interval(milliseconds=1) else: raise NotImplementedError( f"timeunit transform: {units} units are not supported") return expr.mutate([start.name(as_start), (start + delta).name(as_end)])
def aggregate(transform: dict, expr: ibis.Expr) -> ibis.Expr: groupby, = transform["groupby"] op, = transform["ops"] field, = transform["fields"] as_, = transform["as"] expr = expr.group_by(groupby).aggregate( [getattr(expr[field], _translate_op(op))().name(as_)]) return expr
def collect(transform: dict, expr: ibis.Expr) -> ibis.Expr: fields = promote_list(transform["sort"]["field"]) orders = promote_list(transform["sort"].get("order", ["ascending"] * len(fields))) assert len(fields) == len(orders) rules = [ (field, (True if order == "ascending" else False)) for field, order in zip(fields, orders) ] return expr.sort_by(rules)
def aggregate(transform: dict, expr: ibis.Expr) -> ibis.Expr: groupby = transform["groupby"] # It's undocumented, but an undefined "ops" value defaults to ["count"]' # https://github.com/vega/vega/blob/4d10f9da0df0833c90ff259bbd0960f7cb05e3bf/packages/vega-transforms/src/Aggregate.js#L159-L161 ops = transform.get("ops", ["count"]) fields = transform.get("fields", [None]) as_ = transform.get("as", [None]) expr = expr.group_by(groupby).aggregate([ _aggregate(expr, field, op, as__) for (field, op, as__) in zip(fields, ops, as_) ]) return expr
def raise_unsupported_expr_error(expr: ibis.Expr): """Raise an unsupported expression error for given expression. Parameters ---------- expr : ibis.Expr Raises ------ com.UnsupportedOperationError """ msg = "OmniSciDB backend doesn't support {} operation!" op = expr.op() raise com.UnsupportedOperationError(msg.format(type(op)))
def maybe_add_random_sort( self, data_client: ibis.client, table: ibis.Expr ) -> ibis.Expr: """Return a randomly sorted query if it is supported for the client.""" if type(data_client) in RANDOM_SORT_SUPPORTS: return table.sort_by( RandomSortKey(RANDOM_SORT_SUPPORTS[type(data_client)]).to_expr() ) if type(data_client) != TeradataClient: # Teradata 'SAMPLE' is random by nature and does not require a sort by logging.warning( "Data Client %s Does Not Enforce Random Sort on Sample", str(type(data_client)), ) return table
def apply(expr: ibis.Expr, transforms: Any) -> ibis.Expr: """Apply transform or transforms to the expression. Parameters ---------- expr: ibis.Expr transform: list A transform specification or list of transform specifications. Each specification must be valid according to Vega's transform schema. Returns ------- expr_transformed : ibis.expr The transformed dataframe. """ if transforms is None: return expr transforms = promote_list(transforms) # First traverse list of transforms, and find any that create bins # The resulting bin fields, we create as the source fields, # Because, for some reason, the filter happens before # the binning, but it refers to the field created by the binning # See the signals in https://vega.github.io/editor/#/gist/9c7d4dee819450e59cf7381f4d47fee0/example.vl.json # as an example # TODO: Bring this up with Dominik and see why this is for t in transforms: if t["type"] == "bin": expr = expr.mutate(expr[t["field"]].name(t["as"][0])) # Have extra processing for extents that create signals # can probably remove once https://github.com/vega/vega-lite/issues/5320 is fixed. signal_mapping = {} for t in transforms: if t["type"] == "extent": assert {"field", "signal_", "type"} == t.keys() signal_mapping[t["signal_"]] = t["field"] continue # Change binning that reference signal extent with actual value if "extent" in t and "signal" in t["extent"]: t["extent"] = signal_mapping.pop(t["extent"]["signal"]) expr = _delegate_transform(t, expr) return expr
def formula(transform: dict, expr: ibis.Expr) -> ibis.Expr: """ Apply a vega formula transform to an ibis expression. https://vega.github.io/vega/docs/transforms/formula/ This transform evaluates a vega expression, which is not fully implemented. Not every expression will work. Parameters ---------- transform: dict A JSON-able dictionary representing the vega transform. expr: ibis.Expr The expression to which to apply the transform. Returns ------- transformed_expr: the transformed expression """ col = transform["as"] calc = transform["expr"] new_col = eval_vegajs(calc, expr).name(col) return expr.mutate(new_col)
def bin(transform: BinTransform, expr: ibis.Expr) -> ibis.Expr: """ Apply a vega bin transform to an ibis expression. https://vega.github.io/vega/docs/transforms/bin/ """ field = expr[transform["field"]] as_left, as_right = transform["as"] maxbins = transform["maxbins"] extent = expr[transform["extent"]] # Precompute min/max or else we get # "Expression 'xxx' is not being grouped" # errors with tracer.start_span("bin_transform:min") as span: min_expr = extent.min() span.log_kv({"sql": min_expr.compile()}) min_ = min_expr.execute() with tracer.start_span("bin_transform:max") as span: max_expr = extent.max() span.log_kv({"sql": max_expr.compile()}) max_ = max_expr.execute() # Cast these to floats to work around # https://github.com/ibis-project/ibis/issues/1934 binwidth = (max_ - min_) / maxbins bin_ = (((field / _float(binwidth)) - _float(min_ / binwidth))).floor() left = (min_ + (bin_ * binwidth)).name(as_left) right = (((min_ + binwidth) + (bin_ * binwidth))).name(as_right) # add the two new fields and remove the initial column return expr.mutate( [left, right] # + [c for c in expr.columns if c not in {transform["field"], as_left, as_right}] )
def collect(transform: dict, expr: ibis.Expr) -> ibis.Expr: """ Apply a vega collect transform to an ibis expression. https://vega.github.io/vega/docs/transforms/collect/ Parameters ---------- transform: dict A JSON-able dictionary representing the vega transform. expr: ibis.Expr The expression to which to apply the transform. Returns ------- transformed_expr: the transformed expression """ fields = promote_list(transform["sort"]["field"]) orders = promote_list(transform["sort"].get("order", ["ascending"] * len(fields))) assert len(fields) == len(orders) rules = [(field, (True if order == "ascending" else False)) for field, order in zip(fields, orders)] return expr.sort_by(rules)
def formula(transform: dict, expr: ibis.Expr) -> ibis.Expr: col = transform["as"] calc = transform["expr"] new_col = eval_vegajs(calc, expr).name(col) return expr.mutate(new_col)
def vl_aggregate_to_grouping_expr(expr: ibis.Expr, a: dict) -> ibis.Expr: if "field" in a: expr = expr[a["field"]] op = translate_op(a["op"]) expr = getattr(expr, op)() return expr.name(a["as"])
def get_client(expr: ibis.Expr) -> ibis.client.Client: return expr.op().table.op().source
def filter(transform: dict, expr: ibis.Expr) -> ibis.Expr: calc = transform["expr"] return expr.filter(eval_vegajs(calc, expr))