class ExistsSubquery(ops.Node): """Helper class""" foreign_table = Arg(rlz.noop) predicates = Arg(rlz.noop) def output_type(self): return ExistsExpr
class CategoryLabel(ops.ValueOp): arg = Arg(rlz.category) labels = Arg(rlz.noop) nulls = Arg(rlz.noop, default=None) output_type = rlz.shape_like('arg', dt.string) def _validate(self): cardinality = self.arg.type().cardinality if len(self.labels) != cardinality: raise ValueError('Number of labels must match number of ' 'categories: {}'.format(cardinality))
class Between(ops.ValueOp, ops.BooleanValueOp): arg = Arg(rlz.any) lower_bound = Arg(rlz.any) upper_bound = Arg(rlz.any) def output_type(self): arg, lower, upper = self.args if not (rlz.comparable(arg, lower) and rlz.comparable(arg, upper)): raise TypeError('Arguments are not comparable') return rlz.shape_like(self.args, dt.boolean)
def wrapper(f): if not callable(f): raise TypeError('f must be callable, got {}'.format(f)) signature = inspect.signature(f) parameter_names = signature.parameters.keys() udf_node_fields = collections.OrderedDict( [(name, Arg(rlz.value(type))) for name, type in zip(parameter_names, input_type)] + [ ('output_type', lambda self, output_type=output_type: rlz. shape_like(self.args, dtype=output_type)), ('__slots__', ('js', )), ]) udf_node = create_udf_node(f.__name__, udf_node_fields) @compiles(udf_node) def compiles_udf_node(t, expr): return '{}({})'.format(udf_node.__name__, ', '.join(map(t.translate, expr.op().args))) type_translation_context = UDFContext() return_type = ibis_type_to_bigquery_type(dt.dtype(output_type), type_translation_context) bigquery_signature = ', '.join( '{name} {type}'.format(name=name, type=ibis_type_to_bigquery_type( dt.dtype(type), type_translation_context)) for name, type in zip(parameter_names, input_type)) source = PythonToJavaScriptTranslator(f).compile() js = '''\ CREATE TEMPORARY FUNCTION {external_name}({signature}) RETURNS {return_type} LANGUAGE js AS """ {strict}{source} return {internal_name}({args}); """{libraries};'''.format(external_name=udf_node.__name__, internal_name=f.__name__, return_type=return_type, source=source, signature=bigquery_signature, strict=repr('use strict') + ';\n' if strict else '', args=', '.join(parameter_names), libraries=('\nOPTIONS (\n library={}\n)'.format( repr(list(libraries))) if libraries else '')) @functools.wraps(f) def wrapped(*args, **kwargs): node = udf_node(*args, **kwargs) node.js = js return node.to_expr() wrapped.__signature__ = signature wrapped.js = js return wrapped
def existing_udf(name, input_types, output_type, schema=None, parameters=None): """Create an ibis function that refers to an existing Postgres UDF already defined in database Parameters ---------- name: str input_types : List[DataType] output_type : DataType schema: str - optionally specify the schema that the UDF is defined in parameters: List[str] - give names to the arguments of the UDF Returns ------- Callable The wrapped function """ if parameters is None: parameters = ['v{}'.format(i) for i in range(len(input_types))] elif len(input_types) != len(parameters): raise ValueError(("Length mismatch in arguments to existing_udf: " "len(input_types)={}, len(parameters)={}").format( len(input_types), len(parameters))) v.validate_output_type(output_type) udf_node_fields = collections.OrderedDict( [(name, Arg(rlz.value(type_))) for name, type_ in zip(parameters, input_types)] + [( 'output_type', lambda self, output_type=output_type: rlz.shape_like( self.args, dtype=output_type), )]) udf_node_fields['resolve_name'] = lambda self: name udf_node = _create_udf_node(name, udf_node_fields) def _translate_udf(t, expr): func_obj = sa.func if schema is not None: func_obj = getattr(func_obj, schema) func_obj = getattr(func_obj, name) sa_args = [t.translate(arg) for arg in expr.op().args] return func_obj(*sa_args) PostgreSQLCompiler.add_operation(udf_node, _translate_udf) def wrapped(*args, **kwargs): node = udf_node(*args, **kwargs) return node.to_expr() return wrapped
class RandomSortKey(ops.SortKey): expr = Arg(rlz.any) value = None def equals(self, other, cache=None): return isinstance(other, RandomSortKey) def output_type(self): return RandomSortExpr def resolve_name(self): return "RandomSortKey"
class Histogram(BucketLike): arg = Arg(rlz.noop) nbins = Arg(rlz.noop, default=None) binwidth = Arg(rlz.noop, default=None) base = Arg(rlz.noop, default=None) closed = Arg(rlz.isin({'left', 'right'}), default='left') aux_hash = Arg(rlz.noop, default=None) def _validate(self): if self.nbins is None: if self.binwidth is None: raise ValueError('Must indicate nbins or binwidth') elif self.binwidth is not None: raise ValueError('nbins and binwidth are mutually exclusive') def output_type(self): # always undefined cardinality (for now) return dt.category.column_type()
class Bucket(BucketLike): arg = Arg(rlz.noop) buckets = Arg(rlz.noop) closed = Arg(rlz.isin({'left', 'right'}), default='left') close_extreme = Arg(bool, default=True) include_under = Arg(bool, default=False) include_over = Arg(bool, default=False) def _validate(self): if not len(self.buckets): raise ValueError('Must be at least one bucket edge') elif len(self.buckets) == 1: if not self.include_under or not self.include_over: raise ValueError('If one bucket edge provided, must have ' 'include_under=True and include_over=True') @property def nbuckets(self): return len(self.buckets) - 1 + self.include_over + self.include_under
class MyOp(ops.ValueOp): value = Arg(rlz.value(dt.Array(dt.double))) output_type = rlz.typeof('value')
class MyOperation(ops.Node): arg = Arg(ir.IntegerValue)
class Log(ops.Node): arg = Arg(rlz.double()) base = Arg(rlz.double(), default=None)
class DummyOp(ops.ValueOp): arg = Arg(rlz.any)
class CustomOp(Node): first_arg = Arg(int, show=False) second_arg = Arg(float) def output_type(self): return CustomExpr
class FooNode(ops.ValueOp): value = Arg(rlz.integer) def output_type(self): return functools.partial(Foo, dtype=dt.int64)
class StringContains(ops.ValueOp): arg = Arg(rlz.string) substr = Arg(rlz.string) start = Arg(rlz.integer, default=None) end = Arg(rlz.integer, default=None) output_type = rlz.shape_like('arg', dt.boolean)
class StringSQLLike(ops.ValueOp): arg = Arg(rlz.string) pattern = Arg(rlz.string) escape = Arg(str, default=None) output_type = rlz.shape_like('arg', dt.boolean)
class FooNode(ops.ValueOp): value = Arg(rlz.value(dt.Array(dt.int64))) def output_type(self): return Foo
class MyExprNode(ops.Node): foo = Arg(rlz.string) bar = Arg(rlz.numeric) def output_type(self): return MyExpr
def wrapper(f): if not callable(f): raise TypeError("f must be callable, got {}".format(f)) signature = inspect.signature(f) parameter_names = signature.parameters.keys() udf_node_fields = collections.OrderedDict( [(name, Arg(rlz.value(type))) for name, type in zip(parameter_names, input_type)] + [ ( "output_type", lambda self, output_type=output_type: rlz.shape_like( self.args, dtype=output_type), ), ("__slots__", ("js", )), ]) udf_node = create_udf_node(f.__name__, udf_node_fields) # @compiles(udf_node) from ..compiler import BigQueryExprTranslator def compiles_udf_node(t, expr): return "{}({})".format(udf_node.__name__, ", ".join(map(t.translate, expr.op().args))) BigQueryExprTranslator._registry[udf_node] = compiles_udf_node type_translation_context = UDFContext() return_type = ibis_type_to_bigquery_type(dt.dtype(output_type), type_translation_context) bigquery_signature = ", ".join("{name} {type}".format( name=name, type=ibis_type_to_bigquery_type(dt.dtype(type), type_translation_context), ) for name, type in zip(parameter_names, input_type)) source = PythonToJavaScriptTranslator(f).compile() js = '''\ CREATE TEMPORARY FUNCTION {external_name}({signature}) RETURNS {return_type} LANGUAGE js AS """ {strict}{source} return {internal_name}({args}); """{libraries};'''.format( external_name=udf_node.__name__, internal_name=f.__name__, return_type=return_type, source=source, signature=bigquery_signature, strict=repr("use strict") + ";\n" if strict else "", args=", ".join(parameter_names), libraries=("\nOPTIONS (\n library={}\n)".format( repr(list(libraries))) if libraries else ""), ) @functools.wraps(f) def wrapped(*args, **kwargs): node = udf_node(*args, **kwargs) node.js = js return node.to_expr() wrapped.__signature__ = signature wrapped.js = js return wrapped