def __init__( self, inputs, output, so_symbol=None, lib_path=None, name=None ): v.validate_output_type(output) self.so_symbol = so_symbol ImpalaFunction.__init__(self, name=name, lib_path=lib_path) ScalarFunction.__init__(self, inputs, output, name=self.name)
def __init__(self, func, func_type, input_type, output_type): v.validate_input_type(input_type, func) v.validate_output_type(output_type) self.func = func self.func_type = func_type self.input_type = list(map(dt.dtype, input_type)) self.output_type = dt.dtype(output_type)
def __init__(self, func, func_type, input_type, output_type): v.validate_input_type(input_type, func) v.validate_output_type(output_type) self.func = func self.func_type = func_type self.input_type = list(map(dt.dtype, input_type)) self.output_type = dt.dtype(output_type) self.coercion_fn = self._get_coercion_function()
def existing_udf(name, input_types, output_type, schema=None, parameters=None): """Create an ibis function that refers to an existing Postgres UDF already defined in database Parameters ---------- name: str input_types : List[DataType] output_type : DataType schema: str - optionally specify the schema that the UDF is defined in parameters: List[str] - give names to the arguments of the UDF Returns ------- Callable The wrapped function """ if parameters is None: parameters = ['v{}'.format(i) for i in range(len(input_types))] elif len(input_types) != len(parameters): raise ValueError(("Length mismatch in arguments to existing_udf: " "len(input_types)={}, len(parameters)={}").format( len(input_types), len(parameters))) v.validate_output_type(output_type) udf_node_fields = collections.OrderedDict( [(name, Arg(rlz.value(type_))) for name, type_ in zip(parameters, input_types)] + [( 'output_type', lambda self, output_type=output_type: rlz.shape_like( self.args, dtype=output_type), )]) udf_node_fields['resolve_name'] = lambda self: name udf_node = _create_udf_node(name, udf_node_fields) def _translate_udf(t, expr): func_obj = sa.func if schema is not None: func_obj = getattr(func_obj, schema) func_obj = getattr(func_obj, name) sa_args = [t.translate(arg) for arg in expr.op().args] return func_obj(*sa_args) PostgreSQLCompiler.add_operation(udf_node, _translate_udf) def wrapped(*args, **kwargs): node = udf_node(*args, **kwargs) return node.to_expr() return wrapped
def validate_func_and_types(self, func): if not callable(func): raise TypeError('func must be callable, got {}'.format(func)) # Validate that the input_type argument and the function signature # match and that the output_type is valid v.validate_input_type(self.input_type, func) v.validate_output_type(self.output_type) if not self.output_type.nullable: raise com.IbisTypeError( 'Spark does not support non-nullable output types')
def __init__( self, inputs, output, update_fn=None, init_fn=None, merge_fn=None, finalize_fn=None, serialize_fn=None, lib_path=None, name=None, ): self.init_fn = init_fn self.update_fn = update_fn self.merge_fn = merge_fn self.finalize_fn = finalize_fn self.serialize_fn = serialize_fn v.validate_output_type(output) ImpalaFunction.__init__(self, name=name, lib_path=lib_path) AggregateFunction.__init__(self, inputs, output, name=self.name)
def udf(input_type, output_type, strict=True, libraries=None): '''Define a UDF for BigQuery Parameters ---------- input_type : List[DataType] output_type : DataType strict : bool Whether or not to put a ``'use strict';`` string at the beginning of the UDF. Setting to ``False`` is probably a bad idea. libraries : List[str] A list of Google Cloud Storage URIs containing to JavaScript source code. Note that any symbols (functions, classes, variables, etc.) that are exposed in these JavaScript files will be visible inside the UDF. Returns ------- wrapper : Callable The wrapped function Notes ----- - ``INT64`` is not supported as an argument type or a return type, as per `the BigQuery documentation <https://cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions#sql-type-encodings-in-javascript>`_. - `The follow example doctest doesn't work for Python 3.8 <https://github.com/ibis-project/ibis/issues/2085>`_. Examples -------- >>> if PY38: ... import pytest; pytest.skip("Issue #2085") >>> from ibis_bigquery import udf >>> import ibis.expr.datatypes as dt >>> @udf(input_type=[dt.double], output_type=dt.double) ... def add_one(x): ... return x + 1 >>> print(add_one.js) CREATE TEMPORARY FUNCTION add_one_0(x FLOAT64) RETURNS FLOAT64 LANGUAGE js AS """ 'use strict'; function add_one(x) { return (x + 1); } return add_one(x); """; >>> @udf(input_type=[dt.double, dt.double], ... output_type=dt.Array(dt.double)) ... def my_range(start, stop): ... def gen(start, stop): ... curr = start ... while curr < stop: ... yield curr ... curr += 1 ... result = [] ... for value in gen(start, stop): ... result.append(value) ... return result >>> print(my_range.js) CREATE TEMPORARY FUNCTION my_range_0(start FLOAT64, stop FLOAT64) RETURNS ARRAY<FLOAT64> LANGUAGE js AS """ 'use strict'; function my_range(start, stop) { function* gen(start, stop) { let curr = start; while ((curr < stop)) { yield curr; curr += 1; } } let result = []; for (let value of gen(start, stop)) { result.push(value); } return result; } return my_range(start, stop); """; >>> @udf( ... input_type=[dt.double, dt.double], ... output_type=dt.Struct.from_tuples([ ... ('width', 'double'), ('height', 'double') ... ]) ... ) ... def my_rectangle(width, height): ... class Rectangle: ... def __init__(self, width, height): ... self.width = width ... self.height = height ... ... @property ... def area(self): ... return self.width * self.height ... ... def perimeter(self): ... return 2 * (self.width + self.height) ... ... return Rectangle(width, height) >>> print(my_rectangle.js) CREATE TEMPORARY FUNCTION my_rectangle_0(width FLOAT64, height FLOAT64) RETURNS STRUCT<width FLOAT64, height FLOAT64> LANGUAGE js AS """ 'use strict'; function my_rectangle(width, height) { class Rectangle { constructor(width, height) { this.width = width; this.height = height; } get area() { return (this.width * this.height); } perimeter() { return (2 * (this.width + this.height)); } } return (new Rectangle(width, height)); } return my_rectangle(width, height); """; ''' v.validate_output_type(output_type) if libraries is None: libraries = [] def wrapper(f): if not callable(f): raise TypeError('f must be callable, got {}'.format(f)) signature = inspect.signature(f) parameter_names = signature.parameters.keys() udf_node_fields = collections.OrderedDict( [ (name, Arg(rlz.value(type))) for name, type in zip(parameter_names, input_type) ] + [ ( 'output_type', lambda self, output_type=output_type: rlz.shape_like( self.args, dtype=output_type ), ), ('__slots__', ('js',)), ] ) udf_node = create_udf_node(f.__name__, udf_node_fields) @compiles(udf_node) def compiles_udf_node(t, expr): return '{}({})'.format( udf_node.__name__, ', '.join(map(t.translate, expr.op().args)) ) type_translation_context = UDFContext() return_type = ibis_type_to_bigquery_type( dt.dtype(output_type), type_translation_context ) bigquery_signature = ', '.join( '{name} {type}'.format( name=name, type=ibis_type_to_bigquery_type( dt.dtype(type), type_translation_context ), ) for name, type in zip(parameter_names, input_type) ) source = PythonToJavaScriptTranslator(f).compile() js = '''\ CREATE TEMPORARY FUNCTION {external_name}({signature}) RETURNS {return_type} LANGUAGE js AS """ {strict}{source} return {internal_name}({args}); """{libraries};'''.format( external_name=udf_node.__name__, internal_name=f.__name__, return_type=return_type, source=source, signature=bigquery_signature, strict=repr('use strict') + ';\n' if strict else '', args=', '.join(parameter_names), libraries=( '\nOPTIONS (\n library={}\n)'.format(repr(list(libraries))) if libraries else '' ), ) @functools.wraps(f) def wrapped(*args, **kwargs): node = udf_node(*args, **kwargs) node.js = js return node.to_expr() wrapped.__signature__ = signature wrapped.js = js return wrapped return wrapper