def _get_udfs(self, cur, klass): from ibis.expr.rules import varargs from ibis.expr.datatypes import validate_type def _to_type(x): ibis_type = udf._impala_type_to_ibis(x.lower()) return validate_type(ibis_type) tuples = cur.fetchall() if len(tuples) > 0: result = [] for out_type, sig in tuples: name, types = _split_signature(sig) types = _type_parser(types).types inputs = [] for arg in types: argm = _arg_type.match(arg) var, simple = argm.groups() if simple: t = _to_type(simple) inputs.append(t) else: t = _to_type(var) inputs = varargs(t) # TODO # inputs.append(varargs(t)) break output = udf._impala_type_to_ibis(out_type.lower()) result.append(klass(inputs, output, name=name)) return result else: return []
def _get_udfs(self, cur, klass): from ibis.expr.rules import varargs from ibis.expr.datatypes import validate_type def _to_type(x): ibis_type = udf._impala_type_to_ibis(x.lower()) return validate_type(ibis_type) tuples = cur.fetchall() if len(tuples) > 0: result = [] for out_type, sig in tuples: name, types = _split_signature(sig) types = _type_parser(types).types inputs = [] for arg in types: argm = _arg_type.match(arg) var, simple = argm.groups() if simple: t = _to_type(simple) inputs.append(t) else: t = _to_type(var) inputs = varargs(t) # TODO # inputs.append(varargs(t)) break output = udf._impala_type_to_ibis(out_type.lower()) result.append(klass(inputs, output, name=name)) return result else: return []
class CoalesceLike(ValueOp): # According to Impala documentation: # Return type: same as the initial argument value, except that integer # values are promoted to BIGINT and floating-point values are promoted to # DOUBLE; use CAST() when inserting into a smaller numeric column input_type = rules.varargs(rules.value) output_type = _coalesce_upcast
def test_udf_varargs(udfcon, alltypes, udf_ll, test_data_db): t = alltypes name = f'add_numbers_{util.guid()[:4]}' input_sig = rules.varargs(rules.double) func = api.wrap_udf(udf_ll, input_sig, 'double', 'AddNumbers', name=name) func.register(name, test_data_db) udfcon.create_function(func, database=test_data_db) expr = func(t.double_col, t.double_col) expr.execute()
def test_udf_varargs(udfcon, alltypes, udf_ll, test_data_db): t = alltypes name = 'add_numbers_{0}'.format(util.guid()[:4]) input_sig = rules.varargs(rules.double) func = api.wrap_udf(udf_ll, input_sig, 'double', 'AddNumbers', name=name) func.register(name, test_data_db) udfcon.create_function(func, database=test_data_db) expr = func(t.double_col, t.double_col) expr.execute()
def test_udf_varargs(self): t = self.alltypes name = 'add_numbers_{0}'.format(util.guid()[:4]) input_sig = rules.varargs(rules.double) func = api.wrap_udf(self.udf_ll, input_sig, 'double', 'AddNumbers', name=name) func.register(name, self.test_data_db) self.con.create_function(func, database=self.test_data_db) expr = func(t.double_col, t.double_col) expr.execute()
class MADLibAPI(object): """ Class responsible for wrapping all MADLib-on-Impala API functions, creating them in a particular Impala database, and registering them for use with Ibis. """ _udas = { 'linr_fit': (['string', 'double'], 'string', 'LinrUpdate'), 'logr_fit': (['string', 'string', 'boolean', 'double', 'double'], 'string', 'LogrUpdate'), 'svm_fit': (['string', 'string', 'boolean', 'double', 'double'], 'string', 'SVMUpdate'), } _udfs = { 'linr_predict': (['string', 'string'], 'double', 'LinrPredict'), 'logr_predict': (['string', 'string'], 'boolean', 'LogrPredict'), 'logr_loss': (['string', 'string', 'boolean'], 'double', 'LogrLoss'), 'svm_predict': (['string', 'string'], 'boolean', 'SVMPredict'), 'svm_loss': (['string', 'string', 'boolean'], 'double', 'SVMLoss'), 'to_array': (rules.varargs(rules.double), 'string', ('_Z7ToArrayPN10impala_udf' '15FunctionContextEiPNS_9DoubleValE')), 'arrayget': (['int64', 'string'], 'double', 'ArrayGet'), 'allbytes': ([], 'string', 'AllBytes'), 'printarray': (['string'], 'string', 'PrintArray'), 'encodearray': (['string'], 'string', 'EncodeArray'), 'decodearray': (['string'], 'string', 'DecodeArray'), } def __init__(self, library_path, database, func_prefix=None): self.library_path = library_path self.database = database self.function_names = sorted(self._udfs.keys() + self._udas.keys()) self.func_prefix = func_prefix or 'madlib_' self._generate_wrappers() self._register_functions() def _generate_wrappers(self): for name, (inputs, output, update_sym) in self._udas.items(): func = wrap_uda(self.library_path, inputs, output, update_sym, name=self.func_prefix + name) setattr(self, name, func) for name, (inputs, output, sym) in self._udfs.items(): func = wrap_udf(self.library_path, inputs, output, sym, name=self.func_prefix + name) setattr(self, name, func) def _register_functions(self): # Enable SQL translation to work correctly for name in self.function_names: func = getattr(self, name) func.register(func.name, self.database) def create_functions(self, client): for name in self.function_names: func = getattr(self, name) client.create_function(func, database=self.database) def logistic_regression(self): pass def linear_regression(self): pass def svm(self): pass