Esempio n. 1
0
    def _get_udfs(self, cur, klass):
        from ibis.expr.rules import varargs
        from ibis.expr.datatypes import validate_type

        def _to_type(x):
            ibis_type = udf._impala_type_to_ibis(x.lower())
            return validate_type(ibis_type)

        tuples = cur.fetchall()
        if len(tuples) > 0:
            result = []
            for out_type, sig in tuples:
                name, types = _split_signature(sig)
                types = _type_parser(types).types

                inputs = []
                for arg in types:
                    argm = _arg_type.match(arg)
                    var, simple = argm.groups()
                    if simple:
                        t = _to_type(simple)
                        inputs.append(t)
                    else:
                        t = _to_type(var)
                        inputs = varargs(t)
                        # TODO
                        # inputs.append(varargs(t))
                        break

                output = udf._impala_type_to_ibis(out_type.lower())
                result.append(klass(inputs, output, name=name))
            return result
        else:
            return []
Esempio n. 2
0
    def _get_udfs(self, cur, klass):
        from ibis.expr.rules import varargs
        from ibis.expr.datatypes import validate_type

        def _to_type(x):
            ibis_type = udf._impala_type_to_ibis(x.lower())
            return validate_type(ibis_type)

        tuples = cur.fetchall()
        if len(tuples) > 0:
            result = []
            for out_type, sig in tuples:
                name, types = _split_signature(sig)
                types = _type_parser(types).types

                inputs = []
                for arg in types:
                    argm = _arg_type.match(arg)
                    var, simple = argm.groups()
                    if simple:
                        t = _to_type(simple)
                        inputs.append(t)
                    else:
                        t = _to_type(var)
                        inputs = varargs(t)
                        # TODO
                        # inputs.append(varargs(t))
                        break

                output = udf._impala_type_to_ibis(out_type.lower())
                result.append(klass(inputs, output, name=name))
            return result
        else:
            return []
Esempio n. 3
0
class CoalesceLike(ValueOp):

    # According to Impala documentation:
    # Return type: same as the initial argument value, except that integer
    # values are promoted to BIGINT and floating-point values are promoted to
    # DOUBLE; use CAST() when inserting into a smaller numeric column

    input_type = rules.varargs(rules.value)
    output_type = _coalesce_upcast
Esempio n. 4
0
def test_udf_varargs(udfcon, alltypes, udf_ll, test_data_db):
    t = alltypes

    name = f'add_numbers_{util.guid()[:4]}'

    input_sig = rules.varargs(rules.double)
    func = api.wrap_udf(udf_ll, input_sig, 'double', 'AddNumbers', name=name)
    func.register(name, test_data_db)
    udfcon.create_function(func, database=test_data_db)

    expr = func(t.double_col, t.double_col)
    expr.execute()
Esempio n. 5
0
def test_udf_varargs(udfcon, alltypes, udf_ll, test_data_db):
    t = alltypes

    name = 'add_numbers_{0}'.format(util.guid()[:4])

    input_sig = rules.varargs(rules.double)
    func = api.wrap_udf(udf_ll, input_sig, 'double', 'AddNumbers', name=name)
    func.register(name, test_data_db)
    udfcon.create_function(func, database=test_data_db)

    expr = func(t.double_col, t.double_col)
    expr.execute()
Esempio n. 6
0
    def test_udf_varargs(self):
        t = self.alltypes

        name = 'add_numbers_{0}'.format(util.guid()[:4])

        input_sig = rules.varargs(rules.double)
        func = api.wrap_udf(self.udf_ll, input_sig, 'double', 'AddNumbers',
                            name=name)
        func.register(name, self.test_data_db)
        self.con.create_function(func, database=self.test_data_db)

        expr = func(t.double_col, t.double_col)
        expr.execute()
Esempio n. 7
0
class MADLibAPI(object):
    """
    Class responsible for wrapping all MADLib-on-Impala API functions, creating
    them in a particular Impala database, and registering them for use with
    Ibis.
    """
    _udas = {
        'linr_fit': (['string', 'double'], 'string', 'LinrUpdate'),
        'logr_fit': (['string', 'string', 'boolean', 'double',
                      'double'], 'string', 'LogrUpdate'),
        'svm_fit': (['string', 'string', 'boolean', 'double',
                     'double'], 'string', 'SVMUpdate'),
    }

    _udfs = {
        'linr_predict': (['string', 'string'], 'double', 'LinrPredict'),
        'logr_predict': (['string', 'string'], 'boolean', 'LogrPredict'),
        'logr_loss': (['string', 'string', 'boolean'], 'double', 'LogrLoss'),
        'svm_predict': (['string', 'string'], 'boolean', 'SVMPredict'),
        'svm_loss': (['string', 'string', 'boolean'], 'double', 'SVMLoss'),
        'to_array': (rules.varargs(rules.double), 'string',
                     ('_Z7ToArrayPN10impala_udf'
                      '15FunctionContextEiPNS_9DoubleValE')),
        'arrayget': (['int64', 'string'], 'double', 'ArrayGet'),
        'allbytes': ([], 'string', 'AllBytes'),
        'printarray': (['string'], 'string', 'PrintArray'),
        'encodearray': (['string'], 'string', 'EncodeArray'),
        'decodearray': (['string'], 'string', 'DecodeArray'),
    }

    def __init__(self, library_path, database, func_prefix=None):
        self.library_path = library_path
        self.database = database

        self.function_names = sorted(self._udfs.keys() + self._udas.keys())
        self.func_prefix = func_prefix or 'madlib_'

        self._generate_wrappers()
        self._register_functions()

    def _generate_wrappers(self):
        for name, (inputs, output, update_sym) in self._udas.items():
            func = wrap_uda(self.library_path,
                            inputs,
                            output,
                            update_sym,
                            name=self.func_prefix + name)
            setattr(self, name, func)

        for name, (inputs, output, sym) in self._udfs.items():
            func = wrap_udf(self.library_path,
                            inputs,
                            output,
                            sym,
                            name=self.func_prefix + name)
            setattr(self, name, func)

    def _register_functions(self):
        # Enable SQL translation to work correctly
        for name in self.function_names:
            func = getattr(self, name)
            func.register(func.name, self.database)

    def create_functions(self, client):
        for name in self.function_names:
            func = getattr(self, name)
            client.create_function(func, database=self.database)

    def logistic_regression(self):
        pass

    def linear_regression(self):
        pass

    def svm(self):
        pass