예제 #1
0
    def testMap(self):
        expr = self.expr.id.map(lambda a: float(a + 1), rtype=types.float64)
        self.assertIsInstance(expr, MappedExpr)
        self.assertIs(expr._data_type, types.float64)

        if not LESS_PY35:
            l = locals().copy()
            six.exec_(
                textwrap.dedent("""
            from typing import Optional
            
            def fun(v) -> float:
                return float(v + 1)
            expr = self.expr.id.map(fun)
            """), globals(), l)
            expr = l['expr']
            self.assertIsInstance(expr, MappedExpr)
            self.assertIsInstance(expr._data_type, types.Float)

            l = locals().copy()
            six.exec_(
                textwrap.dedent("""
            from typing import Optional
            
            def fun(v) -> Optional[float]:
                return float(v + 1)
            expr = self.expr.id.map(fun)
            """), globals(), l)
            expr = l['expr']
            self.assertIsInstance(expr, MappedExpr)
            self.assertIsInstance(expr._data_type, types.Float)
예제 #2
0
    def testApplyFunction(self):
        def my_func(row):
            return row.name, row.id

        self.engine.compile(self.expr.apply(my_func, axis=1, names=['name', 'id'], types=['string', 'int']))
        udtf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udtf, globals(), locals())
        udtf = locals()[UDF_CLASS_NAME]
        self.assertEqual([('name1', 1), ('name2', 2)],
                          runners.simple_run(udtf, [('name1', 1, None), ('name2', 2, None)]))
예제 #3
0
    def testApplyToSequenceFuntion(self):
        def my_func(row):
            return row.name + str(row.id)

        self.engine.compile(self.expr.apply(my_func, axis=1, reduce=True).rename('test'))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udf, globals(), locals())
        udf = locals()[UDF_CLASS_NAME]
        self.assertEqual(['name1', 'name2'],
                         runners.simple_run(udf, [('name', 1, None), ('name', 2, None)]))
 def testSimpleLambda(self):
     self.engine.compile(self.expr.id.map(lambda x: x + 1))
     udf = list(self.engine._ctx._func_to_udfs.values())[0]
     six.exec_(udf)
     udf = locals()[UDF_CLASS_NAME]
     self.assertSequenceEqual([
         4,
     ], runners.simple_run(udf, [
         (3, ),
     ]))
예제 #5
0
    def testApplyFunction(self):
        def my_func(row):
            return row.name, row.id

        self.engine.compile(self.expr.apply(my_func, axis=1, names=['name', 'id'], types=['string', 'int']))
        udtf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udtf, globals(), locals())
        udtf = locals()[UDF_CLASS_NAME]
        self.assertEqual([('name1', 1), ('name2', 2)],
                          runners.simple_run(udtf, [('name1', 1, None), ('name2', 2, None)]))
예제 #6
0
    def testApplyToSequenceFuntion(self):
        def my_func(row):
            return row.name + str(row.id)

        self.engine.compile(self.expr.apply(my_func, axis=1, reduce=True).rename('test'))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udf, globals(), locals())
        udf = locals()[UDF_CLASS_NAME]
        self.assertEqual(['name1', 'name2'],
                         runners.simple_run(udf, [('name', 1, None), ('name', 2, None)]))
예제 #7
0
    def testApplyGeneratorFunction(self):
        def my_func(row):
            for n in row.name.split(','):
                yield n

        self.engine.compile(self.expr.apply(my_func, axis=1, names='name'))
        udtf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udtf, globals(), locals())
        udtf = locals()[UDF_CLASS_NAME]
        self.assertEqual(['name1', 'name2', 'name3', 'name4'],
                         runners.simple_run(udtf, [('name1,name2', 1, None), ('name3,name4', 2, None)]))
예제 #8
0
    def testApplyGeneratorFunction(self):
        def my_func(row):
            for n in row.name.split(','):
                yield n

        self.engine.compile(self.expr.apply(my_func, axis=1, names='name'))
        udtf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udtf, globals(), locals())
        udtf = locals()[UDF_CLASS_NAME]
        self.assertEqual(['name1', 'name2', 'name3', 'name4'],
                         runners.simple_run(udtf, [('name1,name2', 1, None), ('name3,name4', 2, None)]))
예제 #9
0
    def testSimpleFunction(self):
        def my_func(x):
            if x < 0:
                return -1
            elif x == 0:
                return 0
            else:
                return 1

        self.engine.compile(self.expr.id.map(my_func))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udf)
        udf = locals()[UDF_CLASS_NAME]
        self.assertSequenceEqual([-1, 0, 1], runners.simple_run(udf, [(-3, ), (0, ), (5, )]))
예제 #10
0
    def testSimpleFunction(self):
        def my_func(x):
            if x < 0:
                return -1
            elif x == 0:
                return 0
            else:
                return 1

        self.engine.compile(self.expr.id.map(my_func))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udf)
        udf = locals()[UDF_CLASS_NAME]
        self.assertSequenceEqual([-1, 0, 1], runners.simple_run(udf, [(-3, ), (0, ), (5, )]))
예제 #11
0
    def testGlobalVarFunction(self):
        global_val = 10
        def my_func(x):
            if x < global_val:
                return -1
            elif x == global_val:
                return 0
            else:
                return 1

        self.engine.compile(self.expr.id.map(my_func))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udf, globals(), locals())
        udf = locals()[UDF_CLASS_NAME]
        self.assertSequenceEqual([-1, 0, 1], runners.simple_run(udf, [(-9, ), (10, ), (15, )]))
예제 #12
0
    def testGlobalVarFunction(self):
        global_val = 10
        def my_func(x):
            if x < global_val:
                return -1
            elif x == global_val:
                return 0
            else:
                return 1

        self.engine.compile(self.expr.id.map(my_func))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udf, globals(), locals())
        udf = locals()[UDF_CLASS_NAME]
        self.assertSequenceEqual([-1, 0, 1], runners.simple_run(udf, [(-9, ), (10, ), (15, )]))
예제 #13
0
    def testNestFunction(self):
        def my_func(x):
            def inner(y):
                if y < 0:
                    return -2
                elif y == 0:
                    return 0
                else:
                    return 2
            return inner(x)

        self.engine.compile(self.expr.id.map(my_func))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udf, globals(), locals())
        udf = locals()[UDF_CLASS_NAME]
        self.assertSequenceEqual([-2, 0, 2], runners.simple_run(udf, [(-3, ), (0, ), (5, )]))
    def testReduceApply(self):
        expr = self.expr[self.expr.id, self.expr['name', 'id'].apply(
            lambda row: row.name + row.id, axis=1, reduce=True).rename('nameid')]

        self.assertIsInstance(expr._fields[1], MappedExpr)

        if not LESS_PY35:
            l = locals().copy()
            six.exec_(textwrap.dedent("""
            def fun(r) -> float:
                return r.id + r.fid
            expr = self.expr[self.expr.id, self.expr['id', 'fid'].apply(fun, axis=1, reduce=True).rename('idfid')]
            """), globals(), l)
            expr = l['expr']
            self.assertIsInstance(expr._fields[1], MappedExpr)
            self.assertIsInstance(expr._fields[1]._data_type, types.Float)
예제 #15
0
    def testNestFunction(self):
        def my_func(x):
            def inner(y):
                if y < 0:
                    return -2
                elif y == 0:
                    return 0
                else:
                    return 2
            return inner(x)

        self.engine.compile(self.expr.id.map(my_func))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udf, globals(), locals())
        udf = locals()[UDF_CLASS_NAME]
        self.assertSequenceEqual([-2, 0, 2], runners.simple_run(udf, [(-3, ), (0, ), (5, )]))
예제 #16
0
    def testBizarreField(self):
        def my_func(row):
            return getattr(row, '012') * 2.0

        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = Schema.from_lists(['name', 'id', 'fid', '012'],
                                   datatypes('string', 'int64', 'float64', 'float64'))

        table = MockTable(name='pyodps_test_expr_table', schema=schema)
        expr = CollectionExpr(_source_data=table, _schema=schema)

        self.engine.compile(expr.apply(my_func, axis=1, names=['out_col'], types=['float64']))
        udtf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udtf, globals(), locals())
        udtf = locals()[UDF_CLASS_NAME]
        self.assertEqual([20, 40],
                         runners.simple_run(udtf, [('name1', 1, None, 10), ('name2', 2, None, 20)]))
예제 #17
0
    def testAgg(self):
        class Agg(object):
            def buffer(self):
                return [0]

            def __call__(self, buffer, val):
                buffer[0] += val

            def merge(self, buffer, pbuffer):
                buffer[0] += pbuffer[0]

            def getvalue(self, buffer):
                return buffer[0]

        expr = self.expr.int64.agg(Agg)

        self.assertIsInstance(expr, Aggregation)
        self.assertEqual(expr.dtype, types.int64)

        if not LESS_PY35:
            l = locals().copy()
            six.exec_(
                textwrap.dedent("""
            class Agg(object):
                def buffer(self):
                    return [0]
    
                def __call__(self, buffer, val):
                    buffer[0] += val
    
                def merge(self, buffer, pbuffer):
                    buffer[0] += pbuffer[0]
    
                def getvalue(self, buffer) -> float:
                    return buffer[0]
    
            expr = self.expr.int64.agg(Agg)
            """), globals(), l)
            expr = l['expr']
            self.assertIsInstance(expr, Aggregation)
            self.assertIsInstance(expr.dtype, types.Float)
예제 #18
0
def get_function(source, fun_name):
    d = dict()
    six.exec_(source, d, d)
    return d[fun_name]
    def _gen_class_builder_func():
        out_closure = 10

        def _gen_nested_class_obj():
            class BuildCls(BuildBase, metaclass=BuildMeta):
                a = 10

                def b(self, add_val):
                    print(self.a)
                    return self.a + add_val + out_closure

            return BuildCls
        return _gen_nested_class_obj
    """)
    my_locs = locals().copy()
    six.exec_(py3_code, globals(), my_locs)
    _gen_class_builder_func = my_locs.get('_gen_class_builder_func')

if sys.version_info[:2] < (3, 6):

    def _gen_format_string_func():
        out_closure = 4.0

        def _format_fun(arg):
            return 'Formatted stuff {0}: {1:>5}'.format(arg, out_closure)

        return _format_fun
else:
    py36_code = textwrap.dedent("""
    def _gen_format_string_func():
        out_closure = 4.0
    def _gen_class_builder_func():
        out_closure = 10

        def _gen_nested_class_obj():
            class BuildCls(BuildBase, metaclass=BuildMeta):
                a = 10

                def b(self, add_val):
                    print(self.a)
                    return self.a + add_val + out_closure

            return BuildCls
        return _gen_nested_class_obj
    """)
    my_locs = locals().copy()
    six.exec_(py3_code, globals(), my_locs)
    _gen_class_builder_func = my_locs.get('_gen_class_builder_func')


def _gen_nested_fun():
    out_closure = 10

    def _gen_nested_obj():
        # class NestedClass(object):
        def nested_method(add_val):
            return out_closure + add_val

        return nested_method

    return lambda v: _gen_nested_obj()(v)
# under the License.

from odps.tests.core import TestBase
from odps.compat import unittest, six
from odps.models import Schema
from odps.udf.tools import runners
from odps.df.types import validate_data_type
from odps.df.backends.odpssql.engine import ODPSEngine, UDF_CLASS_NAME
from odps.df.expr.expressions import CollectionExpr
from odps.df.expr.tests.core import MockTable

# required by cloudpickle tests
six.exec_(
    """
import base64
from collections import namedtuple
import inspect
from odps.lib.cloudpickle import *
from odps.lib.importer import *
""", globals(), locals())


class Test(TestBase):
    def setup(self):
        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = Schema.from_lists(['name', 'id', 'fid'],
                                   datatypes('string', 'int64', 'float64'))

        table = MockTable(name='pyodps_test_expr_table', schema=schema)

        self.expr = CollectionExpr(_source_data=table, _schema=schema)
예제 #22
0
 def testSimpleLambda(self):
     self.engine.compile(self.expr.id.map(lambda x: x + 1))
     udf = list(self.engine._ctx._func_to_udfs.values())[0]
     six.exec_(udf)
     udf = locals()[UDF_CLASS_NAME]
     self.assertSequenceEqual([4, ], runners.simple_run(udf, [(3, ), ]))
예제 #23
0
# under the License.

from odps.tests.core import TestBase
from odps.compat import unittest, six
from odps.models import Schema
from odps.udf.tools import runners
from odps.df.types import validate_data_type
from odps.df.backends.odpssql.engine import ODPSEngine, UDF_CLASS_NAME
from odps.df.expr.expressions import CollectionExpr
from odps.df.expr.tests.core import MockTable

# required by cloudpickle tests
six.exec_("""
import base64
from collections import namedtuple
import inspect
import functools
from odps.lib.cloudpickle import *
from odps.lib.importer import *
""", globals(), locals())


class Test(TestBase):
    def setup(self):
        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = Schema.from_lists(['name', 'id', 'fid'],
                                    datatypes('string', 'int64', 'float64'))

        table = MockTable(name='pyodps_test_expr_table', schema=schema)

        self.expr = CollectionExpr(_source_data=table, _schema=schema)
예제 #24
0
from odps.udf.tools import runners
from odps.df.types import validate_data_type
from odps.df.backends.odpssql.engine import ODPSSQLEngine, UDF_CLASS_NAME
from odps.df.expr.expressions import CollectionExpr
from odps.df.expr.tests.core import MockTable

# required by cloudpickle tests
six.exec_("""
import sys
import base64
from collections import namedtuple
import inspect
import functools
from odps.compat import OrderedDict
from odps.lib.cloudpickle import *
from odps.lib.importer import *

PY2 = sys.version_info[0] == 2

if PY2:
    string_type = unicode
else:
    string_type = str
""", globals(), locals())

from odps.df.backends.odpssql.codegen import X_NAMED_TUPLE
six.exec_(X_NAMED_TUPLE, globals(), locals())


class ODPSEngine(ODPSSQLEngine):