def testMap(self): expr = self.expr.id.map(lambda a: float(a + 1), rtype=types.float64) self.assertIsInstance(expr, MappedExpr) self.assertIs(expr._data_type, types.float64) if not LESS_PY35: l = locals().copy() six.exec_( textwrap.dedent(""" from typing import Optional def fun(v) -> float: return float(v + 1) expr = self.expr.id.map(fun) """), globals(), l) expr = l['expr'] self.assertIsInstance(expr, MappedExpr) self.assertIsInstance(expr._data_type, types.Float) l = locals().copy() six.exec_( textwrap.dedent(""" from typing import Optional def fun(v) -> Optional[float]: return float(v + 1) expr = self.expr.id.map(fun) """), globals(), l) expr = l['expr'] self.assertIsInstance(expr, MappedExpr) self.assertIsInstance(expr._data_type, types.Float)
def testApplyFunction(self): def my_func(row): return row.name, row.id self.engine.compile(self.expr.apply(my_func, axis=1, names=['name', 'id'], types=['string', 'int'])) udtf = list(self.engine._ctx._func_to_udfs.values())[0] six.exec_(udtf, globals(), locals()) udtf = locals()[UDF_CLASS_NAME] self.assertEqual([('name1', 1), ('name2', 2)], runners.simple_run(udtf, [('name1', 1, None), ('name2', 2, None)]))
def testApplyToSequenceFuntion(self): def my_func(row): return row.name + str(row.id) self.engine.compile(self.expr.apply(my_func, axis=1, reduce=True).rename('test')) udf = list(self.engine._ctx._func_to_udfs.values())[0] six.exec_(udf, globals(), locals()) udf = locals()[UDF_CLASS_NAME] self.assertEqual(['name1', 'name2'], runners.simple_run(udf, [('name', 1, None), ('name', 2, None)]))
def testSimpleLambda(self): self.engine.compile(self.expr.id.map(lambda x: x + 1)) udf = list(self.engine._ctx._func_to_udfs.values())[0] six.exec_(udf) udf = locals()[UDF_CLASS_NAME] self.assertSequenceEqual([ 4, ], runners.simple_run(udf, [ (3, ), ]))
def testApplyGeneratorFunction(self): def my_func(row): for n in row.name.split(','): yield n self.engine.compile(self.expr.apply(my_func, axis=1, names='name')) udtf = list(self.engine._ctx._func_to_udfs.values())[0] six.exec_(udtf, globals(), locals()) udtf = locals()[UDF_CLASS_NAME] self.assertEqual(['name1', 'name2', 'name3', 'name4'], runners.simple_run(udtf, [('name1,name2', 1, None), ('name3,name4', 2, None)]))
def testSimpleFunction(self): def my_func(x): if x < 0: return -1 elif x == 0: return 0 else: return 1 self.engine.compile(self.expr.id.map(my_func)) udf = list(self.engine._ctx._func_to_udfs.values())[0] six.exec_(udf) udf = locals()[UDF_CLASS_NAME] self.assertSequenceEqual([-1, 0, 1], runners.simple_run(udf, [(-3, ), (0, ), (5, )]))
def testGlobalVarFunction(self): global_val = 10 def my_func(x): if x < global_val: return -1 elif x == global_val: return 0 else: return 1 self.engine.compile(self.expr.id.map(my_func)) udf = list(self.engine._ctx._func_to_udfs.values())[0] six.exec_(udf, globals(), locals()) udf = locals()[UDF_CLASS_NAME] self.assertSequenceEqual([-1, 0, 1], runners.simple_run(udf, [(-9, ), (10, ), (15, )]))
def testNestFunction(self): def my_func(x): def inner(y): if y < 0: return -2 elif y == 0: return 0 else: return 2 return inner(x) self.engine.compile(self.expr.id.map(my_func)) udf = list(self.engine._ctx._func_to_udfs.values())[0] six.exec_(udf, globals(), locals()) udf = locals()[UDF_CLASS_NAME] self.assertSequenceEqual([-2, 0, 2], runners.simple_run(udf, [(-3, ), (0, ), (5, )]))
def testReduceApply(self): expr = self.expr[self.expr.id, self.expr['name', 'id'].apply( lambda row: row.name + row.id, axis=1, reduce=True).rename('nameid')] self.assertIsInstance(expr._fields[1], MappedExpr) if not LESS_PY35: l = locals().copy() six.exec_(textwrap.dedent(""" def fun(r) -> float: return r.id + r.fid expr = self.expr[self.expr.id, self.expr['id', 'fid'].apply(fun, axis=1, reduce=True).rename('idfid')] """), globals(), l) expr = l['expr'] self.assertIsInstance(expr._fields[1], MappedExpr) self.assertIsInstance(expr._fields[1]._data_type, types.Float)
def testBizarreField(self): def my_func(row): return getattr(row, '012') * 2.0 datatypes = lambda *types: [validate_data_type(t) for t in types] schema = Schema.from_lists(['name', 'id', 'fid', '012'], datatypes('string', 'int64', 'float64', 'float64')) table = MockTable(name='pyodps_test_expr_table', schema=schema) expr = CollectionExpr(_source_data=table, _schema=schema) self.engine.compile(expr.apply(my_func, axis=1, names=['out_col'], types=['float64'])) udtf = list(self.engine._ctx._func_to_udfs.values())[0] six.exec_(udtf, globals(), locals()) udtf = locals()[UDF_CLASS_NAME] self.assertEqual([20, 40], runners.simple_run(udtf, [('name1', 1, None, 10), ('name2', 2, None, 20)]))
def testAgg(self): class Agg(object): def buffer(self): return [0] def __call__(self, buffer, val): buffer[0] += val def merge(self, buffer, pbuffer): buffer[0] += pbuffer[0] def getvalue(self, buffer): return buffer[0] expr = self.expr.int64.agg(Agg) self.assertIsInstance(expr, Aggregation) self.assertEqual(expr.dtype, types.int64) if not LESS_PY35: l = locals().copy() six.exec_( textwrap.dedent(""" class Agg(object): def buffer(self): return [0] def __call__(self, buffer, val): buffer[0] += val def merge(self, buffer, pbuffer): buffer[0] += pbuffer[0] def getvalue(self, buffer) -> float: return buffer[0] expr = self.expr.int64.agg(Agg) """), globals(), l) expr = l['expr'] self.assertIsInstance(expr, Aggregation) self.assertIsInstance(expr.dtype, types.Float)
def get_function(source, fun_name): d = dict() six.exec_(source, d, d) return d[fun_name]
def _gen_class_builder_func(): out_closure = 10 def _gen_nested_class_obj(): class BuildCls(BuildBase, metaclass=BuildMeta): a = 10 def b(self, add_val): print(self.a) return self.a + add_val + out_closure return BuildCls return _gen_nested_class_obj """) my_locs = locals().copy() six.exec_(py3_code, globals(), my_locs) _gen_class_builder_func = my_locs.get('_gen_class_builder_func') if sys.version_info[:2] < (3, 6): def _gen_format_string_func(): out_closure = 4.0 def _format_fun(arg): return 'Formatted stuff {0}: {1:>5}'.format(arg, out_closure) return _format_fun else: py36_code = textwrap.dedent(""" def _gen_format_string_func(): out_closure = 4.0
def _gen_class_builder_func(): out_closure = 10 def _gen_nested_class_obj(): class BuildCls(BuildBase, metaclass=BuildMeta): a = 10 def b(self, add_val): print(self.a) return self.a + add_val + out_closure return BuildCls return _gen_nested_class_obj """) my_locs = locals().copy() six.exec_(py3_code, globals(), my_locs) _gen_class_builder_func = my_locs.get('_gen_class_builder_func') def _gen_nested_fun(): out_closure = 10 def _gen_nested_obj(): # class NestedClass(object): def nested_method(add_val): return out_closure + add_val return nested_method return lambda v: _gen_nested_obj()(v)
# under the License. from odps.tests.core import TestBase from odps.compat import unittest, six from odps.models import Schema from odps.udf.tools import runners from odps.df.types import validate_data_type from odps.df.backends.odpssql.engine import ODPSEngine, UDF_CLASS_NAME from odps.df.expr.expressions import CollectionExpr from odps.df.expr.tests.core import MockTable # required by cloudpickle tests six.exec_( """ import base64 from collections import namedtuple import inspect from odps.lib.cloudpickle import * from odps.lib.importer import * """, globals(), locals()) class Test(TestBase): def setup(self): datatypes = lambda *types: [validate_data_type(t) for t in types] schema = Schema.from_lists(['name', 'id', 'fid'], datatypes('string', 'int64', 'float64')) table = MockTable(name='pyodps_test_expr_table', schema=schema) self.expr = CollectionExpr(_source_data=table, _schema=schema)
def testSimpleLambda(self): self.engine.compile(self.expr.id.map(lambda x: x + 1)) udf = list(self.engine._ctx._func_to_udfs.values())[0] six.exec_(udf) udf = locals()[UDF_CLASS_NAME] self.assertSequenceEqual([4, ], runners.simple_run(udf, [(3, ), ]))
# under the License. from odps.tests.core import TestBase from odps.compat import unittest, six from odps.models import Schema from odps.udf.tools import runners from odps.df.types import validate_data_type from odps.df.backends.odpssql.engine import ODPSEngine, UDF_CLASS_NAME from odps.df.expr.expressions import CollectionExpr from odps.df.expr.tests.core import MockTable # required by cloudpickle tests six.exec_(""" import base64 from collections import namedtuple import inspect import functools from odps.lib.cloudpickle import * from odps.lib.importer import * """, globals(), locals()) class Test(TestBase): def setup(self): datatypes = lambda *types: [validate_data_type(t) for t in types] schema = Schema.from_lists(['name', 'id', 'fid'], datatypes('string', 'int64', 'float64')) table = MockTable(name='pyodps_test_expr_table', schema=schema) self.expr = CollectionExpr(_source_data=table, _schema=schema)
from odps.udf.tools import runners from odps.df.types import validate_data_type from odps.df.backends.odpssql.engine import ODPSSQLEngine, UDF_CLASS_NAME from odps.df.expr.expressions import CollectionExpr from odps.df.expr.tests.core import MockTable # required by cloudpickle tests six.exec_(""" import sys import base64 from collections import namedtuple import inspect import functools from odps.compat import OrderedDict from odps.lib.cloudpickle import * from odps.lib.importer import * PY2 = sys.version_info[0] == 2 if PY2: string_type = unicode else: string_type = str """, globals(), locals()) from odps.df.backends.odpssql.codegen import X_NAMED_TUPLE six.exec_(X_NAMED_TUPLE, globals(), locals()) class ODPSEngine(ODPSSQLEngine):