Ejemplo n.º 1
0
    def testBizarreField(self):
        def my_func(row):
            return getattr(row, '012') * 2.0

        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = Schema.from_lists(['name', 'id', 'fid', '012'],
                                   datatypes('string', 'int64', 'float64', 'float64'))

        table = MockTable(name='pyodps_test_expr_table', schema=schema)
        expr = CollectionExpr(_source_data=table, _schema=schema)

        self.engine.compile(expr.apply(my_func, axis=1, names=['out_col'], types=['float64']))
        udtf = list(self.engine._ctx._func_to_udfs.values())[0]
        udtf = get_function(udtf, UDF_CLASS_NAME)
        self.assertEqual([20, 40],
                         runners.simple_run(udtf, [('name1', 1, None, 10), ('name2', 2, None, 20)]))
Ejemplo n.º 2
0
class Test(TestBase):
    def setup(self):
        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = Schema.from_lists(['name', 'id', 'fid'],
                                    datatypes('string', 'int64', 'float64'))

        table = MockTable(name='pyodps_test_expr_table', schema=schema)

        self.expr = CollectionExpr(_source_data=table, _schema=schema)

        self.engine = ODPSEngine(self.odps)

    def testSimpleLambda(self):
        self.engine.compile(self.expr.id.map(lambda x: x + 1))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udf)
        udf = locals()[UDF_CLASS_NAME]
        self.assertSequenceEqual([4, ], runners.simple_run(udf, [(3, ), ]))

    def testSimpleFunction(self):
        def my_func(x):
            if x < 0:
                return -1
            elif x == 0:
                return 0
            else:
                return 1

        self.engine.compile(self.expr.id.map(my_func))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udf)
        udf = locals()[UDF_CLASS_NAME]
        self.assertSequenceEqual([-1, 0, 1], runners.simple_run(udf, [(-3, ), (0, ), (5, )]))

    def testNestFunction(self):
        def my_func(x):
            def inner(y):
                if y < 0:
                    return -2
                elif y == 0:
                    return 0
                else:
                    return 2
            return inner(x)

        self.engine.compile(self.expr.id.map(my_func))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udf, globals(), locals())
        udf = locals()[UDF_CLASS_NAME]
        self.assertSequenceEqual([-2, 0, 2], runners.simple_run(udf, [(-3, ), (0, ), (5, )]))

    def testGlobalVarFunction(self):
        global_val = 10
        def my_func(x):
            if x < global_val:
                return -1
            elif x == global_val:
                return 0
            else:
                return 1

        self.engine.compile(self.expr.id.map(my_func))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udf, globals(), locals())
        udf = locals()[UDF_CLASS_NAME]
        self.assertSequenceEqual([-1, 0, 1], runners.simple_run(udf, [(-9, ), (10, ), (15, )]))

    def testRefFuncFunction(self):
        global_val = 10
        def my_func1(x):
            if x < global_val:
                return -1
            elif x == global_val:
                return 0
            else:
                return 1
        def my_func(y):
            return my_func1(y)

        self.engine.compile(self.expr.id.map(my_func))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udf, globals(), locals())
        udf = locals()[UDF_CLASS_NAME]
        self.assertSequenceEqual([-1, 0, 1], runners.simple_run(udf, [(-9, ), (10, ), (15, )]))

    def testApplyToSequenceFuntion(self):
        def my_func(row):
            return row.name + str(row.id)

        self.engine.compile(self.expr.apply(my_func, axis=1, reduce=True).rename('test'))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udf, globals(), locals())
        udf = locals()[UDF_CLASS_NAME]
        self.assertEqual(['name1', 'name2'],
                         runners.simple_run(udf, [('name', 1, None), ('name', 2, None)]))

    def testApplyFunction(self):
        def my_func(row):
            return row.name, row.id

        self.engine.compile(self.expr.apply(my_func, axis=1, names=['name', 'id'], types=['string', 'int']))
        udtf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udtf, globals(), locals())
        udtf = locals()[UDF_CLASS_NAME]
        self.assertEqual([('name1', 1), ('name2', 2)],
                          runners.simple_run(udtf, [('name1', 1, None), ('name2', 2, None)]))

    def testApplyGeneratorFunction(self):
        def my_func(row):
            for n in row.name.split(','):
                yield n

        self.engine.compile(self.expr.apply(my_func, axis=1, names='name'))
        udtf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udtf, globals(), locals())
        udtf = locals()[UDF_CLASS_NAME]
        self.assertEqual(['name1', 'name2', 'name3', 'name4'],
                         runners.simple_run(udtf, [('name1,name2', 1, None), ('name3,name4', 2, None)]))
Ejemplo n.º 3
0
class Test(TestBase):
    def setup(self):
        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = Schema.from_lists(['name', 'id', 'fid'],
                                    datatypes('string', 'int64', 'float64'))

        table = MockTable(name='pyodps_test_expr_table', schema=schema)

        self.expr = CollectionExpr(_source_data=table, _schema=schema)

        self.engine = ODPSEngine(self.odps)

    def testSimpleLambda(self):
        self.engine.compile(self.expr.id.map(lambda x: x + 1))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        udf = get_function(udf, UDF_CLASS_NAME)
        self.assertSequenceEqual([4, ], runners.simple_run(udf, [(3, ), ]))

    def testSimpleFunction(self):
        def my_func(x):
            if x < 0:
                return -1
            elif x == 0:
                return 0
            else:
                return 1

        self.engine.compile(self.expr.id.map(my_func))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        udf = get_function(udf, UDF_CLASS_NAME)
        self.assertSequenceEqual([-1, 0, 1], runners.simple_run(udf, [(-3, ), (0, ), (5, )]))

    @numpy_case
    def testSimpleNumpyFunction(self):
        import numpy as np

        def my_func(x):
            if x < 0:
                return np.int32(-1)
            elif x == 0:
                return np.int32(0)
            else:
                return np.int32(1)

        self.engine.compile(self.expr.id.map(my_func))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        udf = get_function(udf, UDF_CLASS_NAME)
        run_result = runners.simple_run(udf, [(-3, ), (0, ), (5, )])
        self.assertSequenceEqual([-1, 0, 1], run_result)
        self.assertFalse(any(isinstance(v, np.generic) for v in run_result))

    def testNestFunction(self):
        def my_func(x):
            def inner(y):
                if y < 0:
                    return -2
                elif y == 0:
                    return 0
                else:
                    return 2
            return inner(x)

        self.engine.compile(self.expr.id.map(my_func))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        udf = get_function(udf, UDF_CLASS_NAME)
        self.assertSequenceEqual([-2, 0, 2], runners.simple_run(udf, [(-3, ), (0, ), (5, )]))

    def testGlobalVarFunction(self):
        global_val = 10

        def my_func(x):
            if x < global_val:
                return -1
            elif x == global_val:
                return 0
            else:
                return 1

        self.engine.compile(self.expr.id.map(my_func))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        udf = get_function(udf, UDF_CLASS_NAME)
        self.assertSequenceEqual([-1, 0, 1], runners.simple_run(udf, [(-9, ), (10, ), (15, )]))

    def testRefFuncFunction(self):
        global_val = 10

        def my_func1(x):
            if x < global_val:
                return -1
            elif x == global_val:
                return 0
            else:
                return 1

        def my_func(y):
            return my_func1(y)

        self.engine.compile(self.expr.id.map(my_func))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        udf = get_function(udf, UDF_CLASS_NAME)
        self.assertSequenceEqual([-1, 0, 1], runners.simple_run(udf, [(-9, ), (10, ), (15, )]))

    def testApplyToSequenceFuntion(self):
        def my_func(row):
            return row.name + str(row.id)

        self.engine.compile(self.expr.apply(my_func, axis=1, reduce=True).rename('test'))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        udf = get_function(udf, UDF_CLASS_NAME)
        self.assertEqual(['name1', 'name2'],
                         runners.simple_run(udf, [('name', 1, None), ('name', 2, None)]))

    def testApplyFunction(self):
        def my_func(row):
            return row.name, row.id

        self.engine.compile(self.expr.apply(my_func, axis=1, names=['name', 'id'], types=['string', 'int']))
        udtf = list(self.engine._ctx._func_to_udfs.values())[0]
        udtf = get_function(udtf, UDF_CLASS_NAME)
        self.assertEqual([('name1', 1), ('name2', 2)],
                          runners.simple_run(udtf, [('name1', 1, None), ('name2', 2, None)]))

    @numpy_case
    def testApplyNumpyFunction(self):
        import numpy as np

        def my_func(row):
            return row.name, np.int32(row.id)

        self.engine.compile(self.expr.apply(my_func, axis=1, names=['name', 'id'], types=['string', 'int']))
        udtf = list(self.engine._ctx._func_to_udfs.values())[0]
        udtf = get_function(udtf, UDF_CLASS_NAME)
        run_result = runners.simple_run(udtf, [('name1', 1, None), ('name2', 2, None)])
        self.assertEqual([('name1', 1), ('name2', 2)], run_result)

    def testApplyGeneratorFunction(self):
        def my_func(row):
            for n in row.name.split(','):
                yield n

        self.engine.compile(self.expr.apply(my_func, axis=1, names='name'))
        udtf = list(self.engine._ctx._func_to_udfs.values())[0]
        udtf = get_function(udtf, UDF_CLASS_NAME)
        self.assertEqual(['name1', 'name2', 'name3', 'name4'],
                         runners.simple_run(udtf, [('name1,name2', 1, None), ('name3,name4', 2, None)]))

    def testAggFunction(self):
        class Agg(object):
            def buffer(self):
                return ['']

            def __call__(self, buffer, val):
                if not buffer[0]:
                    buffer[0] = val
                else:
                    buffer[0] += ',' + val

            def merge(self, buffer, pbuffer):
                if not pbuffer[0]:
                    return
                elif not buffer[0]:
                    buffer[0] = pbuffer[0]
                else:
                    buffer[0] += ',' + pbuffer[0]

            def getvalue(self, buffer):
                return buffer[0]

        self.engine.compile(self.expr.name.agg(Agg))
        udaf = list(self.engine._ctx._func_to_udfs.values())[0]
        udaf = get_function(udaf, UDF_CLASS_NAME)
        self.assertEqual(['name1,name2,name3,name4'],
                         runners.simple_run(udaf, [('name1,name2',), ('name3,name4',)]))

    @numpy_case
    def testAggNumpyFunction(self):
        import numpy as np

        class Agg(object):
            def buffer(self):
                return [np.int32(1)]

            def __call__(self, buffer, val):
                buffer[0] *= val

            def merge(self, buffer, pbuffer):
                buffer[0] *= pbuffer[0]

            def getvalue(self, buffer):
                return buffer[0]

        self.engine.compile(self.expr.id.agg(Agg))
        udaf = list(self.engine._ctx._func_to_udfs.values())[0]
        udaf = get_function(udaf, UDF_CLASS_NAME)
        result = runners.simple_run(udaf, [(3,), (6,), (5,)])
        self.assertEqual([90], result)
        self.assertNotIsInstance(result, np.generic)

    @unittest.skipIf(PY26, 'Ignored under Python 2.6')
    def testBizarreField(self):
        def my_func(row):
            return getattr(row, '012') * 2.0

        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = Schema.from_lists(['name', 'id', 'fid', '012'],
                                   datatypes('string', 'int64', 'float64', 'float64'))

        table = MockTable(name='pyodps_test_expr_table', schema=schema)
        expr = CollectionExpr(_source_data=table, _schema=schema)

        self.engine.compile(expr.apply(my_func, axis=1, names=['out_col'], types=['float64']))
        udtf = list(self.engine._ctx._func_to_udfs.values())[0]
        udtf = get_function(udtf, UDF_CLASS_NAME)
        self.assertEqual([20, 40],
                         runners.simple_run(udtf, [('name1', 1, None, 10), ('name2', 2, None, 20)]))
class Test(TestBase):
    def setup(self):
        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = Schema.from_lists(['name', 'id', 'fid'],
                                   datatypes('string', 'int64', 'float64'))

        table = MockTable(name='pyodps_test_expr_table', schema=schema)

        self.expr = CollectionExpr(_source_data=table, _schema=schema)

        self.engine = ODPSEngine(self.odps)

    def testSimpleLambda(self):
        self.engine.compile(self.expr.id.map(lambda x: x + 1))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udf)
        udf = locals()[UDF_CLASS_NAME]
        self.assertSequenceEqual([
            4,
        ], runners.simple_run(udf, [
            (3, ),
        ]))

    def testSimpleFunction(self):
        def my_func(x):
            if x < 0:
                return -1
            elif x == 0:
                return 0
            else:
                return 1

        self.engine.compile(self.expr.id.map(my_func))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udf)
        udf = locals()[UDF_CLASS_NAME]
        self.assertSequenceEqual([-1, 0, 1],
                                 runners.simple_run(udf, [(-3, ), (0, ),
                                                          (5, )]))

    def testNestFunction(self):
        def my_func(x):
            def inner(y):
                if y < 0:
                    return -2
                elif y == 0:
                    return 0
                else:
                    return 2

            return inner(x)

        self.engine.compile(self.expr.id.map(my_func))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udf, globals(), locals())
        udf = locals()[UDF_CLASS_NAME]
        self.assertSequenceEqual([-2, 0, 2],
                                 runners.simple_run(udf, [(-3, ), (0, ),
                                                          (5, )]))

    def testGlobalVarFunction(self):
        global_val = 10

        def my_func(x):
            if x < global_val:
                return -1
            elif x == global_val:
                return 0
            else:
                return 1

        self.engine.compile(self.expr.id.map(my_func))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udf, globals(), locals())
        udf = locals()[UDF_CLASS_NAME]
        self.assertSequenceEqual([-1, 0, 1],
                                 runners.simple_run(udf, [(-9, ), (10, ),
                                                          (15, )]))

    def testRefFuncFunction(self):
        global_val = 10

        def my_func1(x):
            if x < global_val:
                return -1
            elif x == global_val:
                return 0
            else:
                return 1

        def my_func(y):
            return my_func1(y)

        self.engine.compile(self.expr.id.map(my_func))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udf, globals(), locals())
        udf = locals()[UDF_CLASS_NAME]
        self.assertSequenceEqual([-1, 0, 1],
                                 runners.simple_run(udf, [(-9, ), (10, ),
                                                          (15, )]))

    def testApplyToSequenceFuntion(self):
        def my_func(row):
            return row.name + str(row.id)

        self.engine.compile(
            self.expr.apply(my_func, axis=1, reduce=True).rename('test'))
        udf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udf, globals(), locals())
        udf = locals()[UDF_CLASS_NAME]
        self.assertEqual(['name1', 'name2'],
                         runners.simple_run(udf, [('name', 1, None),
                                                  ('name', 2, None)]))

    def testApplyFunction(self):
        def my_func(row):
            return row.name, row.id

        self.engine.compile(
            self.expr.apply(my_func,
                            axis=1,
                            names=['name', 'id'],
                            types=['string', 'int']))
        udtf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udtf, globals(), locals())
        udtf = locals()[UDF_CLASS_NAME]
        self.assertEqual([('name1', 1), ('name2', 2)],
                         runners.simple_run(udtf, [('name1', 1, None),
                                                   ('name2', 2, None)]))

    def testApplyGeneratorFunction(self):
        def my_func(row):
            for n in row.name.split(','):
                yield n

        self.engine.compile(self.expr.apply(my_func, axis=1, names='name'))
        udtf = list(self.engine._ctx._func_to_udfs.values())[0]
        six.exec_(udtf, globals(), locals())
        udtf = locals()[UDF_CLASS_NAME]
        self.assertEqual(['name1', 'name2', 'name3', 'name4'],
                         runners.simple_run(udtf, [('name1,name2', 1, None),
                                                   ('name3,name4', 2, None)]))