def testString(self):
        data = self._gen_data(5)

        methods_to_fields = [
            (lambda s: s.capitalize(), self.expr.name.capitalize()),
            (lambda s: data[0][0] in s,
             self.expr.name.contains(data[0][0], regex=False)),
            (lambda s: s.count(data[0][0]), self.expr.name.count(data[0][0])),
            (lambda s: s.endswith(data[0][0]),
             self.expr.name.endswith(data[0][0])),
            (lambda s: s.startswith(data[0][0]),
             self.expr.name.startswith(data[0][0])),
            (lambda s: s.find(data[0][0]), self.expr.name.find(data[0][0])),
            (lambda s: s.rfind(data[0][0]), self.expr.name.rfind(data[0][0])),
            (lambda s: s.replace(data[0][0], 'test'),
             self.expr.name.replace(data[0][0], 'test')),
            (lambda s: s[0], self.expr.name.get(0)),
            (lambda s: len(s), self.expr.name.len()),
            (lambda s: s.ljust(10), self.expr.name.ljust(10)),
            (lambda s: s.ljust(20, '*'), self.expr.name.ljust(20,
                                                              fillchar='*')),
            (lambda s: s.rjust(10), self.expr.name.rjust(10)),
            (lambda s: s.rjust(20, '*'), self.expr.name.rjust(20,
                                                              fillchar='*')),
            (lambda s: s * 4, self.expr.name.repeat(4)),
            (lambda s: s[2:10:2], self.expr.name.slice(2, 10, 2)),
            (lambda s: s[-5:-1], self.expr.name.slice(-5, -1)),
            (lambda s: s.title(), self.expr.name.title()),
            (lambda s: s.rjust(20, '0'), self.expr.name.zfill(20)),
            (lambda s: s.isalnum(), self.expr.name.isalnum()),
            (lambda s: s.isalpha(), self.expr.name.isalpha()),
            (lambda s: s.isdigit(), self.expr.name.isdigit()),
            (lambda s: s.isspace(), self.expr.name.isspace()),
            (lambda s: s.isupper(), self.expr.name.isupper()),
            (lambda s: s.istitle(), self.expr.name.istitle()),
            (lambda s: to_str(s).isnumeric(), self.expr.name.isnumeric()),
            (lambda s: to_str(s).isdecimal(), self.expr.name.isdecimal()),
        ]

        fields = [
            it[1].rename('id' + str(i))
            for i, it in enumerate(methods_to_fields)
        ]

        expr = self.expr[fields]

        res = self.engine.execute(expr)
        result = self._get_result(res)

        for i, it in enumerate(methods_to_fields):
            method = it[0]

            first = [method(it[0]) for it in data]
            second = [it[i] for it in result]
            self.assertEqual(first, second)
    def testUnion(self):
        data = [
            ['name1', 4, 5.3, None, None, None],
            ['name2', 2, 3.5, None, None, None],
            ['name1', 4, 4.2, None, None, None],
            ['name1', 3, 2.2, None, None, None],
            ['name1', 3, 4.1, None, None, None],
        ]

        schema2 = Schema.from_lists(['name', 'id2', 'id3'],
                                    [types.string, types.bigint, types.bigint])
        table_name = 'pyodps_test_engine_table2'
        self.odps.delete_table(table_name, if_exists=True)
        table2 = self.odps.create_table(name=table_name, schema=schema2)
        expr2 = CollectionExpr(_source_data=table2, _schema=odps_schema_to_df_schema(schema2))

        self._gen_data(data=data)

        data2 = [
            ['name3', 5, -1],
            ['name4', 6, -2]
        ]

        self.odps.write_table(table2, 0, [table2.new_record(values=d) for d in data2])

        try:
            expr = self.expr['name', 'id'].distinct().union(expr2[expr2.id2.rename('id'), 'name'])

            res = self.engine.execute(expr)
            result = self._get_result(res)

            expected = [
                ['name1', 4],
                ['name1', 3],
                ['name2', 2],
                ['name3', 5],
                ['name4', 6]
            ]

            result = sorted(result)
            expected = sorted(expected)

            self.assertEqual(len(result), len(expected))
            for e, r in zip(result, expected):
                self.assertEqual([to_str(t) for t in e],
                                 [to_str(t) for t in r])

        finally:
            table2.drop()
Beispiel #3
0
    def testUnion(self):
        data = [
            ['name1', 4, 5.3, None, None, None],
            ['name2', 2, 3.5, None, None, None],
            ['name1', 4, 4.2, None, None, None],
            ['name1', 3, 2.2, None, None, None],
            ['name1', 3, 4.1, None, None, None],
        ]

        data2 = [
            ['name3', 5, -1],
            ['name4', 6, -2]
        ]

        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema2 = Schema.from_lists(['name', 'id2', 'id3'],
                                    datatypes('string', 'int64', 'int64'))
        table_name = tn('pyodps_test_engine_table2')
        table2 = self._create_table_and_insert_data(table_name, schema2, data2)
        expr2 = CollectionExpr(_source_data=table2, _schema=schema2)

        self._gen_data(data=data)

        try:
            expr = self.expr['name', 'id'].distinct().union(expr2[expr2.id2.rename('id'), 'name'])

            res = self.engine.execute(expr)
            result = self._get_result(res)

            expected = [
                ['name1', 4],
                ['name1', 3],
                ['name2', 2],
                ['name3', 5],
                ['name4', 6]
            ]

            result = sorted(result)
            expected = sorted(expected)

            self.assertEqual(len(result), len(expected))
            for e, r in zip(result, expected):
                self.assertEqual([to_str(t) for t in e],
                                 [to_str(t) for t in r])

        finally:
            [conn.close() for conn in _engine_to_connections.values()]
            table2.drop()
    def testJoin(self):
        data = [
            ['name1', 4, 5.3, None, None, None],
            ['name2', 2, 3.5, None, None, None],
            ['name1', 4, 4.2, None, None, None],
            ['name1', 3, 2.2, None, None, None],
            ['name1', 3, 4.1, None, None, None],
        ]

        schema2 = Schema.from_lists(['name', 'id2', 'id3'],
                                    [types.string, types.bigint, types.bigint])
        table_name = 'pyodps_test_engine_table2'
        self.odps.delete_table(table_name, if_exists=True)
        table2 = self.odps.create_table(name=table_name, schema=schema2)
        expr2 = CollectionExpr(_source_data=table2, _schema=odps_schema_to_df_schema(schema2))

        self._gen_data(data=data)

        data2 = [
            ['name1', 4, -1],
            ['name2', 1, -2]
        ]

        self.odps.write_table(table2, 0, [table2.new_record(values=d) for d in data2])

        try:
            expr = self.expr.join(expr2)['name', 'id2']

            res = self.engine.execute(expr)
            result = self._get_result(res)

            self.assertEqual(len(result), 5)
            expected = [
                [to_str('name1'), 4],
                [to_str('name2'), 1]
            ]
            self.assertTrue(all(it in expected for it in result))

            expr = self.expr.join(expr2, on=['name', ('id', 'id2')])[self.expr.name, expr2.id2]
            res = self.engine.execute(expr)
            result = self._get_result(res)
            self.assertEqual(len(result), 2)
            expected = [to_str('name1'), 4]
            self.assertTrue(all(it == expected for it in result))

        finally:
            table2.drop()
    def testString(self):
        data = self._gen_data(5)

        methods_to_fields = [
            (lambda s: s.capitalize(), self.expr.name.capitalize()),
            (lambda s: data[0][0] in s, self.expr.name.contains(data[0][0], regex=False)),
            (lambda s: s.count(data[0][0]), self.expr.name.count(data[0][0])),
            (lambda s: s.endswith(data[0][0]), self.expr.name.endswith(data[0][0])),
            (lambda s: s.startswith(data[0][0]), self.expr.name.startswith(data[0][0])),
            (lambda s: s.find(data[0][0]), self.expr.name.find(data[0][0])),
            (lambda s: s.rfind(data[0][0]), self.expr.name.rfind(data[0][0])),
            (lambda s: s.replace(data[0][0], 'test'), self.expr.name.replace(data[0][0], 'test')),
            (lambda s: s[0], self.expr.name.get(0)),
            (lambda s: len(s), self.expr.name.len()),
            (lambda s: s.ljust(10), self.expr.name.ljust(10)),
            (lambda s: s.ljust(20, '*'), self.expr.name.ljust(20, fillchar='*')),
            (lambda s: s.rjust(10), self.expr.name.rjust(10)),
            (lambda s: s.rjust(20, '*'), self.expr.name.rjust(20, fillchar='*')),
            (lambda s: s * 4, self.expr.name.repeat(4)),
            (lambda s: s[2: 10: 2], self.expr.name.slice(2, 10, 2)),
            (lambda s: s[-5: -1], self.expr.name.slice(-5, -1)),
            (lambda s: s.title(), self.expr.name.title()),
            (lambda s: s.rjust(20, '0'), self.expr.name.zfill(20)),
            (lambda s: s.isalnum(), self.expr.name.isalnum()),
            (lambda s: s.isalpha(), self.expr.name.isalpha()),
            (lambda s: s.isdigit(), self.expr.name.isdigit()),
            (lambda s: s.isspace(), self.expr.name.isspace()),
            (lambda s: s.isupper(), self.expr.name.isupper()),
            (lambda s: s.istitle(), self.expr.name.istitle()),
            (lambda s: to_str(s).isnumeric(), self.expr.name.isnumeric()),
            (lambda s: to_str(s).isdecimal(), self.expr.name.isdecimal()),
        ]

        fields = [it[1].rename('id'+str(i)) for i, it in enumerate(methods_to_fields)]

        expr = self.expr[fields]

        res = self.engine.execute(expr)
        result = self._get_result(res)

        for i, it in enumerate(methods_to_fields):
            method = it[0]

            first = [method(it[0]) for it in data]
            second = [it[i] for it in result]
            self.assertEqual(first, second)
    def testJoin(self):
        data = [
            ['name1', 4, 5.3, None, None, None],
            ['name2', 2, 3.5, None, None, None],
            ['name1', 4, 4.2, None, None, None],
            ['name1', 3, 2.2, None, None, None],
            ['name1', 3, 4.1, None, None, None],
        ]

        schema2 = Schema.from_lists(['name', 'id2', 'id3'],
                                    [types.string, types.bigint, types.bigint])
        table_name = 'pyodps_test_engine_table2'
        self.odps.delete_table(table_name, if_exists=True)
        table2 = self.odps.create_table(name=table_name, schema=schema2)
        expr2 = CollectionExpr(_source_data=table2,
                               _schema=odps_schema_to_df_schema(schema2))

        self._gen_data(data=data)

        data2 = [['name1', 4, -1], ['name2', 1, -2]]

        self.odps.write_table(table2, 0,
                              [table2.new_record(values=d) for d in data2])

        try:
            expr = self.expr.join(expr2)['name', 'id2']

            res = self.engine.execute(expr)
            result = self._get_result(res)

            self.assertEqual(len(result), 5)
            expected = [[to_str('name1'), 4], [to_str('name2'), 1]]
            self.assertTrue(all(it in expected for it in result))

            expr = self.expr.join(expr2, on=['name',
                                             ('id', 'id2')])[self.expr.name,
                                                             expr2.id2]
            res = self.engine.execute(expr)
            result = self._get_result(res)
            self.assertEqual(len(result), 2)
            expected = [to_str('name1'), 4]
            self.assertTrue(all(it == expected for it in result))

        finally:
            table2.drop()
    def testUnion(self):
        data = [
            ['name1', 4, 5.3, None, None, None],
            ['name2', 2, 3.5, None, None, None],
            ['name1', 4, 4.2, None, None, None],
            ['name1', 3, 2.2, None, None, None],
            ['name1', 3, 4.1, None, None, None],
        ]

        schema2 = Schema.from_lists(['name', 'id2', 'id3'],
                                    [types.string, types.bigint, types.bigint])
        table_name = 'pyodps_test_engine_table2'
        self.odps.delete_table(table_name, if_exists=True)
        table2 = self.odps.create_table(name=table_name, schema=schema2)
        expr2 = CollectionExpr(_source_data=table2,
                               _schema=odps_schema_to_df_schema(schema2))

        self._gen_data(data=data)

        data2 = [['name3', 5, -1], ['name4', 6, -2]]

        self.odps.write_table(table2, 0,
                              [table2.new_record(values=d) for d in data2])

        try:
            expr = self.expr['name', 'id'].distinct().union(
                expr2[expr2.id2.rename('id'), 'name'])

            res = self.engine.execute(expr)
            result = self._get_result(res)

            expected = [['name1', 4], ['name1', 3], ['name2', 2], ['name3', 5],
                        ['name4', 6]]

            result = sorted(result)
            expected = sorted(expected)

            self.assertEqual(len(result), len(expected))
            for e, r in zip(result, expected):
                self.assertEqual([to_str(t) for t in e],
                                 [to_str(t) for t in r])

        finally:
            table2.drop()
    def testUnion(self):
        data = [
            ['name1', 4, 5.3, None, None, None],
            ['name2', 2, 3.5, None, None, None],
            ['name1', 4, 4.2, None, None, None],
            ['name1', 3, 2.2, None, None, None],
            ['name1', 3, 4.1, None, None, None],
        ]

        schema2 = Schema.from_lists(['name', 'id2', 'id3'],
                                    [types.string, types.int64, types.int64])

        self._gen_data(data=data)

        data2 = [
            ['name3', 5, -1],
            ['name4', 6, -2]
        ]

        import pandas as pd
        expr2 = CollectionExpr(_source_data=pd.DataFrame(data2, columns=schema2.names),
                               _schema=schema2)

        expr = self.expr['name', 'id'].distinct().union(expr2[expr2.id2.rename('id'), 'name'])

        res = self.engine.execute(expr)
        result = self._get_result(res)

        expected = [
            ['name1', 4],
            ['name1', 3],
            ['name2', 2],
            ['name3', 5],
            ['name4', 6]
        ]

        result = sorted(result)
        expected = sorted(expected)

        self.assertEqual(len(result), len(expected))
        for e, r in zip(result, expected):
            self.assertEqual([to_str(t) for t in e],
                             [to_str(t) for t in r])
    def testJoin(self):
        data = [
            ['name1', 4, 5.3, None, None, None],
            ['name2', 2, 3.5, None, None, None],
            ['name1', 4, 4.2, None, None, None],
            ['name1', 3, 2.2, None, None, None],
            ['name1', 3, 4.1, None, None, None],
        ]

        schema2 = Schema.from_lists(['name', 'id2', 'id3'],
                                    [types.string, types.int64, types.int64])

        self._gen_data(data=data)

        data2 = [
            ['name1', 4, -1],
            ['name2', 1, -2]
        ]

        import pandas as pd
        expr2 = CollectionExpr(_source_data=pd.DataFrame(data2, columns=schema2.names),
                               _schema=schema2)

        expr = self.expr.join(expr2)['name', 'id2']

        res = self.engine.execute(expr)
        result = self._get_result(res)

        self.assertEqual(len(result), 5)
        expected = [
            [to_str('name1'), 4],
            [to_str('name2'), 1]
        ]
        self.assertTrue(all(it in expected for it in result))

        expr = self.expr.join(expr2, on=['name', ('id', 'id2')])[self.expr.name, expr2.id2]
        res = self.engine.execute(expr)
        result = self._get_result(res)
        self.assertEqual(len(result), 2)
        expected = [to_str('name1'), 4]
        self.assertTrue(all(it == expected for it in result))
    def testJoin(self):
        data = [
            ['name1', 4, 5.3, None, None, None],
            ['name2', 2, 3.5, None, None, None],
            ['name1', 4, 4.2, None, None, None],
            ['name1', 3, 2.2, None, None, None],
            ['name1', 3, 4.1, None, None, None],
        ]

        schema2 = Schema.from_lists(['name', 'id2', 'id3'],
                                    [types.string, types.int64, types.int64])

        self._gen_data(data=data)

        data2 = [['name1', 4, -1], ['name2', 1, -2]]

        import pandas as pd
        expr2 = CollectionExpr(_source_data=pd.DataFrame(
            data2, columns=schema2.names),
                               _schema=schema2)

        expr = self.expr.join(expr2)['name', 'id2']

        res = self.engine.execute(expr)
        result = self._get_result(res)

        self.assertEqual(len(result), 5)
        expected = [[to_str('name1'), 4], [to_str('name2'), 1]]
        self.assertTrue(all(it in expected for it in result))

        expr = self.expr.join(expr2, on=['name',
                                         ('id', 'id2')])[self.expr.name,
                                                         expr2.id2]
        res = self.engine.execute(expr)
        result = self._get_result(res)
        self.assertEqual(len(result), 2)
        expected = [to_str('name1'), 4]
        self.assertTrue(all(it == expected for it in result))
    def testUnion(self):
        data = [
            ['name1', 4, 5.3, None, None, None],
            ['name2', 2, 3.5, None, None, None],
            ['name1', 4, 4.2, None, None, None],
            ['name1', 3, 2.2, None, None, None],
            ['name1', 3, 4.1, None, None, None],
        ]

        schema2 = Schema.from_lists(['name', 'id2', 'id3'],
                                    [types.string, types.int64, types.int64])

        self._gen_data(data=data)

        data2 = [['name3', 5, -1], ['name4', 6, -2]]

        import pandas as pd
        expr2 = CollectionExpr(_source_data=pd.DataFrame(
            data2, columns=schema2.names),
                               _schema=schema2)

        expr = self.expr['name',
                         'id'].distinct().union(expr2[expr2.id2.rename('id'),
                                                      'name'])

        res = self.engine.execute(expr)
        result = self._get_result(res)

        expected = [['name1', 4], ['name1', 3], ['name2', 2], ['name3', 5],
                    ['name4', 6]]

        result = sorted(result)
        expected = sorted(expected)

        self.assertEqual(len(result), len(expected))
        for e, r in zip(result, expected):
            self.assertEqual([to_str(t) for t in e], [to_str(t) for t in r])
    def testElement(self):
        data = self._gen_data(5, nullable_field='name')

        fields = [
            self.expr.name.isnull().rename('name1'),
            self.expr.name.notnull().rename('name2'),
            self.expr.name.fillna('test').rename('name3'),
            self.expr.id.isin([1, 2, 3]).rename('id1'),
            self.expr.id.isin(self.expr.fid.astype('int')).rename('id2'),
            self.expr.id.notin([1, 2, 3]).rename('id3'),
            self.expr.id.notin(self.expr.fid.astype('int')).rename('id4'),
            self.expr.id.between(self.expr.fid, 3).rename('id5'),
            self.expr.name.fillna('test').switch('test', 'test' + self.expr.name.fillna('test'),
                                                 'test2', 'test2' + self.expr.name.fillna('test'),
                                                 default=self.expr.name).rename('name4'),
            self.expr.id.cut([100, 200, 300],
                             labels=['xsmall', 'small', 'large', 'xlarge'],
                             include_under=True, include_over=True).rename('id6')
        ]

        expr = self.expr[fields]

        res = self.engine.execute(expr)
        result = self._get_result(res)

        self.assertEqual(len(data), len(result))

        self.assertEqual(len([it for it in data if it[0] is None]),
                         len([it[0] for it in result if it[0]]))

        self.assertEqual(len([it[0] for it in data if it[0] is not None]),
                         len([it[1] for it in result if it[1]]))

        self.assertEqual([(it[0] if it[0] is not None else 'test') for it in data],
                         [it[2] for it in result])

        self.assertEqual([(it[1] in (1, 2, 3)) for it in data],
                         [it[3] for it in result])

        fids = [int(it[2]) for it in data]
        self.assertEqual([(it[1] in fids) for it in data],
                         [it[4] for it in result])

        self.assertEqual([(it[1] not in (1, 2, 3)) for it in data],
                         [it[5] for it in result])

        self.assertEqual([(it[1] not in fids) for it in data],
                         [it[6] for it in result])

        self.assertEqual([(it[2] <= it[1] <= 3) for it in data],
                         [it[7] for it in result])

        self.assertEqual([to_str('testtest' if it[0] is None else it[0]) for it in data],
                         [to_str(it[8]) for it in result])

        def get_val(val):
            if val <= 100:
                return 'xsmall'
            elif 100 < val <= 200:
                return 'small'
            elif 200 < val <= 300:
                return 'large'
            else:
                return 'xlarge'
        self.assertEqual([to_str(get_val(it[1])) for it in data], [to_str(it[9]) for it in result])
    def testElement(self):
        data = self._gen_data(5, nullable_field='name')

        fields = [
            self.expr.name.isnull().rename('name1'),
            self.expr.name.notnull().rename('name2'),
            self.expr.name.fillna('test').rename('name3'),
            self.expr.id.isin([1, 2, 3]).rename('id1'),
            self.expr.id.isin(self.expr.fid.astype('int')).rename('id2'),
            self.expr.id.notin([1, 2, 3]).rename('id3'),
            self.expr.id.notin(self.expr.fid.astype('int')).rename('id4'),
            self.expr.id.between(self.expr.fid, 3).rename('id5'),
            self.expr.name.fillna('test').switch(
                'test',
                'test' + self.expr.name.fillna('test'),
                'test2',
                'test2' + self.expr.name.fillna('test'),
                default=self.expr.name).rename('name4'),
            self.expr.id.cut([100, 200, 300],
                             labels=['xsmall', 'small', 'large', 'xlarge'],
                             include_under=True,
                             include_over=True).rename('id6')
        ]

        expr = self.expr[fields]

        res = self.engine.execute(expr)
        result = self._get_result(res)

        self.assertEqual(len(data), len(result))

        self.assertEqual(len([it for it in data if it[0] is None]),
                         len([it[0] for it in result if it[0]]))

        self.assertEqual(len([it[0] for it in data if it[0] is not None]),
                         len([it[1] for it in result if it[1]]))

        self.assertEqual([(it[0] if it[0] is not None else 'test')
                          for it in data], [it[2] for it in result])

        self.assertEqual([(it[1] in (1, 2, 3)) for it in data],
                         [it[3] for it in result])

        fids = [int(it[2]) for it in data]
        self.assertEqual([(it[1] in fids) for it in data],
                         [it[4] for it in result])

        self.assertEqual([(it[1] not in (1, 2, 3)) for it in data],
                         [it[5] for it in result])

        self.assertEqual([(it[1] not in fids) for it in data],
                         [it[6] for it in result])

        self.assertEqual([(it[2] <= it[1] <= 3) for it in data],
                         [it[7] for it in result])

        self.assertEqual(
            [to_str('testtest' if it[0] is None else it[0]) for it in data],
            [to_str(it[8]) for it in result])

        def get_val(val):
            if val <= 100:
                return 'xsmall'
            elif 100 < val <= 200:
                return 'small'
            elif 200 < val <= 300:
                return 'large'
            else:
                return 'xlarge'

        self.assertEqual([to_str(get_val(it[1])) for it in data],
                         [to_str(it[9]) for it in result])
Beispiel #14
0
    def testJoin(self):
        data = [
            ['name1', 4, 5.3, None, None, None],
            ['name2', 2, 3.5, None, None, None],
            ['name1', 4, 4.2, None, None, None],
            ['name1', 3, 2.2, None, None, None],
            ['name1', 3, 4.1, None, None, None],
        ]

        data2 = [['name1', 4, -1], ['name2', 1, -2]]

        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema2 = Schema.from_lists(['name', 'id2', 'id3'],
                                    datatypes('string', 'int64', 'int64'))
        table_name = tn('pyodps_test_engine_table2')
        table2 = self._create_table_and_insert_data(table_name, schema2, data2)
        expr2 = CollectionExpr(_source_data=table2, _schema=schema2)

        self._gen_data(data=data)

        try:
            expr = self.expr.join(expr2)['name', 'id2']

            res = self.engine.execute(expr)
            result = self._get_result(res)

            self.assertEqual(len(result), 5)
            expected = [[to_str('name1'), 4], [to_str('name2'), 1]]
            self.assertTrue(all(it in expected for it in result))

            expr = self.expr.join(expr2, on=['name',
                                             ('id', 'id2')])[self.expr.name,
                                                             expr2.id2]
            res = self.engine.execute(expr)
            result = self._get_result(res)
            self.assertEqual(len(result), 2)
            expected = [to_str('name1'), 4]
            self.assertTrue(all(it == expected for it in result))

            expr = self.expr.left_join(expr2,
                                       on=['name',
                                           ('id', 'id2')])[self.expr.name,
                                                           expr2.id2]
            res = self.engine.execute(expr)
            result = self._get_result(res)
            expected = [['name1', 4], ['name2', None], ['name1', 4],
                        ['name1', None], ['name1', None]]
            self.assertEqual(len(result), 5)
            self.assertTrue(all(it in expected for it in result))

            expr = self.expr.right_join(expr2,
                                        on=['name',
                                            ('id', 'id2')])[self.expr.name,
                                                            expr2.id2]
            res = self.engine.execute(expr)
            result = self._get_result(res)
            expected = [
                ['name1', 4],
                ['name1', 4],
                [None, 1],
            ]
            self.assertEqual(len(result), 3)
            self.assertTrue(all(it in expected for it in result))

            if self.sql_engine.name != 'mysql':
                expr = self.expr.outer_join(expr2,
                                            on=['name',
                                                ('id', 'id2')])[self.expr.name,
                                                                expr2.id2]
                res = self.engine.execute(expr)
                result = self._get_result(res)
                expected = [
                    ['name1', 4],
                    ['name1', 4],
                    ['name2', None],
                    ['name1', None],
                    ['name1', None],
                    [None, 1],
                ]
                self.assertEqual(len(result), 6)
                self.assertTrue(all(it in expected for it in result))

            grouped = self.expr.groupby('name').agg(
                new_id=self.expr.id.sum()).cache()
            self.engine.execute(self.expr.join(grouped, on='name'))

            if self.sql_engine.name != 'mysql':
                expr = self.expr.join(expr2, on=[
                    'name', ('id', 'id2')
                ])[lambda x: x.groupby(Scalar(1)).sort('name').row_number(), ]
                self.engine.execute(expr)
        finally:
            [conn.close() for conn in _engine_to_connections.values()]
            table2.drop()
    def testJoin(self):
        data = [
            ['name1', 4, 5.3, None, None],
            ['name2', 2, 3.5, None, None],
            ['name1', 4, 4.2, None, None],
            ['name1', 3, 2.2, None, None],
            ['name1', 3, 4.1, None, None],
        ]

        schema2 = Schema.from_lists(['name', 'id2', 'id3'],
                                    [types.string, types.bigint, types.bigint])
        table_name = tn('pyodps_test_engine_table2')
        self.odps.delete_table(table_name, if_exists=True)
        table2 = self.odps.create_table(name=table_name, schema=schema2)
        expr2 = CollectionExpr(_source_data=table2,
                               _schema=odps_schema_to_df_schema(schema2))

        self._gen_data(data=data)

        data2 = [['name1', 4, -1], ['name2', 1, -2]]

        self.odps.write_table(table2, 0, data2)

        try:
            expr = self.expr.join(expr2)['name', 'id2']

            res = self.engine.execute(expr)
            result = self._get_result(res)

            self.assertEqual(len(result), 5)
            expected = [[to_str('name1'), 4], [to_str('name2'), 1]]
            self.assertTrue(all(it in expected for it in result))

            expr = self.expr.join(expr2, on=['name',
                                             ('id', 'id2')])[self.expr.name,
                                                             expr2.id2]
            res = self.engine.execute(expr)
            result = self._get_result(res)
            self.assertEqual(len(result), 2)
            expected = [to_str('name1'), 4]
            self.assertTrue(all(it == expected for it in result))

            expr = self.expr.left_join(expr2,
                                       on=['name',
                                           ('id', 'id2')])[self.expr.name,
                                                           expr2.id2]
            res = self.engine.execute(expr)
            result = self._get_result(res)
            expected = [['name1', 4], ['name2', None], ['name1', 4],
                        ['name1', None], ['name1', None]]
            self.assertEqual(len(result), 5)
            self.assertTrue(all(it in expected for it in result))

            expr = self.expr.right_join(expr2,
                                        on=['name',
                                            ('id', 'id2')])[self.expr.name,
                                                            expr2.id2]
            res = self.engine.execute(expr)
            result = self._get_result(res)
            expected = [
                ['name1', 4],
                ['name1', 4],
                [None, 1],
            ]
            self.assertEqual(len(result), 3)
            self.assertTrue(all(it in expected for it in result))

            expr = self.expr.outer_join(expr2,
                                        on=['name',
                                            ('id', 'id2')])[self.expr.name,
                                                            expr2.id2]
            res = self.engine.execute(expr)
            result = self._get_result(res)
            expected = [
                ['name1', 4],
                ['name1', 4],
                ['name2', None],
                ['name1', None],
                ['name1', None],
                [None, 1],
            ]
            self.assertEqual(len(result), 6)
            self.assertTrue(all(it in expected for it in result))

            grouped = self.expr.groupby('name').agg(
                new_id=self.expr.id.sum()).cache()
            self.engine.execute(self.expr.join(grouped, on='name'))

            expr = self.expr.join(expr2, on=[
                'name', ('id', 'id2')
            ])[lambda x: x.groupby(Scalar(1)).sort('name').row_number(), ]
            self.engine.execute(expr)
        finally:
            table2.drop()