Exemple #1
0
    def testUnion(self):
        data = [
            ['name1', 4, 5.3, None, None, None],
            ['name2', 2, 3.5, None, None, None],
            ['name1', 4, 4.2, None, None, None],
            ['name1', 3, 2.2, None, None, None],
            ['name1', 3, 4.1, None, None, None],
        ]

        data2 = [
            ['name3', 5, -1],
            ['name4', 6, -2]
        ]

        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema2 = Schema.from_lists(['name', 'id2', 'id3'],
                                    datatypes('string', 'int64', 'int64'))
        table_name = tn('pyodps_test_engine_table2')
        table2 = self._create_table_and_insert_data(table_name, schema2, data2)
        expr2 = CollectionExpr(_source_data=table2, _schema=schema2)

        self._gen_data(data=data)

        try:
            expr = self.expr['name', 'id'].distinct().union(expr2[expr2.id2.rename('id'), 'name'])

            res = self.engine.execute(expr)
            result = self._get_result(res)

            expected = [
                ['name1', 4],
                ['name1', 3],
                ['name2', 2],
                ['name3', 5],
                ['name4', 6]
            ]

            result = sorted(result)
            expected = sorted(expected)

            self.assertEqual(len(result), len(expected))
            for e, r in zip(result, expected):
                self.assertEqual([to_str(t) for t in e],
                                 [to_str(t) for t in r])

        finally:
            [conn.close() for conn in _engine_to_connections.values()]
            table2.drop()
Exemple #2
0
    def testJoin(self):
        data = [
            ['name1', 4, 5.3, None, None, None],
            ['name2', 2, 3.5, None, None, None],
            ['name1', 4, 4.2, None, None, None],
            ['name1', 3, 2.2, None, None, None],
            ['name1', 3, 4.1, None, None, None],
        ]

        data2 = [['name1', 4, -1], ['name2', 1, -2]]

        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema2 = Schema.from_lists(['name', 'id2', 'id3'],
                                    datatypes('string', 'int64', 'int64'))
        table_name = tn('pyodps_test_engine_table2')
        table2 = self._create_table_and_insert_data(table_name, schema2, data2)
        expr2 = CollectionExpr(_source_data=table2, _schema=schema2)

        self._gen_data(data=data)

        try:
            expr = self.expr.join(expr2)['name', 'id2']

            res = self.engine.execute(expr)
            result = self._get_result(res)

            self.assertEqual(len(result), 5)
            expected = [[to_str('name1'), 4], [to_str('name2'), 1]]
            self.assertTrue(all(it in expected for it in result))

            expr = self.expr.join(expr2, on=['name',
                                             ('id', 'id2')])[self.expr.name,
                                                             expr2.id2]
            res = self.engine.execute(expr)
            result = self._get_result(res)
            self.assertEqual(len(result), 2)
            expected = [to_str('name1'), 4]
            self.assertTrue(all(it == expected for it in result))

            expr = self.expr.left_join(expr2,
                                       on=['name',
                                           ('id', 'id2')])[self.expr.name,
                                                           expr2.id2]
            res = self.engine.execute(expr)
            result = self._get_result(res)
            expected = [['name1', 4], ['name2', None], ['name1', 4],
                        ['name1', None], ['name1', None]]
            self.assertEqual(len(result), 5)
            self.assertTrue(all(it in expected for it in result))

            expr = self.expr.right_join(expr2,
                                        on=['name',
                                            ('id', 'id2')])[self.expr.name,
                                                            expr2.id2]
            res = self.engine.execute(expr)
            result = self._get_result(res)
            expected = [
                ['name1', 4],
                ['name1', 4],
                [None, 1],
            ]
            self.assertEqual(len(result), 3)
            self.assertTrue(all(it in expected for it in result))

            if self.sql_engine.name != 'mysql':
                expr = self.expr.outer_join(expr2,
                                            on=['name',
                                                ('id', 'id2')])[self.expr.name,
                                                                expr2.id2]
                res = self.engine.execute(expr)
                result = self._get_result(res)
                expected = [
                    ['name1', 4],
                    ['name1', 4],
                    ['name2', None],
                    ['name1', None],
                    ['name1', None],
                    [None, 1],
                ]
                self.assertEqual(len(result), 6)
                self.assertTrue(all(it in expected for it in result))

            grouped = self.expr.groupby('name').agg(
                new_id=self.expr.id.sum()).cache()
            self.engine.execute(self.expr.join(grouped, on='name'))

            if self.sql_engine.name != 'mysql':
                expr = self.expr.join(expr2, on=[
                    'name', ('id', 'id2')
                ])[lambda x: x.groupby(Scalar(1)).sort('name').row_number(), ]
                self.engine.execute(expr)
        finally:
            [conn.close() for conn in _engine_to_connections.values()]
            table2.drop()
Exemple #3
0
 def teardown(self):
     [conn.close() for conn in _engine_to_connections.values()]
     self.table.drop()
     self.conn.close()