class Test(TestBase):
    def setup(self):
        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = Schema.from_lists(['name', 'id', 'fid'],
                                   datatypes('string', 'int64', 'float64'))
        table = MockTable(name='pyodps_test_expr_table', schema=schema)

        self.expr = CollectionExpr(_source_data=table, _schema=schema)
        self.ctx = ExecuteContext()

    def testBuildDAG(self):
        expr = self.expr[self.expr.name, self.expr.id + 1]
        expr2 = self.expr[self.expr.name, self.expr.id + 2]
        expr3 = self.expr[self.expr.name, self.expr.id + 3]
        self.ctx.build_dag(expr, expr.copy_tree())
        self.ctx.build_dag(expr2, expr2.copy_tree())
        self.ctx.build_dag(expr3, expr3.copy_tree())

        self.assertEqual(len(self.ctx._expr_to_dag), 3)
        self.assertGreater(len(list(self.ctx._expr_to_dag.values())[0].nodes()), 0)
        self.assertGreater(len(list(self.ctx._expr_to_dag.values())[1].nodes()), 0)
        self.assertGreater(len(list(self.ctx._expr_to_dag.values())[2].nodes()), 0)
        self.assertTrue(all(l is r for l, r in zip(expr.traverse(unique=True),
                                                   self.ctx.get_dag(expr).traverse(expr))))

        del expr
        self.assertEqual(len(self.ctx._expr_to_dag), 2)
        del expr2
        del expr3
        self.assertEqual(len(self.ctx._expr_to_dag), 0)
class Test(TestBase):
    def setup(self):
        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = Schema.from_lists(["name", "id", "fid"], datatypes("string", "int64", "float64"))
        table = MockTable(name="pyodps_test_expr_table", schema=schema)

        self.expr = CollectionExpr(_source_data=table, _schema=schema)
        self.ctx = ExecuteContext()

    def testBuildDAG(self):
        expr = self.expr[self.expr.name, self.expr.id + 1]
        expr2 = self.expr[self.expr.name, self.expr.id + 2]
        expr3 = self.expr[self.expr.name, self.expr.id + 3]
        self.ctx.build_dag(expr, expr.copy_tree())
        self.ctx.build_dag(expr2, expr2.copy_tree())
        self.ctx.build_dag(expr3, expr3.copy_tree())

        self.assertEqual(len(self.ctx._expr_to_dag), 3)
        self.assertGreater(len(list(self.ctx._expr_to_dag.values())[0].nodes()), 0)
        self.assertGreater(len(list(self.ctx._expr_to_dag.values())[1].nodes()), 0)
        self.assertGreater(len(list(self.ctx._expr_to_dag.values())[2].nodes()), 0)
        self.assertTrue(all(l is r for l, r in zip(expr.traverse(unique=True), self.ctx.get_dag(expr).traverse(expr))))

        del expr
        self.assertEqual(len(self.ctx._expr_to_dag), 2)
        del expr2
        del expr3
        self.assertEqual(len(self.ctx._expr_to_dag), 0)
    def setup(self):
        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = Schema.from_lists(['name', 'id', 'fid'],
                                   datatypes('string', 'int64', 'float64'))
        table = MockTable(name='pyodps_test_expr_table', schema=schema)

        self.expr = CollectionExpr(_source_data=table, _schema=schema)
        self.ctx = ExecuteContext()
    def setup(self):
        datatypes = lambda *types: [validate_data_type(t) for t in types]
        schema = Schema.from_lists(["name", "id", "fid"], datatypes("string", "int64", "float64"))
        table = MockTable(name="pyodps_test_expr_table", schema=schema)

        self.expr = CollectionExpr(_source_data=table, _schema=schema)
        self.ctx = ExecuteContext()
    def testPandasCompilation(self):
        import pandas as pd
        import numpy as np

        df = pd.DataFrame(np.arange(9).reshape(3, 3), columns=list('abc'))

        schema = Schema.from_lists(list('abc'), [types.int8] * 3)
        expr = CollectionExpr(_source_data=df, _schema=schema)

        expr = expr['a', 'b']
        ctx = ExecuteContext()

        compiler = PandasCompiler(ctx.build_dag(expr, expr))
        dag = compiler.compile(expr)

        self.assertEqual(len(dag._graph), 4)
        topos = dag.topological_sort()
        self.assertIsInstance(topos[0][0], CollectionExpr)
        self.assertIsInstance(topos[1][0], Column)
        self.assertIsInstance(topos[2][0], Column)
        self.assertIsInstance(topos[3][0], ProjectCollectionExpr)
Exemple #6
0
    def testPandasCompilation(self):
        import pandas as pd
        import numpy as np

        df = pd.DataFrame(np.arange(9).reshape(3, 3), columns=list('abc'))

        schema = Schema.from_lists(list('abc'), [types.int8] * 3)
        expr = CollectionExpr(_source_data=df, _schema=schema)

        expr = expr['a', 'b']
        ctx = ExecuteContext()

        compiler = PandasCompiler(expr.to_dag())
        dag = compiler.compile(expr)

        self.assertEqual(len(dag._graph), 4)
        topos = dag.topological_sort()
        self.assertIsInstance(topos[0][0], CollectionExpr)
        self.assertIsInstance(topos[1][0], Column)
        self.assertIsInstance(topos[2][0], Column)
        self.assertIsInstance(topos[3][0], ProjectCollectionExpr)