class Test(TestBase): def setup(self): datatypes = lambda *types: [validate_data_type(t) for t in types] schema = Schema.from_lists(['name', 'id', 'fid'], datatypes('string', 'int64', 'float64')) table = MockTable(name='pyodps_test_expr_table', schema=schema) self.expr = CollectionExpr(_source_data=table, _schema=schema) self.ctx = ExecuteContext() def testBuildDAG(self): expr = self.expr[self.expr.name, self.expr.id + 1] expr2 = self.expr[self.expr.name, self.expr.id + 2] expr3 = self.expr[self.expr.name, self.expr.id + 3] self.ctx.build_dag(expr, expr.copy_tree()) self.ctx.build_dag(expr2, expr2.copy_tree()) self.ctx.build_dag(expr3, expr3.copy_tree()) self.assertEqual(len(self.ctx._expr_to_dag), 3) self.assertGreater(len(list(self.ctx._expr_to_dag.values())[0].nodes()), 0) self.assertGreater(len(list(self.ctx._expr_to_dag.values())[1].nodes()), 0) self.assertGreater(len(list(self.ctx._expr_to_dag.values())[2].nodes()), 0) self.assertTrue(all(l is r for l, r in zip(expr.traverse(unique=True), self.ctx.get_dag(expr).traverse(expr)))) del expr self.assertEqual(len(self.ctx._expr_to_dag), 2) del expr2 del expr3 self.assertEqual(len(self.ctx._expr_to_dag), 0)
class Test(TestBase): def setup(self): datatypes = lambda *types: [validate_data_type(t) for t in types] schema = Schema.from_lists(["name", "id", "fid"], datatypes("string", "int64", "float64")) table = MockTable(name="pyodps_test_expr_table", schema=schema) self.expr = CollectionExpr(_source_data=table, _schema=schema) self.ctx = ExecuteContext() def testBuildDAG(self): expr = self.expr[self.expr.name, self.expr.id + 1] expr2 = self.expr[self.expr.name, self.expr.id + 2] expr3 = self.expr[self.expr.name, self.expr.id + 3] self.ctx.build_dag(expr, expr.copy_tree()) self.ctx.build_dag(expr2, expr2.copy_tree()) self.ctx.build_dag(expr3, expr3.copy_tree()) self.assertEqual(len(self.ctx._expr_to_dag), 3) self.assertGreater(len(list(self.ctx._expr_to_dag.values())[0].nodes()), 0) self.assertGreater(len(list(self.ctx._expr_to_dag.values())[1].nodes()), 0) self.assertGreater(len(list(self.ctx._expr_to_dag.values())[2].nodes()), 0) self.assertTrue(all(l is r for l, r in zip(expr.traverse(unique=True), self.ctx.get_dag(expr).traverse(expr)))) del expr self.assertEqual(len(self.ctx._expr_to_dag), 2) del expr2 del expr3 self.assertEqual(len(self.ctx._expr_to_dag), 0)
def setup(self): datatypes = lambda *types: [validate_data_type(t) for t in types] schema = Schema.from_lists(['name', 'id', 'fid'], datatypes('string', 'int64', 'float64')) table = MockTable(name='pyodps_test_expr_table', schema=schema) self.expr = CollectionExpr(_source_data=table, _schema=schema) self.ctx = ExecuteContext()
def setup(self): datatypes = lambda *types: [validate_data_type(t) for t in types] schema = Schema.from_lists(["name", "id", "fid"], datatypes("string", "int64", "float64")) table = MockTable(name="pyodps_test_expr_table", schema=schema) self.expr = CollectionExpr(_source_data=table, _schema=schema) self.ctx = ExecuteContext()
def testPandasCompilation(self): import pandas as pd import numpy as np df = pd.DataFrame(np.arange(9).reshape(3, 3), columns=list('abc')) schema = Schema.from_lists(list('abc'), [types.int8] * 3) expr = CollectionExpr(_source_data=df, _schema=schema) expr = expr['a', 'b'] ctx = ExecuteContext() compiler = PandasCompiler(ctx.build_dag(expr, expr)) dag = compiler.compile(expr) self.assertEqual(len(dag._graph), 4) topos = dag.topological_sort() self.assertIsInstance(topos[0][0], CollectionExpr) self.assertIsInstance(topos[1][0], Column) self.assertIsInstance(topos[2][0], Column) self.assertIsInstance(topos[3][0], ProjectCollectionExpr)
def testPandasCompilation(self): import pandas as pd import numpy as np df = pd.DataFrame(np.arange(9).reshape(3, 3), columns=list('abc')) schema = Schema.from_lists(list('abc'), [types.int8] * 3) expr = CollectionExpr(_source_data=df, _schema=schema) expr = expr['a', 'b'] ctx = ExecuteContext() compiler = PandasCompiler(expr.to_dag()) dag = compiler.compile(expr) self.assertEqual(len(dag._graph), 4) topos = dag.topological_sort() self.assertIsInstance(topos[0][0], CollectionExpr) self.assertIsInstance(topos[1][0], Column) self.assertIsInstance(topos[2][0], Column) self.assertIsInstance(topos[3][0], ProjectCollectionExpr)