def testFilterPrune(self): expr = self.expr.filter(self.expr.name == 'name1') expr = expr['name', 'id'] new_expr = ColumnPruning(expr.to_dag()).prune() self.assertIsInstance(new_expr.input, FilterCollectionExpr) self.assertNotIsInstance(new_expr.input.input, ProjectCollectionExpr) self.assertIsNotNone(new_expr.input.input._source_data) expected = 'SELECT t1.`name`, t1.`id` \n' \ 'FROM mocked_project.`pyodps_test_expr_table` t1 \n' \ 'WHERE t1.`name` == \'name1\'' self.assertEqual( to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False))) expr = self.expr.filter(self.expr.name == 'name1') new_expr = ColumnPruning(expr.to_dag()).prune() self.assertIsInstance(new_expr, FilterCollectionExpr) self.assertIsNotNone(new_expr.input._source_data) expr = self.expr.filter(self.expr.id.isin(self.expr3.id)) expected = 'SELECT * \n' \ 'FROM mocked_project.`pyodps_test_expr_table` t1 \n' \ 'WHERE t1.`id` IN (SELECT t3.`id` FROM ( ' \ 'SELECT t2.`id` FROM mocked_project.`pyodps_test_expr_table2` t2 ) t3)' self.assertTrue( to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
def testProjectPrune(self): expr = self.expr.select('name', 'id') new_expr = ColumnPruning(expr.to_dag()).prune() self.assertIsInstance(new_expr, ProjectCollectionExpr) self.assertIsNotNone(new_expr.input._source_data) expected = 'SELECT t1.`name`, t1.`id` \n' \ 'FROM mocked_project.`pyodps_test_expr_table` t1' self.assertEqual(expected, ODPSEngine(self.odps).compile(expr, prettify=False)) expr = self.expr[Scalar(3).rename('const'), NullScalar('string').rename('string_const'), self.expr.id] expected = 'SELECT 3 AS `const`, CAST(NULL AS STRING) AS `string_const`, t1.`id` \n' \ 'FROM mocked_project.`pyodps_test_expr_table` t1' self.assertEqual( to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False))) expr = self.expr.select( pt=BuiltinFunction('max_pt', args=(self.expr._source_data.name, ))) expected = "SELECT max_pt('pyodps_test_expr_table') AS `pt` \n" \ "FROM mocked_project.`pyodps_test_expr_table` t1" self.assertEqual( to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
def testApplyPrune(self): @output(['name', 'id'], ['string', 'string']) def h(row): yield row[0], row[1] expr = self.expr[self.expr.fid < 0].apply(h, axis=1)['id', ] new_expr = ColumnPruning(expr.to_dag()).prune() self.assertIsInstance(new_expr, ProjectCollectionExpr) self.assertIsInstance(new_expr.input.input, FilterCollectionExpr) self.assertIsNotNone(new_expr.input.input.input._source_data)
def testSlicePrune(self): expr = self.expr.filter(self.expr.fid < 0)[:4]['name', lambda x: x.id + 1] new_expr = ColumnPruning(expr.to_dag()).prune() self.assertIsNotNone(new_expr.input.input.input._source_data) expected = "SELECT t1.`name`, t1.`id` + 1 AS `id` \n" \ "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \ "WHERE t1.`fid` < 0 \n" \ "LIMIT 4" self.assertEqual( to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
def testFilterPartsPrune(self): expr = self.expr.filter_parts('ds=today')[lambda x: x.fid < 0][ 'name', lambda x: x.id + 1] new_expr = ColumnPruning(expr.to_dag()).prune() self.assertEqual(set(new_expr.input.input.schema.names), set(['name', 'id', 'fid'])) expected = "SELECT t2.`name`, t2.`id` + 1 AS `id` \n" \ "FROM (\n" \ " SELECT t1.`name`, t1.`id`, t1.`fid` \n" \ " FROM mocked_project.`pyodps_test_expr_table` t1 \n" \ " WHERE t1.`ds` == 'today' \n" \ ") t2 \n" \ "WHERE t2.`fid` < 0" self.assertEqual( to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
def testLateralViewPrune(self): expr = self.expr4['name', 'id', self.expr4.hobbies.explode()] new_expr = ColumnPruning(expr.to_dag()).prune() self.assertIsInstance(new_expr, LateralViewCollectionExpr) self.assertIsNotNone(new_expr.input._source_data) expected = 'SELECT t1.`name`, t1.`id`, t2.`hobbies` \n' \ 'FROM mocked_project.`pyodps_test_expr_table` t1 \n' \ 'LATERAL VIEW EXPLODE(t1.`hobbies`) t2 AS `hobbies`' self.assertEqual(expected, ODPSEngine(self.odps).compile(expr, prettify=False)) expected = 'SELECT t1.`id`, t2.`hobbies` \n' \ 'FROM mocked_project.`pyodps_test_expr_table` t1 \n' \ 'LATERAL VIEW EXPLODE(t1.`hobbies`) t2 AS `hobbies`' expr2 = expr[expr.id, expr.hobbies] self.assertEqual(expected, ODPSEngine(self.odps).compile(expr2, prettify=False))
def testValueCountsPrune(self): expr = self.expr.name.value_counts()['count', ] new_expr = ColumnPruning(expr.to_dag()).prune() self.assertIsInstance(new_expr.input.input, ProjectCollectionExpr) self.assertEqual(set(new_expr.input.input.schema.names), set(['name']))