def testFilterParts(self): self.assertRaises(ExpressionError, lambda: self.expr.filter_parts(None)) self.assertRaises(ExpressionError, lambda: self.expr.filter_parts('part3=a')) self.assertRaises( ExpressionError, lambda: self.expr.filter_parts('part1=a,part2=1/part1=b,part2=2')) self.assertRaises( ExpressionError, lambda: self.expr2.filter_parts('part1,part2=1/part1=b,part2=2')) filtered1 = self.expr2.filter_parts('part1=a,part2=1/part1=b,part2=2') self.assertIsInstance(filtered1, FilterPartitionCollectionExpr) self.assertEqual(filtered1.schema, self.expr.schema) self.assertEqual(filtered1.predicate_string, 'part1=a,part2=1/part1=b,part2=2') filtered2 = self.expr2.filter_parts('part1=a,part2=1/part1=b,part2=2', exclude=False) self.assertIsInstance(filtered2, FilterCollectionExpr) try: import pandas as pd from odps.df import DataFrame pd_df = pd.DataFrame([['Col1', 1], ['Col2', 2]], columns=['Field1', 'Field2']) df = DataFrame(pd_df) self.assertRaises(ExpressionError, lambda: df.filter_parts('Fieldd2=2')) except ImportError: pass
def testFetchTableSize(self): df = DataFrame(self.table) expr = df.filter_parts(self.pt) dag = expr.to_dag(copy=False) self.assertGreater(fetch_data_source_size(dag, df, self.table), 0) expr = df.filter_parts('ds=today,hh=curr,mm=now') dag = expr.to_dag(copy=False) self.assertGreater(fetch_data_source_size(dag, df, self.table), 0) expr = df.filter_parts('ds=today,hh=curr,mm=now2') dag = expr.to_dag(copy=False) self.assertIsNone(fetch_data_source_size(dag, df, self.table)) expr = df.filter_parts('ds=today,hh=curr') dag = expr.to_dag(copy=False) self.assertIsNone(fetch_data_source_size(dag, df, self.table)) expr = df.filter_parts('ds=today,mm=now') dag = expr.to_dag(copy=False) self.assertGreater(fetch_data_source_size(dag, df, self.table), 0) expr = df.filter(df.ds == 'today', df.mm == 'now', df.hh == 'curr') dag = expr.to_dag(copy=False) self.assertGreater(fetch_data_source_size(dag, df, self.table), 0) expr = df.filter(df.ds == 'today', df.hh == 'curr', df.mm == 'now') dag = expr.to_dag(copy=False) self.assertGreater(fetch_data_source_size(dag, df, self.table), 0) expr = df.filter(df.ds == 'today', df.hh == 'curr', df.mm == 'now2') dag = expr.to_dag(copy=False) self.assertIsNone(fetch_data_source_size(dag, df, self.table)) expr = df.filter(df.ds == 'today', df.hh == 'curr') dag = expr.to_dag(copy=False) self.assertIsNone(fetch_data_source_size(dag, df, self.table)) expr = df.filter(df.ds == 'today', df.mm == 'now') dag = expr.to_dag(copy=False) self.assertGreater(fetch_data_source_size(dag, df, self.table), 0)