def testFilterPartition(self):
        self.assertRaises(ExpressionError,
                          lambda: self.expr.filter_partition(None))
        self.assertRaises(
            ExpressionError, lambda: self.expr.filter_partition(
                'part1=a/part2=1,part1=b/part2=2'))
        self.assertRaises(
            ExpressionError, lambda: self.expr2.filter_partition(
                'part1/part2=1,part1=b/part2=2'))

        filtered1 = self.expr2.filter_partition(
            'part1=a/part2=1,part1=b/part2=2')
        self.assertIsInstance(filtered1, FilterPartitionCollectionExpr)
        self.assertEqual(filtered1.schema, self.expr.schema)
        self.assertEqual(filtered1.predicate_string,
                         'part1=a/part2=1,part1=b/part2=2')

        filtered2 = self.expr2.filter_partition(
            'part1=a/part2=1,part1=b/part2=2', exclude=False)
        self.assertIsInstance(filtered2, FilterCollectionExpr)

        try:
            import pandas as pd
            from odps.df import DataFrame
            pd_df = pd.DataFrame([['Col1', 1], ['Col2', 2]],
                                 columns=['Field1', 'Field2'])
            df = DataFrame(pd_df)
            self.assertRaises(ExpressionError,
                              lambda: df.filter_partition('Fieldd2=2'))
        except ImportError:
            pass
Example #2
0
    def testFetchTableSize(self):
        df = DataFrame(self.table)

        expr = df.filter_partition(self.pt)
        dag = expr.to_dag(copy=False)
        self.assertGreater(fetch_data_source_size(dag, df, self.table), 0)

        expr = df.filter_partition('ds=today,hh=curr,mm=now')
        dag = expr.to_dag(copy=False)
        self.assertGreater(fetch_data_source_size(dag, df, self.table), 0)

        expr = df.filter_partition('ds=today,hh=curr,mm=now2')
        dag = expr.to_dag(copy=False)
        self.assertIsNone(fetch_data_source_size(dag, df, self.table))

        expr = df.filter_partition('ds=today,hh=curr')
        dag = expr.to_dag(copy=False)
        self.assertIsNone(fetch_data_source_size(dag, df, self.table))

        expr = df.filter_partition('ds=today,mm=now')
        dag = expr.to_dag(copy=False)
        self.assertGreater(fetch_data_source_size(dag, df, self.table), 0)

        expr = df.filter(df.ds == 'today', df.mm == 'now', df.hh == 'curr')
        dag = expr.to_dag(copy=False)
        self.assertGreater(fetch_data_source_size(dag, df, self.table), 0)

        expr = df.filter(df.ds == 'today', df.hh == 'curr', df.mm == 'now')
        dag = expr.to_dag(copy=False)
        self.assertGreater(fetch_data_source_size(dag, df, self.table), 0)

        expr = df.filter(df.ds == 'today', df.hh == 'curr', df.mm == 'now2')
        dag = expr.to_dag(copy=False)
        self.assertIsNone(fetch_data_source_size(dag, df, self.table))

        expr = df.filter(df.ds == 'today', df.hh == 'curr')
        dag = expr.to_dag(copy=False)
        self.assertIsNone(fetch_data_source_size(dag, df, self.table))

        expr = df.filter(df.ds == 'today', df.mm == 'now')
        dag = expr.to_dag(copy=False)
        self.assertGreater(fetch_data_source_size(dag, df, self.table), 0)
    def testFilterPartition(self):
        self.assertRaises(ExpressionError, lambda: self.expr.filter_partition(None))
        self.assertRaises(ExpressionError, lambda: self.expr.filter_partition('part1=a/part2=1,part1=b/part2=2'))
        self.assertRaises(ExpressionError, lambda: self.expr2.filter_partition('part1/part2=1,part1=b/part2=2'))

        filtered1 = self.expr2.filter_partition('part1=a/part2=1,part1=b/part2=2')
        self.assertIsInstance(filtered1, FilterPartitionCollectionExpr)
        self.assertEqual(filtered1.schema, self.expr.schema)
        self.assertEqual(filtered1.predicate_string, 'part1=a/part2=1,part1=b/part2=2')

        filtered2 = self.expr2.filter_partition('part1=a/part2=1,part1=b/part2=2', exclude=False)
        self.assertIsInstance(filtered2, FilterCollectionExpr)

        try:
            import pandas as pd
            from odps.df import DataFrame
            pd_df = pd.DataFrame([['Col1', 1], ['Col2', 2]], columns=['Field1', 'Field2'])
            df = DataFrame(pd_df)
            self.assertRaises(ExpressionError, lambda: df.filter_partition('Fieldd2=2'))
        except ImportError:
            pass