Ejemplo n.º 1
0
    def testFilterParts(self):
        self.assertRaises(ExpressionError,
                          lambda: self.expr.filter_parts(None))
        self.assertRaises(ExpressionError,
                          lambda: self.expr.filter_parts('part3=a'))
        self.assertRaises(
            ExpressionError,
            lambda: self.expr.filter_parts('part1=a,part2=1/part1=b,part2=2'))
        self.assertRaises(
            ExpressionError,
            lambda: self.expr2.filter_parts('part1,part2=1/part1=b,part2=2'))

        filtered1 = self.expr2.filter_parts('part1=a,part2=1/part1=b,part2=2')
        self.assertIsInstance(filtered1, FilterPartitionCollectionExpr)
        self.assertEqual(filtered1.schema, self.expr.schema)
        self.assertEqual(filtered1.predicate_string,
                         'part1=a,part2=1/part1=b,part2=2')

        filtered2 = self.expr2.filter_parts('part1=a,part2=1/part1=b,part2=2',
                                            exclude=False)
        self.assertIsInstance(filtered2, FilterCollectionExpr)

        try:
            import pandas as pd
            from odps.df import DataFrame
            pd_df = pd.DataFrame([['Col1', 1], ['Col2', 2]],
                                 columns=['Field1', 'Field2'])
            df = DataFrame(pd_df)
            self.assertRaises(ExpressionError,
                              lambda: df.filter_parts('Fieldd2=2'))
        except ImportError:
            pass
Ejemplo n.º 2
0
    def testFetchTableSize(self):
        df = DataFrame(self.table)

        expr = df.filter_parts(self.pt)
        dag = expr.to_dag(copy=False)
        self.assertGreater(fetch_data_source_size(dag, df, self.table), 0)

        expr = df.filter_parts('ds=today,hh=curr,mm=now')
        dag = expr.to_dag(copy=False)
        self.assertGreater(fetch_data_source_size(dag, df, self.table), 0)

        expr = df.filter_parts('ds=today,hh=curr,mm=now2')
        dag = expr.to_dag(copy=False)
        self.assertIsNone(fetch_data_source_size(dag, df, self.table))

        expr = df.filter_parts('ds=today,hh=curr')
        dag = expr.to_dag(copy=False)
        self.assertIsNone(fetch_data_source_size(dag, df, self.table))

        expr = df.filter_parts('ds=today,mm=now')
        dag = expr.to_dag(copy=False)
        self.assertGreater(fetch_data_source_size(dag, df, self.table), 0)

        expr = df.filter(df.ds == 'today', df.mm == 'now', df.hh == 'curr')
        dag = expr.to_dag(copy=False)
        self.assertGreater(fetch_data_source_size(dag, df, self.table), 0)

        expr = df.filter(df.ds == 'today', df.hh == 'curr', df.mm == 'now')
        dag = expr.to_dag(copy=False)
        self.assertGreater(fetch_data_source_size(dag, df, self.table), 0)

        expr = df.filter(df.ds == 'today', df.hh == 'curr', df.mm == 'now2')
        dag = expr.to_dag(copy=False)
        self.assertIsNone(fetch_data_source_size(dag, df, self.table))

        expr = df.filter(df.ds == 'today', df.hh == 'curr')
        dag = expr.to_dag(copy=False)
        self.assertIsNone(fetch_data_source_size(dag, df, self.table))

        expr = df.filter(df.ds == 'today', df.mm == 'now')
        dag = expr.to_dag(copy=False)
        self.assertGreater(fetch_data_source_size(dag, df, self.table), 0)