def testCacheTable(self):
        self.engine._selecter.force_odps = True

        df = self.odps_df.join(self.pd_df, 'name').cache()
        df2 = df.sort('id_x')

        dag = self.engine.compile(df2)
        self.assertEqual(len(dag.nodes()), 3)

        result = self.engine.execute(df2).values

        df3 = DataFrame(self.odps_df.to_pandas())
        expected = self.pd_engine.execute(
            df3.join(self.pd_df, 'name').sort('id_x')).values
        self.assertTrue(result.equals(expected))

        self.assertEqual(len(self.engine._generated_table_names), 2)

        table = context.get_cached(df)
        self.assertEqual(len(self.engine.execute(df)), len(expected))

        self.assertIs(context.get_cached(df), table)
        if not isinstance(table, SeahawksTable):
            self.assertEqual(context.get_cached(df).lifecycle, 1)

        df4 = df[df.id_x < 3].count()
        result = self.engine.execute(df4)
        self.assertEqual(result, 2)

        self.assertEqual(context.get_cached(df4), 2)
Beispiel #2
0
    def testUseCache(self):
        self.engine._selecter.force_odps = True

        df_cache = self.odps_df[self.odps_df['name'] == 'name1'].cache()
        df = df_cache[df_cache.id * 2, df_cache.exclude('id')]
        self.assertEqual(len(self.engine.execute(df, head=10)), 2)

        context.get_cached(df_cache).drop()

        self.assertEqual(len(self.engine.execute(df_cache['name', df_cache.id * 2], head=10)), 2)
        self.assertTrue(context.is_cached(df_cache))
        self.assertTrue(self.odps.exist_table(context.get_cached(df_cache).name))
Beispiel #3
0
    def testUseCache(self):
        self.engine._selecter.force_odps = True

        df = self.odps_df[self.odps_df['name'] == 'name1']
        self.assertEqual(len(self.engine.execute(df, head=10)), 2)

        context.get_cached(df).drop()

        self.assertRaises(ODPSError,
                          lambda: self.engine.execute(df['name', 'id']))

        def plot(**_):
            pass

        self.assertRaises(ODPSError, lambda: df.plot(x='id', plot_func=plot))
    def testCache(self):
        data = self._gen_data(10, value_range=(-1000, 1000))

        expr = self.expr[self.expr.id < 10].cache()
        cnt = expr.count()

        dag = self.engine.compile(expr)
        self.assertEqual(len(dag.nodes()), 2)

        res = self.engine.execute(cnt)
        self.assertEqual(len([it for it in data if it[1] < 10]), res)
        self.assertTrue(context.is_cached(expr))

        table = context.get_cached(expr)
        self.assertIsInstance(table, SeahawksTable)