def testCacheTable(self): self.engine._selecter.force_odps = True df = self.odps_df.join(self.pd_df, 'name').cache() df2 = df.sort('id_x') dag = self.engine.compile(df2) self.assertEqual(len(dag.nodes()), 3) result = self.engine.execute(df2).values df3 = DataFrame(self.odps_df.to_pandas()) expected = self.pd_engine.execute( df3.join(self.pd_df, 'name').sort('id_x')).values self.assertTrue(result.equals(expected)) self.assertEqual(len(self.engine._generated_table_names), 2) table = context.get_cached(df) self.assertEqual(len(self.engine.execute(df)), len(expected)) self.assertIs(context.get_cached(df), table) if not isinstance(table, SeahawksTable): self.assertEqual(context.get_cached(df).lifecycle, 1) df4 = df[df.id_x < 3].count() result = self.engine.execute(df4) self.assertEqual(result, 2) self.assertEqual(context.get_cached(df4), 2)
def testUseCache(self): self.engine._selecter.force_odps = True df_cache = self.odps_df[self.odps_df['name'] == 'name1'].cache() df = df_cache[df_cache.id * 2, df_cache.exclude('id')] self.assertEqual(len(self.engine.execute(df, head=10)), 2) context.get_cached(df_cache).drop() self.assertEqual(len(self.engine.execute(df_cache['name', df_cache.id * 2], head=10)), 2) self.assertTrue(context.is_cached(df_cache)) self.assertTrue(self.odps.exist_table(context.get_cached(df_cache).name))
def testUseCache(self): self.engine._selecter.force_odps = True df = self.odps_df[self.odps_df['name'] == 'name1'] self.assertEqual(len(self.engine.execute(df, head=10)), 2) context.get_cached(df).drop() self.assertRaises(ODPSError, lambda: self.engine.execute(df['name', 'id'])) def plot(**_): pass self.assertRaises(ODPSError, lambda: df.plot(x='id', plot_func=plot))
def testCache(self): data = self._gen_data(10, value_range=(-1000, 1000)) expr = self.expr[self.expr.id < 10].cache() cnt = expr.count() dag = self.engine.compile(expr) self.assertEqual(len(dag.nodes()), 2) res = self.engine.execute(cnt) self.assertEqual(len([it for it in data if it[1] < 10]), res) self.assertTrue(context.is_cached(expr)) table = context.get_cached(expr) self.assertIsInstance(table, SeahawksTable)