def testAsyncExecute(self): def make_filter(df, cnt): def waiter(val, c): import time time.sleep(5 * c) return val f_df = df[df.value == cnt] return f_df[f_df.exclude('value'), f_df.value.map(functools.partial(waiter, cnt))] delay = Delay() filtered = self.df[self.df.id > 0].cache() sub_futures = [ make_filter(filtered, i).execute(delay=delay) for i in range(1, 4) ] future = delay.execute(async=True, n_parallel=3) self.assertRaises(RuntimeError, lambda: delay.execute()) for i in range(1, 4): self.assertFalse(future.done()) self.assertFalse(any(f.done() for f in sub_futures[i - 1:])) self.assertTrue(all(f.done() for f in sub_futures[:i - 1])) self.assertEqual(self._get_result(sub_futures[i - 1].result()), [d for d in self.data if d[2] == i]) self.assertTrue(all(f.done() for f in sub_futures)) future.result(timeout=10 * 60) self.assertTrue(future.done())
def testPersistExecute(self): delay = Delay() filtered = self.df[self.df.id > 0].cache() persist_table_name = tn('pyodps_test_delay_persist') schema = Schema.from_lists(['id', 'name', 'value'], ['bigint', 'string', 'bigint'], ['pt', 'ds'], ['string', 'string']) self.odps.delete_table(persist_table_name, if_exists=True) self.odps.create_table(persist_table_name, schema) future1 = filtered[filtered.value > 2].persist(persist_table_name, partition='pt=a,ds=d1', delay=delay) future2 = filtered[filtered.value < 2].persist(persist_table_name, partition='pt=a,ds=d2', delay=delay) delay.execute() df1 = future1.result() df2 = future2.result() self.assertEqual([c.lhs.name for c in df1.predicate.children()], ['pt', 'ds']) result1 = self._get_result(df1.execute()) self.assertEqual([r[:-2] for r in result1], [d for d in self.data if d[2] > 2]) self.assertEqual([c.lhs.name for c in df2.predicate.children()], ['pt', 'ds']) result2 = self._get_result(df2.execute()) self.assertEqual([r[:-2] for r in result2], [d for d in self.data if d[2] < 2])
def testToPandas(self): table_name = tn('pyodps_test_mixed_engine_to_pandas') self.odps.delete_table(table_name, if_exists=True) table2 = self.odps.create_table( name=table_name, schema=Schema.from_lists(['col%s' % i for i in range(7)], [ 'bigint', 'double', 'string', 'datetime', 'boolean', 'decimal', 'datetime' ])) expr2 = DataFrame(table2) data2 = [[ 1234567, 3.14, 'test', datetime(2016, 6, 1), True, Decimal('3.14'), None ]] self.odps.write_table(table2, 0, data2) pd_df = expr2.to_pandas() self.assertSequenceEqual(data2[0], pd_df.ix[0].tolist()) wrapped_pd_df = expr2.to_pandas(wrap=True) self.assertSequenceEqual(data2[0], list(next(wrapped_pd_df.execute()))) pd_df_col = expr2.col0.to_pandas() self.assertSequenceEqual([data2[0][0]], pd_df_col.tolist()) wrapped_pd_df_col = expr2.col0.to_pandas(wrap=True) self.assertSequenceEqual([data2[0][0]], list(next(wrapped_pd_df_col.execute()))) pd_df_future = expr2.to_pandas(async=True) self.assertSequenceEqual(data2[0], pd_df_future.result().ix[0].tolist()) wrapped_pd_df_future = expr2.to_pandas(async=True, wrap=True) self.assertSequenceEqual( data2[0], list(next(wrapped_pd_df_future.result().execute()))) delay = Delay() pd_df_future = expr2.to_pandas(delay=delay) delay.execute() self.assertSequenceEqual(data2[0], pd_df_future.result().ix[0].tolist()) exc_future = (expr2.col0 / 0).to_pandas(async=True) self.assertRaises(ODPSError, exc_future.result)
def testSyncExecute(self): delay = Delay() filtered = self.df[self.df.id > 0].cache() sub_futures = [ filtered[filtered.value == i].execute(delay=delay) for i in range(1, 3) ] delay.execute(timeout=10 * 60) self.assertTrue(all(f.done() for f in sub_futures)) for i in range(1, 3): self.assertEqual(self._get_result(sub_futures[i - 1].result()), [d for d in self.data if d[2] == i]) # execute on executed delay sub_future = filtered[filtered.value == 3].execute(delay=delay) delay.execute(timeout=10 * 60) self.assertTrue(sub_future.done()) self.assertEqual(self._get_result(sub_future.result()), [d for d in self.data if d[2] == 3])