Esempio n. 1
0
    def testAsyncExecute(self):
        def make_filter(df, cnt):
            def waiter(val, c):
                import time
                time.sleep(5 * c)
                return val

            f_df = df[df.value == cnt]
            return f_df[f_df.exclude('value'),
                        f_df.value.map(functools.partial(waiter, cnt))]

        delay = Delay()
        filtered = self.df[self.df.id > 0].cache()
        sub_futures = [
            make_filter(filtered, i).execute(delay=delay) for i in range(1, 4)
        ]
        future = delay.execute(async=True, n_parallel=3)
        self.assertRaises(RuntimeError, lambda: delay.execute())

        for i in range(1, 4):
            self.assertFalse(future.done())
            self.assertFalse(any(f.done() for f in sub_futures[i - 1:]))
            self.assertTrue(all(f.done() for f in sub_futures[:i - 1]))
            self.assertEqual(self._get_result(sub_futures[i - 1].result()),
                             [d for d in self.data if d[2] == i])
        self.assertTrue(all(f.done() for f in sub_futures))
        future.result(timeout=10 * 60)
        self.assertTrue(future.done())
    def testPersistExecute(self):
        delay = Delay()
        filtered = self.df[self.df.id > 0].cache()

        persist_table_name = tn('pyodps_test_delay_persist')
        schema = Schema.from_lists(['id', 'name', 'value'],
                                   ['bigint', 'string', 'bigint'],
                                   ['pt', 'ds'], ['string', 'string'])
        self.odps.delete_table(persist_table_name, if_exists=True)
        self.odps.create_table(persist_table_name, schema)

        future1 = filtered[filtered.value > 2].persist(persist_table_name,
                                                       partition='pt=a,ds=d1',
                                                       delay=delay)
        future2 = filtered[filtered.value < 2].persist(persist_table_name,
                                                       partition='pt=a,ds=d2',
                                                       delay=delay)

        delay.execute()
        df1 = future1.result()
        df2 = future2.result()

        self.assertEqual([c.lhs.name for c in df1.predicate.children()],
                         ['pt', 'ds'])
        result1 = self._get_result(df1.execute())
        self.assertEqual([r[:-2] for r in result1],
                         [d for d in self.data if d[2] > 2])
        self.assertEqual([c.lhs.name for c in df2.predicate.children()],
                         ['pt', 'ds'])
        result2 = self._get_result(df2.execute())
        self.assertEqual([r[:-2] for r in result2],
                         [d for d in self.data if d[2] < 2])
Esempio n. 3
0
    def testToPandas(self):
        table_name = tn('pyodps_test_mixed_engine_to_pandas')
        self.odps.delete_table(table_name, if_exists=True)
        table2 = self.odps.create_table(
            name=table_name,
            schema=Schema.from_lists(['col%s' % i for i in range(7)], [
                'bigint', 'double', 'string', 'datetime', 'boolean', 'decimal',
                'datetime'
            ]))
        expr2 = DataFrame(table2)

        data2 = [[
            1234567, 3.14, 'test',
            datetime(2016, 6, 1), True,
            Decimal('3.14'), None
        ]]
        self.odps.write_table(table2, 0, data2)

        pd_df = expr2.to_pandas()
        self.assertSequenceEqual(data2[0], pd_df.ix[0].tolist())

        wrapped_pd_df = expr2.to_pandas(wrap=True)
        self.assertSequenceEqual(data2[0], list(next(wrapped_pd_df.execute())))

        pd_df_col = expr2.col0.to_pandas()
        self.assertSequenceEqual([data2[0][0]], pd_df_col.tolist())

        wrapped_pd_df_col = expr2.col0.to_pandas(wrap=True)
        self.assertSequenceEqual([data2[0][0]],
                                 list(next(wrapped_pd_df_col.execute())))

        pd_df_future = expr2.to_pandas(async=True)
        self.assertSequenceEqual(data2[0],
                                 pd_df_future.result().ix[0].tolist())

        wrapped_pd_df_future = expr2.to_pandas(async=True, wrap=True)
        self.assertSequenceEqual(
            data2[0], list(next(wrapped_pd_df_future.result().execute())))

        delay = Delay()
        pd_df_future = expr2.to_pandas(delay=delay)
        delay.execute()
        self.assertSequenceEqual(data2[0],
                                 pd_df_future.result().ix[0].tolist())

        exc_future = (expr2.col0 / 0).to_pandas(async=True)
        self.assertRaises(ODPSError, exc_future.result)
Esempio n. 4
0
    def testSyncExecute(self):
        delay = Delay()
        filtered = self.df[self.df.id > 0].cache()
        sub_futures = [
            filtered[filtered.value == i].execute(delay=delay)
            for i in range(1, 3)
        ]
        delay.execute(timeout=10 * 60)

        self.assertTrue(all(f.done() for f in sub_futures))
        for i in range(1, 3):
            self.assertEqual(self._get_result(sub_futures[i - 1].result()),
                             [d for d in self.data if d[2] == i])

        # execute on executed delay
        sub_future = filtered[filtered.value == 3].execute(delay=delay)
        delay.execute(timeout=10 * 60)
        self.assertTrue(sub_future.done())
        self.assertEqual(self._get_result(sub_future.result()),
                         [d for d in self.data if d[2] == 3])