def testToCPUExecution(self): pdf = pd.DataFrame(np.random.rand(20, 30), index=np.arange(20, 0, -1)) df = from_pandas_df(pdf, chunk_size=(13, 21)) cdf = to_gpu(df) df2 = to_cpu(cdf) res = self.executor.execute_dataframe(df2, concat=True)[0] self.assertIsInstance(res, pd.DataFrame) pd.testing.assert_frame_equal(res, pdf) pseries = pdf.iloc[:, 0] series = from_pandas_series(pseries, chunk_size=(13, 21)) cseries = to_gpu(series) series2 = to_cpu(cseries) res = self.executor.execute_dataframe(series2, concat=True)[0] self.assertIsInstance(res, pd.Series) pd.testing.assert_series_equal(res, pseries)
def testToGPU(self): # test dataframe data = pd.DataFrame(np.random.rand(10, 10), index=np.random.randint(-100, 100, size=(10, )), columns=[np.random.bytes(10) for _ in range(10)]) df = from_pandas_df(data) cdf = to_gpu(df) self.assertEqual(df.index_value, cdf.index_value) self.assertEqual(df.columns_value, cdf.columns_value) self.assertTrue(cdf.op.gpu) pd.testing.assert_series_equal(df.dtypes, cdf.dtypes) cdf = cdf.tiles() df = get_tiled(df) self.assertEqual(df.nsplits, cdf.nsplits) self.assertEqual(df.chunks[0].index_value, cdf.chunks[0].index_value) self.assertEqual(df.chunks[0].columns_value, cdf.chunks[0].columns_value) self.assertTrue(cdf.chunks[0].op.gpu) pd.testing.assert_series_equal(df.chunks[0].dtypes, cdf.chunks[0].dtypes) self.assertIs(cdf, to_gpu(cdf)) # test series sdata = data.iloc[:, 0] series = from_pandas_series(sdata) cseries = to_gpu(series) self.assertEqual(series.index_value, cseries.index_value) self.assertTrue(cseries.op.gpu) cseries = cseries.tiles() series = get_tiled(series) self.assertEqual(series.nsplits, cseries.nsplits) self.assertEqual(series.chunks[0].index_value, cseries.chunks[0].index_value) self.assertTrue(cseries.chunks[0].op.gpu) self.assertIs(cseries, to_gpu(cseries))
def testGPUExecution(self): df_raw = pd.DataFrame(np.random.rand(30, 3), columns=list('abc')) df = to_gpu(md.DataFrame(df_raw, chunk_size=6)) r = df.sum() res = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_series_equal(res.to_pandas(), df_raw.sum()) r = df.kurt() res = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_series_equal(res.to_pandas(), df_raw.kurt()) r = df.agg(['sum', 'var']) res = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_frame_equal(res.to_pandas(), df_raw.agg(['sum', 'var'])) s_raw = pd.Series(np.random.rand(30)) s = to_gpu(md.Series(s_raw, chunk_size=6)) r = s.sum() res = self.executor.execute_dataframe(r, concat=True)[0] self.assertAlmostEqual(res, s_raw.sum()) r = s.kurt() res = self.executor.execute_dataframe(r, concat=True)[0] self.assertAlmostEqual(res, s_raw.kurt()) r = s.agg(['sum', 'var']) res = self.executor.execute_dataframe(r, concat=True)[0] pd.testing.assert_series_equal(res.to_pandas(), s_raw.agg(['sum', 'var'])) s_raw = pd.Series( np.random.randint(0, 3, size=(30, )) * np.random.randint(0, 5, size=(30, ))) s = to_gpu(md.Series(s_raw, chunk_size=6)) r = s.unique() res = self.executor.execute_dataframe(r, concat=True)[0] np.testing.assert_array_equal( cp.asnumpy(res).sort(), s_raw.unique().sort())
def test_gpu_execution(setup, check_ref_counts): df_raw = pd.DataFrame(np.random.rand(30, 3), columns=list('abc')) df = to_gpu(md.DataFrame(df_raw, chunk_size=6)) r = df.sum() res = r.execute().fetch() pd.testing.assert_series_equal(res.to_pandas(), df_raw.sum()) r = df.kurt() res = r.execute().fetch() pd.testing.assert_series_equal(res.to_pandas(), df_raw.kurt()) r = df.agg(['sum', 'var']) res = r.execute().fetch() pd.testing.assert_frame_equal(res.to_pandas(), df_raw.agg(['sum', 'var'])) s_raw = pd.Series(np.random.rand(30)) s = to_gpu(md.Series(s_raw, chunk_size=6)) r = s.sum() res = r.execute().fetch() assert pytest.approx(res) == s_raw.sum() r = s.kurt() res = r.execute().fetch() assert pytest.approx(res) == s_raw.kurt() r = s.agg(['sum', 'var']) res = r.execute().fetch() pd.testing.assert_series_equal(res.to_pandas(), s_raw.agg(['sum', 'var'])) s_raw = pd.Series( np.random.randint(0, 3, size=(30, )) * np.random.randint(0, 5, size=(30, ))) s = to_gpu(md.Series(s_raw, chunk_size=6)) r = s.unique() res = r.execute().fetch() np.testing.assert_array_equal( cp.asnumpy(res).sort(), s_raw.unique().sort())
def test_to_gpu(): # test dataframe data = pd.DataFrame(np.random.rand(10, 10), index=np.random.randint(-100, 100, size=(10, )), columns=[np.random.bytes(10) for _ in range(10)]) df = from_pandas_df(data) cdf = to_gpu(df) assert df.index_value == cdf.index_value assert df.columns_value == cdf.columns_value assert cdf.op.gpu is True pd.testing.assert_series_equal(df.dtypes, cdf.dtypes) df, cdf = tile(df, cdf) assert df.nsplits == cdf.nsplits assert df.chunks[0].index_value == cdf.chunks[0].index_value assert df.chunks[0].columns_value == cdf.chunks[0].columns_value assert cdf.chunks[0].op.gpu is True pd.testing.assert_series_equal(df.chunks[0].dtypes, cdf.chunks[0].dtypes) assert cdf is to_gpu(cdf) # test series sdata = data.iloc[:, 0] series = from_pandas_series(sdata) cseries = to_gpu(series) assert series.index_value == cseries.index_value assert cseries.op.gpu is True series, cseries = tile(series, cseries) assert series.nsplits == cseries.nsplits assert series.chunks[0].index_value == cseries.chunks[0].index_value assert cseries.chunks[0].op.gpu is True assert cseries is to_gpu(cseries)
def testToCPU(self): data = pd.DataFrame(np.random.rand(10, 10), index=np.random.randint(-100, 100, size=(10,)), columns=[np.random.bytes(10) for _ in range(10)]) df = from_pandas_df(data) cdf = to_gpu(df) df2 = to_cpu(cdf) self.assertEqual(df.index_value, df2.index_value) self.assertEqual(df.columns_value, df2.columns_value) self.assertFalse(df2.op.gpu) pd.testing.assert_series_equal(df.dtypes, df2.dtypes) df2.tiles() self.assertEqual(df.nsplits, df2.nsplits) self.assertEqual(df.chunks[0].index_value, df2.chunks[0].index_value) self.assertEqual(df.chunks[0].columns_value, df2.chunks[0].columns_value) self.assertFalse(df2.chunks[0].op.gpu) pd.testing.assert_series_equal(df.chunks[0].dtypes, df2.chunks[0].dtypes) self.assertIs(df2, to_cpu(df2))
def test_to_cpu(): data = pd.DataFrame(np.random.rand(10, 10), index=np.random.randint(-100, 100, size=(10, )), columns=[np.random.bytes(10) for _ in range(10)]) df = from_pandas_df(data) cdf = to_gpu(df) df2 = to_cpu(cdf) assert df.index_value == df2.index_value assert df.columns_value == df2.columns_value assert df2.op.gpu is False pd.testing.assert_series_equal(df.dtypes, df2.dtypes) df, df2 = tile(df, df2) assert df.nsplits == df2.nsplits assert df.chunks[0].index_value == df2.chunks[0].index_value assert df.chunks[0].columns_value == df2.chunks[0].columns_value assert df2.chunks[0].op.gpu is False pd.testing.assert_series_equal(df.chunks[0].dtypes, df2.chunks[0].dtypes) assert df2 is to_cpu(df2)