Esempio n. 1
0
    def testToCPUExecution(self):
        pdf = pd.DataFrame(np.random.rand(20, 30), index=np.arange(20, 0, -1))
        df = from_pandas_df(pdf, chunk_size=(13, 21))
        cdf = to_gpu(df)
        df2 = to_cpu(cdf)

        res = self.executor.execute_dataframe(df2, concat=True)[0]
        self.assertIsInstance(res, pd.DataFrame)
        pd.testing.assert_frame_equal(res, pdf)

        pseries = pdf.iloc[:, 0]
        series = from_pandas_series(pseries, chunk_size=(13, 21))
        cseries = to_gpu(series)
        series2 = to_cpu(cseries)

        res = self.executor.execute_dataframe(series2, concat=True)[0]
        self.assertIsInstance(res, pd.Series)
        pd.testing.assert_series_equal(res, pseries)
Esempio n. 2
0
    def testToGPU(self):
        # test dataframe
        data = pd.DataFrame(np.random.rand(10, 10),
                            index=np.random.randint(-100, 100, size=(10, )),
                            columns=[np.random.bytes(10) for _ in range(10)])
        df = from_pandas_df(data)
        cdf = to_gpu(df)

        self.assertEqual(df.index_value, cdf.index_value)
        self.assertEqual(df.columns_value, cdf.columns_value)
        self.assertTrue(cdf.op.gpu)
        pd.testing.assert_series_equal(df.dtypes, cdf.dtypes)

        cdf = cdf.tiles()
        df = get_tiled(df)

        self.assertEqual(df.nsplits, cdf.nsplits)
        self.assertEqual(df.chunks[0].index_value, cdf.chunks[0].index_value)
        self.assertEqual(df.chunks[0].columns_value,
                         cdf.chunks[0].columns_value)
        self.assertTrue(cdf.chunks[0].op.gpu)
        pd.testing.assert_series_equal(df.chunks[0].dtypes,
                                       cdf.chunks[0].dtypes)

        self.assertIs(cdf, to_gpu(cdf))

        # test series
        sdata = data.iloc[:, 0]
        series = from_pandas_series(sdata)
        cseries = to_gpu(series)

        self.assertEqual(series.index_value, cseries.index_value)
        self.assertTrue(cseries.op.gpu)

        cseries = cseries.tiles()
        series = get_tiled(series)

        self.assertEqual(series.nsplits, cseries.nsplits)
        self.assertEqual(series.chunks[0].index_value,
                         cseries.chunks[0].index_value)
        self.assertTrue(cseries.chunks[0].op.gpu)

        self.assertIs(cseries, to_gpu(cseries))
Esempio n. 3
0
    def testGPUExecution(self):
        df_raw = pd.DataFrame(np.random.rand(30, 3), columns=list('abc'))
        df = to_gpu(md.DataFrame(df_raw, chunk_size=6))

        r = df.sum()
        res = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_series_equal(res.to_pandas(), df_raw.sum())

        r = df.kurt()
        res = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_series_equal(res.to_pandas(), df_raw.kurt())

        r = df.agg(['sum', 'var'])
        res = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_frame_equal(res.to_pandas(),
                                      df_raw.agg(['sum', 'var']))

        s_raw = pd.Series(np.random.rand(30))
        s = to_gpu(md.Series(s_raw, chunk_size=6))

        r = s.sum()
        res = self.executor.execute_dataframe(r, concat=True)[0]
        self.assertAlmostEqual(res, s_raw.sum())

        r = s.kurt()
        res = self.executor.execute_dataframe(r, concat=True)[0]
        self.assertAlmostEqual(res, s_raw.kurt())

        r = s.agg(['sum', 'var'])
        res = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_series_equal(res.to_pandas(),
                                       s_raw.agg(['sum', 'var']))

        s_raw = pd.Series(
            np.random.randint(0, 3, size=(30, )) *
            np.random.randint(0, 5, size=(30, )))
        s = to_gpu(md.Series(s_raw, chunk_size=6))

        r = s.unique()
        res = self.executor.execute_dataframe(r, concat=True)[0]
        np.testing.assert_array_equal(
            cp.asnumpy(res).sort(),
            s_raw.unique().sort())
Esempio n. 4
0
def test_gpu_execution(setup, check_ref_counts):
    df_raw = pd.DataFrame(np.random.rand(30, 3), columns=list('abc'))
    df = to_gpu(md.DataFrame(df_raw, chunk_size=6))

    r = df.sum()
    res = r.execute().fetch()
    pd.testing.assert_series_equal(res.to_pandas(), df_raw.sum())

    r = df.kurt()
    res = r.execute().fetch()
    pd.testing.assert_series_equal(res.to_pandas(), df_raw.kurt())

    r = df.agg(['sum', 'var'])
    res = r.execute().fetch()
    pd.testing.assert_frame_equal(res.to_pandas(), df_raw.agg(['sum', 'var']))

    s_raw = pd.Series(np.random.rand(30))
    s = to_gpu(md.Series(s_raw, chunk_size=6))

    r = s.sum()
    res = r.execute().fetch()
    assert pytest.approx(res) == s_raw.sum()

    r = s.kurt()
    res = r.execute().fetch()
    assert pytest.approx(res) == s_raw.kurt()

    r = s.agg(['sum', 'var'])
    res = r.execute().fetch()
    pd.testing.assert_series_equal(res.to_pandas(), s_raw.agg(['sum', 'var']))

    s_raw = pd.Series(
        np.random.randint(0, 3, size=(30, )) *
        np.random.randint(0, 5, size=(30, )))
    s = to_gpu(md.Series(s_raw, chunk_size=6))

    r = s.unique()
    res = r.execute().fetch()
    np.testing.assert_array_equal(
        cp.asnumpy(res).sort(),
        s_raw.unique().sort())
Esempio n. 5
0
def test_to_gpu():
    # test dataframe
    data = pd.DataFrame(np.random.rand(10, 10),
                        index=np.random.randint(-100, 100, size=(10, )),
                        columns=[np.random.bytes(10) for _ in range(10)])
    df = from_pandas_df(data)
    cdf = to_gpu(df)

    assert df.index_value == cdf.index_value
    assert df.columns_value == cdf.columns_value
    assert cdf.op.gpu is True
    pd.testing.assert_series_equal(df.dtypes, cdf.dtypes)

    df, cdf = tile(df, cdf)

    assert df.nsplits == cdf.nsplits
    assert df.chunks[0].index_value == cdf.chunks[0].index_value
    assert df.chunks[0].columns_value == cdf.chunks[0].columns_value
    assert cdf.chunks[0].op.gpu is True
    pd.testing.assert_series_equal(df.chunks[0].dtypes, cdf.chunks[0].dtypes)

    assert cdf is to_gpu(cdf)

    # test series
    sdata = data.iloc[:, 0]
    series = from_pandas_series(sdata)
    cseries = to_gpu(series)

    assert series.index_value == cseries.index_value
    assert cseries.op.gpu is True

    series, cseries = tile(series, cseries)

    assert series.nsplits == cseries.nsplits
    assert series.chunks[0].index_value == cseries.chunks[0].index_value
    assert cseries.chunks[0].op.gpu is True

    assert cseries is to_gpu(cseries)
Esempio n. 6
0
    def testToCPU(self):
        data = pd.DataFrame(np.random.rand(10, 10), index=np.random.randint(-100, 100, size=(10,)),
                            columns=[np.random.bytes(10) for _ in range(10)])
        df = from_pandas_df(data)
        cdf = to_gpu(df)
        df2 = to_cpu(cdf)

        self.assertEqual(df.index_value, df2.index_value)
        self.assertEqual(df.columns_value, df2.columns_value)
        self.assertFalse(df2.op.gpu)
        pd.testing.assert_series_equal(df.dtypes, df2.dtypes)

        df2.tiles()

        self.assertEqual(df.nsplits, df2.nsplits)
        self.assertEqual(df.chunks[0].index_value, df2.chunks[0].index_value)
        self.assertEqual(df.chunks[0].columns_value, df2.chunks[0].columns_value)
        self.assertFalse(df2.chunks[0].op.gpu)
        pd.testing.assert_series_equal(df.chunks[0].dtypes, df2.chunks[0].dtypes)

        self.assertIs(df2, to_cpu(df2))
Esempio n. 7
0
def test_to_cpu():
    data = pd.DataFrame(np.random.rand(10, 10),
                        index=np.random.randint(-100, 100, size=(10, )),
                        columns=[np.random.bytes(10) for _ in range(10)])
    df = from_pandas_df(data)
    cdf = to_gpu(df)
    df2 = to_cpu(cdf)

    assert df.index_value == df2.index_value
    assert df.columns_value == df2.columns_value
    assert df2.op.gpu is False
    pd.testing.assert_series_equal(df.dtypes, df2.dtypes)

    df, df2 = tile(df, df2)

    assert df.nsplits == df2.nsplits
    assert df.chunks[0].index_value == df2.chunks[0].index_value
    assert df.chunks[0].columns_value == df2.chunks[0].columns_value
    assert df2.chunks[0].op.gpu is False
    pd.testing.assert_series_equal(df.chunks[0].dtypes, df2.chunks[0].dtypes)

    assert df2 is to_cpu(df2)