Example #1
0
    def testGPUExecution(self):
        # test sort_values
        distinct_opts = ['0'] if sys.platform.lower().startswith('win') else [
            '0', '1'
        ]
        for add_distinct in distinct_opts:
            os.environ['PSRS_DISTINCT_COL'] = add_distinct

            # test dataframe
            raw = pd.DataFrame(np.random.rand(100, 10),
                               columns=['a' + str(i) for i in range(10)])
            mdf = DataFrame(raw, chunk_size=30).to_gpu()

            result = self.executor.execute_dataframe(mdf.sort_values(by='a0'),
                                                     concat=True)[0]
            expected = raw.sort_values(by='a0')
            pd.testing.assert_frame_equal(result.to_pandas(), expected)

            # test series
            raw = pd.Series(np.random.rand(10))
            series = Series(raw).to_gpu()

            result = self.executor.execute_dataframe(series.sort_values(),
                                                     concat=True)[0]
            expected = raw.sort_values()
            pd.testing.assert_series_equal(result.to_pandas(), expected)

        # test DataFrame.sort_index
        raw = pd.DataFrame(np.random.rand(10, 10), columns=np.random.rand(10))
        mdf = DataFrame(raw).to_gpu()

        result = self.executor.execute_dataframe(mdf.sort_index(),
                                                 concat=True)[0]
        expected = raw.sort_index()
        pd.testing.assert_frame_equal(result.to_pandas(), expected)

        # test Series.sort_index
        raw = pd.Series(np.random.rand(10, ), index=np.random.rand(10))
        series = Series(raw).to_gpu()

        result = self.executor.execute_dataframe(series.sort_index(),
                                                 concat=True)[0]
        expected = raw.sort_index()
        pd.testing.assert_series_equal(result.to_pandas(), expected)
Example #2
0
    def testSortIndexExecution(self):
        raw = pd.DataFrame(np.random.rand(100, 20), index=np.random.rand(100))

        mdf = DataFrame(raw)
        result = self.executor.execute_dataframe(mdf.sort_index(),
                                                 concat=True)[0]
        expected = raw.sort_index()
        pd.testing.assert_frame_equal(result, expected)

        mdf = DataFrame(raw)
        mdf.sort_index(inplace=True)
        result = self.executor.execute_dataframe(mdf, concat=True)[0]
        expected = raw.sort_index()
        pd.testing.assert_frame_equal(result, expected)

        mdf = DataFrame(raw, chunk_size=30)
        result = self.executor.execute_dataframe(mdf.sort_index(),
                                                 concat=True)[0]
        expected = raw.sort_index()
        pd.testing.assert_frame_equal(result, expected)

        mdf = DataFrame(raw, chunk_size=20)
        result = self.executor.execute_dataframe(
            mdf.sort_index(ascending=False), concat=True)[0]
        expected = raw.sort_index(ascending=False)
        pd.testing.assert_frame_equal(result, expected)

        executor = ExecutorForTest(storage=new_session().context)

        mdf = DataFrame(raw, chunk_size=10)
        result = executor.execute_dataframe(mdf.sort_index(ignore_index=True),
                                            concat=True)[0]
        try:  # for python3.5
            expected = raw.sort_index(ignore_index=True)
        except TypeError:
            expected = raw.sort_index()
            expected.index = pd.RangeIndex(len(expected))
        pd.testing.assert_frame_equal(result, expected)

        # test axis=1
        raw = pd.DataFrame(np.random.rand(10, 10), columns=np.random.rand(10))

        mdf = DataFrame(raw)
        result = self.executor.execute_dataframe(mdf.sort_index(axis=1),
                                                 concat=True)[0]
        expected = raw.sort_index(axis=1)
        pd.testing.assert_frame_equal(result, expected)

        mdf = DataFrame(raw, chunk_size=3)
        result = self.executor.execute_dataframe(mdf.sort_index(axis=1),
                                                 concat=True)[0]
        expected = raw.sort_index(axis=1)
        pd.testing.assert_frame_equal(result, expected)

        mdf = DataFrame(raw, chunk_size=4)
        result = self.executor.execute_dataframe(mdf.sort_index(
            axis=1, ascending=False),
                                                 concat=True)[0]
        expected = raw.sort_index(axis=1, ascending=False)
        pd.testing.assert_frame_equal(result, expected)

        mdf = DataFrame(raw, chunk_size=4)
        executor = ExecutorForTest(storage=new_session().context)

        result = executor.execute_dataframe(mdf.sort_index(axis=1,
                                                           ignore_index=True),
                                            concat=True)[0]
        try:  # for python3.5
            expected = raw.sort_index(axis=1, ignore_index=True)
        except TypeError:
            expected = raw.sort_index(axis=1)
            expected.index = pd.RangeIndex(len(expected))
        pd.testing.assert_frame_equal(result, expected)

        # test series
        raw = pd.Series(np.random.rand(10, ), index=np.random.rand(10))

        series = Series(raw)
        result = self.executor.execute_dataframe(series.sort_index(),
                                                 concat=True)[0]
        expected = raw.sort_index()
        pd.testing.assert_series_equal(result, expected)

        series = Series(raw, chunk_size=2)
        result = self.executor.execute_dataframe(series.sort_index(),
                                                 concat=True)[0]
        expected = raw.sort_index()
        pd.testing.assert_series_equal(result, expected)

        series = Series(raw, chunk_size=3)
        result = self.executor.execute_dataframe(
            series.sort_index(ascending=False), concat=True)[0]
        expected = raw.sort_index(ascending=False)
        pd.testing.assert_series_equal(result, expected)
Example #3
0
def test_sort_index_execution(setup):
    raw = pd.DataFrame(np.random.rand(100, 20), index=np.random.rand(100))

    mdf = DataFrame(raw)
    result = mdf.sort_index().execute().fetch()
    expected = raw.sort_index()
    pd.testing.assert_frame_equal(result, expected)

    mdf = DataFrame(raw)
    mdf.sort_index(inplace=True)
    result = mdf.execute().fetch()
    expected = raw.sort_index()
    pd.testing.assert_frame_equal(result, expected)

    mdf = DataFrame(raw, chunk_size=30)
    result = mdf.sort_index().execute().fetch()
    expected = raw.sort_index()
    pd.testing.assert_frame_equal(result, expected)

    mdf = DataFrame(raw, chunk_size=20)
    result = mdf.sort_index(ascending=False).execute().fetch()
    expected = raw.sort_index(ascending=False)
    pd.testing.assert_frame_equal(result, expected)

    mdf = DataFrame(raw, chunk_size=10)
    result = mdf.sort_index(ignore_index=True).execute().fetch()
    try:  # for python3.5
        expected = raw.sort_index(ignore_index=True)
    except TypeError:
        expected = raw.sort_index()
        expected.index = pd.RangeIndex(len(expected))
    pd.testing.assert_frame_equal(result, expected)

    # test axis=1
    raw = pd.DataFrame(np.random.rand(10, 10), columns=np.random.rand(10))

    mdf = DataFrame(raw)
    result = mdf.sort_index(axis=1).execute().fetch()
    expected = raw.sort_index(axis=1)
    pd.testing.assert_frame_equal(result, expected)

    mdf = DataFrame(raw, chunk_size=3)
    result = mdf.sort_index(axis=1).execute().fetch()
    expected = raw.sort_index(axis=1)
    pd.testing.assert_frame_equal(result, expected)

    mdf = DataFrame(raw, chunk_size=4)
    result = mdf.sort_index(axis=1, ascending=False).execute().fetch()
    expected = raw.sort_index(axis=1, ascending=False)
    pd.testing.assert_frame_equal(result, expected)

    mdf = DataFrame(raw, chunk_size=4)

    result = mdf.sort_index(axis=1, ignore_index=True).execute().fetch()
    try:  # for python3.5
        expected = raw.sort_index(axis=1, ignore_index=True)
    except TypeError:
        expected = raw.sort_index(axis=1)
        expected.index = pd.RangeIndex(len(expected))
    pd.testing.assert_frame_equal(result, expected)

    # test series
    raw = pd.Series(np.random.rand(10, ), index=np.random.rand(10))

    series = Series(raw)
    result = series.sort_index().execute().fetch()
    expected = raw.sort_index()
    pd.testing.assert_series_equal(result, expected)

    series = Series(raw, chunk_size=2)
    result = series.sort_index().execute().fetch()
    expected = raw.sort_index()
    pd.testing.assert_series_equal(result, expected)

    series = Series(raw, chunk_size=3)
    result = series.sort_index(ascending=False).execute().fetch()
    expected = raw.sort_index(ascending=False)
    pd.testing.assert_series_equal(result, expected)