Beispiel #1
0
def test_dataframe_corr_with(setup):
    rs = np.random.RandomState(0)
    raw_df = rs.rand(20, 10)
    raw_df = pd.DataFrame(np.where(raw_df > 0.4, raw_df, np.nan),
                          columns=list('ABCDEFGHIJ'))
    raw_df2 = rs.rand(20, 10)
    raw_df2 = pd.DataFrame(np.where(raw_df2 > 0.4, raw_df2, np.nan),
                           columns=list('ACDEGHIJKL'))
    raw_s = rs.rand(20)
    raw_s = pd.Series(np.where(raw_s > 0.4, raw_s, np.nan))
    raw_s2 = rs.rand(10)
    raw_s2 = pd.Series(np.where(raw_s2 > 0.4, raw_s2, np.nan),
                       index=raw_df2.columns)

    df = DataFrame(raw_df)
    df2 = DataFrame(raw_df2)

    result = df.corrwith(df2)
    pd.testing.assert_series_equal(result.execute().fetch(),
                                   raw_df.corrwith(raw_df2))

    result = df.corrwith(df2, axis=1)
    pd.testing.assert_series_equal(result.execute().fetch(),
                                   raw_df.corrwith(raw_df2, axis=1))

    result = df.corrwith(df2, method='kendall')
    pd.testing.assert_series_equal(result.execute().fetch(),
                                   raw_df.corrwith(raw_df2, method='kendall'))

    df = DataFrame(raw_df, chunk_size=4)
    df2 = DataFrame(raw_df2, chunk_size=6)
    s = Series(raw_s, chunk_size=5)
    s2 = Series(raw_s2, chunk_size=5)

    with pytest.raises(Exception):
        df.corrwith(df2, method='kendall').execute()

    result = df.corrwith(df2)
    pd.testing.assert_series_equal(result.execute().fetch().sort_index(),
                                   raw_df.corrwith(raw_df2).sort_index())

    result = df.corrwith(df2, axis=1)
    pd.testing.assert_series_equal(
        result.execute().fetch().sort_index(),
        raw_df.corrwith(raw_df2, axis=1).sort_index())

    result = df.corrwith(s)
    pd.testing.assert_series_equal(result.execute().fetch().sort_index(),
                                   raw_df.corrwith(raw_s).sort_index())

    result = df.corrwith(s2, axis=1)
    pd.testing.assert_series_equal(
        result.execute().fetch().sort_index(),
        raw_df.corrwith(raw_s2, axis=1).sort_index())
Beispiel #2
0
    def testSeriesQuantileExecution(self):
        raw = pd.Series(np.random.rand(10), name='a')
        a = Series(raw, chunk_size=3)

        # q = 0.5, scalar
        r = a.quantile()
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.quantile()

        self.assertEqual(result, expected)

        # q is a list
        r = a.quantile([0.3, 0.7])
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.quantile([0.3, 0.7])

        pd.testing.assert_series_equal(result, expected)

        # test interpolation
        r = a.quantile([0.3, 0.7], interpolation='midpoint')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.quantile([0.3, 0.7], interpolation='midpoint')

        pd.testing.assert_series_equal(result, expected)

        ctx, executor = self._create_test_context(self.executor)
        with ctx:
            q = tensor([0.3, 0.7])

            # q is a tensor
            r = a.quantile(q)
            result = executor.execute_dataframes([r])[0]
            expected = raw.quantile([0.3, 0.7])

            pd.testing.assert_series_equal(result, expected)
Beispiel #3
0
def test_series_quantile_execution(setup):
    raw = pd.Series(np.random.rand(10), name='a')
    a = Series(raw, chunk_size=3)

    # q = 0.5, scalar
    r = a.quantile()
    result = r.execute().fetch()
    expected = raw.quantile()

    assert result == expected

    # q is a list
    r = a.quantile([0.3, 0.7])
    result = r.execute().fetch()
    expected = raw.quantile([0.3, 0.7])

    pd.testing.assert_series_equal(result, expected)

    # test interpolation
    r = a.quantile([0.3, 0.7], interpolation='midpoint')
    result = r.execute().fetch()
    expected = raw.quantile([0.3, 0.7], interpolation='midpoint')

    pd.testing.assert_series_equal(result, expected)

    q = tensor([0.3, 0.7])

    # q is a tensor
    r = a.quantile(q)
    result = r.execute().fetch()
    expected = raw.quantile([0.3, 0.7])

    pd.testing.assert_series_equal(result, expected)
Beispiel #4
0
    def testGPUExecution(self):
        # test sort_values
        distinct_opts = ['0'] if sys.platform.lower().startswith('win') else [
            '0', '1'
        ]
        for add_distinct in distinct_opts:
            os.environ['PSRS_DISTINCT_COL'] = add_distinct

            # test dataframe
            raw = pd.DataFrame(np.random.rand(100, 10),
                               columns=['a' + str(i) for i in range(10)])
            mdf = DataFrame(raw, chunk_size=30).to_gpu()

            result = self.executor.execute_dataframe(mdf.sort_values(by='a0'),
                                                     concat=True)[0]
            expected = raw.sort_values(by='a0')
            pd.testing.assert_frame_equal(result.to_pandas(), expected)

            # test series
            raw = pd.Series(np.random.rand(10))
            series = Series(raw).to_gpu()

            result = self.executor.execute_dataframe(series.sort_values(),
                                                     concat=True)[0]
            expected = raw.sort_values()
            pd.testing.assert_series_equal(result.to_pandas(), expected)

        # test DataFrame.sort_index
        raw = pd.DataFrame(np.random.rand(10, 10), columns=np.random.rand(10))
        mdf = DataFrame(raw).to_gpu()

        result = self.executor.execute_dataframe(mdf.sort_index(),
                                                 concat=True)[0]
        expected = raw.sort_index()
        pd.testing.assert_frame_equal(result.to_pandas(), expected)

        # test Series.sort_index
        raw = pd.Series(np.random.rand(10, ), index=np.random.rand(10))
        series = Series(raw).to_gpu()

        result = self.executor.execute_dataframe(series.sort_index(),
                                                 concat=True)[0]
        expected = raw.sort_index()
        pd.testing.assert_series_equal(result.to_pandas(), expected)
Beispiel #5
0
    def testSeriesQuantileExecution(self):
        raw = pd.Series(np.random.rand(10), name='a')
        a = Series(raw, chunk_size=3)

        # q = 0.5, scalar
        r = a.quantile()
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.quantile()

        self.assertEqual(result, expected)

        # q is a list
        r = a.quantile([0.3, 0.7])
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.quantile([0.3, 0.7])

        pd.testing.assert_series_equal(result, expected)

        # test interpolation
        r = a.quantile([0.3, 0.7], interpolation='midpoint')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.quantile([0.3, 0.7], interpolation='midpoint')

        pd.testing.assert_series_equal(result, expected)

        this = self

        class MockSession:
            def __init__(self):
                self.executor = this.executor

        ctx = LocalContext(MockSession())
        executor = ExecutorForTest('numpy', storage=ctx)
        with ctx:
            q = tensor([0.3, 0.7])

            # q is a tensor
            r = a.quantile(q)
            result = executor.execute_dataframes([r])[0]
            expected = raw.quantile([0.3, 0.7])

            pd.testing.assert_series_equal(result, expected)
Beispiel #6
0
    def testToDatetimeExecution(self):
        # scalar
        r = to_datetime(1490195805, unit='s')

        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.to_datetime(1490195805, unit='s')
        self.assertEqual(pd.to_datetime(result.item()), expected)

        # test list like
        raw = ['3/11/2000', '3/12/2000', '3/13/2000']
        t = tensor(raw, chunk_size=2)
        r = to_datetime(t, infer_datetime_format=True)

        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.to_datetime(raw, infer_datetime_format=True)
        pd.testing.assert_index_equal(result, expected)

        # test series
        raw_series = pd.Series(raw)
        s = Series(raw_series, chunk_size=2)
        r = to_datetime(s)

        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.to_datetime(raw_series)
        pd.testing.assert_series_equal(result, expected)

        # test DataFrame
        raw_df = pd.DataFrame({
            'year': [2015, 2016],
            'month': [2, 3],
            'day': [4, 5]
        })
        df = DataFrame(raw_df, chunk_size=(1, 2))
        r = to_datetime(df)

        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.to_datetime(raw_df)
        pd.testing.assert_series_equal(result, expected)

        # test Index
        raw_index = pd.Index([1, 2, 3])
        s = Index(raw_index, chunk_size=2)
        r = to_datetime(s)

        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.to_datetime(raw_index)
        pd.testing.assert_index_equal(result, expected)

        # test raises == 'ignore'
        raw = ['13000101']
        r = to_datetime(raw, format='%Y%m%d', errors='ignore')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.to_datetime(raw, format='%Y%m%d', errors='ignore')
        pd.testing.assert_index_equal(result, expected)

        # test unit
        r = to_datetime([1490195805], unit='s')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.to_datetime([1490195805], unit='s')
        pd.testing.assert_index_equal(result, expected)

        # test origin
        r = to_datetime([1, 2, 3], unit='D', origin=pd.Timestamp('1960-01-01'))
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.to_datetime([1, 2, 3],
                                  unit='D',
                                  origin=pd.Timestamp('1960-01-01'))
        pd.testing.assert_index_equal(result, expected)
Beispiel #7
0
    def testDotExecution(self):
        df1_raw = pd.DataFrame(np.random.rand(4, 7))
        df2_raw = pd.DataFrame(np.random.rand(7, 5), columns=list('efghi'))
        s1_raw = pd.Series(np.random.rand(7))
        s2_raw = pd.Series(np.random.rand(7))

        df1 = DataFrame(df1_raw, chunk_size=(3, 2))
        df2 = DataFrame(df2_raw, chunk_size=(3, 4))

        # df.dot(df)
        r = df1.dot(df2)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df1_raw.dot(df2_raw)
        pd.testing.assert_frame_equal(result, expected)

        # test @
        r = df1 @ df2
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df1_raw @ df2_raw
        pd.testing.assert_frame_equal(result, expected)

        series1 = Series(s1_raw, chunk_size=5)

        # df.dot(series)
        r = df1.dot(series1)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df1_raw.dot(s1_raw)
        pd.testing.assert_series_equal(result, expected)

        # df.dot(2d_array)
        r = df1.dot(df2_raw.to_numpy())
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df1_raw.dot(df2_raw.to_numpy())
        pd.testing.assert_frame_equal(result, expected)

        # df.dot(1d_array)
        r = df1.dot(s1_raw.to_numpy())
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = df1_raw.dot(s1_raw.to_numpy())
        pd.testing.assert_series_equal(result, expected)

        series2 = Series(s2_raw, chunk_size=4)

        # series.dot(series)
        r = series1.dot(series2)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s1_raw.dot(s2_raw)
        self.assertAlmostEqual(result, expected)

        # series.dot(df)
        r = series1.dot(df2)
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s1_raw.dot(df2_raw)
        pd.testing.assert_series_equal(result, expected)

        # series.dot(2d_array)
        r = series1.dot(df2_raw.to_numpy())
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s1_raw.dot(df2_raw.to_numpy())
        np.testing.assert_almost_equal(result, expected)

        # series.dot(1d_array)
        r = series1.dot(s2_raw.to_numpy())
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = s1_raw.dot(s2_raw.to_numpy())
        self.assertAlmostEqual(result, expected)
Beispiel #8
0
def test_sort_values_execution(setup):
    distinct_opts = ['0'] if sys.platform.lower().startswith('win') else [
        '0', '1'
    ]
    for add_distinct in distinct_opts:
        os.environ['PSRS_DISTINCT_COL'] = add_distinct
        df = pd.DataFrame(np.random.rand(100, 10),
                          columns=['a' + str(i) for i in range(10)])

        # test one chunk
        mdf = DataFrame(df)
        result = mdf.sort_values('a0').execute().fetch()
        expected = df.sort_values('a0')

        pd.testing.assert_frame_equal(result, expected)

        result = mdf.sort_values(['a6', 'a7'],
                                 ascending=False).execute().fetch()
        expected = df.sort_values(['a6', 'a7'], ascending=False)

        pd.testing.assert_frame_equal(result, expected)

        # test psrs
        mdf = DataFrame(df, chunk_size=10)
        result = mdf.sort_values('a0').execute().fetch()
        expected = df.sort_values('a0')

        pd.testing.assert_frame_equal(result, expected)

        result = mdf.sort_values(['a3', 'a4']).execute().fetch()
        expected = df.sort_values(['a3', 'a4'])

        pd.testing.assert_frame_equal(result, expected)

        # test ascending=False
        result = mdf.sort_values(['a0', 'a1'],
                                 ascending=False).execute().fetch()
        expected = df.sort_values(['a0', 'a1'], ascending=False)

        pd.testing.assert_frame_equal(result, expected)

        result = mdf.sort_values(['a7'], ascending=False).execute().fetch()
        expected = df.sort_values(['a7'], ascending=False)

        pd.testing.assert_frame_equal(result, expected)

        # test multiindex
        df2 = df.copy(deep=True)
        df2.columns = pd.MultiIndex.from_product([list('AB'), list('CDEFG')])
        mdf = DataFrame(df2, chunk_size=10)

        result = mdf.sort_values([('A', 'C')]).execute().fetch()
        expected = df2.sort_values([('A', 'C')])

        pd.testing.assert_frame_equal(result, expected)

        # test rechunk
        mdf = DataFrame(df, chunk_size=3)
        result = mdf.sort_values('a0').execute().fetch()
        expected = df.sort_values('a0')

        pd.testing.assert_frame_equal(result, expected)

        result = mdf.sort_values(['a3', 'a4']).execute().fetch()
        expected = df.sort_values(['a3', 'a4'])

        pd.testing.assert_frame_equal(result, expected)

        # test other types
        raw = pd.DataFrame(
            {
                'a': np.random.rand(10),
                'b': np.random.randint(1000, size=10),
                'c': np.random.rand(10),
                'd': [np.random.bytes(10) for _ in range(10)],
                'e': [pd.Timestamp(f'201{i}') for i in range(10)],
                'f': [pd.Timedelta(f'{i} days') for i in range(10)]
            }, )
        mdf = DataFrame(raw, chunk_size=3)

        for label in raw.columns:
            result = mdf.sort_values(label).execute().fetch()
            expected = raw.sort_values(label)
            pd.testing.assert_frame_equal(result, expected)

        result = mdf.sort_values(['a', 'b', 'e'],
                                 ascending=False).execute().fetch()
        expected = raw.sort_values(['a', 'b', 'e'], ascending=False)

        pd.testing.assert_frame_equal(result, expected)

        # test nan
        df = pd.DataFrame({
            'col1': ['A', 'A', 'B', 'B', 'D', 'C'],
            'col2': [2, 1, 9, np.nan, 7, 4],
            'col3': [0, 1, 9, 4, 2, 3],
        })
        mdf = DataFrame(df)
        result = mdf.sort_values(['col2']).execute().fetch()
        expected = df.sort_values(['col2'])

        pd.testing.assert_frame_equal(result, expected)

        mdf = DataFrame(df, chunk_size=3)
        result = mdf.sort_values(['col2']).execute().fetch()
        expected = df.sort_values(['col2'])

        pd.testing.assert_frame_equal(result, expected)

        # test None (issue #1885)
        df = pd.DataFrame(np.random.rand(1000, 10))

        df[0][df[0] < 0.5] = 'A'
        df[0][df[0] != 'A'] = None

        mdf = DataFrame(df)
        result = mdf.sort_values([0, 1]).execute().fetch()
        expected = df.sort_values([0, 1])

        pd.testing.assert_frame_equal(result, expected)

        mdf = DataFrame(df, chunk_size=100)
        result = mdf.sort_values([0, 1]).execute().fetch()
        expected = df.sort_values([0, 1])

        pd.testing.assert_frame_equal(result, expected)

        # test ignore_index
        df = pd.DataFrame(np.random.rand(10, 3),
                          columns=['a' + str(i) for i in range(3)])

        mdf = DataFrame(df, chunk_size=3)
        result = mdf.sort_values(['a0', 'a1'],
                                 ignore_index=True).execute().fetch()
        try:  # for python3.5
            expected = df.sort_values(['a0', 'a1'], ignore_index=True)
        except TypeError:
            expected = df.sort_values(['a0', 'a1'])
            expected.index = pd.RangeIndex(len(expected))

        pd.testing.assert_frame_equal(result, expected)

        # test inplace
        mdf = DataFrame(df)
        mdf.sort_values('a0', inplace=True)
        result = mdf.execute().fetch()
        df.sort_values('a0', inplace=True)

        pd.testing.assert_frame_equal(result, df)

        # test unknown shape
        df = pd.DataFrame({'a': list(range(10)), 'b': np.random.random(10)})
        mdf = DataFrame(df, chunk_size=4)
        filtered = mdf[mdf['a'] > 2]
        result = filtered.sort_values(by='b').execute().fetch()

        pd.testing.assert_frame_equal(result,
                                      df[df['a'] > 2].sort_values(by='b'))

        # test empty dataframe
        df = pd.DataFrame({'a': list(range(10)), 'b': np.random.random(10)})
        mdf = DataFrame(df, chunk_size=4)
        filtered = mdf[mdf['b'] > 100]
        result = filtered.sort_values(by='b').execute().fetch()

        pd.testing.assert_frame_equal(result,
                                      df[df['b'] > 100].sort_values(by='b'))

        # test chunks with zero length
        df = pd.DataFrame({'a': list(range(10)), 'b': np.random.random(10)})
        df.iloc[4:8, 1] = 0

        mdf = DataFrame(df, chunk_size=4)
        filtered = mdf[mdf['b'] != 0]
        result = filtered.sort_values(by='b').execute().fetch()

        pd.testing.assert_frame_equal(result,
                                      df[df['b'] != 0].sort_values(by='b'))

        # test Series.sort_values
        raw = pd.Series(np.random.rand(10))
        series = Series(raw)
        result = series.sort_values().execute().fetch()
        expected = raw.sort_values()

        pd.testing.assert_series_equal(result, expected)

        series = Series(raw, chunk_size=3)
        result = series.sort_values().execute().fetch()
        expected = raw.sort_values()

        pd.testing.assert_series_equal(result, expected)

        series = Series(raw, chunk_size=2)
        result = series.sort_values(ascending=False).execute().fetch()
        expected = raw.sort_values(ascending=False)

        pd.testing.assert_series_equal(result, expected)

        # test empty series
        series = pd.Series(list(range(10)), name='a')
        mseries = Series(series, chunk_size=4)
        filtered = mseries[mseries > 100]
        result = filtered.sort_values().execute().fetch()

        pd.testing.assert_series_equal(result,
                                       series[series > 100].sort_values())

        # test series with None
        series = pd.Series(np.arange(1000, ))

        series[series < 500] = 'A'
        series[series != 'A'] = None

        mseries = Series(series, chunk_size=100)
        result = mseries.sort_values().execute().fetch()
        expected = series.sort_values()
        pd.testing.assert_series_equal(result.reset_index(drop=True),
                                       expected.reset_index(drop=True))
Beispiel #9
0
def test_sort_index_execution(setup):
    raw = pd.DataFrame(np.random.rand(100, 20), index=np.random.rand(100))

    mdf = DataFrame(raw)
    result = mdf.sort_index().execute().fetch()
    expected = raw.sort_index()
    pd.testing.assert_frame_equal(result, expected)

    mdf = DataFrame(raw)
    mdf.sort_index(inplace=True)
    result = mdf.execute().fetch()
    expected = raw.sort_index()
    pd.testing.assert_frame_equal(result, expected)

    mdf = DataFrame(raw, chunk_size=30)
    result = mdf.sort_index().execute().fetch()
    expected = raw.sort_index()
    pd.testing.assert_frame_equal(result, expected)

    mdf = DataFrame(raw, chunk_size=20)
    result = mdf.sort_index(ascending=False).execute().fetch()
    expected = raw.sort_index(ascending=False)
    pd.testing.assert_frame_equal(result, expected)

    mdf = DataFrame(raw, chunk_size=10)
    result = mdf.sort_index(ignore_index=True).execute().fetch()
    try:  # for python3.5
        expected = raw.sort_index(ignore_index=True)
    except TypeError:
        expected = raw.sort_index()
        expected.index = pd.RangeIndex(len(expected))
    pd.testing.assert_frame_equal(result, expected)

    # test axis=1
    raw = pd.DataFrame(np.random.rand(10, 10), columns=np.random.rand(10))

    mdf = DataFrame(raw)
    result = mdf.sort_index(axis=1).execute().fetch()
    expected = raw.sort_index(axis=1)
    pd.testing.assert_frame_equal(result, expected)

    mdf = DataFrame(raw, chunk_size=3)
    result = mdf.sort_index(axis=1).execute().fetch()
    expected = raw.sort_index(axis=1)
    pd.testing.assert_frame_equal(result, expected)

    mdf = DataFrame(raw, chunk_size=4)
    result = mdf.sort_index(axis=1, ascending=False).execute().fetch()
    expected = raw.sort_index(axis=1, ascending=False)
    pd.testing.assert_frame_equal(result, expected)

    mdf = DataFrame(raw, chunk_size=4)

    result = mdf.sort_index(axis=1, ignore_index=True).execute().fetch()
    try:  # for python3.5
        expected = raw.sort_index(axis=1, ignore_index=True)
    except TypeError:
        expected = raw.sort_index(axis=1)
        expected.index = pd.RangeIndex(len(expected))
    pd.testing.assert_frame_equal(result, expected)

    # test series
    raw = pd.Series(np.random.rand(10, ), index=np.random.rand(10))

    series = Series(raw)
    result = series.sort_index().execute().fetch()
    expected = raw.sort_index()
    pd.testing.assert_series_equal(result, expected)

    series = Series(raw, chunk_size=2)
    result = series.sort_index().execute().fetch()
    expected = raw.sort_index()
    pd.testing.assert_series_equal(result, expected)

    series = Series(raw, chunk_size=3)
    result = series.sort_index(ascending=False).execute().fetch()
    expected = raw.sort_index(ascending=False)
    pd.testing.assert_series_equal(result, expected)
Beispiel #10
0
    def testSortValuesExecution(self):
        df = pd.DataFrame(np.random.rand(100, 10),
                          columns=['a' + str(i) for i in range(10)])

        # test one chunk
        mdf = DataFrame(df)
        result = self.executor.execute_dataframe(mdf.sort_values('a0'),
                                                 concat=True)[0]
        expected = df.sort_values('a0')

        pd.testing.assert_frame_equal(result, expected)

        result = self.executor.execute_dataframe(mdf.sort_values(
            ['a6', 'a7'], ascending=False),
                                                 concat=True)[0]
        expected = df.sort_values(['a6', 'a7'], ascending=False)

        pd.testing.assert_frame_equal(result, expected)

        # test psrs
        mdf = DataFrame(df, chunk_size=10)
        result = self.executor.execute_dataframe(mdf.sort_values('a0'),
                                                 concat=True)[0]
        expected = df.sort_values('a0')

        pd.testing.assert_frame_equal(result, expected)

        result = self.executor.execute_dataframe(mdf.sort_values(['a3', 'a4']),
                                                 concat=True)[0]
        expected = df.sort_values(['a3', 'a4'])

        pd.testing.assert_frame_equal(result, expected)

        # test ascending=False
        result = self.executor.execute_dataframe(mdf.sort_values(
            ['a0', 'a1'], ascending=False),
                                                 concat=True)[0]
        expected = df.sort_values(['a0', 'a1'], ascending=False)

        pd.testing.assert_frame_equal(result, expected)

        result = self.executor.execute_dataframe(mdf.sort_values(
            ['a7'], ascending=False),
                                                 concat=True)[0]
        expected = df.sort_values(['a7'], ascending=False)

        pd.testing.assert_frame_equal(result, expected)

        # test rechunk
        mdf = DataFrame(df, chunk_size=3)
        result = self.executor.execute_dataframe(mdf.sort_values('a0'),
                                                 concat=True)[0]
        expected = df.sort_values('a0')

        pd.testing.assert_frame_equal(result, expected)

        result = self.executor.execute_dataframe(mdf.sort_values(['a3', 'a4']),
                                                 concat=True)[0]
        expected = df.sort_values(['a3', 'a4'])

        pd.testing.assert_frame_equal(result, expected)

        # test other types
        raw = pd.DataFrame(
            {
                'a': np.random.rand(10),
                'b': np.random.randint(1000, size=10),
                'c': np.random.rand(10),
                'd': [np.random.bytes(10) for _ in range(10)],
                'e': [pd.Timestamp('201{}'.format(i)) for i in range(10)],
                'f': [pd.Timedelta('{} days'.format(i)) for i in range(10)]
            }, )
        mdf = DataFrame(raw, chunk_size=3)

        for label in raw.columns:
            result = self.executor.execute_dataframe(mdf.sort_values(label),
                                                     concat=True)[0]
            expected = raw.sort_values(label)
            pd.testing.assert_frame_equal(result, expected)

        result = self.executor.execute_dataframe(mdf.sort_values(
            ['a', 'b', 'e'], ascending=False),
                                                 concat=True)[0]
        expected = raw.sort_values(['a', 'b', 'e'], ascending=False)

        pd.testing.assert_frame_equal(result, expected)

        # test nan
        df = pd.DataFrame({
            'col1': ['A', 'A', 'B', 'B', 'D', 'C'],
            'col2': [2, 1, 9, np.nan, 7, 4],
            'col3': [0, 1, 9, 4, 2, 3],
        })
        mdf = DataFrame(df)
        result = self.executor.execute_dataframe(mdf.sort_values(['col2']),
                                                 concat=True)[0]
        expected = df.sort_values(['col2'])

        pd.testing.assert_frame_equal(result, expected)

        mdf = DataFrame(df, chunk_size=3)
        result = self.executor.execute_dataframe(mdf.sort_values(['col2']),
                                                 concat=True)[0]
        expected = df.sort_values(['col2'])

        pd.testing.assert_frame_equal(result, expected)

        # test ignore_index
        executor = ExecutorForTest(storage=new_session().context)

        df = pd.DataFrame(np.random.rand(10, 3),
                          columns=['a' + str(i) for i in range(3)])

        mdf = DataFrame(df, chunk_size=3)
        result = executor.execute_dataframe(mdf.sort_values(['a0', 'a1'],
                                                            ignore_index=True),
                                            concat=True)[0]
        try:  # for python3.5
            expected = df.sort_values(['a0', 'a1'], ignore_index=True)
        except TypeError:
            expected = df.sort_values(['a0', 'a1'])
            expected.index = pd.RangeIndex(len(expected))

        pd.testing.assert_frame_equal(result, expected)

        # test inplace
        mdf = DataFrame(df)
        mdf.sort_values('a0', inplace=True)
        result = self.executor.execute_dataframe(mdf, concat=True)[0]
        df.sort_values('a0', inplace=True)

        pd.testing.assert_frame_equal(result, df)

        # test unknown shape
        df = pd.DataFrame({'a': list(range(10)), 'b': np.random.random(10)})
        mdf = DataFrame(df, chunk_size=4)
        filtered = mdf[mdf['a'] > 2]
        result = self.executor.execute_dataframe(filtered.sort_values(by='b'),
                                                 concat=True)[0]

        pd.testing.assert_frame_equal(result,
                                      df[df['a'] > 2].sort_values(by='b'))

        # test Sereis.sort_values
        raw = pd.Series(np.random.rand(10))
        series = Series(raw)
        result = self.executor.execute_dataframe(series.sort_values(),
                                                 concat=True)[0]
        expected = raw.sort_values()

        pd.testing.assert_series_equal(result, expected)

        series = Series(raw, chunk_size=3)
        result = self.executor.execute_dataframe(series.sort_values(),
                                                 concat=True)[0]
        expected = raw.sort_values()

        pd.testing.assert_series_equal(result, expected)

        series = Series(raw, chunk_size=2)
        result = self.executor.execute_dataframe(
            series.sort_values(ascending=False), concat=True)[0]
        expected = raw.sort_values(ascending=False)

        pd.testing.assert_series_equal(result, expected)
    def testSeriesCorr(self):
        rs = np.random.RandomState(0)
        raw = rs.rand(20)
        raw = pd.Series(np.where(raw > 0.4, raw, np.nan))
        raw2 = rs.rand(20)
        raw2 = pd.Series(np.where(raw2 > 0.4, raw2, np.nan))

        s = Series(raw)
        s2 = Series(raw2)

        result = s.corr(s2)
        self.assertEqual(
            self.executor.execute_dataframe(result, concat=True)[0],
            raw.corr(raw2))

        result = s.corr(s2, method='kendall')
        self.assertEqual(
            self.executor.execute_dataframe(result, concat=True)[0],
            raw.corr(raw2, method='kendall'))

        result = s.autocorr(2)
        self.assertEqual(
            self.executor.execute_dataframe(result, concat=True)[0],
            raw.autocorr(2))

        s = Series(raw, chunk_size=6)
        s2 = Series(raw2, chunk_size=4)

        with self.assertRaises(Exception):
            self.executor.execute_dataframe(s.corr(s2, method='kendall'),
                                            concat=True)

        result = s.corr(s2)
        self.assertAlmostEqual(
            self.executor.execute_dataframe(result, concat=True)[0],
            raw.corr(raw2))

        result = s.corr(s2, min_periods=7)
        self.assertAlmostEqual(
            self.executor.execute_dataframe(result, concat=True)[0],
            raw.corr(raw2, min_periods=7))

        result = s.autocorr(2)
        self.assertAlmostEqual(
            self.executor.execute_dataframe(result, concat=True)[0],
            raw.autocorr(2))
Beispiel #12
0
def test_dot_execution(setup):
    df1_raw = pd.DataFrame(np.random.rand(4, 7))
    df2_raw = pd.DataFrame(np.random.rand(7, 5), columns=list('efghi'))
    s1_raw = pd.Series(np.random.rand(7))
    s2_raw = pd.Series(np.random.rand(7))

    df1 = DataFrame(df1_raw, chunk_size=(3, 2))
    df2 = DataFrame(df2_raw, chunk_size=(3, 4))

    # df.dot(df)
    r = df1.dot(df2)
    result = r.execute().fetch()
    expected = df1_raw.dot(df2_raw)
    pd.testing.assert_frame_equal(result, expected)

    # test @
    r = df1 @ df2
    result = r.execute().fetch()
    expected = df1_raw @ df2_raw
    pd.testing.assert_frame_equal(result, expected)

    series1 = Series(s1_raw, chunk_size=5)

    # df.dot(series)
    r = df1.dot(series1)
    result = r.execute().fetch()
    expected = df1_raw.dot(s1_raw)
    pd.testing.assert_series_equal(result, expected)

    # df.dot(2d_array)
    r = df1.dot(df2_raw.to_numpy())
    result = r.execute().fetch()
    expected = df1_raw.dot(df2_raw.to_numpy())
    pd.testing.assert_frame_equal(result, expected)

    # df.dot(1d_array)
    r = df1.dot(s1_raw.to_numpy())
    result = r.execute().fetch()
    expected = df1_raw.dot(s1_raw.to_numpy())
    pd.testing.assert_series_equal(result, expected)

    series2 = Series(s2_raw, chunk_size=4)

    # series.dot(series)
    r = series1.dot(series2)
    result = r.execute().fetch()
    expected = s1_raw.dot(s2_raw)
    assert pytest.approx(result) == expected

    # series.dot(df)
    r = series1.dot(df2)
    result = r.execute().fetch()
    expected = s1_raw.dot(df2_raw)
    pd.testing.assert_series_equal(result, expected)

    # series.dot(2d_array)
    r = series1.dot(df2_raw.to_numpy())
    result = r.execute().fetch()
    expected = s1_raw.dot(df2_raw.to_numpy())
    np.testing.assert_almost_equal(result, expected)

    # series.dot(1d_array)
    r = series1.dot(s2_raw.to_numpy())
    result = r.execute().fetch()
    expected = s1_raw.dot(s2_raw.to_numpy())
    assert pytest.approx(result) == expected
Beispiel #13
0
def test_series_corr(setup):
    rs = np.random.RandomState(0)
    raw = rs.rand(20)
    raw = pd.Series(np.where(raw > 0.4, raw, np.nan))
    raw2 = rs.rand(20)
    raw2 = pd.Series(np.where(raw2 > 0.4, raw2, np.nan))

    s = Series(raw)
    s2 = Series(raw2)

    result = s.corr(s2)
    assert result.execute().fetch() == raw.corr(raw2)

    result = s.corr(s2, method='kendall')
    assert result.execute().fetch() == raw.corr(raw2, method='kendall')

    result = s.autocorr(2)
    assert result.execute().fetch() == raw.autocorr(2)

    s = Series(raw, chunk_size=6)
    s2 = Series(raw2, chunk_size=4)

    with pytest.raises(Exception):
        s.corr(s2, method='kendall').execute()

    result = s.corr(s2)
    assert pytest.approx(result.execute().fetch()) == raw.corr(raw2)

    result = s.corr(s2, min_periods=7)
    assert pytest.approx(result.execute().fetch()) == raw.corr(raw2,
                                                               min_periods=7)

    result = s.autocorr(2)
    assert pytest.approx(result.execute().fetch()) == raw.autocorr(2)
Beispiel #14
0
    def testSortIndexExecution(self):
        raw = pd.DataFrame(np.random.rand(100, 20), index=np.random.rand(100))

        mdf = DataFrame(raw)
        result = self.executor.execute_dataframe(mdf.sort_index(),
                                                 concat=True)[0]
        expected = raw.sort_index()
        pd.testing.assert_frame_equal(result, expected)

        mdf = DataFrame(raw)
        mdf.sort_index(inplace=True)
        result = self.executor.execute_dataframe(mdf, concat=True)[0]
        expected = raw.sort_index()
        pd.testing.assert_frame_equal(result, expected)

        mdf = DataFrame(raw, chunk_size=30)
        result = self.executor.execute_dataframe(mdf.sort_index(),
                                                 concat=True)[0]
        expected = raw.sort_index()
        pd.testing.assert_frame_equal(result, expected)

        mdf = DataFrame(raw, chunk_size=20)
        result = self.executor.execute_dataframe(
            mdf.sort_index(ascending=False), concat=True)[0]
        expected = raw.sort_index(ascending=False)
        pd.testing.assert_frame_equal(result, expected)

        executor = ExecutorForTest(storage=new_session().context)

        mdf = DataFrame(raw, chunk_size=10)
        result = executor.execute_dataframe(mdf.sort_index(ignore_index=True),
                                            concat=True)[0]
        try:  # for python3.5
            expected = raw.sort_index(ignore_index=True)
        except TypeError:
            expected = raw.sort_index()
            expected.index = pd.RangeIndex(len(expected))
        pd.testing.assert_frame_equal(result, expected)

        # test axis=1
        raw = pd.DataFrame(np.random.rand(10, 10), columns=np.random.rand(10))

        mdf = DataFrame(raw)
        result = self.executor.execute_dataframe(mdf.sort_index(axis=1),
                                                 concat=True)[0]
        expected = raw.sort_index(axis=1)
        pd.testing.assert_frame_equal(result, expected)

        mdf = DataFrame(raw, chunk_size=3)
        result = self.executor.execute_dataframe(mdf.sort_index(axis=1),
                                                 concat=True)[0]
        expected = raw.sort_index(axis=1)
        pd.testing.assert_frame_equal(result, expected)

        mdf = DataFrame(raw, chunk_size=4)
        result = self.executor.execute_dataframe(mdf.sort_index(
            axis=1, ascending=False),
                                                 concat=True)[0]
        expected = raw.sort_index(axis=1, ascending=False)
        pd.testing.assert_frame_equal(result, expected)

        mdf = DataFrame(raw, chunk_size=4)
        executor = ExecutorForTest(storage=new_session().context)

        result = executor.execute_dataframe(mdf.sort_index(axis=1,
                                                           ignore_index=True),
                                            concat=True)[0]
        try:  # for python3.5
            expected = raw.sort_index(axis=1, ignore_index=True)
        except TypeError:
            expected = raw.sort_index(axis=1)
            expected.index = pd.RangeIndex(len(expected))
        pd.testing.assert_frame_equal(result, expected)

        # test series
        raw = pd.Series(np.random.rand(10, ), index=np.random.rand(10))

        series = Series(raw)
        result = self.executor.execute_dataframe(series.sort_index(),
                                                 concat=True)[0]
        expected = raw.sort_index()
        pd.testing.assert_series_equal(result, expected)

        series = Series(raw, chunk_size=2)
        result = self.executor.execute_dataframe(series.sort_index(),
                                                 concat=True)[0]
        expected = raw.sort_index()
        pd.testing.assert_series_equal(result, expected)

        series = Series(raw, chunk_size=3)
        result = self.executor.execute_dataframe(
            series.sort_index(ascending=False), concat=True)[0]
        expected = raw.sort_index(ascending=False)
        pd.testing.assert_series_equal(result, expected)
Beispiel #15
0
def test_to_datetime_execution(setup):
    # scalar
    r = to_datetime(1490195805, unit='s')

    result = r.execute().fetch(extra_config={
        'check_dtypes': False,
        'check_shape': False
    })
    expected = pd.to_datetime(1490195805, unit='s')
    assert pd.to_datetime(result) == expected

    # test list like
    raw = ['3/11/2000', '3/12/2000', '3/13/2000']
    t = tensor(raw, chunk_size=2)
    r = to_datetime(t, infer_datetime_format=True)

    result = r.execute().fetch()
    expected = pd.to_datetime(raw, infer_datetime_format=True)
    pd.testing.assert_index_equal(result, expected)

    # test series
    raw_series = pd.Series(raw)
    s = Series(raw_series, chunk_size=2)
    r = to_datetime(s)

    result = r.execute().fetch()
    expected = pd.to_datetime(raw_series)
    pd.testing.assert_series_equal(result, expected)

    # test DataFrame
    raw_df = pd.DataFrame({
        'year': [2015, 2016],
        'month': [2, 3],
        'day': [4, 5]
    })
    df = DataFrame(raw_df, chunk_size=(1, 2))
    r = to_datetime(df)

    result = r.execute().fetch()
    expected = pd.to_datetime(raw_df)
    pd.testing.assert_series_equal(result, expected)

    # test Index
    raw_index = pd.Index([1, 2, 3])
    s = Index(raw_index, chunk_size=2)
    r = to_datetime(s)

    result = r.execute().fetch()
    expected = pd.to_datetime(raw_index)
    pd.testing.assert_index_equal(result, expected)

    # test raises == 'ignore'
    raw = ['13000101']
    r = to_datetime(raw, format='%Y%m%d', errors='ignore')
    result = r.execute().fetch()
    expected = pd.to_datetime(raw, format='%Y%m%d', errors='ignore')
    pd.testing.assert_index_equal(result, expected)

    # test unit
    r = to_datetime([1490195805], unit='s')
    result = r.execute().fetch()
    expected = pd.to_datetime([1490195805], unit='s')
    pd.testing.assert_index_equal(result, expected)

    # test origin
    r = to_datetime([1, 2, 3], unit='D', origin=pd.Timestamp('1960-01-01'))
    result = r.execute().fetch()
    expected = pd.to_datetime([1, 2, 3],
                              unit='D',
                              origin=pd.Timestamp('1960-01-01'))
    pd.testing.assert_index_equal(result, expected)
Beispiel #16
0
    def testSortValuesExecution(self):
        distinct_opts = ['0'] if sys.platform.lower().startswith('win') else [
            '0', '1'
        ]
        for add_distinct in distinct_opts:
            os.environ['PSRS_DISTINCT_COL'] = add_distinct
            df = pd.DataFrame(np.random.rand(100, 10),
                              columns=['a' + str(i) for i in range(10)])

            # test one chunk
            mdf = DataFrame(df)
            result = self.executor.execute_dataframe(mdf.sort_values('a0'),
                                                     concat=True)[0]
            expected = df.sort_values('a0')

            pd.testing.assert_frame_equal(result, expected)

            result = self.executor.execute_dataframe(mdf.sort_values(
                ['a6', 'a7'], ascending=False),
                                                     concat=True)[0]
            expected = df.sort_values(['a6', 'a7'], ascending=False)

            pd.testing.assert_frame_equal(result, expected)

            # test psrs
            mdf = DataFrame(df, chunk_size=10)
            result = self.executor.execute_dataframe(mdf.sort_values('a0'),
                                                     concat=True)[0]
            expected = df.sort_values('a0')

            pd.testing.assert_frame_equal(result, expected)

            result = self.executor.execute_dataframe(mdf.sort_values(
                ['a3', 'a4']),
                                                     concat=True)[0]
            expected = df.sort_values(['a3', 'a4'])

            pd.testing.assert_frame_equal(result, expected)

            # test ascending=False
            result = self.executor.execute_dataframe(mdf.sort_values(
                ['a0', 'a1'], ascending=False),
                                                     concat=True)[0]
            expected = df.sort_values(['a0', 'a1'], ascending=False)

            pd.testing.assert_frame_equal(result, expected)

            result = self.executor.execute_dataframe(mdf.sort_values(
                ['a7'], ascending=False),
                                                     concat=True)[0]
            expected = df.sort_values(['a7'], ascending=False)

            pd.testing.assert_frame_equal(result, expected)

            # test multiindex
            df2 = df.copy(deep=True)
            df2.columns = pd.MultiIndex.from_product(
                [list('AB'), list('CDEFG')])
            mdf = DataFrame(df2, chunk_size=10)

            result = self.executor.execute_dataframe(mdf.sort_values([('A',
                                                                       'C')]),
                                                     concat=True)[0]
            expected = df2.sort_values([('A', 'C')])

            pd.testing.assert_frame_equal(result, expected)

            # test rechunk
            mdf = DataFrame(df, chunk_size=3)
            result = self.executor.execute_dataframe(mdf.sort_values('a0'),
                                                     concat=True)[0]
            expected = df.sort_values('a0')

            pd.testing.assert_frame_equal(result, expected)

            result = self.executor.execute_dataframe(mdf.sort_values(
                ['a3', 'a4']),
                                                     concat=True)[0]
            expected = df.sort_values(['a3', 'a4'])

            pd.testing.assert_frame_equal(result, expected)

            # test other types
            raw = pd.DataFrame(
                {
                    'a': np.random.rand(10),
                    'b': np.random.randint(1000, size=10),
                    'c': np.random.rand(10),
                    'd': [np.random.bytes(10) for _ in range(10)],
                    'e': [pd.Timestamp(f'201{i}') for i in range(10)],
                    'f': [pd.Timedelta(f'{i} days') for i in range(10)]
                }, )
            mdf = DataFrame(raw, chunk_size=3)

            for label in raw.columns:
                result = self.executor.execute_dataframe(
                    mdf.sort_values(label), concat=True)[0]
                expected = raw.sort_values(label)
                pd.testing.assert_frame_equal(result, expected)

            result = self.executor.execute_dataframe(mdf.sort_values(
                ['a', 'b', 'e'], ascending=False),
                                                     concat=True)[0]
            expected = raw.sort_values(['a', 'b', 'e'], ascending=False)

            pd.testing.assert_frame_equal(result, expected)

            # test nan
            df = pd.DataFrame({
                'col1': ['A', 'A', 'B', 'B', 'D', 'C'],
                'col2': [2, 1, 9, np.nan, 7, 4],
                'col3': [0, 1, 9, 4, 2, 3],
            })
            mdf = DataFrame(df)
            result = self.executor.execute_dataframe(mdf.sort_values(['col2']),
                                                     concat=True)[0]
            expected = df.sort_values(['col2'])

            pd.testing.assert_frame_equal(result, expected)

            mdf = DataFrame(df, chunk_size=3)
            result = self.executor.execute_dataframe(mdf.sort_values(['col2']),
                                                     concat=True)[0]
            expected = df.sort_values(['col2'])

            pd.testing.assert_frame_equal(result, expected)

            # test ignore_index
            executor = ExecutorForTest(storage=new_session().context)

            df = pd.DataFrame(np.random.rand(10, 3),
                              columns=['a' + str(i) for i in range(3)])

            mdf = DataFrame(df, chunk_size=3)
            result = executor.execute_dataframe(mdf.sort_values(
                ['a0', 'a1'], ignore_index=True),
                                                concat=True)[0]
            try:  # for python3.5
                expected = df.sort_values(['a0', 'a1'], ignore_index=True)
            except TypeError:
                expected = df.sort_values(['a0', 'a1'])
                expected.index = pd.RangeIndex(len(expected))

            pd.testing.assert_frame_equal(result, expected)

            # test inplace
            mdf = DataFrame(df)
            mdf.sort_values('a0', inplace=True)
            result = self.executor.execute_dataframe(mdf, concat=True)[0]
            df.sort_values('a0', inplace=True)

            pd.testing.assert_frame_equal(result, df)

            # test unknown shape
            df = pd.DataFrame({
                'a': list(range(10)),
                'b': np.random.random(10)
            })
            mdf = DataFrame(df, chunk_size=4)
            filtered = mdf[mdf['a'] > 2]
            result = self.executor.execute_dataframe(
                filtered.sort_values(by='b'), concat=True)[0]

            pd.testing.assert_frame_equal(result,
                                          df[df['a'] > 2].sort_values(by='b'))

            # test Series.sort_values
            raw = pd.Series(np.random.rand(10))
            series = Series(raw)
            result = self.executor.execute_dataframe(series.sort_values(),
                                                     concat=True)[0]
            expected = raw.sort_values()

            pd.testing.assert_series_equal(result, expected)

            series = Series(raw, chunk_size=3)
            result = self.executor.execute_dataframe(series.sort_values(),
                                                     concat=True)[0]
            expected = raw.sort_values()

            pd.testing.assert_series_equal(result, expected)

            series = Series(raw, chunk_size=2)
            result = self.executor.execute_dataframe(
                series.sort_values(ascending=False), concat=True)[0]
            expected = raw.sort_values(ascending=False)

            pd.testing.assert_series_equal(result, expected)
    def testDataFrameCorrWith(self):
        rs = np.random.RandomState(0)
        raw_df = rs.rand(20, 10)
        raw_df = pd.DataFrame(np.where(raw_df > 0.4, raw_df, np.nan),
                              columns=list('ABCDEFGHIJ'))
        raw_df2 = rs.rand(20, 10)
        raw_df2 = pd.DataFrame(np.where(raw_df2 > 0.4, raw_df2, np.nan),
                               columns=list('ACDEGHIJKL'))
        raw_s = rs.rand(20)
        raw_s = pd.Series(np.where(raw_s > 0.4, raw_s, np.nan))
        raw_s2 = rs.rand(10)
        raw_s2 = pd.Series(np.where(raw_s2 > 0.4, raw_s2, np.nan),
                           index=raw_df2.columns)

        df = DataFrame(raw_df)
        df2 = DataFrame(raw_df2)

        result = df.corrwith(df2)
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(result, concat=True)[0],
            raw_df.corrwith(raw_df2))

        result = df.corrwith(df2, axis=1)
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(result, concat=True)[0],
            raw_df.corrwith(raw_df2, axis=1))

        result = df.corrwith(df2, method='kendall')
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(result, concat=True)[0],
            raw_df.corrwith(raw_df2, method='kendall'))

        df = DataFrame(raw_df, chunk_size=4)
        df2 = DataFrame(raw_df2, chunk_size=6)
        s = Series(raw_s, chunk_size=5)
        s2 = Series(raw_s2, chunk_size=5)

        with self.assertRaises(Exception):
            self.executor.execute_dataframe(df.corrwith(df2, method='kendall'),
                                            concat=True)

        result = df.corrwith(df2)
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(result,
                                            concat=True)[0].sort_index(),
            raw_df.corrwith(raw_df2).sort_index())

        result = df.corrwith(df2, axis=1)
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(result,
                                            concat=True)[0].sort_index(),
            raw_df.corrwith(raw_df2, axis=1).sort_index())

        result = df.corrwith(s)
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(result,
                                            concat=True)[0].sort_index(),
            raw_df.corrwith(raw_s).sort_index())

        result = df.corrwith(s2, axis=1)
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(result,
                                            concat=True)[0].sort_index(),
            raw_df.corrwith(raw_s2, axis=1).sort_index())