def testDataFrameCorr(self):
        rs = np.random.RandomState(0)
        raw = rs.rand(20, 10)
        raw = pd.DataFrame(np.where(raw > 0.4, raw, np.nan),
                           columns=list('ABCDEFGHIJ'))
        raw['k'] = pd.Series(['aaa'] * 20)

        df = DataFrame(raw)

        result = df.corr()
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(result, concat=True)[0],
            raw.corr())

        result = df.corr(method='kendall')
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(result, concat=True)[0],
            raw.corr(method='kendall'))

        df = DataFrame(raw, chunk_size=6)

        with self.assertRaises(Exception):
            self.executor.execute_dataframe(df.corr(method='kendall'),
                                            concat=True)

        result = df.corr()
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(result, concat=True)[0],
            raw.corr())

        result = df.corr(min_periods=7)
        pd.testing.assert_frame_equal(
            self.executor.execute_dataframe(result, concat=True)[0],
            raw.corr(min_periods=7))
Exemple #2
0
def test_dataframe_corr(setup):
    rs = np.random.RandomState(0)
    raw = rs.rand(20, 10)
    raw = pd.DataFrame(np.where(raw > 0.4, raw, np.nan),
                       columns=list('ABCDEFGHIJ'))
    raw['k'] = pd.Series(['aaa'] * 20)

    df = DataFrame(raw)

    result = df.corr()
    pd.testing.assert_frame_equal(result.execute().fetch(), raw.corr())

    result = df.corr(method='kendall')
    pd.testing.assert_frame_equal(result.execute().fetch(),
                                  raw.corr(method='kendall'))

    df = DataFrame(raw, chunk_size=6)

    with pytest.raises(Exception):
        df.corr(method='kendall').execute()

    result = df.corr()
    pd.testing.assert_frame_equal(result.execute().fetch(), raw.corr())

    result = df.corr(min_periods=7)
    pd.testing.assert_frame_equal(result.execute().fetch(),
                                  raw.corr(min_periods=7))