def test_dataframe_corr_with(setup): rs = np.random.RandomState(0) raw_df = rs.rand(20, 10) raw_df = pd.DataFrame(np.where(raw_df > 0.4, raw_df, np.nan), columns=list('ABCDEFGHIJ')) raw_df2 = rs.rand(20, 10) raw_df2 = pd.DataFrame(np.where(raw_df2 > 0.4, raw_df2, np.nan), columns=list('ACDEGHIJKL')) raw_s = rs.rand(20) raw_s = pd.Series(np.where(raw_s > 0.4, raw_s, np.nan)) raw_s2 = rs.rand(10) raw_s2 = pd.Series(np.where(raw_s2 > 0.4, raw_s2, np.nan), index=raw_df2.columns) df = DataFrame(raw_df) df2 = DataFrame(raw_df2) result = df.corrwith(df2) pd.testing.assert_series_equal(result.execute().fetch(), raw_df.corrwith(raw_df2)) result = df.corrwith(df2, axis=1) pd.testing.assert_series_equal(result.execute().fetch(), raw_df.corrwith(raw_df2, axis=1)) result = df.corrwith(df2, method='kendall') pd.testing.assert_series_equal(result.execute().fetch(), raw_df.corrwith(raw_df2, method='kendall')) df = DataFrame(raw_df, chunk_size=4) df2 = DataFrame(raw_df2, chunk_size=6) s = Series(raw_s, chunk_size=5) s2 = Series(raw_s2, chunk_size=5) with pytest.raises(Exception): df.corrwith(df2, method='kendall').execute() result = df.corrwith(df2) pd.testing.assert_series_equal(result.execute().fetch().sort_index(), raw_df.corrwith(raw_df2).sort_index()) result = df.corrwith(df2, axis=1) pd.testing.assert_series_equal( result.execute().fetch().sort_index(), raw_df.corrwith(raw_df2, axis=1).sort_index()) result = df.corrwith(s) pd.testing.assert_series_equal(result.execute().fetch().sort_index(), raw_df.corrwith(raw_s).sort_index()) result = df.corrwith(s2, axis=1) pd.testing.assert_series_equal( result.execute().fetch().sort_index(), raw_df.corrwith(raw_s2, axis=1).sort_index())
def testDataFrameCorrWith(self): rs = np.random.RandomState(0) raw_df = rs.rand(20, 10) raw_df = pd.DataFrame(np.where(raw_df > 0.4, raw_df, np.nan), columns=list('ABCDEFGHIJ')) raw_df2 = rs.rand(20, 10) raw_df2 = pd.DataFrame(np.where(raw_df2 > 0.4, raw_df2, np.nan), columns=list('ACDEGHIJKL')) raw_s = rs.rand(20) raw_s = pd.Series(np.where(raw_s > 0.4, raw_s, np.nan)) raw_s2 = rs.rand(10) raw_s2 = pd.Series(np.where(raw_s2 > 0.4, raw_s2, np.nan), index=raw_df2.columns) df = DataFrame(raw_df) df2 = DataFrame(raw_df2) result = df.corrwith(df2) pd.testing.assert_series_equal( self.executor.execute_dataframe(result, concat=True)[0], raw_df.corrwith(raw_df2)) result = df.corrwith(df2, axis=1) pd.testing.assert_series_equal( self.executor.execute_dataframe(result, concat=True)[0], raw_df.corrwith(raw_df2, axis=1)) result = df.corrwith(df2, method='kendall') pd.testing.assert_series_equal( self.executor.execute_dataframe(result, concat=True)[0], raw_df.corrwith(raw_df2, method='kendall')) df = DataFrame(raw_df, chunk_size=4) df2 = DataFrame(raw_df2, chunk_size=6) s = Series(raw_s, chunk_size=5) s2 = Series(raw_s2, chunk_size=5) with self.assertRaises(Exception): self.executor.execute_dataframe(df.corrwith(df2, method='kendall'), concat=True) result = df.corrwith(df2) pd.testing.assert_series_equal( self.executor.execute_dataframe(result, concat=True)[0].sort_index(), raw_df.corrwith(raw_df2).sort_index()) result = df.corrwith(df2, axis=1) pd.testing.assert_series_equal( self.executor.execute_dataframe(result, concat=True)[0].sort_index(), raw_df.corrwith(raw_df2, axis=1).sort_index()) result = df.corrwith(s) pd.testing.assert_series_equal( self.executor.execute_dataframe(result, concat=True)[0].sort_index(), raw_df.corrwith(raw_s).sort_index()) result = df.corrwith(s2, axis=1) pd.testing.assert_series_equal( self.executor.execute_dataframe(result, concat=True)[0].sort_index(), raw_df.corrwith(raw_s2, axis=1).sort_index())