def tearDownClass(cls): super(DistributedDefaultIndexTest, cls).tearDownClass() reset_option('compute.default_index_type')
def tearDownClass(cls): reset_option('plotting.max_rows') reset_option('plotting.sample_ratio') super(DataFramePlotTest, cls).tearDownClass()
def tearDownClass(cls): reset_option("compute.default_index_type") super(DistributedOneByOneDefaultIndexTest, cls).tearDownClass()
def tearDownClass(cls): super(OneByOneDefaultIndexTest, cls).tearDownClass() reset_option('compute.default_index_type')
def tearDownClass(cls): reset_option("compute.ops_on_diff_frames") super(OpsOnDiffFramesGroupByTest, cls).tearDownClass()
def tearDownClass(cls): reset_option("compute.ops_on_diff_frames") super().tearDownClass()
def tearDownClass(cls): reset_option("plotting.max_rows") reset_option("plotting.sample_ratio") super().tearDownClass()
def tearDownClass(cls): reset_option("display.max_rows")
def tearDownClass(cls): reset_option('compute.ops_on_diff_frames') super(OpsOnDiffFramesDisabledTest, cls).tearDownClass()
def tearDownClass(cls): reset_option('plotting.max_rows') super(SeriesPlotTest, cls).tearDownClass()
def tearDownClass(cls): if LooseVersion(pd.__version__) >= LooseVersion("0.25"): pd.reset_option("plotting.backend") reset_option("plotting.backend") reset_option("plotting.max_rows") super().tearDownClass()
def test_transform(self): pdf = pd.DataFrame( { 'a': [1, 2, 3, 4, 5, 6], 'b': [1, 1, 2, 3, 5, 8], 'c': [1, 4, 9, 16, 25, 36] }, columns=['a', 'b', 'c']) kdf = koalas.DataFrame(pdf) self.assert_eq( kdf.groupby("b").transform(lambda x: x + 1).sort_index(), pdf.groupby("b").transform(lambda x: x + 1).sort_index()) self.assert_eq( kdf.groupby(['a', 'b']).transform(lambda x: x * x).sort_index(), pdf.groupby(['a', 'b']).transform(lambda x: x * x).sort_index()) self.assert_eq( kdf.groupby(['b'])['a'].transform(lambda x: x).sort_index(), pdf.groupby(['b'])['a'].transform(lambda x: x).sort_index()) # multi-index columns columns = pd.MultiIndex.from_tuples([('x', 'a'), ('x', 'b'), ('y', 'c')]) pdf.columns = columns kdf.columns = columns self.assert_eq( kdf.groupby(("x", "b")).transform(lambda x: x + 1).sort_index(), pdf.groupby(("x", "b")).transform(lambda x: x + 1).sort_index()) self.assert_eq( kdf.groupby([('x', 'a'), ('x', 'b')]).transform(lambda x: x * x).sort_index(), pdf.groupby([('x', 'a'), ('x', 'b')]).transform(lambda x: x * x).sort_index()) set_option('compute.shortcut_limit', 1000) try: pdf = pd.DataFrame( { 'a': [1, 2, 3, 4, 5, 6] * 300, 'b': [1, 1, 2, 3, 5, 8] * 300, 'c': [1, 4, 9, 16, 25, 36] * 300 }, columns=['a', 'b', 'c']) kdf = koalas.DataFrame(pdf) self.assert_eq( kdf.groupby("b").transform(lambda x: x + 1).sort_index(), pdf.groupby("b").transform(lambda x: x + 1).sort_index()) self.assert_eq( kdf.groupby(['a', 'b']).transform(lambda x: x * x).sort_index(), pdf.groupby(['a', 'b']).transform(lambda x: x * x).sort_index()) self.assert_eq( kdf.groupby(['b'])['a'].transform(lambda x: x).sort_index(), pdf.groupby(['b'])['a'].transform(lambda x: x).sort_index()) with self.assertRaisesRegex( TypeError, "<class 'int'> object is not callable"): kdf.groupby("b").transform(1) # multi-index columns columns = pd.MultiIndex.from_tuples([('x', 'a'), ('x', 'b'), ('y', 'c')]) pdf.columns = columns kdf.columns = columns self.assert_eq( kdf.groupby( ("x", "b")).transform(lambda x: x + 1).sort_index(), pdf.groupby( ("x", "b")).transform(lambda x: x + 1).sort_index()) self.assert_eq( kdf.groupby([('x', 'a'), ('x', 'b') ]).transform(lambda x: x * x).sort_index(), pdf.groupby([('x', 'a'), ('x', 'b') ]).transform(lambda x: x * x).sort_index()) finally: reset_option('compute.shortcut_limit')
def tearDownClass(cls): reset_option("display.max_rows") super(ReprTest, cls).tearDownClass()
def test_apply(self): pdf = pd.DataFrame( { 'a': [1, 2, 3, 4, 5, 6], 'b': [1, 1, 2, 3, 5, 8], 'c': [1, 4, 9, 16, 25, 36] }, columns=['a', 'b', 'c']) kdf = koalas.DataFrame(pdf) self.assert_eq( kdf.groupby("b").apply(lambda x: x + 1).sort_index(), pdf.groupby("b").apply(lambda x: x + 1).sort_index()) self.assert_eq( kdf.groupby(['a', 'b']).apply(lambda x: x * x).sort_index(), pdf.groupby(['a', 'b']).apply(lambda x: x * x).sort_index()) self.assert_eq( kdf.groupby(['b'])['a'].apply(lambda x: x).sort_index(), pdf.groupby(['b'])['a'].apply(lambda x: x).sort_index()) # multi-index columns columns = pd.MultiIndex.from_tuples([('x', 'a'), ('x', 'b'), ('y', 'c')]) pdf.columns = columns kdf.columns = columns self.assert_eq( kdf.groupby(("x", "b")).apply(lambda x: x + 1).sort_index(), pdf.groupby(("x", "b")).apply(lambda x: x + 1).sort_index()) self.assert_eq( kdf.groupby([('x', 'a'), ('x', 'b')]).apply(lambda x: x * x).sort_index(), pdf.groupby([('x', 'a'), ('x', 'b')]).apply(lambda x: x * x).sort_index()) # Less than 'compute.shortcut_limit' will execute a shortcut # by using collected pandas dataframe directly. # now we set the 'compute.shortcut_limit' as 1000 explicitly set_option('compute.shortcut_limit', 1000) try: pdf = pd.DataFrame( { 'a': [1, 2, 3, 4, 5, 6] * 300, 'b': [1, 1, 2, 3, 5, 8] * 300, 'c': [1, 4, 9, 16, 25, 36] * 300 }, columns=['a', 'b', 'c']) kdf = koalas.DataFrame(pdf) self.assert_eq( kdf.groupby("b").apply(lambda x: x + 1).sort_index(), pdf.groupby("b").apply(lambda x: x + 1).sort_index()) self.assert_eq( kdf.groupby(['a', 'b']).apply(lambda x: x * x).sort_index(), pdf.groupby(['a', 'b']).apply(lambda x: x * x).sort_index()) self.assert_eq( kdf.groupby(['b'])['a'].apply(lambda x: x).sort_index(), pdf.groupby(['b'])['a'].apply(lambda x: x).sort_index()) with self.assertRaisesRegex( TypeError, "<class 'int'> object is not callable"): kdf.groupby("b").apply(1) # multi-index columns columns = pd.MultiIndex.from_tuples([('x', 'a'), ('x', 'b'), ('y', 'c')]) pdf.columns = columns kdf.columns = columns self.assert_eq( kdf.groupby(("x", "b")).apply(lambda x: x + 1).sort_index(), pdf.groupby(("x", "b")).apply(lambda x: x + 1).sort_index()) self.assert_eq( kdf.groupby([('x', 'a'), ('x', 'b')]).apply(lambda x: x * x).sort_index(), pdf.groupby([('x', 'a'), ('x', 'b')]).apply(lambda x: x * x).sort_index()) finally: reset_option('compute.shortcut_limit')
def tearDownClass(cls): super(DataFramePlotTest, cls).tearDownClass() reset_option('plotting.max_rows')