Ejemplo n.º 1
0
 def tearDownClass(cls):
     super(DistributedDefaultIndexTest, cls).tearDownClass()
     reset_option('compute.default_index_type')
Ejemplo n.º 2
0
 def tearDownClass(cls):
     reset_option('plotting.max_rows')
     reset_option('plotting.sample_ratio')
     super(DataFramePlotTest, cls).tearDownClass()
Ejemplo n.º 3
0
 def tearDownClass(cls):
     reset_option("compute.default_index_type")
     super(DistributedOneByOneDefaultIndexTest, cls).tearDownClass()
Ejemplo n.º 4
0
 def tearDownClass(cls):
     super(OneByOneDefaultIndexTest, cls).tearDownClass()
     reset_option('compute.default_index_type')
 def tearDownClass(cls):
     reset_option("compute.ops_on_diff_frames")
     super(OpsOnDiffFramesGroupByTest, cls).tearDownClass()
Ejemplo n.º 6
0
 def tearDownClass(cls):
     reset_option("compute.ops_on_diff_frames")
     super().tearDownClass()
Ejemplo n.º 7
0
 def tearDownClass(cls):
     reset_option("plotting.max_rows")
     reset_option("plotting.sample_ratio")
     super().tearDownClass()
Ejemplo n.º 8
0
 def tearDownClass(cls):
     reset_option("display.max_rows")
Ejemplo n.º 9
0
 def tearDownClass(cls):
     reset_option('compute.ops_on_diff_frames')
     super(OpsOnDiffFramesDisabledTest, cls).tearDownClass()
Ejemplo n.º 10
0
 def tearDownClass(cls):
     reset_option('plotting.max_rows')
     super(SeriesPlotTest, cls).tearDownClass()
Ejemplo n.º 11
0
 def tearDownClass(cls):
     if LooseVersion(pd.__version__) >= LooseVersion("0.25"):
         pd.reset_option("plotting.backend")
     reset_option("plotting.backend")
     reset_option("plotting.max_rows")
     super().tearDownClass()
Ejemplo n.º 12
0
    def test_transform(self):
        pdf = pd.DataFrame(
            {
                'a': [1, 2, 3, 4, 5, 6],
                'b': [1, 1, 2, 3, 5, 8],
                'c': [1, 4, 9, 16, 25, 36]
            },
            columns=['a', 'b', 'c'])
        kdf = koalas.DataFrame(pdf)
        self.assert_eq(
            kdf.groupby("b").transform(lambda x: x + 1).sort_index(),
            pdf.groupby("b").transform(lambda x: x + 1).sort_index())
        self.assert_eq(
            kdf.groupby(['a', 'b']).transform(lambda x: x * x).sort_index(),
            pdf.groupby(['a', 'b']).transform(lambda x: x * x).sort_index())
        self.assert_eq(
            kdf.groupby(['b'])['a'].transform(lambda x: x).sort_index(),
            pdf.groupby(['b'])['a'].transform(lambda x: x).sort_index())

        # multi-index columns
        columns = pd.MultiIndex.from_tuples([('x', 'a'), ('x', 'b'),
                                             ('y', 'c')])
        pdf.columns = columns
        kdf.columns = columns

        self.assert_eq(
            kdf.groupby(("x", "b")).transform(lambda x: x + 1).sort_index(),
            pdf.groupby(("x", "b")).transform(lambda x: x + 1).sort_index())
        self.assert_eq(
            kdf.groupby([('x', 'a'),
                         ('x', 'b')]).transform(lambda x: x * x).sort_index(),
            pdf.groupby([('x', 'a'),
                         ('x', 'b')]).transform(lambda x: x * x).sort_index())

        set_option('compute.shortcut_limit', 1000)
        try:
            pdf = pd.DataFrame(
                {
                    'a': [1, 2, 3, 4, 5, 6] * 300,
                    'b': [1, 1, 2, 3, 5, 8] * 300,
                    'c': [1, 4, 9, 16, 25, 36] * 300
                },
                columns=['a', 'b', 'c'])
            kdf = koalas.DataFrame(pdf)
            self.assert_eq(
                kdf.groupby("b").transform(lambda x: x + 1).sort_index(),
                pdf.groupby("b").transform(lambda x: x + 1).sort_index())
            self.assert_eq(
                kdf.groupby(['a',
                             'b']).transform(lambda x: x * x).sort_index(),
                pdf.groupby(['a',
                             'b']).transform(lambda x: x * x).sort_index())
            self.assert_eq(
                kdf.groupby(['b'])['a'].transform(lambda x: x).sort_index(),
                pdf.groupby(['b'])['a'].transform(lambda x: x).sort_index())
            with self.assertRaisesRegex(
                    TypeError, "<class 'int'> object is not callable"):
                kdf.groupby("b").transform(1)

            # multi-index columns
            columns = pd.MultiIndex.from_tuples([('x', 'a'), ('x', 'b'),
                                                 ('y', 'c')])
            pdf.columns = columns
            kdf.columns = columns

            self.assert_eq(
                kdf.groupby(
                    ("x", "b")).transform(lambda x: x + 1).sort_index(),
                pdf.groupby(
                    ("x", "b")).transform(lambda x: x + 1).sort_index())
            self.assert_eq(
                kdf.groupby([('x', 'a'), ('x', 'b')
                             ]).transform(lambda x: x * x).sort_index(),
                pdf.groupby([('x', 'a'), ('x', 'b')
                             ]).transform(lambda x: x * x).sort_index())
        finally:
            reset_option('compute.shortcut_limit')
Ejemplo n.º 13
0
 def tearDownClass(cls):
     reset_option("display.max_rows")
     super(ReprTest, cls).tearDownClass()
Ejemplo n.º 14
0
    def test_apply(self):
        pdf = pd.DataFrame(
            {
                'a': [1, 2, 3, 4, 5, 6],
                'b': [1, 1, 2, 3, 5, 8],
                'c': [1, 4, 9, 16, 25, 36]
            },
            columns=['a', 'b', 'c'])
        kdf = koalas.DataFrame(pdf)
        self.assert_eq(
            kdf.groupby("b").apply(lambda x: x + 1).sort_index(),
            pdf.groupby("b").apply(lambda x: x + 1).sort_index())
        self.assert_eq(
            kdf.groupby(['a', 'b']).apply(lambda x: x * x).sort_index(),
            pdf.groupby(['a', 'b']).apply(lambda x: x * x).sort_index())
        self.assert_eq(
            kdf.groupby(['b'])['a'].apply(lambda x: x).sort_index(),
            pdf.groupby(['b'])['a'].apply(lambda x: x).sort_index())

        # multi-index columns
        columns = pd.MultiIndex.from_tuples([('x', 'a'), ('x', 'b'),
                                             ('y', 'c')])
        pdf.columns = columns
        kdf.columns = columns

        self.assert_eq(
            kdf.groupby(("x", "b")).apply(lambda x: x + 1).sort_index(),
            pdf.groupby(("x", "b")).apply(lambda x: x + 1).sort_index())
        self.assert_eq(
            kdf.groupby([('x', 'a'),
                         ('x', 'b')]).apply(lambda x: x * x).sort_index(),
            pdf.groupby([('x', 'a'),
                         ('x', 'b')]).apply(lambda x: x * x).sort_index())

        # Less than 'compute.shortcut_limit' will execute a shortcut
        # by using collected pandas dataframe directly.
        # now we set the 'compute.shortcut_limit' as 1000 explicitly
        set_option('compute.shortcut_limit', 1000)
        try:
            pdf = pd.DataFrame(
                {
                    'a': [1, 2, 3, 4, 5, 6] * 300,
                    'b': [1, 1, 2, 3, 5, 8] * 300,
                    'c': [1, 4, 9, 16, 25, 36] * 300
                },
                columns=['a', 'b', 'c'])
            kdf = koalas.DataFrame(pdf)
            self.assert_eq(
                kdf.groupby("b").apply(lambda x: x + 1).sort_index(),
                pdf.groupby("b").apply(lambda x: x + 1).sort_index())
            self.assert_eq(
                kdf.groupby(['a', 'b']).apply(lambda x: x * x).sort_index(),
                pdf.groupby(['a', 'b']).apply(lambda x: x * x).sort_index())
            self.assert_eq(
                kdf.groupby(['b'])['a'].apply(lambda x: x).sort_index(),
                pdf.groupby(['b'])['a'].apply(lambda x: x).sort_index())
            with self.assertRaisesRegex(
                    TypeError, "<class 'int'> object is not callable"):
                kdf.groupby("b").apply(1)

            # multi-index columns
            columns = pd.MultiIndex.from_tuples([('x', 'a'), ('x', 'b'),
                                                 ('y', 'c')])
            pdf.columns = columns
            kdf.columns = columns

            self.assert_eq(
                kdf.groupby(("x", "b")).apply(lambda x: x + 1).sort_index(),
                pdf.groupby(("x", "b")).apply(lambda x: x + 1).sort_index())
            self.assert_eq(
                kdf.groupby([('x', 'a'),
                             ('x', 'b')]).apply(lambda x: x * x).sort_index(),
                pdf.groupby([('x', 'a'),
                             ('x', 'b')]).apply(lambda x: x * x).sort_index())
        finally:
            reset_option('compute.shortcut_limit')
Ejemplo n.º 15
0
 def tearDownClass(cls):
     super(DataFramePlotTest, cls).tearDownClass()
     reset_option('plotting.max_rows')