Exemple #1
0
    def test_repr_dataframe(self):
        kdf = ks.range(ReprTest.max_display_count)
        self.assertTrue("Showing only the first" not in repr(kdf))
        self.assert_eq(repr(kdf), repr(kdf.to_pandas()))

        kdf = ks.range(ReprTest.max_display_count + 1)
        self.assertTrue("Showing only the first" in repr(kdf))

        with option_context("display.max_rows", None):
            kdf = ks.range(ReprTest.max_display_count + 1)
            self.assert_eq(repr(kdf), repr(kdf.to_pandas()))
Exemple #2
0
    def test_repr_indexes(self):
        kdf = ks.range(ReprTest.max_display_count)
        kidx = kdf.index
        self.assertTrue("Showing only the first" not in repr(kidx))
        self.assert_eq(repr(kidx), repr(kidx.to_pandas()))

        kdf = ks.range(ReprTest.max_display_count + 1)
        kidx = kdf.index
        self.assertTrue("Showing only the first" in repr(kidx))

        with option_context("display.max_rows", None):
            kdf = ks.range(ReprTest.max_display_count + 1)
            kidx = kdf.index
            self.assert_eq(repr(kidx), repr(kidx.to_pandas()))
    def test_repr_indexes(self):
        kidx = ks.range(ReprTest.max_display_count).index
        self.assertTrue("Showing only the first" not in repr(kidx))
        self.assert_eq(repr(kidx), repr(kidx.to_pandas()))

        kidx = ks.range(ReprTest.max_display_count + 1).index
        self.assertTrue("Showing only the first" in repr(kidx))
        self.assertTrue(
            repr(kidx).startswith(
                repr(kidx.to_pandas().to_series().head(
                    ReprTest.max_display_count).index)))

        with option_context("display.max_rows", None):
            kidx = ks.range(ReprTest.max_display_count + 1).index
            self.assert_eq(repr(kidx), repr(kidx.to_pandas()))

        kidx = ks.MultiIndex.from_tuples([
            (100 * i, i) for i in range(ReprTest.max_display_count)
        ])
        self.assertTrue("Showing only the first" not in repr(kidx))
        self.assert_eq(repr(kidx), repr(kidx.to_pandas()))

        kidx = ks.MultiIndex.from_tuples([
            (100 * i, i) for i in range(ReprTest.max_display_count + 1)
        ])
        self.assertTrue("Showing only the first" in repr(kidx))
        self.assertTrue(
            repr(kidx).startswith(
                repr(kidx.to_pandas().to_frame().head(
                    ReprTest.max_display_count).index)))

        with option_context("display.max_rows", None):
            kidx = ks.MultiIndex.from_tuples([
                (100 * i, i) for i in range(ReprTest.max_display_count + 1)
            ])
            self.assert_eq(repr(kidx), repr(kidx.to_pandas()))
Exemple #4
0
 def test_axis_on_dataframe(self):
     # The number of each count is intentionally big
     # because when data is small, it executes a shortcut.
     # Less than 'compute.shortcut_limit' will execute a shortcut
     # by using collected pandas dataframe directly.
     # now we set the 'compute.shortcut_limit' as 1000 explicitly
     with option_context('compute.shortcut_limit', 1000):
         pdf = pd.DataFrame({
             'A': [1, -2, 3, -4, 5] * 300,
             'B': [1., -2, 3, -4, 5] * 300,
             'C': [-6., -7, -8, -9, 10] * 300,
             'D': [True, False, True, False, False] * 300
         })
         kdf = ks.from_pandas(pdf)
         self.assert_eq(kdf.count(axis=1), pdf.count(axis=1))
         self.assert_eq(kdf.var(axis=1), pdf.var(axis=1))
         self.assert_eq(kdf.std(axis=1), pdf.std(axis=1))
         self.assert_eq(kdf.max(axis=1), pdf.max(axis=1))
         self.assert_eq(kdf.min(axis=1), pdf.min(axis=1))
         self.assert_eq(kdf.sum(axis=1), pdf.sum(axis=1))
         self.assert_eq(kdf.kurtosis(axis=1), pdf.kurtosis(axis=1))
         self.assert_eq(kdf.skew(axis=1), pdf.skew(axis=1))
         self.assert_eq(kdf.mean(axis=1), pdf.mean(axis=1))
Exemple #5
0
    def test_axis_on_dataframe(self):
        # The number of each count is intentionally big
        # because when data is small, it executes a shortcut.
        # Less than 'compute.shortcut_limit' will execute a shortcut
        # by using collected pandas dataframe directly.
        # now we set the 'compute.shortcut_limit' as 1000 explicitly
        with option_context("compute.shortcut_limit", 1000):
            pdf = pd.DataFrame(
                {
                    "A": [1, -2, 3, -4, 5] * 300,
                    "B": [1.0, -2, 3, -4, 5] * 300,
                    "C": [-6.0, -7, -8, -9, 10] * 300,
                    "D": [True, False, True, False, False] * 300,
                },
                index=range(10, 15001, 10),
            )
            kdf = ks.from_pandas(pdf)
            self.assert_eq(kdf.count(axis=1), pdf.count(axis=1))
            self.assert_eq(kdf.var(axis=1), pdf.var(axis=1))
            self.assert_eq(kdf.var(axis=1, ddof=0), pdf.var(axis=1, ddof=0))
            self.assert_eq(kdf.std(axis=1), pdf.std(axis=1))
            self.assert_eq(kdf.std(axis=1, ddof=0), pdf.std(axis=1, ddof=0))
            self.assert_eq(kdf.max(axis=1), pdf.max(axis=1))
            self.assert_eq(kdf.min(axis=1), pdf.min(axis=1))
            self.assert_eq(kdf.sum(axis=1), pdf.sum(axis=1))
            self.assert_eq(kdf.product(axis=1), pdf.product(axis=1))
            self.assert_eq(kdf.kurtosis(axis=1), pdf.kurtosis(axis=1))
            self.assert_eq(kdf.skew(axis=1), pdf.skew(axis=1))
            self.assert_eq(kdf.mean(axis=1), pdf.mean(axis=1))
            self.assert_eq(kdf.sem(axis=1), pdf.sem(axis=1))
            self.assert_eq(kdf.sem(axis=1, ddof=0), pdf.sem(axis=1, ddof=0))

            self.assert_eq(kdf.count(axis=1, numeric_only=True),
                           pdf.count(axis=1, numeric_only=True))
            self.assert_eq(kdf.var(axis=1, numeric_only=True),
                           pdf.var(axis=1, numeric_only=True))
            self.assert_eq(
                kdf.var(axis=1, ddof=0, numeric_only=True),
                pdf.var(axis=1, ddof=0, numeric_only=True),
            )
            self.assert_eq(kdf.std(axis=1, numeric_only=True),
                           pdf.std(axis=1, numeric_only=True))
            self.assert_eq(
                kdf.std(axis=1, ddof=0, numeric_only=True),
                pdf.std(axis=1, ddof=0, numeric_only=True),
            )
            self.assert_eq(kdf.max(axis=1, numeric_only=True),
                           pdf.max(axis=1, numeric_only=True).astype(float))
            self.assert_eq(kdf.min(axis=1, numeric_only=True),
                           pdf.min(axis=1, numeric_only=True).astype(float))
            self.assert_eq(kdf.sum(axis=1, numeric_only=True),
                           pdf.sum(axis=1, numeric_only=True).astype(float))
            self.assert_eq(
                kdf.product(axis=1, numeric_only=True),
                pdf.product(axis=1, numeric_only=True).astype(float),
            )
            self.assert_eq(kdf.kurtosis(axis=1, numeric_only=True),
                           pdf.kurtosis(axis=1, numeric_only=True))
            self.assert_eq(kdf.skew(axis=1, numeric_only=True),
                           pdf.skew(axis=1, numeric_only=True))
            self.assert_eq(kdf.mean(axis=1, numeric_only=True),
                           pdf.mean(axis=1, numeric_only=True))
            self.assert_eq(kdf.sem(axis=1, numeric_only=True),
                           pdf.sem(axis=1, numeric_only=True))
            self.assert_eq(
                kdf.sem(axis=1, ddof=0, numeric_only=True),
                pdf.sem(axis=1, ddof=0, numeric_only=True),
            )
Exemple #6
0
 def test_sampled_plot_with_ratio(self):
     with option_context("plotting.sample_ratio", 0.5):
         pdf = pd.DataFrame(np.random.rand(2500, 4), columns=["a", "b", "c", "d"])
         kdf = ks.from_pandas(pdf)
         data = SampledPlotBase().get_sampled(kdf)
         self.assertEqual(round(len(data) / 2500, 1), 0.5)