def test_repr_dataframe(self): kdf = ks.range(ReprTest.max_display_count) self.assertTrue("Showing only the first" not in repr(kdf)) self.assert_eq(repr(kdf), repr(kdf.to_pandas())) kdf = ks.range(ReprTest.max_display_count + 1) self.assertTrue("Showing only the first" in repr(kdf)) with option_context("display.max_rows", None): kdf = ks.range(ReprTest.max_display_count + 1) self.assert_eq(repr(kdf), repr(kdf.to_pandas()))
def test_repr_indexes(self): kdf = ks.range(ReprTest.max_display_count) kidx = kdf.index self.assertTrue("Showing only the first" not in repr(kidx)) self.assert_eq(repr(kidx), repr(kidx.to_pandas())) kdf = ks.range(ReprTest.max_display_count + 1) kidx = kdf.index self.assertTrue("Showing only the first" in repr(kidx)) with option_context("display.max_rows", None): kdf = ks.range(ReprTest.max_display_count + 1) kidx = kdf.index self.assert_eq(repr(kidx), repr(kidx.to_pandas()))
def test_repr_indexes(self): kidx = ks.range(ReprTest.max_display_count).index self.assertTrue("Showing only the first" not in repr(kidx)) self.assert_eq(repr(kidx), repr(kidx.to_pandas())) kidx = ks.range(ReprTest.max_display_count + 1).index self.assertTrue("Showing only the first" in repr(kidx)) self.assertTrue( repr(kidx).startswith( repr(kidx.to_pandas().to_series().head( ReprTest.max_display_count).index))) with option_context("display.max_rows", None): kidx = ks.range(ReprTest.max_display_count + 1).index self.assert_eq(repr(kidx), repr(kidx.to_pandas())) kidx = ks.MultiIndex.from_tuples([ (100 * i, i) for i in range(ReprTest.max_display_count) ]) self.assertTrue("Showing only the first" not in repr(kidx)) self.assert_eq(repr(kidx), repr(kidx.to_pandas())) kidx = ks.MultiIndex.from_tuples([ (100 * i, i) for i in range(ReprTest.max_display_count + 1) ]) self.assertTrue("Showing only the first" in repr(kidx)) self.assertTrue( repr(kidx).startswith( repr(kidx.to_pandas().to_frame().head( ReprTest.max_display_count).index))) with option_context("display.max_rows", None): kidx = ks.MultiIndex.from_tuples([ (100 * i, i) for i in range(ReprTest.max_display_count + 1) ]) self.assert_eq(repr(kidx), repr(kidx.to_pandas()))
def test_axis_on_dataframe(self): # The number of each count is intentionally big # because when data is small, it executes a shortcut. # Less than 'compute.shortcut_limit' will execute a shortcut # by using collected pandas dataframe directly. # now we set the 'compute.shortcut_limit' as 1000 explicitly with option_context('compute.shortcut_limit', 1000): pdf = pd.DataFrame({ 'A': [1, -2, 3, -4, 5] * 300, 'B': [1., -2, 3, -4, 5] * 300, 'C': [-6., -7, -8, -9, 10] * 300, 'D': [True, False, True, False, False] * 300 }) kdf = ks.from_pandas(pdf) self.assert_eq(kdf.count(axis=1), pdf.count(axis=1)) self.assert_eq(kdf.var(axis=1), pdf.var(axis=1)) self.assert_eq(kdf.std(axis=1), pdf.std(axis=1)) self.assert_eq(kdf.max(axis=1), pdf.max(axis=1)) self.assert_eq(kdf.min(axis=1), pdf.min(axis=1)) self.assert_eq(kdf.sum(axis=1), pdf.sum(axis=1)) self.assert_eq(kdf.kurtosis(axis=1), pdf.kurtosis(axis=1)) self.assert_eq(kdf.skew(axis=1), pdf.skew(axis=1)) self.assert_eq(kdf.mean(axis=1), pdf.mean(axis=1))
def test_axis_on_dataframe(self): # The number of each count is intentionally big # because when data is small, it executes a shortcut. # Less than 'compute.shortcut_limit' will execute a shortcut # by using collected pandas dataframe directly. # now we set the 'compute.shortcut_limit' as 1000 explicitly with option_context("compute.shortcut_limit", 1000): pdf = pd.DataFrame( { "A": [1, -2, 3, -4, 5] * 300, "B": [1.0, -2, 3, -4, 5] * 300, "C": [-6.0, -7, -8, -9, 10] * 300, "D": [True, False, True, False, False] * 300, }, index=range(10, 15001, 10), ) kdf = ks.from_pandas(pdf) self.assert_eq(kdf.count(axis=1), pdf.count(axis=1)) self.assert_eq(kdf.var(axis=1), pdf.var(axis=1)) self.assert_eq(kdf.var(axis=1, ddof=0), pdf.var(axis=1, ddof=0)) self.assert_eq(kdf.std(axis=1), pdf.std(axis=1)) self.assert_eq(kdf.std(axis=1, ddof=0), pdf.std(axis=1, ddof=0)) self.assert_eq(kdf.max(axis=1), pdf.max(axis=1)) self.assert_eq(kdf.min(axis=1), pdf.min(axis=1)) self.assert_eq(kdf.sum(axis=1), pdf.sum(axis=1)) self.assert_eq(kdf.product(axis=1), pdf.product(axis=1)) self.assert_eq(kdf.kurtosis(axis=1), pdf.kurtosis(axis=1)) self.assert_eq(kdf.skew(axis=1), pdf.skew(axis=1)) self.assert_eq(kdf.mean(axis=1), pdf.mean(axis=1)) self.assert_eq(kdf.sem(axis=1), pdf.sem(axis=1)) self.assert_eq(kdf.sem(axis=1, ddof=0), pdf.sem(axis=1, ddof=0)) self.assert_eq(kdf.count(axis=1, numeric_only=True), pdf.count(axis=1, numeric_only=True)) self.assert_eq(kdf.var(axis=1, numeric_only=True), pdf.var(axis=1, numeric_only=True)) self.assert_eq( kdf.var(axis=1, ddof=0, numeric_only=True), pdf.var(axis=1, ddof=0, numeric_only=True), ) self.assert_eq(kdf.std(axis=1, numeric_only=True), pdf.std(axis=1, numeric_only=True)) self.assert_eq( kdf.std(axis=1, ddof=0, numeric_only=True), pdf.std(axis=1, ddof=0, numeric_only=True), ) self.assert_eq(kdf.max(axis=1, numeric_only=True), pdf.max(axis=1, numeric_only=True).astype(float)) self.assert_eq(kdf.min(axis=1, numeric_only=True), pdf.min(axis=1, numeric_only=True).astype(float)) self.assert_eq(kdf.sum(axis=1, numeric_only=True), pdf.sum(axis=1, numeric_only=True).astype(float)) self.assert_eq( kdf.product(axis=1, numeric_only=True), pdf.product(axis=1, numeric_only=True).astype(float), ) self.assert_eq(kdf.kurtosis(axis=1, numeric_only=True), pdf.kurtosis(axis=1, numeric_only=True)) self.assert_eq(kdf.skew(axis=1, numeric_only=True), pdf.skew(axis=1, numeric_only=True)) self.assert_eq(kdf.mean(axis=1, numeric_only=True), pdf.mean(axis=1, numeric_only=True)) self.assert_eq(kdf.sem(axis=1, numeric_only=True), pdf.sem(axis=1, numeric_only=True)) self.assert_eq( kdf.sem(axis=1, ddof=0, numeric_only=True), pdf.sem(axis=1, ddof=0, numeric_only=True), )
def test_sampled_plot_with_ratio(self): with option_context("plotting.sample_ratio", 0.5): pdf = pd.DataFrame(np.random.rand(2500, 4), columns=["a", "b", "c", "d"]) kdf = ks.from_pandas(pdf) data = SampledPlotBase().get_sampled(kdf) self.assertEqual(round(len(data) / 2500, 1), 0.5)