예제 #1
0
    def test_expanding_error(self):
        with self.assertRaisesRegex(ValueError, "min_periods must be >= 0"):
            ps.range(10).expanding(-1)

        with self.assertRaisesRegex(
            TypeError, "psdf_or_psser must be a series or dataframe; however, got:.*int"
        ):
            Expanding(1, 2)
예제 #2
0
    def test_rolling_error(self):
        with self.assertRaisesRegex(ValueError, "window must be >= 0"):
            ps.range(10).rolling(window=-1)
        with self.assertRaisesRegex(ValueError, "min_periods must be >= 0"):
            ps.range(10).rolling(window=1, min_periods=-1)

        with self.assertRaisesRegex(
            TypeError, "psdf_or_psser must be a series or dataframe; however, got:.*int"
        ):
            Rolling(1, 2)
예제 #3
0
    def test_html_repr(self):
        kdf = ps.range(ReprTest.max_display_count)
        self.assertTrue("Showing only the first" not in kdf._repr_html_())
        self.assertEqual(kdf._repr_html_(), kdf.to_pandas()._repr_html_())

        kdf = ps.range(ReprTest.max_display_count + 1)
        self.assertTrue("Showing only the first" in kdf._repr_html_())

        with option_context("display.max_rows", None):
            kdf = ps.range(ReprTest.max_display_count + 1)
            self.assertEqual(kdf._repr_html_(), kdf.to_pandas()._repr_html_())
예제 #4
0
    def test_repr_dataframe(self):
        kdf = ps.range(ReprTest.max_display_count)
        self.assertTrue("Showing only the first" not in repr(kdf))
        self.assert_eq(repr(kdf), repr(kdf.to_pandas()))

        kdf = ps.range(ReprTest.max_display_count + 1)
        self.assertTrue("Showing only the first" in repr(kdf))
        self.assertTrue(
            repr(kdf).startswith(
                repr(kdf.to_pandas().head(ReprTest.max_display_count))))

        with option_context("display.max_rows", None):
            kdf = ps.range(ReprTest.max_display_count + 1)
            self.assert_eq(repr(kdf), repr(kdf.to_pandas()))
예제 #5
0
    def test_resample_error(self):
        psdf = ps.range(10)

        with self.assertRaisesRegex(
            NotImplementedError, "resample currently works only for DatetimeIndex"
        ):
            psdf.resample("3Y").sum()

        dates = [
            datetime.datetime(2012, 1, 2),
            datetime.datetime(2012, 5, 3),
            datetime.datetime(2022, 5, 3),
            pd.NaT,
        ]
        pdf = pd.DataFrame(np.ones(len(dates)), index=pd.DatetimeIndex(dates), columns=["A"])
        psdf = ps.from_pandas(pdf)

        with self.assertRaisesRegex(ValueError, "rule code W-SUN is not supported"):
            psdf.A.resample("3W").sum()

        with self.assertRaisesRegex(ValueError, "rule offset must be positive"):
            psdf.A.resample("0Y").sum()

        with self.assertRaisesRegex(ValueError, "invalid closed: 'middle'"):
            psdf.A.resample("3Y", closed="middle").sum()

        with self.assertRaisesRegex(ValueError, "invalid label: 'both'"):
            psdf.A.resample("3Y", label="both").sum()
예제 #6
0
    def test_ewm_error(self):
        with self.assertRaisesRegex(
                TypeError,
                "psdf_or_psser must be a series or dataframe; however, got:.*int"
        ):
            ExponentialMoving(1, 2)

        psdf = ps.range(10)

        with self.assertRaisesRegex(ValueError, "min_periods must be >= 0"):
            psdf.ewm(min_periods=-1, alpha=0.5).mean()

        with self.assertRaisesRegex(ValueError, "com must be >= 0"):
            psdf.ewm(com=-0.1).mean()

        with self.assertRaisesRegex(ValueError, "span must be >= 1"):
            psdf.ewm(span=0.7).mean()

        with self.assertRaisesRegex(ValueError, "halflife must be > 0"):
            psdf.ewm(halflife=0).mean()

        with self.assertRaisesRegex(ValueError, "alpha must be in"):
            psdf.ewm(alpha=1.7).mean()

        with self.assertRaisesRegex(
                ValueError, "Must pass one of com, span, halflife, or alpha"):
            psdf.ewm().mean()

        with self.assertRaisesRegex(
                ValueError,
                "com, span, halflife, and alpha are mutually exclusive"):
            psdf.ewm(com=0.5, alpha=0.7).mean()
예제 #7
0
    def test_resample_error(self):
        psdf = ps.range(10)

        with self.assertRaisesRegex(
                NotImplementedError,
                "resample currently works only for DatetimeIndex"):
            psdf.resample("3Y").sum()

        with self.assertRaisesRegex(
                NotImplementedError,
                "resample currently works only for DatetimeIndex"):
            psdf.id.resample("3Y").sum()

        dates = [
            datetime.datetime(2012, 1, 2),
            datetime.datetime(2012, 5, 3),
            datetime.datetime(2022, 5, 3),
            pd.NaT,
        ]
        pdf = pd.DataFrame(np.ones(len(dates)),
                           index=pd.DatetimeIndex(dates),
                           columns=["A"])
        psdf = ps.from_pandas(pdf)

        with self.assertRaisesRegex(ValueError,
                                    "rule code W-SUN is not supported"):
            psdf.A.resample("3W").sum()

        with self.assertRaisesRegex(ValueError,
                                    "rule offset must be positive"):
            psdf.A.resample("0Y").sum()

        with self.assertRaisesRegex(ValueError, "invalid closed: 'middle'"):
            psdf.A.resample("3Y", closed="middle").sum()

        with self.assertRaisesRegex(ValueError, "invalid label: 'both'"):
            psdf.A.resample("3Y", label="both").sum()

        with self.assertRaisesRegex(
                NotImplementedError,
                "`on` currently works only for TimestampType"):
            psdf.A.resample("2D", on=psdf.A).sum()

        with self.assertRaisesRegex(
                NotImplementedError,
                "`on` currently works only for TimestampType"):
            psdf[["A"]].resample("2D", on=psdf.A).sum()

        psdf["B"] = ["a", "b", "c", "d"]
        with self.assertRaisesRegex(ValueError,
                                    "No available aggregation columns!"):
            psdf.B.resample("2D").sum()

        with self.assertRaisesRegex(ValueError,
                                    "No available aggregation columns!"):
            psdf[[]].resample("2D").sum()
예제 #8
0
    def test_interpolate_error(self):
        psdf = ps.range(10)

        with self.assertRaisesRegex(
                NotImplementedError,
                "interpolate currently works only for method='linear'"):
            psdf.interpolate(method="quadratic")

        with self.assertRaisesRegex(ValueError, "limit must be > 0"):
            psdf.interpolate(limit=0)
예제 #9
0
    def test_repr_indexes(self):
        kidx = ps.range(ReprTest.max_display_count).index
        self.assertTrue("Showing only the first" not in repr(kidx))
        self.assert_eq(repr(kidx), repr(kidx.to_pandas()))

        kidx = ps.range(ReprTest.max_display_count + 1).index
        self.assertTrue("Showing only the first" in repr(kidx))
        self.assertTrue(
            repr(kidx).startswith(
                repr(kidx.to_pandas().to_series().head(
                    ReprTest.max_display_count).index)))

        with option_context("display.max_rows", None):
            kidx = ps.range(ReprTest.max_display_count + 1).index
            self.assert_eq(repr(kidx), repr(kidx.to_pandas()))

        kidx = ps.MultiIndex.from_tuples([
            (100 * i, i) for i in range(ReprTest.max_display_count)
        ])
        self.assertTrue("Showing only the first" not in repr(kidx))
        self.assert_eq(repr(kidx), repr(kidx.to_pandas()))

        kidx = ps.MultiIndex.from_tuples([
            (100 * i, i) for i in range(ReprTest.max_display_count + 1)
        ])
        self.assertTrue("Showing only the first" in repr(kidx))
        self.assertTrue(
            repr(kidx).startswith(
                repr(kidx.to_pandas().to_frame().head(
                    ReprTest.max_display_count).index)))

        with option_context("display.max_rows", None):
            kidx = ps.MultiIndex.from_tuples([
                (100 * i, i) for i in range(ReprTest.max_display_count + 1)
            ])
            self.assert_eq(repr(kidx), repr(kidx.to_pandas()))
예제 #10
0
    def test_repr_series(self):
        kser = ps.range(ReprTest.max_display_count).id
        self.assertTrue("Showing only the first" not in repr(kser))
        self.assert_eq(repr(kser), repr(kser.to_pandas()))

        kser = ps.range(ReprTest.max_display_count + 1).id
        self.assertTrue("Showing only the first" in repr(kser))
        self.assertTrue(
            repr(kser).startswith(
                repr(kser.to_pandas().head(ReprTest.max_display_count))))

        with option_context("display.max_rows", None):
            kser = ps.range(ReprTest.max_display_count + 1).id
            self.assert_eq(repr(kser), repr(kser.to_pandas()))

        kser = ps.range(ReprTest.max_display_count).id.rename()
        self.assertTrue("Showing only the first" not in repr(kser))
        self.assert_eq(repr(kser), repr(kser.to_pandas()))

        kser = ps.range(ReprTest.max_display_count + 1).id.rename()
        self.assertTrue("Showing only the first" in repr(kser))
        self.assertTrue(
            repr(kser).startswith(
                repr(kser.to_pandas().head(ReprTest.max_display_count))))

        with option_context("display.max_rows", None):
            kser = ps.range(ReprTest.max_display_count + 1).id.rename()
            self.assert_eq(repr(kser), repr(kser.to_pandas()))

        if LooseVersion(pyspark.__version__) >= LooseVersion("2.4"):
            kser = ps.MultiIndex.from_tuples([
                (100 * i, i) for i in range(ReprTest.max_display_count)
            ]).to_series()
            self.assertTrue("Showing only the first" not in repr(kser))
            self.assert_eq(repr(kser), repr(kser.to_pandas()))

            kser = ps.MultiIndex.from_tuples([
                (100 * i, i) for i in range(ReprTest.max_display_count + 1)
            ]).to_series()
            self.assertTrue("Showing only the first" in repr(kser))
            self.assertTrue(
                repr(kser).startswith(
                    repr(kser.to_pandas().head(ReprTest.max_display_count))))

            with option_context("display.max_rows", None):
                kser = ps.MultiIndex.from_tuples([
                    (100 * i, i) for i in range(ReprTest.max_display_count + 1)
                ]).to_series()
                self.assert_eq(repr(kser), repr(kser.to_pandas()))
예제 #11
0
 def test_frame_apply_negative(self):
     with self.assertRaisesRegex(
         ValueError, "The output of the function.* pyspark.sql.DataFrame.*int"
     ):
         ps.range(10).spark.apply(lambda scol: 1)
예제 #12
0
 def test_expanding_repr(self):
     self.assertEqual(repr(ps.range(10).expanding(5)), "Expanding [min_periods=5]")
예제 #13
0
 def psdf2(self):
     return ps.range(1002)