예제 #1
0
    def test_toPandas_respect_session_timezone(self):
        df = self.spark.createDataFrame(self.data, schema=self.schema)

        timezone = "America/New_York"
        with self.sql_conf({
                "spark.sql.execution.pandas.respectSessionTimeZone":
                False,
                "spark.sql.session.timeZone":
                timezone
        }):
            pdf_la, pdf_arrow_la = self._toPandas_arrow_toggle(df)
            assert_frame_equal(pdf_arrow_la, pdf_la)

        with self.sql_conf({
                "spark.sql.execution.pandas.respectSessionTimeZone":
                True,
                "spark.sql.session.timeZone":
                timezone
        }):
            pdf_ny, pdf_arrow_ny = self._toPandas_arrow_toggle(df)
            assert_frame_equal(pdf_arrow_ny, pdf_ny)

            self.assertFalse(pdf_ny.equals(pdf_la))

            from pyspark.sql.types import _check_series_convert_timestamps_local_tz
            pdf_la_corrected = pdf_la.copy()
            for field in self.schema:
                if isinstance(field.dataType, TimestampType):
                    pdf_la_corrected[
                        field.
                        name] = _check_series_convert_timestamps_local_tz(
                            pdf_la_corrected[field.name], timezone)
            assert_frame_equal(pdf_ny, pdf_la_corrected)
예제 #2
0
    def test_toPandas_respect_session_timezone(self):
        df = self.spark.createDataFrame(self.data, schema=self.schema)

        timezone = "America/New_York"
        with self.sql_conf({
                "spark.sql.execution.pandas.respectSessionTimeZone": False,
                "spark.sql.session.timeZone": timezone}):
            pdf_la, pdf_arrow_la = self._toPandas_arrow_toggle(df)
            self.assertPandasEqual(pdf_arrow_la, pdf_la)

        with self.sql_conf({
                "spark.sql.execution.pandas.respectSessionTimeZone": True,
                "spark.sql.session.timeZone": timezone}):
            pdf_ny, pdf_arrow_ny = self._toPandas_arrow_toggle(df)
            self.assertPandasEqual(pdf_arrow_ny, pdf_ny)

            self.assertFalse(pdf_ny.equals(pdf_la))

            from pyspark.sql.types import _check_series_convert_timestamps_local_tz
            pdf_la_corrected = pdf_la.copy()
            for field in self.schema:
                if isinstance(field.dataType, TimestampType):
                    pdf_la_corrected[field.name] = _check_series_convert_timestamps_local_tz(
                        pdf_la_corrected[field.name], timezone)
            self.assertPandasEqual(pdf_ny, pdf_la_corrected)