def create_array(s, t): if hasattr(s.array, "__arrow_array__"): mask = None else: mask = s.isnull() # Ensure timestamp series are in expected form for Spark internal representation if t is not None and pa.types.is_timestamp(t) and t.tz is not None: s = _check_series_convert_timestamps_internal( s, self._timezone) elif t is not None and pa.types.is_map(t): s = _convert_dict_to_map_items(s) elif is_categorical_dtype(s.dtype): # Note: This can be removed once minimum pyarrow version is >= 0.16.1 s = s.astype(s.dtypes.categories.dtype) try: array = pa.Array.from_pandas(s, mask=mask, type=t, safe=self._safecheck) except ValueError as e: if self._safecheck: error_msg = ( "Exception thrown when converting pandas.Series (%s) to " + "Arrow Array (%s). It can be caused by overflows or other " + "unsafe conversions warned by Arrow. Arrow safe type check " + "can be disabled by using SQL config " + "`spark.sql.execution.pandas.convertToArrowArraySafely`." ) raise ValueError(error_msg % (s.dtype, t)) from e else: raise e return array
def create_array(s, t): mask = s.isnull() # Ensure timestamp series are in expected form for Spark internal representation if t is not None and pa.types.is_timestamp(t): s = _check_series_convert_timestamps_internal(s, self._timezone) try: array = pa.Array.from_pandas(s, mask=mask, type=t, safe=self._safecheck) except pa.ArrowException as e: error_msg = "Exception thrown when converting pandas.Series (%s) to Arrow " + \ "Array (%s). It can be caused by overflows or other unsafe " + \ "conversions warned by Arrow. Arrow safe type check can be " + \ "disabled by using SQL config " + \ "`spark.sql.execution.pandas.convertToArrowArraySafely`." raise RuntimeError(error_msg % (s.dtype, t), e) return array