예제 #1
0
 def create_array(s, t):
     if hasattr(s.array, "__arrow_array__"):
         mask = None
     else:
         mask = s.isnull()
     # Ensure timestamp series are in expected form for Spark internal representation
     if t is not None and pa.types.is_timestamp(t) and t.tz is not None:
         s = _check_series_convert_timestamps_internal(
             s, self._timezone)
     elif t is not None and pa.types.is_map(t):
         s = _convert_dict_to_map_items(s)
     elif is_categorical_dtype(s.dtype):
         # Note: This can be removed once minimum pyarrow version is >= 0.16.1
         s = s.astype(s.dtypes.categories.dtype)
     try:
         array = pa.Array.from_pandas(s,
                                      mask=mask,
                                      type=t,
                                      safe=self._safecheck)
     except ValueError as e:
         if self._safecheck:
             error_msg = (
                 "Exception thrown when converting pandas.Series (%s) to "
                 +
                 "Arrow Array (%s). It can be caused by overflows or other "
                 +
                 "unsafe conversions warned by Arrow. Arrow safe type check "
                 + "can be disabled by using SQL config " +
                 "`spark.sql.execution.pandas.convertToArrowArraySafely`."
             )
             raise ValueError(error_msg % (s.dtype, t)) from e
         else:
             raise e
     return array
예제 #2
0
 def create_array(s, t):
     mask = s.isnull()
     # Ensure timestamp series are in expected form for Spark internal representation
     if t is not None and pa.types.is_timestamp(t):
         s = _check_series_convert_timestamps_internal(s, self._timezone)
     try:
         array = pa.Array.from_pandas(s, mask=mask, type=t, safe=self._safecheck)
     except pa.ArrowException as e:
         error_msg = "Exception thrown when converting pandas.Series (%s) to Arrow " + \
                     "Array (%s). It can be caused by overflows or other unsafe " + \
                     "conversions warned by Arrow. Arrow safe type check can be " + \
                     "disabled by using SQL config " + \
                     "`spark.sql.execution.pandas.convertToArrowArraySafely`."
         raise RuntimeError(error_msg % (s.dtype, t), e)
     return array