Example #1
0
 def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
     _sanitize_list_like(right)
     # Note that timestamp subtraction casts arguments to integer. This is to mimic pandas's
     # behaviors. pandas returns 'timedelta64[ns]' from 'datetime64[ns]'s subtraction.
     msg = (
         "Note that there is a behavior difference of timestamp subtraction. "
         "The timestamp subtraction returns an integer in seconds, "
         "whereas pandas returns 'timedelta64[ns]'.")
     if isinstance(right, IndexOpsMixin) and isinstance(
             right.spark.data_type, (TimestampType, TimestampNTZType)):
         warnings.warn(msg, UserWarning)
         return left.astype("long") - right.astype("long")
     elif isinstance(right, datetime.datetime):
         warnings.warn(msg, UserWarning)
         return cast(
             SeriesOrIndex,
             left._with_new_scol(
                 left.astype("long").spark.column -
                 self._cast_spark_column_timestamp_to_long(SF.lit(right)),
                 field=left._internal.data_fields[0].copy(
                     dtype=np.dtype("int64"), spark_type=LongType()),
             ),
         )
     else:
         raise TypeError(
             "Datetime subtraction can only be applied to datetime series.")
Example #2
0
    def astype(self, index_ops: IndexOpsLike,
               dtype: Union[str, type, Dtype]) -> IndexOpsLike:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)
        elif isinstance(spark_type, BooleanType):
            # Cannot cast binary to boolean in Spark.
            # We should cast binary to str first, and cast it to boolean
            return index_ops.astype(str).astype(bool)
        elif isinstance(spark_type, StringType):
            return _as_string_type(index_ops, dtype)
        else:
            return _as_other_type(index_ops, dtype, spark_type)
Example #3
0
 def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
     # Note that timestamp subtraction casts arguments to integer. This is to mimic pandas's
     # behaviors. pandas returns 'timedelta64[ns]' from 'datetime64[ns]'s subtraction.
     msg = (
         "Note that there is a behavior difference of timestamp subtraction. "
         "The timestamp subtraction returns an integer in seconds, "
         "whereas pandas returns 'timedelta64[ns]'.")
     if isinstance(right, IndexOpsMixin) and isinstance(
             right.spark.data_type, TimestampType):
         warnings.warn(msg, UserWarning)
         return left.astype("long") - right.astype("long")
     elif isinstance(right, datetime.datetime):
         warnings.warn(msg, UserWarning)
         return cast(
             SeriesOrIndex,
             left.spark.transform(lambda scol: scol.astype("long") - SF.lit(
                 right).cast(as_spark_type("long"))),
         )
     else:
         raise TypeError(
             "datetime subtraction can only be applied to datetime series.")