Beispiel #1
0
 def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
     _sanitize_list_like(right)
     # Note that timestamp subtraction casts arguments to integer. This is to mimic pandas's
     # behaviors. pandas returns 'timedelta64[ns]' from 'datetime64[ns]'s subtraction.
     msg = (
         "Note that there is a behavior difference of timestamp subtraction. "
         "The timestamp subtraction returns an integer in seconds, "
         "whereas pandas returns 'timedelta64[ns]'.")
     if isinstance(right, IndexOpsMixin) and isinstance(
             right.spark.data_type, (TimestampType, TimestampNTZType)):
         warnings.warn(msg, UserWarning)
         return left.astype("long") - right.astype("long")
     elif isinstance(right, datetime.datetime):
         warnings.warn(msg, UserWarning)
         return cast(
             SeriesOrIndex,
             left._with_new_scol(
                 left.astype("long").spark.column -
                 self._cast_spark_column_timestamp_to_long(SF.lit(right)),
                 field=left._internal.data_fields[0].copy(
                     dtype=np.dtype("int64"), spark_type=LongType()),
             ),
         )
     else:
         raise TypeError(
             "Datetime subtraction can only be applied to datetime series.")
 def rmul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
     _sanitize_list_like(right)
     if not isinstance(right, numbers.Number):
         raise TypeError(
             "Multiplication can not be applied to given types.")
     right = transform_boolean_operand_to_numeric(right)
     return column_op(Column.__rmul__)(left, right)
Beispiel #3
0
    def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
        _sanitize_list_like(right)
        if not is_valid_operand_for_numeric_arithmetic(right):
            raise TypeError("Subtraction can not be applied to given types.")

        right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
        return column_op(Column.__sub__)(left, right)
Beispiel #4
0
    def rsub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
        from pyspark.pandas.base import column_op

        _sanitize_list_like(right)

        if isinstance(right, timedelta):
            return column_op(Column.__rsub__)(left, right)
        else:
            raise TypeError("Timedelta subtraction can only be applied to timedelta series.")
Beispiel #5
0
 def rmod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
     _sanitize_list_like(right)
     if isinstance(right, numbers.Number) and not isinstance(right, bool):
         left = transform_boolean_operand_to_numeric(left, spark_type=as_spark_type(type(right)))
         return right % left
     else:
         raise TypeError(
             "Modulo can not be applied to %s and the given type." % self.pretty_name
         )
Beispiel #6
0
    def rpow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
        _sanitize_list_like(right)
        if not isinstance(right, numbers.Number):
            raise TypeError("Exponentiation can not be applied to given types.")

        def rpow_func(left: Column, right: Any) -> Column:
            return F.when(SF.lit(right == 1), right).otherwise(Column.__rpow__(left, right))

        right = transform_boolean_operand_to_numeric(right)
        return column_op(rpow_func)(left, right)
Beispiel #7
0
    def rmod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
        _sanitize_list_like(right)
        if not isinstance(right, numbers.Number):
            raise TypeError("Modulo can not be applied to given types.")

        def rmod(left: Column, right: Any) -> Column:
            return ((right % left) + left) % left

        right = transform_boolean_operand_to_numeric(right)
        return column_op(rmod)(left, right)
Beispiel #8
0
    def mod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
        _sanitize_list_like(right)
        if not is_valid_operand_for_numeric_arithmetic(right):
            raise TypeError("Modulo can not be applied to given types.")

        def mod(left: Column, right: Any) -> Column:
            return ((left % right) + right) % right

        right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
        return column_op(mod)(left, right)
Beispiel #9
0
    def mul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
        _sanitize_list_like(right)
        if isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType):
            return column_op(SF.repeat)(right, left)

        if not is_valid_operand_for_numeric_arithmetic(right):
            raise TypeError("Multiplication can not be applied to given types.")

        right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
        return column_op(Column.__mul__)(left, right)
Beispiel #10
0
    def radd(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
        _sanitize_list_like(right)

        if isinstance(right, bytes):
            return cast(
                SeriesOrIndex,
                left._with_new_scol(F.concat(SF.lit(right),
                                             left.spark.column)))
        else:
            raise TypeError(
                "Concatenation can not be applied to %s and the given type." %
                self.pretty_name)
Beispiel #11
0
    def add(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
        _sanitize_list_like(right)

        if isinstance(right, IndexOpsMixin) and isinstance(
                right.spark.data_type, BinaryType):
            return column_op(F.concat)(left, right)
        elif isinstance(right, bytes):
            return column_op(F.concat)(left, SF.lit(right))
        else:
            raise TypeError(
                "Concatenation can not be applied to %s and the given type." %
                self.pretty_name)
Beispiel #12
0
    def __or__(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
        _sanitize_list_like(right)

        def or_func(left: Column, right: Any) -> Column:
            if not isinstance(right, Column):
                if pd.isna(right):
                    right = SF.lit(None)
                else:
                    right = SF.lit(right)
            return left | right

        return column_op(or_func)(left, right)
Beispiel #13
0
    def rtruediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
        _sanitize_list_like(right)
        if not isinstance(right, numbers.Number):
            raise TypeError("True division can not be applied to given types.")

        def rtruediv(left: Column, right: Any) -> Column:
            return F.when(left == 0, SF.lit(np.inf).__div__(right)).otherwise(
                SF.lit(right).__truediv__(left)
            )

        right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
        return numpy_column_op(rtruediv)(left, right)
Beispiel #14
0
    def truediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
        _sanitize_list_like(right)
        if not is_valid_operand_for_numeric_arithmetic(right):
            raise TypeError("True division can not be applied to given types.")

        def truediv(left: Column, right: Any) -> Column:
            return F.when(
                SF.lit(right != 0) | SF.lit(right).isNull(), left.__div__(right)
            ).otherwise(SF.lit(np.inf).__div__(left))

        right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
        return numpy_column_op(truediv)(left, right)
Beispiel #15
0
    def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
        from pyspark.pandas.base import column_op

        _sanitize_list_like(right)

        if (
            isinstance(right, IndexOpsMixin)
            and isinstance(right.spark.data_type, DayTimeIntervalType)
            or isinstance(right, timedelta)
        ):
            return column_op(Column.__sub__)(left, right)
        else:
            raise TypeError("Timedelta subtraction can only be applied to timedelta series.")
Beispiel #16
0
 def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
     _sanitize_list_like(right)
     if not is_valid_operand_for_numeric_arithmetic(right, allow_bool=False):
         raise TypeError(
             "Subtraction can not be applied to %s and the given type." % self.pretty_name
         )
     if isinstance(right, numbers.Number):
         left = transform_boolean_operand_to_numeric(left, spark_type=as_spark_type(type(right)))
         return left - right
     else:
         assert isinstance(right, IndexOpsMixin)
         left = transform_boolean_operand_to_numeric(left, spark_type=right.spark.data_type)
         return left - right
 def rsub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
     _sanitize_list_like(right)
     # Note that date subtraction casts arguments to integer. This is to mimic pandas's
     # behaviors. pandas returns 'timedelta64[ns]' in days from date's subtraction.
     msg = ("Note that there is a behavior difference of date subtraction. "
            "The date subtraction returns an integer in days, "
            "whereas pandas returns 'timedelta64[ns]'.")
     if isinstance(right, datetime.date) and not isinstance(
             right, datetime.datetime):
         warnings.warn(msg, UserWarning)
         return -column_op(F.datediff)(left, SF.lit(right)).astype("long")
     else:
         raise TypeError(
             "Date subtraction can only be applied to date series.")
Beispiel #18
0
    def __or__(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
        _sanitize_list_like(right)
        if isinstance(right, IndexOpsMixin) and isinstance(right.dtype, extension_dtypes):
            return right.__or__(left)
        else:

            def or_func(left: Column, right: Any) -> Column:
                if not isinstance(right, Column) and pd.isna(right):
                    return SF.lit(False)
                else:
                    scol = left | SF.lit(right)
                    return F.when(left.isNull() | scol.isNull(), False).otherwise(scol)

            return column_op(or_func)(left, right)
Beispiel #19
0
    def pow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
        _sanitize_list_like(right)
        if not is_valid_operand_for_numeric_arithmetic(right):
            raise TypeError("Exponentiation can not be applied to given types.")

        def pow_func(left: Column, right: Any) -> Column:
            return (
                F.when(left == 1, left)
                .when(SF.lit(right) == 0, 1)
                .otherwise(Column.__pow__(left, right))
            )

        right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
        return column_op(pow_func)(left, right)
    def xor(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
        _sanitize_list_like(right)

        if _is_boolean_type(right):

            def xor_func(left: Column, right: Any) -> Column:
                if not isinstance(right, Column):
                    if pd.isna(right):
                        right = SF.lit(None)
                    else:
                        right = SF.lit(right)
                return left.cast("integer").bitwiseXOR(
                    right.cast("integer")).cast("boolean")

            return column_op(xor_func)(left, right)
        else:
            raise TypeError("XOR can not be applied to given types.")
Beispiel #21
0
 def mul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
     _sanitize_list_like(right)
     if not is_valid_operand_for_numeric_arithmetic(right):
         raise TypeError(
             "Multiplication can not be applied to %s and the given type." % self.pretty_name
         )
     if isinstance(right, bool):
         return left.__and__(right)
     elif isinstance(right, numbers.Number):
         left = transform_boolean_operand_to_numeric(left, spark_type=as_spark_type(type(right)))
         return left * right
     else:
         assert isinstance(right, IndexOpsMixin)
         if isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, BooleanType):
             return left.__and__(right)
         else:
             left = transform_boolean_operand_to_numeric(left, spark_type=right.spark.data_type)
             return left * right
    def xor(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
        _sanitize_list_like(right)
        if isinstance(right, IndexOpsMixin) and isinstance(
                right.dtype, extension_dtypes):
            return right ^ left
        elif _is_valid_for_logical_operator(right):

            def xor_func(left: Column, right: Any) -> Column:
                if not isinstance(right, Column):
                    if pd.isna(right):
                        right = SF.lit(None)
                    else:
                        right = SF.lit(right)
                scol = left.cast("integer").bitwiseXOR(
                    right.cast("integer")).cast("boolean")
                return F.when(scol.isNull(), False).otherwise(scol)

            return column_op(xor_func)(left, right)
        else:
            raise TypeError("XOR can not be applied to given types.")
    def add(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
        _sanitize_list_like(right)
        if not isinstance(right, IndexOpsMixin) or (
            isinstance(right, IndexOpsMixin) and not isinstance(right.spark.data_type, ArrayType)
        ):
            raise TypeError(
                "Concatenation can not be applied to %s and the given type." % self.pretty_name
            )

        left_type = cast(ArrayType, left.spark.data_type).elementType
        right_type = cast(ArrayType, right.spark.data_type).elementType

        if left_type != right_type and not (
            isinstance(left_type, NumericType) and isinstance(right_type, NumericType)
        ):
            raise TypeError(
                "Concatenation can only be applied to %s of the same type" % self.pretty_name
            )

        return column_op(F.concat)(left, right)
    def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
        from pyspark.pandas.base import column_op

        _sanitize_list_like(right)
        return column_op(Column.__gt__)(left, right)
Beispiel #25
0
 def ne(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
     _sanitize_list_like(right)
     return _compare(left,
                     right,
                     Column.__ne__,
                     is_equality_comparison=True)
Beispiel #26
0
 def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
     _sanitize_list_like(right)
     return column_op(Column.__gt__)(left, right)
Beispiel #27
0
 def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
     _sanitize_list_like(right)
     return _compare(left, right, Column.__ge__)
Beispiel #28
0
 def xor(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
     _sanitize_list_like(right)
     raise TypeError("XOR can not be applied to given types.")