Beispiel #1
0
    def astype(self, index_ops: T_IndexOps, dtype: Union[str, type,
                                                         Dtype]) -> T_IndexOps:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)
        elif isinstance(spark_type, BooleanType):
            if isinstance(dtype, extension_dtypes):
                scol = index_ops.spark.column.cast(spark_type)
            else:
                if isinstance(index_ops.spark.data_type,
                              (FloatType, DoubleType)):
                    scol = F.when(
                        index_ops.spark.column.isNull()
                        | F.isnan(index_ops.spark.column),
                        F.lit(True),
                    ).otherwise(index_ops.spark.column.cast(spark_type))
                else:  # DecimalType
                    scol = F.when(index_ops.spark.column.isNull(),
                                  F.lit(False)).otherwise(
                                      index_ops.spark.column.cast(spark_type))
            return index_ops._with_new_scol(
                scol.alias(index_ops._internal.data_spark_column_names[0]),
                field=InternalField(dtype=dtype),
            )
        elif isinstance(spark_type, StringType):
            return _as_string_type(index_ops, dtype, null_str=str(np.nan))
        else:
            return _as_other_type(index_ops, dtype, spark_type)
Beispiel #2
0
    def astype(self, index_ops: T_IndexOps, dtype: Union[str, type,
                                                         Dtype]) -> T_IndexOps:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)
        elif isinstance(spark_type, BooleanType):
            return _as_bool_type(index_ops, dtype)
        elif isinstance(spark_type, StringType):
            if isinstance(dtype, extension_dtypes):
                scol = F.when(
                    index_ops.spark.column.isNotNull(),
                    F.when(index_ops.spark.column, "True").otherwise("False"),
                )
            else:
                null_str = str(None)
                casted = F.when(index_ops.spark.column,
                                "True").otherwise("False")
                scol = F.when(index_ops.spark.column.isNull(),
                              null_str).otherwise(casted)
            return index_ops._with_new_scol(
                scol.alias(index_ops._internal.data_spark_column_names[0]),
                field=InternalField(dtype=dtype),
            )
        else:
            return _as_other_type(index_ops, dtype, spark_type)
Beispiel #3
0
 def isnull(self, index_ops: T_IndexOps) -> T_IndexOps:
     return index_ops._with_new_scol(
         index_ops.spark.column.isNull(),
         field=index_ops._internal.data_fields[0].copy(
             dtype=np.dtype("bool"),
             spark_type=BooleanType(),
             nullable=False),
     )
Beispiel #4
0
 def mul(self, left: T_IndexOps, right: Any) -> IndexOpsLike:
     if not is_valid_operand_for_numeric_arithmetic(right):
         raise TypeError(
             "Multiplication can not be applied to %s and the given type." % self.pretty_name
         )
     if isinstance(right, bool):
         return left.__and__(right)
     elif isinstance(right, numbers.Number):
         left = left.spark.transform(lambda scol: scol.cast(as_spark_type(type(right))))
         return left * right
     else:
         assert isinstance(right, IndexOpsMixin)
         if isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, BooleanType):
             return left.__and__(right)
         else:
             left = transform_boolean_operand_to_numeric(left, right.spark.data_type)
             return left * right
Beispiel #5
0
    def astype(self, index_ops: T_IndexOps, dtype: Union[str, type,
                                                         Dtype]) -> T_IndexOps:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype) and dtype.categories is None:
            return index_ops.copy()

        categories = index_ops.dtype.categories
        if len(categories) == 0:
            scol = F.lit(None)
        else:
            kvs = chain(*[(F.lit(code), F.lit(category))
                          for code, category in enumerate(categories)])
            map_scol = F.create_map(*kvs)
            scol = map_scol.getItem(index_ops.spark.column)
        return index_ops._with_new_scol(
            scol.alias(
                index_ops._internal.data_spark_column_names[0])).astype(dtype)
Beispiel #6
0
 def radd(self, left: T_IndexOps, right: Any) -> IndexOpsLike:
     if isinstance(right, bytes):
         return cast(
             IndexOpsLike, left._with_new_scol(F.concat(F.lit(right), left.spark.column))
         )
     else:
         raise TypeError(
             "Concatenation can not be applied to %s and the given type." % self.pretty_name
         )
Beispiel #7
0
 def rmul(self, left: T_IndexOps, right: Any) -> IndexOpsLike:
     if isinstance(right, bool):
         return left.__and__(right)
     elif isinstance(right, numbers.Number):
         left = left.spark.transform(lambda scol: scol.cast(as_spark_type(type(right))))
         return right * left
     else:
         raise TypeError(
             "Multiplication can not be applied to %s and the given type." % self.pretty_name
         )
Beispiel #8
0
 def radd(self, left: T_IndexOps, right: Any) -> IndexOpsLike:
     if isinstance(right, str):
         return cast(
             IndexOpsLike,
             left._with_new_scol(F.concat(
                 F.lit(right), left.spark.column)),  # TODO: dtype?
         )
     else:
         raise TypeError(
             "string addition can only be applied to string series or literals."
         )
Beispiel #9
0
 def sub(self, left: T_IndexOps, right: Any) -> IndexOpsLike:
     # Note that timestamp subtraction casts arguments to integer. This is to mimic pandas's
     # behaviors. pandas returns 'timedelta64[ns]' from 'datetime64[ns]'s subtraction.
     msg = (
         "Note that there is a behavior difference of timestamp subtraction. "
         "The timestamp subtraction returns an integer in seconds, "
         "whereas pandas returns 'timedelta64[ns]'.")
     if isinstance(right, IndexOpsMixin) and isinstance(
             right.spark.data_type, TimestampType):
         warnings.warn(msg, UserWarning)
         return left.astype("long") - right.astype("long")
     elif isinstance(right, datetime.datetime):
         warnings.warn(msg, UserWarning)
         return cast(
             IndexOpsLike,
             left.spark.transform(lambda scol: scol.astype("long") - F.lit(
                 right).cast(as_spark_type("long"))),
         )
     else:
         raise TypeError(
             "datetime subtraction can only be applied to datetime series.")
Beispiel #10
0
    def astype(self, index_ops: T_IndexOps, dtype: Union[str, type,
                                                         Dtype]) -> T_IndexOps:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)

        if isinstance(spark_type, BooleanType):
            if isinstance(dtype, extension_dtypes):
                scol = index_ops.spark.column.cast(spark_type)
            else:
                scol = F.when(index_ops.spark.column.isNull(),
                              SF.lit(False)).otherwise(
                                  F.length(index_ops.spark.column) > 0)
            return index_ops._with_new_scol(
                scol.alias(index_ops._internal.data_spark_column_names[0]),
                field=InternalField(dtype=dtype),
            )
        elif isinstance(spark_type, StringType):
            return _as_string_type(index_ops, dtype)
        else:
            return _as_other_type(index_ops, dtype, spark_type)
Beispiel #11
0
 def isnull(self, index_ops: T_IndexOps) -> T_IndexOps:
     return index_ops._with_new_scol(index_ops.spark.column.isNull())