コード例 #1
0
    def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if is_integer_dtype(dtype) and not isinstance(dtype, extension_dtypes):
            if index_ops.hasnans:
                raise ValueError(
                    "Cannot convert %s with missing values to integer" % self.pretty_name
                )
        elif is_bool_dtype(dtype) and not isinstance(dtype, extension_dtypes):
            if index_ops.hasnans:
                raise ValueError("Cannot convert %s with missing values to bool" % self.pretty_name)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)
        elif isinstance(spark_type, BooleanType):
            if isinstance(dtype, extension_dtypes):
                scol = index_ops.spark.column.cast(spark_type)
            else:
                scol = F.when(
                    index_ops.spark.column.isNull() | F.isnan(index_ops.spark.column),
                    SF.lit(True),
                ).otherwise(index_ops.spark.column.cast(spark_type))
            return index_ops._with_new_scol(
                scol.alias(index_ops._internal.data_spark_column_names[0]),
                field=index_ops._internal.data_fields[0].copy(dtype=dtype, spark_type=spark_type),
            )
        elif isinstance(spark_type, StringType):
            return _as_string_type(index_ops, dtype, null_str=str(np.nan))
        else:
            return _as_other_type(index_ops, dtype, spark_type)
コード例 #2
0
ファイル: boolean_ops.py プロジェクト: wwjiang007/spark
    def astype(self, index_ops: IndexOpsLike,
               dtype: Union[str, type, Dtype]) -> IndexOpsLike:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)
        elif isinstance(spark_type, BooleanType):
            return _as_bool_type(index_ops, dtype)
        elif isinstance(spark_type, StringType):
            if isinstance(dtype, extension_dtypes):
                scol = F.when(
                    index_ops.spark.column.isNotNull(),
                    F.when(index_ops.spark.column, "True").otherwise("False"),
                )
            else:
                null_str = str(None)
                casted = F.when(index_ops.spark.column,
                                "True").otherwise("False")
                scol = F.when(index_ops.spark.column.isNull(),
                              null_str).otherwise(casted)
            return index_ops._with_new_scol(
                scol.alias(index_ops._internal.data_spark_column_names[0]),
                field=InternalField(dtype=dtype),
            )
        else:
            return _as_other_type(index_ops, dtype, spark_type)
コード例 #3
0
ファイル: datetime_ops.py プロジェクト: kangdw0x80/spark
    def astype(self, index_ops: T_IndexOps, dtype: Union[str, type,
                                                         Dtype]) -> T_IndexOps:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)
        elif isinstance(spark_type, BooleanType):
            return _as_bool_type(index_ops, dtype)
        elif isinstance(spark_type, StringType):
            if isinstance(dtype, extension_dtypes):
                # seems like a pandas' bug?
                scol = F.when(index_ops.spark.column.isNull(),
                              str(pd.NaT)).otherwise(
                                  index_ops.spark.column.cast(spark_type))
            else:
                null_str = str(pd.NaT)
                casted = index_ops.spark.column.cast(spark_type)
                scol = F.when(index_ops.spark.column.isNull(),
                              null_str).otherwise(casted)
            return index_ops._with_new_scol(
                scol.alias(index_ops._internal.data_spark_column_names[0]),
                field=InternalField(dtype=dtype),
            )
        else:
            return _as_other_type(index_ops, dtype, spark_type)
コード例 #4
0
    def astype(self, index_ops: IndexOpsLike,
               dtype: Union[str, type, Dtype]) -> IndexOpsLike:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)
        elif isinstance(spark_type, BooleanType):
            return _as_bool_type(index_ops, dtype)
        elif isinstance(spark_type, StringType):
            if isinstance(dtype, extension_dtypes):
                scol = F.when(
                    index_ops.spark.column.isNotNull(),
                    F.when(index_ops.spark.column, "True").otherwise("False"),
                )
                nullable = index_ops.spark.nullable
            else:
                null_str = str(pd.NA) if isinstance(
                    self, BooleanExtensionOps) else str(None)
                casted = F.when(index_ops.spark.column,
                                "True").otherwise("False")
                scol = F.when(index_ops.spark.column.isNull(),
                              null_str).otherwise(casted)
                nullable = False
            return index_ops._with_new_scol(
                scol,
                field=index_ops._internal.data_fields[0].copy(
                    dtype=dtype, spark_type=spark_type, nullable=nullable),
            )
        else:
            return _as_other_type(index_ops, dtype, spark_type)
コード例 #5
0
    def astype(self, index_ops: T_IndexOps, dtype: Union[str, type,
                                                         Dtype]) -> T_IndexOps:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)
        elif isinstance(spark_type, BooleanType):
            if isinstance(dtype, extension_dtypes):
                scol = index_ops.spark.column.cast(spark_type)
            else:
                if isinstance(index_ops.spark.data_type,
                              (FloatType, DoubleType)):
                    scol = F.when(
                        index_ops.spark.column.isNull()
                        | F.isnan(index_ops.spark.column),
                        F.lit(True),
                    ).otherwise(index_ops.spark.column.cast(spark_type))
                else:  # DecimalType
                    scol = F.when(index_ops.spark.column.isNull(),
                                  F.lit(False)).otherwise(
                                      index_ops.spark.column.cast(spark_type))
            return index_ops._with_new_scol(
                scol.alias(index_ops._internal.data_spark_column_names[0]),
                field=InternalField(dtype=dtype),
            )
        elif isinstance(spark_type, StringType):
            return _as_string_type(index_ops, dtype, null_str=str(np.nan))
        else:
            return _as_other_type(index_ops, dtype, spark_type)
コード例 #6
0
def _non_fractional_astype(index_ops: IndexOpsLike, dtype: Dtype,
                           spark_type: DataType) -> IndexOpsLike:
    if isinstance(dtype, CategoricalDtype):
        return _as_categorical_type(index_ops, dtype, spark_type)
    elif isinstance(spark_type, BooleanType):
        return _as_bool_type(index_ops, dtype)
    elif isinstance(spark_type, StringType):
        return _as_string_type(index_ops, dtype, null_str=str(np.nan))
    else:
        return _as_other_type(index_ops, dtype, spark_type)
コード例 #7
0
    def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)
        elif isinstance(spark_type, BooleanType):
            return _as_bool_type(index_ops, dtype)
        elif isinstance(spark_type, StringType):
            return _as_string_type(index_ops, dtype, null_str=str(np.nan))
        else:
            return _as_other_type(index_ops, dtype, spark_type)
コード例 #8
0
    def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)
        elif isinstance(spark_type, BooleanType):
            raise TypeError("cannot astype a %s to [bool]" % self.pretty_name)
        elif isinstance(spark_type, StringType):
            return _as_string_type(index_ops, dtype, null_str=str(pd.NaT))
        else:
            return _as_other_type(index_ops, dtype, spark_type)
コード例 #9
0
ファイル: null_ops.py プロジェクト: Halo9Pan/dive-spark
    def astype(self, index_ops: Union["Index", "Series"],
               dtype: Union[str, type, Dtype]) -> Union["Index", "Series"]:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)
        elif isinstance(spark_type, BooleanType):
            return _as_bool_type(index_ops, dtype)
        elif isinstance(spark_type, StringType):
            return _as_string_type(index_ops, dtype)
        else:
            return _as_other_type(index_ops, dtype, spark_type)
コード例 #10
0
    def astype(self, index_ops: IndexOpsLike,
               dtype: Union[str, type, Dtype]) -> IndexOpsLike:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)
        elif isinstance(spark_type, BooleanType):
            # Cannot cast binary to boolean in Spark.
            # We should cast binary to str first, and cast it to boolean
            return index_ops.astype(str).astype(bool)
        elif isinstance(spark_type, StringType):
            return _as_string_type(index_ops, dtype)
        else:
            return _as_other_type(index_ops, dtype, spark_type)
コード例 #11
0
    def astype(self, index_ops: IndexOpsLike,
               dtype: Union[str, type, Dtype]) -> IndexOpsLike:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)
        elif isinstance(spark_type, BooleanType):
            return index_ops._with_new_scol(
                index_ops.spark.column.isNotNull(),
                field=index_ops._internal.data_fields[0].copy(
                    dtype=np.dtype(bool),
                    spark_type=spark_type,
                    nullable=False),
            )
        elif isinstance(spark_type, StringType):
            return _as_string_type(index_ops, dtype, null_str=str(pd.NaT))
        else:
            return _as_other_type(index_ops, dtype, spark_type)
コード例 #12
0
    def astype(self, index_ops: Union["Index", "Series"],
               dtype: Union[str, type, Dtype]) -> Union["Index", "Series"]:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)

        if isinstance(spark_type, BooleanType):
            if isinstance(dtype, extension_dtypes):
                scol = index_ops.spark.column.cast(spark_type)
            else:
                scol = F.when(index_ops.spark.column.isNull(),
                              F.lit(False)).otherwise(
                                  F.length(index_ops.spark.column) > 0)
            return index_ops._with_new_scol(
                scol.alias(index_ops._internal.data_spark_column_names[0]),
                field=InternalField(dtype=dtype),
            )
        elif isinstance(spark_type, StringType):
            return _as_string_type(index_ops, dtype)
        else:
            return _as_other_type(index_ops, dtype, spark_type)
コード例 #13
0
    def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
        dtype, spark_type = pandas_on_spark_type(dtype)

        if isinstance(dtype, CategoricalDtype):
            return _as_categorical_type(index_ops, dtype, spark_type)

        if isinstance(spark_type, BooleanType):
            if isinstance(dtype, extension_dtypes):
                scol = index_ops.spark.column.cast(spark_type)
            else:
                scol = F.when(index_ops.spark.column.isNull(), SF.lit(False)).otherwise(
                    F.length(index_ops.spark.column) > 0
                )
            return index_ops._with_new_scol(
                scol,
                field=index_ops._internal.data_fields[0].copy(dtype=dtype, spark_type=spark_type),
            )
        elif isinstance(spark_type, StringType):
            null_str = str(pd.NA) if isinstance(self, StringExtensionOps) else str(None)
            return _as_string_type(index_ops, dtype, null_str=null_str)
        else:
            return _as_other_type(index_ops, dtype, spark_type)