def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike: dtype, spark_type = pandas_on_spark_type(dtype) if is_integer_dtype(dtype) and not isinstance(dtype, extension_dtypes): if index_ops.hasnans: raise ValueError( "Cannot convert %s with missing values to integer" % self.pretty_name ) elif is_bool_dtype(dtype) and not isinstance(dtype, extension_dtypes): if index_ops.hasnans: raise ValueError("Cannot convert %s with missing values to bool" % self.pretty_name) if isinstance(dtype, CategoricalDtype): return _as_categorical_type(index_ops, dtype, spark_type) elif isinstance(spark_type, BooleanType): if isinstance(dtype, extension_dtypes): scol = index_ops.spark.column.cast(spark_type) else: scol = F.when( index_ops.spark.column.isNull() | F.isnan(index_ops.spark.column), SF.lit(True), ).otherwise(index_ops.spark.column.cast(spark_type)) return index_ops._with_new_scol( scol.alias(index_ops._internal.data_spark_column_names[0]), field=index_ops._internal.data_fields[0].copy(dtype=dtype, spark_type=spark_type), ) elif isinstance(spark_type, StringType): return _as_string_type(index_ops, dtype, null_str=str(np.nan)) else: return _as_other_type(index_ops, dtype, spark_type)
def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike: dtype, spark_type = pandas_on_spark_type(dtype) if isinstance(dtype, CategoricalDtype): return _as_categorical_type(index_ops, dtype, spark_type) elif isinstance(spark_type, BooleanType): return _as_bool_type(index_ops, dtype) elif isinstance(spark_type, StringType): if isinstance(dtype, extension_dtypes): scol = F.when( index_ops.spark.column.isNotNull(), F.when(index_ops.spark.column, "True").otherwise("False"), ) else: null_str = str(None) casted = F.when(index_ops.spark.column, "True").otherwise("False") scol = F.when(index_ops.spark.column.isNull(), null_str).otherwise(casted) return index_ops._with_new_scol( scol.alias(index_ops._internal.data_spark_column_names[0]), field=InternalField(dtype=dtype), ) else: return _as_other_type(index_ops, dtype, spark_type)
def astype(self, index_ops: T_IndexOps, dtype: Union[str, type, Dtype]) -> T_IndexOps: dtype, spark_type = pandas_on_spark_type(dtype) if isinstance(dtype, CategoricalDtype): return _as_categorical_type(index_ops, dtype, spark_type) elif isinstance(spark_type, BooleanType): return _as_bool_type(index_ops, dtype) elif isinstance(spark_type, StringType): if isinstance(dtype, extension_dtypes): # seems like a pandas' bug? scol = F.when(index_ops.spark.column.isNull(), str(pd.NaT)).otherwise( index_ops.spark.column.cast(spark_type)) else: null_str = str(pd.NaT) casted = index_ops.spark.column.cast(spark_type) scol = F.when(index_ops.spark.column.isNull(), null_str).otherwise(casted) return index_ops._with_new_scol( scol.alias(index_ops._internal.data_spark_column_names[0]), field=InternalField(dtype=dtype), ) else: return _as_other_type(index_ops, dtype, spark_type)
def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike: dtype, spark_type = pandas_on_spark_type(dtype) if isinstance(dtype, CategoricalDtype): return _as_categorical_type(index_ops, dtype, spark_type) elif isinstance(spark_type, BooleanType): return _as_bool_type(index_ops, dtype) elif isinstance(spark_type, StringType): if isinstance(dtype, extension_dtypes): scol = F.when( index_ops.spark.column.isNotNull(), F.when(index_ops.spark.column, "True").otherwise("False"), ) nullable = index_ops.spark.nullable else: null_str = str(pd.NA) if isinstance( self, BooleanExtensionOps) else str(None) casted = F.when(index_ops.spark.column, "True").otherwise("False") scol = F.when(index_ops.spark.column.isNull(), null_str).otherwise(casted) nullable = False return index_ops._with_new_scol( scol, field=index_ops._internal.data_fields[0].copy( dtype=dtype, spark_type=spark_type, nullable=nullable), ) else: return _as_other_type(index_ops, dtype, spark_type)
def astype(self, index_ops: T_IndexOps, dtype: Union[str, type, Dtype]) -> T_IndexOps: dtype, spark_type = pandas_on_spark_type(dtype) if isinstance(dtype, CategoricalDtype): return _as_categorical_type(index_ops, dtype, spark_type) elif isinstance(spark_type, BooleanType): if isinstance(dtype, extension_dtypes): scol = index_ops.spark.column.cast(spark_type) else: if isinstance(index_ops.spark.data_type, (FloatType, DoubleType)): scol = F.when( index_ops.spark.column.isNull() | F.isnan(index_ops.spark.column), F.lit(True), ).otherwise(index_ops.spark.column.cast(spark_type)) else: # DecimalType scol = F.when(index_ops.spark.column.isNull(), F.lit(False)).otherwise( index_ops.spark.column.cast(spark_type)) return index_ops._with_new_scol( scol.alias(index_ops._internal.data_spark_column_names[0]), field=InternalField(dtype=dtype), ) elif isinstance(spark_type, StringType): return _as_string_type(index_ops, dtype, null_str=str(np.nan)) else: return _as_other_type(index_ops, dtype, spark_type)
def _non_fractional_astype(index_ops: IndexOpsLike, dtype: Dtype, spark_type: DataType) -> IndexOpsLike: if isinstance(dtype, CategoricalDtype): return _as_categorical_type(index_ops, dtype, spark_type) elif isinstance(spark_type, BooleanType): return _as_bool_type(index_ops, dtype) elif isinstance(spark_type, StringType): return _as_string_type(index_ops, dtype, null_str=str(np.nan)) else: return _as_other_type(index_ops, dtype, spark_type)
def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike: dtype, spark_type = pandas_on_spark_type(dtype) if isinstance(dtype, CategoricalDtype): return _as_categorical_type(index_ops, dtype, spark_type) elif isinstance(spark_type, BooleanType): return _as_bool_type(index_ops, dtype) elif isinstance(spark_type, StringType): return _as_string_type(index_ops, dtype, null_str=str(np.nan)) else: return _as_other_type(index_ops, dtype, spark_type)
def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike: dtype, spark_type = pandas_on_spark_type(dtype) if isinstance(dtype, CategoricalDtype): return _as_categorical_type(index_ops, dtype, spark_type) elif isinstance(spark_type, BooleanType): raise TypeError("cannot astype a %s to [bool]" % self.pretty_name) elif isinstance(spark_type, StringType): return _as_string_type(index_ops, dtype, null_str=str(pd.NaT)) else: return _as_other_type(index_ops, dtype, spark_type)
def astype(self, index_ops: Union["Index", "Series"], dtype: Union[str, type, Dtype]) -> Union["Index", "Series"]: dtype, spark_type = pandas_on_spark_type(dtype) if isinstance(dtype, CategoricalDtype): return _as_categorical_type(index_ops, dtype, spark_type) elif isinstance(spark_type, BooleanType): return _as_bool_type(index_ops, dtype) elif isinstance(spark_type, StringType): return _as_string_type(index_ops, dtype) else: return _as_other_type(index_ops, dtype, spark_type)
def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike: dtype, spark_type = pandas_on_spark_type(dtype) if isinstance(dtype, CategoricalDtype): return _as_categorical_type(index_ops, dtype, spark_type) elif isinstance(spark_type, BooleanType): # Cannot cast binary to boolean in Spark. # We should cast binary to str first, and cast it to boolean return index_ops.astype(str).astype(bool) elif isinstance(spark_type, StringType): return _as_string_type(index_ops, dtype) else: return _as_other_type(index_ops, dtype, spark_type)
def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike: dtype, spark_type = pandas_on_spark_type(dtype) if isinstance(dtype, CategoricalDtype): return _as_categorical_type(index_ops, dtype, spark_type) elif isinstance(spark_type, BooleanType): return index_ops._with_new_scol( index_ops.spark.column.isNotNull(), field=index_ops._internal.data_fields[0].copy( dtype=np.dtype(bool), spark_type=spark_type, nullable=False), ) elif isinstance(spark_type, StringType): return _as_string_type(index_ops, dtype, null_str=str(pd.NaT)) else: return _as_other_type(index_ops, dtype, spark_type)
def astype(self, index_ops: Union["Index", "Series"], dtype: Union[str, type, Dtype]) -> Union["Index", "Series"]: dtype, spark_type = pandas_on_spark_type(dtype) if isinstance(dtype, CategoricalDtype): return _as_categorical_type(index_ops, dtype, spark_type) if isinstance(spark_type, BooleanType): if isinstance(dtype, extension_dtypes): scol = index_ops.spark.column.cast(spark_type) else: scol = F.when(index_ops.spark.column.isNull(), F.lit(False)).otherwise( F.length(index_ops.spark.column) > 0) return index_ops._with_new_scol( scol.alias(index_ops._internal.data_spark_column_names[0]), field=InternalField(dtype=dtype), ) elif isinstance(spark_type, StringType): return _as_string_type(index_ops, dtype) else: return _as_other_type(index_ops, dtype, spark_type)
def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike: dtype, spark_type = pandas_on_spark_type(dtype) if isinstance(dtype, CategoricalDtype): return _as_categorical_type(index_ops, dtype, spark_type) if isinstance(spark_type, BooleanType): if isinstance(dtype, extension_dtypes): scol = index_ops.spark.column.cast(spark_type) else: scol = F.when(index_ops.spark.column.isNull(), SF.lit(False)).otherwise( F.length(index_ops.spark.column) > 0 ) return index_ops._with_new_scol( scol, field=index_ops._internal.data_fields[0].copy(dtype=dtype, spark_type=spark_type), ) elif isinstance(spark_type, StringType): null_str = str(pd.NA) if isinstance(self, StringExtensionOps) else str(None) return _as_string_type(index_ops, dtype, null_str=null_str) else: return _as_other_type(index_ops, dtype, spark_type)