def integral_extension_dtypes(self): return ([ "Int8", "Int16", "Int32", "Int64", Int8Dtype(), Int16Dtype(), Int32Dtype(), Int64Dtype(), ] if extension_dtypes_available else [])
def test_as_spark_type_extension_dtypes(self): from pandas import Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype type_mapper = { Int8Dtype(): ByteType(), Int16Dtype(): ShortType(), Int32Dtype(): IntegerType(), Int64Dtype(): LongType(), } for extension_dtype, spark_type in type_mapper.items(): self.assertEqual(as_spark_type(extension_dtype), spark_type)
def spark_type_to_pandas_dtype( spark_type: types.DataType, *, use_extension_dtypes: bool = False ) -> Dtype: """Return the given Spark DataType to pandas dtype.""" if use_extension_dtypes and extension_dtypes_available: # IntegralType if isinstance(spark_type, types.ByteType): return Int8Dtype() elif isinstance(spark_type, types.ShortType): return Int16Dtype() elif isinstance(spark_type, types.IntegerType): return Int32Dtype() elif isinstance(spark_type, types.LongType): return Int64Dtype() if extension_object_dtypes_available: # BooleanType if isinstance(spark_type, types.BooleanType): return BooleanDtype() # StringType elif isinstance(spark_type, types.StringType): return StringDtype() # FractionalType if extension_float_dtypes_available: if isinstance(spark_type, types.FloatType): return Float32Dtype() elif isinstance(spark_type, types.DoubleType): return Float64Dtype() if isinstance( spark_type, ( types.DateType, types.NullType, types.ArrayType, types.MapType, types.StructType, types.UserDefinedType, ), ): return np.dtype("object") elif isinstance(spark_type, types.TimestampType): return np.dtype("datetime64[ns]") else: return np.dtype(to_arrow_type(spark_type).to_pandas_dtype())