def test_bool_ext_ops(self): from pyspark.pandas.typedef.typehints import extension_object_dtypes_available if extension_object_dtypes_available: from pandas import BooleanDtype self.assertIsInstance(DataTypeOps(BooleanDtype(), BooleanType()), BooleanExtensionOps) else: self.assertIsInstance(DataTypeOps(ExtensionDtype(), BooleanType()), BooleanOps)
def test_as_spark_type_extension_object_dtypes(self): from pandas import BooleanDtype, StringDtype type_mapper = { BooleanDtype(): BooleanType(), StringDtype(): StringType(), } for extension_dtype, spark_type in type_mapper.items(): self.assertEqual(as_spark_type(extension_dtype), spark_type)
def spark_type_to_pandas_dtype( spark_type: types.DataType, *, use_extension_dtypes: bool = False ) -> Dtype: """Return the given Spark DataType to pandas dtype.""" if use_extension_dtypes and extension_dtypes_available: # IntegralType if isinstance(spark_type, types.ByteType): return Int8Dtype() elif isinstance(spark_type, types.ShortType): return Int16Dtype() elif isinstance(spark_type, types.IntegerType): return Int32Dtype() elif isinstance(spark_type, types.LongType): return Int64Dtype() if extension_object_dtypes_available: # BooleanType if isinstance(spark_type, types.BooleanType): return BooleanDtype() # StringType elif isinstance(spark_type, types.StringType): return StringDtype() # FractionalType if extension_float_dtypes_available: if isinstance(spark_type, types.FloatType): return Float32Dtype() elif isinstance(spark_type, types.DoubleType): return Float64Dtype() if isinstance( spark_type, ( types.DateType, types.NullType, types.ArrayType, types.MapType, types.StructType, types.UserDefinedType, ), ): return np.dtype("object") elif isinstance(spark_type, types.TimestampType): return np.dtype("datetime64[ns]") else: return np.dtype(to_arrow_type(spark_type).to_pandas_dtype())
def object_extension_dtypes(self): return ( ["boolean", "string", BooleanDtype(), StringDtype()] if extension_object_dtypes_available else [] )