def test_generic_ext_type_register(registered_period_type): # test that trying to register other type does not segfault with pytest.raises(TypeError): pa.register_extension_type(pa.string()) # register second time raises KeyError period_type = PeriodType('D') with pytest.raises(KeyError): pa.register_extension_type(period_type)
def registered_period_type(): # setup period_type = PeriodType('D') pa.register_extension_type(period_type) yield # teardown try: pa.unregister_extension_type('pandas.period') except KeyError: pass
def registered_period_type(request): # setup period_type = request.param period_class = period_type.__arrow_ext_class__() pa.register_extension_type(period_type) yield period_type, period_class # teardown try: pa.unregister_extension_type('test.period') except KeyError: pass
return type(self) == type(other) and self.freq == other.freq else: return NotImplemented def __hash__(self) -> int: return hash((str(self), self.freq)) def to_pandas_dtype(self): import pandas as pd return pd.PeriodDtype(freq=self.freq) # register the type with a dummy instance _period_type = ArrowPeriodType("D") pyarrow.register_extension_type(_period_type) class ArrowIntervalType(pyarrow.ExtensionType): @deprecate_kwarg(old_arg_name="closed", new_arg_name="inclusive") def __init__(self, subtype, inclusive: IntervalInclusiveType) -> None: # attributes need to be set first before calling # super init (as that calls serialize) assert inclusive in VALID_INCLUSIVE self._inclusive: IntervalInclusiveType = inclusive if not isinstance(subtype, pyarrow.DataType): subtype = pyarrow.type_for_alias(str(subtype)) self._subtype = subtype storage_type = pyarrow.struct([("left", subtype), ("right", subtype)]) pyarrow.ExtensionType.__init__(self, storage_type, "pandas.interval")
if not isinstance(subtype, pa.DataType): subtype = pa.type_for_alias(str(subtype)) self._subtype = subtype pa.ExtensionType.__init__(self, pa.list_(subtype, 2), "dask-ms.complex") def to_pandas_dtype(self): return np.result_type(self._subtype.to_pandas_dtype(), np.complex64) def __eq__(self, other): return (isinstance(other, ComplexType) and self._subtype == other._subtype) def __arrow_ext_serialize__(self): return b"" @classmethod def __arrow_ext_deserialize__(cls, storage_type, serialized): return ComplexType(storage_type.value_type) def __arrow_ext_class__(self): return ComplexArray if pa: _complex_type = ComplexType("float32") _tensor_type = TensorType((1, ), "float32") pa.register_extension_type(_complex_type) pa.register_extension_type(_tensor_type)
metadata["node_type"], metadata["mask_parameters"], metadata["node_parameters"], metadata["record_is_tuple"], metadata["record_is_scalar"], ) @property def num_buffers(self): return self.storage_type.num_buffers @property def num_fields(self): return self.storage_type.num_fields pyarrow.register_extension_type( AwkwardArrowType(pyarrow.null(), None, None, None, None, None, None)) # order is important; _string_like[:2] vs _string_like[::2] _string_like = ( pyarrow.string(), pyarrow.large_string(), pyarrow.binary(), pyarrow.large_binary(), ) _pyarrow_to_numpy_dtype = { pyarrow.date32(): (True, np.dtype("M8[D]")), pyarrow.date64(): (False, np.dtype("M8[ms]")), pyarrow.time32("s"): (True, np.dtype("M8[s]")), pyarrow.time32("ms"): (True, np.dtype("M8[ms]")), pyarrow.time64("us"): (False, np.dtype("M8[us]")),
@classmethod def __arrow_ext_deserialize__(cls, storage_type, serialized): # return an instance of this subclass given the serialized # metadata. metadata = json.loads(serialized.decode()) shape = metadata["shape"] subtype = pa.type_for_alias(metadata["subtype"]) return ArrowTensorType(shape=shape, subtype=subtype) def to_pandas_dtype(self): return TensorDtype() # register the type with a dummy instance _tensor_type = ArrowTensorType((1, ), pa.float32()) pa.register_extension_type(_tensor_type) class registry_type(type): """Fix registry lookup for extension types. It appears that parquet stores `str(TensorDtype)`, yet the lookup tries to match it to `TensorDtype.name`. """ def __str__(self): try: return self.name except AttributeError: return self.__name__