def test_generic_ext_type_register(registered_period_type):
    # test that trying to register other type does not segfault
    with pytest.raises(TypeError):
        pa.register_extension_type(pa.string())

    # register second time raises KeyError
    period_type = PeriodType('D')
    with pytest.raises(KeyError):
        pa.register_extension_type(period_type)
def registered_period_type():
    # setup
    period_type = PeriodType('D')
    pa.register_extension_type(period_type)
    yield
    # teardown
    try:
        pa.unregister_extension_type('pandas.period')
    except KeyError:
        pass
Exemple #3
0
def registered_period_type(request):
    # setup
    period_type = request.param
    period_class = period_type.__arrow_ext_class__()
    pa.register_extension_type(period_type)
    yield period_type, period_class
    # teardown
    try:
        pa.unregister_extension_type('test.period')
    except KeyError:
        pass
Exemple #4
0
            return type(self) == type(other) and self.freq == other.freq
        else:
            return NotImplemented

    def __hash__(self) -> int:
        return hash((str(self), self.freq))

    def to_pandas_dtype(self):
        import pandas as pd

        return pd.PeriodDtype(freq=self.freq)


# register the type with a dummy instance
_period_type = ArrowPeriodType("D")
pyarrow.register_extension_type(_period_type)


class ArrowIntervalType(pyarrow.ExtensionType):
    @deprecate_kwarg(old_arg_name="closed", new_arg_name="inclusive")
    def __init__(self, subtype, inclusive: IntervalInclusiveType) -> None:
        # attributes need to be set first before calling
        # super init (as that calls serialize)
        assert inclusive in VALID_INCLUSIVE
        self._inclusive: IntervalInclusiveType = inclusive
        if not isinstance(subtype, pyarrow.DataType):
            subtype = pyarrow.type_for_alias(str(subtype))
        self._subtype = subtype

        storage_type = pyarrow.struct([("left", subtype), ("right", subtype)])
        pyarrow.ExtensionType.__init__(self, storage_type, "pandas.interval")
Exemple #5
0
        if not isinstance(subtype, pa.DataType):
            subtype = pa.type_for_alias(str(subtype))

        self._subtype = subtype
        pa.ExtensionType.__init__(self, pa.list_(subtype, 2),
                                  "dask-ms.complex")

    def to_pandas_dtype(self):
        return np.result_type(self._subtype.to_pandas_dtype(), np.complex64)

    def __eq__(self, other):
        return (isinstance(other, ComplexType)
                and self._subtype == other._subtype)

    def __arrow_ext_serialize__(self):
        return b""

    @classmethod
    def __arrow_ext_deserialize__(cls, storage_type, serialized):
        return ComplexType(storage_type.value_type)

    def __arrow_ext_class__(self):
        return ComplexArray


if pa:
    _complex_type = ComplexType("float32")
    _tensor_type = TensorType((1, ), "float32")
    pa.register_extension_type(_complex_type)
    pa.register_extension_type(_tensor_type)
Exemple #6
0
                metadata["node_type"],
                metadata["mask_parameters"],
                metadata["node_parameters"],
                metadata["record_is_tuple"],
                metadata["record_is_scalar"],
            )

        @property
        def num_buffers(self):
            return self.storage_type.num_buffers

        @property
        def num_fields(self):
            return self.storage_type.num_fields

    pyarrow.register_extension_type(
        AwkwardArrowType(pyarrow.null(), None, None, None, None, None, None))

    # order is important; _string_like[:2] vs _string_like[::2]
    _string_like = (
        pyarrow.string(),
        pyarrow.large_string(),
        pyarrow.binary(),
        pyarrow.large_binary(),
    )

    _pyarrow_to_numpy_dtype = {
        pyarrow.date32(): (True, np.dtype("M8[D]")),
        pyarrow.date64(): (False, np.dtype("M8[ms]")),
        pyarrow.time32("s"): (True, np.dtype("M8[s]")),
        pyarrow.time32("ms"): (True, np.dtype("M8[ms]")),
        pyarrow.time64("us"): (False, np.dtype("M8[us]")),
Exemple #7
0
    @classmethod
    def __arrow_ext_deserialize__(cls, storage_type, serialized):
        # return an instance of this subclass given the serialized
        # metadata.
        metadata = json.loads(serialized.decode())
        shape = metadata["shape"]
        subtype = pa.type_for_alias(metadata["subtype"])
        return ArrowTensorType(shape=shape, subtype=subtype)

    def to_pandas_dtype(self):
        return TensorDtype()


# register the type with a dummy instance
_tensor_type = ArrowTensorType((1, ), pa.float32())
pa.register_extension_type(_tensor_type)


class registry_type(type):
    """Fix registry lookup for extension types.

    It appears that parquet stores `str(TensorDtype)`, yet the
    lookup tries to match it to `TensorDtype.name`.
    """
    def __str__(self):
        try:
            return self.name
        except AttributeError:
            return self.__name__