コード例 #1
0
def _get_extension_dtypes(table, columns_metadata, extension_columns):
    """
    Based on the stored column pandas metadata and the extension types
    in the arrow schema, infer which columns should be converted to a
    pandas extension dtype.

    The 'numpy_type' field in the column metadata stores the string
    representation of the original pandas dtype (and, despite its name,
    not the 'pandas_type' field).
    Based on this string representation, a pandas/numpy dtype is constructed
    and then we can check if this dtype supports conversion from arrow.

    """
    ext_columns = {}

    # older pandas version that does not yet support extension dtypes
    if _pandas_api.extension_dtype is None:
        if extension_columns is not None:
            raise ValueError(
                "Converting to pandas ExtensionDtypes is not supported")
        return ext_columns

    if extension_columns is None:
        # infer the extension columns from the pandas metadata
        for col_meta in columns_metadata:
            name = col_meta['name']
            dtype = col_meta['numpy_type']
            if dtype not in _pandas_supported_numpy_types:
                # pandas_dtype is expensive, so avoid doing this for types
                # that are certainly numpy dtypes
                pandas_dtype = _pandas_api.pandas_dtype(dtype)
                if isinstance(pandas_dtype, _pandas_api.extension_dtype):
                    if hasattr(pandas_dtype, "__from_arrow__"):
                        ext_columns[name] = pandas_dtype
        # infer from extension type in the schema
        for field in table.schema:
            typ = field.type
            if isinstance(typ, pa.BaseExtensionType):
                try:
                    pandas_dtype = typ.to_pandas_dtype()
                except NotImplementedError:
                    pass
                else:
                    ext_columns[field.name] = pandas_dtype

    else:
        # get the extension dtype for the specified columns
        for name in extension_columns:
            col_meta = [
                meta for meta in columns_metadata if meta['name'] == name
            ][0]
            pandas_dtype = _pandas_api.pandas_dtype(col_meta['numpy_type'])
            if not isinstance(pandas_dtype, _pandas_api.extension_dtype):
                raise ValueError("not an extension dtype")
            if not hasattr(pandas_dtype, "__from_arrow__"):
                raise ValueError("this column does not support to be "
                                 "converted to extension dtype")
            ext_columns[name] = pandas_dtype

    return ext_columns
コード例 #2
0
def _get_extension_dtypes(table, columns_metadata, types_mapper=None):
    """
    Based on the stored column pandas metadata and the extension types
    in the arrow schema, infer which columns should be converted to a
    pandas extension dtype.

    The 'numpy_type' field in the column metadata stores the string
    representation of the original pandas dtype (and, despite its name,
    not the 'pandas_type' field).
    Based on this string representation, a pandas/numpy dtype is constructed
    and then we can check if this dtype supports conversion from arrow.

    """
    ext_columns = {}

    # older pandas version that does not yet support extension dtypes
    if _pandas_api.extension_dtype is None:
        return ext_columns

    # infer the extension columns from the pandas metadata
    for col_meta in columns_metadata:
        try:
            name = col_meta['field_name']
        except KeyError:
            name = col_meta['name']
        dtype = col_meta['numpy_type']

        if dtype not in _pandas_supported_numpy_types:
            # pandas_dtype is expensive, so avoid doing this for types
            # that are certainly numpy dtypes
            pandas_dtype = _pandas_api.pandas_dtype(dtype)
            if isinstance(pandas_dtype, _pandas_api.extension_dtype):
                if hasattr(pandas_dtype, "__from_arrow__"):
                    ext_columns[name] = pandas_dtype

    # infer from extension type in the schema
    for field in table.schema:
        typ = field.type
        if isinstance(typ, pa.BaseExtensionType):
            try:
                pandas_dtype = typ.to_pandas_dtype()
            except NotImplementedError:
                pass
            else:
                ext_columns[field.name] = pandas_dtype

    # use the specified mapping of built-in arrow types to pandas dtypes
    if types_mapper:
        for field in table.schema:
            typ = field.type
            pandas_dtype = types_mapper(typ)
            if pandas_dtype is not None:
                ext_columns[field.name] = pandas_dtype

    return ext_columns