def coerce_arrow(array: pa.Array) -> pa.Array: # also coerces timezone to naive representation # units are accounted for by pyarrow if "timestamp" in str(array.type): warnings.warn( "Conversion of (potentially) timezone aware to naive datetimes. TZ information may be lost", ) ts_ms = pa.compute.cast(array, pa.timestamp("ms"), safe=False) ms = pa.compute.cast(ts_ms, pa.int64()) del ts_ms array = pa.compute.cast(ms, pa.date64()) del ms # note: Decimal256 could not be cast to float elif isinstance(array.type, pa.Decimal128Type): array = pa.compute.cast(array, pa.float64()) # simplest solution is to cast to (large)-string arrays # this is copy and expensive elif isinstance(array.type, pa.DictionaryType): if pa.types.is_string(array.type.value_type): array = pa.compute.cast(array, pa.large_utf8()) else: raise ValueError( "polars does not support dictionary encoded types other than strings" ) if hasattr(array, "num_chunks") and array.num_chunks > 1: if pa.types.is_string(array.type): array = pa.compute.cast(array, pa.large_utf8()) elif pa.types.is_list(array.type): array = pa.compute.cast(array, pa.large_list()) array = array.combine_chunks() return array
def coerce_arrow(array: pa.Array) -> pa.Array: # also coerces timezone to naive representation # units are accounted for by pyarrow if "timestamp" in str(array.type): warnings.warn( "Conversion of (potentially) timezone aware to naive datetimes. TZ information may be lost", ) ts_ms = pa.compute.cast(array, pa.timestamp("ms"), safe=False) ms = pa.compute.cast(ts_ms, pa.int64()) del ts_ms array = pa.compute.cast(ms, pa.date64()) del ms # note: Decimal256 could not be cast to float elif isinstance(array.type, pa.Decimal128Type): array = pa.compute.cast(array, pa.float64()) if hasattr(array, "num_chunks") and array.num_chunks > 1: # we have to coerce before combining chunks, because pyarrow panics if # offsets overflow if pa.types.is_string(array.type): array = pa.compute.cast(array, pa.large_utf8()) elif pa.types.is_list(array.type): # pyarrow does not seem to support casting from list to largelist # so we use convert to large list ourselves and do the re-alloc on polars/arrow side chunks = [] for arr in array.iterchunks(): chunks.append(pl.from_arrow(arr).to_arrow()) array = pa.chunked_array(chunks) array = array.combine_chunks() return array