def sequence_from_anyvalue_or_object(name: str, values: Sequence[Any]) -> "PySeries": """ Last resort conversion. AnyValues are most flexible and if they fail we go for object types """ try: return PySeries.new_from_anyvalues(name, values) # raised if we cannot convert to Wrap<AnyValue> except RuntimeError: return PySeries.new_object(name, values, False)
def sequence_to_pyseries( name: str, values: Sequence[Any], dtype: Optional[Type[DataType]] = None, strict: bool = True, ) -> "PySeries": """ Construct a PySeries from a sequence. """ # Empty sequence defaults to Float32 type if not values and dtype is None: dtype = Float32 if dtype is not None: constructor = polars_type_to_constructor(dtype) pyseries = constructor(name, values, strict) if dtype == Date32: pyseries = pyseries.cast(str(pl.Date32), True) elif dtype == Date64: pyseries = pyseries.cast(str(pl.Date64), True) return pyseries else: value = _get_first_non_none(values) dtype_ = type(value) if value is not None else float if dtype_ == date or dtype_ == datetime: if not _PYARROW_AVAILABLE: raise ImportError( "'pyarrow' is required for converting a Sequence of date or datetime values to a PySeries." ) return arrow_to_pyseries(name, pa.array(values)) elif dtype_ == list or dtype_ == tuple or dtype_ == pl.Series: nested_value = _get_first_non_none(value) nested_dtype = type(nested_value) if value is not None else float if not _PYARROW_AVAILABLE: raise ImportError( f"'pyarrow' is required for converting a Sequence of {nested_dtype} to a PySeries." ) try: nested_arrow_dtype = py_type_to_arrow_type(nested_dtype) except ValueError as e: raise ValueError( f"Cannot construct Series from sequence of {nested_dtype}." ) from e try: arrow_values = pa.array(values, pa.large_list(nested_arrow_dtype)) return arrow_to_pyseries(name, arrow_values) # failure expected for mixed sequences like `[[12], "foo", 9]` except pa.lib.ArrowInvalid: return PySeries.new_object(name, values, strict) else: constructor = py_type_to_constructor(dtype_) return constructor(name, values, strict)
def arrow_to_pyseries(name: str, values: "pa.Array", rechunk: bool = True) -> "PySeries": """ Construct a PySeries from an Arrow array. """ array = coerce_arrow(values) if hasattr(array, "num_chunks"): if array.num_chunks > 1: it = array.iterchunks() pys = PySeries.from_arrow(name, next(it)) for a in it: pys.append(PySeries.from_arrow(name, a)) else: pys = PySeries.from_arrow(name, array.combine_chunks()) if rechunk: pys.rechunk(in_place=True) return pys return PySeries.from_arrow(name, array)
def sequence_to_pyseries( name: str, values: Sequence[Any], dtype: Optional[Type[DataType]] = None, ) -> "PySeries": """ Construct a PySeries from a sequence. """ # Empty sequence defaults to Float32 type if not values and dtype is None: dtype = Float32 if dtype is not None: constructor = polars_type_to_constructor(dtype) pyseries = constructor(name, values) if dtype == Date32: pyseries = pyseries.cast_date32() elif dtype == Date64: pyseries = pyseries.cast_date64() return pyseries else: value = _get_first_non_none(values) dtype_ = type(value) if value is not None else float if dtype_ == date or dtype_ == datetime: return arrow_to_pyseries(name, pa.array(values)) elif dtype_ == list or dtype_ == tuple: nested_value = _get_first_non_none(value) nested_dtype = type(nested_value) if value is not None else float try: nested_arrow_dtype = py_type_to_arrow_type(nested_dtype) except ValueError as e: raise ValueError( f"Cannot construct Series from sequence of {nested_dtype}." ) from e try: arrow_values = pa.array(values, pa.large_list(nested_arrow_dtype)) return arrow_to_pyseries(name, arrow_values) # failure expected for mixed sequences like `[[12], "foo", 9]` except pa.lib.ArrowInvalid: return PySeries.new_object(name, values) else: constructor = py_type_to_constructor(dtype_) return constructor(name, values)
def numpy_to_pyseries( name: str, values: np.ndarray, nullable: bool = True, strict: bool = True ) -> "PySeries": """ Construct a PySeries from a numpy array. """ if not values.data.contiguous: values = np.array(values) if len(values.shape) == 1: dtype = values.dtype.type constructor = numpy_type_to_constructor(dtype) if dtype == np.float32 or dtype == np.float64: return constructor(name, values, nullable) else: return constructor(name, values, strict) else: return PySeries.new_object(name, values)
def numpy_to_pyseries(name: str, values: np.ndarray, strict: bool = True, nan_to_null: bool = False) -> "PySeries": """ Construct a PySeries from a numpy array. """ if not values.flags["C_CONTIGUOUS"]: values = np.array(values) if len(values.shape) == 1: dtype = values.dtype.type constructor = numpy_type_to_constructor(dtype) if dtype == np.float32 or dtype == np.float64: return constructor(name, values, nan_to_null) else: return constructor(name, values, strict) else: return PySeries.new_object(name, values, strict)
def sequence_to_pyseries( name: str, values: Sequence[Any], dtype: Optional[Type[DataType]] = None, strict: bool = True, ) -> "PySeries": """ Construct a PySeries from a sequence. """ # Empty sequence defaults to Float32 type if not values and dtype is None: dtype = Float32 if dtype is not None: constructor = polars_type_to_constructor(dtype) pyseries = constructor(name, values, strict) if dtype in (Date, Datetime, Duration, Time, Categorical): pyseries = pyseries.cast(dtype, True) return pyseries else: value = _get_first_non_none(values) dtype_ = type(value) if value is not None else float if dtype_ in {date, datetime, timedelta}: if not _PYARROW_AVAILABLE: # pragma: no cover raise ImportError( "'pyarrow' is required for converting a Sequence of date or datetime values to a PySeries." ) # let arrow infer dtype if not timedelta # arrow uses microsecond durations by default, not supported yet. return arrow_to_pyseries(name, pa.array(values)) elif dtype_ == list or dtype_ == tuple: nested_value = _get_first_non_none(value) nested_dtype = type(nested_value) if value is not None else float # recursively call Series constructor if nested_dtype == list: return sequence_to_pyseries( name=name, values=[ sequence_to_pyseries(name, seq, dtype=None, strict=strict) for seq in values ], dtype=None, strict=strict, ) # logs will show a panic if we infer wrong dtype # and its hard to error from rust side # to reduce the likelihood of this happening # we infer the dtype of first 100 elements # if all() fails, we will hit the PySeries.new_object if not _PYARROW_AVAILABLE: # check lists for consistent inner types if isinstance(value, list): count = 0 equal_to_inner = True for lst in values: for vl in lst: equal_to_inner = type(vl) == nested_dtype if not equal_to_inner or count > 50: break count += 1 if equal_to_inner: dtype = py_type_to_dtype(nested_dtype) try: return PySeries.new_list(name, values, dtype) except BaseException: pass # pass we create an object if we get here else: try: nested_arrow_dtype = py_type_to_arrow_type(nested_dtype) except ValueError as e: # pragma: no cover raise ValueError( f"Cannot construct Series from sequence of {nested_dtype}." ) from e try: arrow_values = pa.array(values, pa.large_list(nested_arrow_dtype)) return arrow_to_pyseries(name, arrow_values) except pa.lib.ArrowInvalid: pass # Convert mixed sequences like `[[12], "foo", 9]` return PySeries.new_object(name, values, strict) elif dtype_ == pli.Series: return PySeries.new_series_list(name, [v.inner() for v in values], strict) elif dtype_ == PySeries: return PySeries.new_series_list(name, values, strict) else: constructor = py_type_to_constructor(dtype_) if constructor == PySeries.new_object: np_constructor = numpy_type_to_constructor(dtype_) if np_constructor is not None: values = np.array(values) # type: ignore constructor = np_constructor return constructor(name, values, strict)
def sequence_to_pyseries( name: str, values: Sequence[Any], dtype: PolarsDataType | None = None, strict: bool = True, ) -> PySeries: """ Construct a PySeries from a sequence. """ dtype_: type | None = None nested_dtype: PolarsDataType | type | None = None temporal_unit: str | None = None # empty sequence defaults to Float32 type if not values and dtype is None: dtype = Float32 # lists defer to subsequent handling; identify nested type elif dtype == List: nested_dtype = getattr(dtype, "inner", None) dtype_ = list # infer temporal type handling py_temporal_types = {date, datetime, timedelta, time} pl_temporal_types = {Date, Datetime, Duration, Time} value = _get_first_non_none(values) if value is not None: if dtype in py_temporal_types and isinstance(value, int): dtype = py_type_to_dtype(dtype) # construct from integer elif (dtype in pl_temporal_types or type(dtype) in pl_temporal_types) and not isinstance(value, int): temporal_unit = getattr(dtype, "tu", None) dtype_ = dtype_to_py_type(dtype) # type: ignore[arg-type] if (dtype is not None) and is_polars_dtype(dtype) and (dtype_ is None): constructor = polars_type_to_constructor(dtype) pyseries = constructor(name, values, strict) if dtype in (Date, Datetime, Duration, Time, Categorical): pyseries = pyseries.cast(dtype, True) return pyseries else: if dtype_ is None: dtype_ = float if (value is None) else type(value) if dtype_ in py_temporal_types: if not _PYARROW_AVAILABLE: # pragma: no cover raise ImportError( "'pyarrow' is required for converting a Sequence of date or datetime values to a PySeries." ) # let arrow infer dtype if not timedelta # arrow uses microsecond durations by default, not supported yet. arrow_dtype = (dtype_to_arrow_type(dtype) if (dtype is not None and temporal_unit) else None) return arrow_to_pyseries(name, pa.array(values, type=arrow_dtype)) elif dtype_ == list or dtype_ == tuple: if nested_dtype is None: nested_value = _get_first_non_none(value) nested_dtype = type( nested_value) if nested_value is not None else float # recursively call Series constructor if nested_dtype == list: return sequence_to_pyseries( name=name, values=[ sequence_to_pyseries(name, seq, dtype=None, strict=strict) for seq in values ], dtype=None, strict=strict, ) # logs will show a panic if we infer wrong dtype # and its hard to error from rust side # to reduce the likelihood of this happening # we infer the dtype of first 100 elements # if all() fails, we will hit the PySeries.new_object if not _PYARROW_AVAILABLE: # check lists for consistent inner types if isinstance(value, list): count = 0 equal_to_inner = True for lst in values: for vl in lst: equal_to_inner = type(vl) == nested_dtype if not equal_to_inner or count > 50: break count += 1 if equal_to_inner: dtype = py_type_to_dtype(nested_dtype) try: return PySeries.new_list(name, values, dtype) except BaseException: pass # pass we create an object if we get here else: try: to_arrow_type = (dtype_to_arrow_type if is_polars_dtype(nested_dtype) else py_type_to_arrow_type) nested_arrow_dtype = to_arrow_type( nested_dtype # type: ignore[arg-type] ) except ValueError: # pragma: no cover return sequence_from_anyvalue_or_object(name, values) try: arrow_values = pa.array(values, pa.large_list(nested_arrow_dtype)) return arrow_to_pyseries(name, arrow_values) except (pa.lib.ArrowInvalid, pa.lib.ArrowTypeError): pass # Convert mixed sequences like `[[12], "foo", 9]` return PySeries.new_object(name, values, strict) elif dtype_ == pli.Series: return PySeries.new_series_list(name, [v.inner() for v in values], strict) elif dtype_ == PySeries: return PySeries.new_series_list(name, values, strict) else: constructor = py_type_to_constructor(dtype_) if constructor == PySeries.new_object: try: return PySeries.new_from_anyvalues(name, values) # raised if we cannot convert to Wrap<AnyValue> except RuntimeError: return sequence_from_anyvalue_or_object(name, values) return constructor(name, values, strict)
def arrow_to_pyseries(name: str, values: pa.Array) -> "PySeries": """ Construct a PySeries from an Arrow array. """ array = coerce_arrow(values) return PySeries.from_arrow(name, array)