def sequence_to_pyseries( name: str, values: Sequence[Any], dtype: Optional[Type[DataType]] = None, strict: bool = True, ) -> "PySeries": """ Construct a PySeries from a sequence. """ # Empty sequence defaults to Float32 type if not values and dtype is None: dtype = Float32 if dtype is not None: constructor = polars_type_to_constructor(dtype) pyseries = constructor(name, values, strict) if dtype == Date32: pyseries = pyseries.cast(str(pl.Date32), True) elif dtype == Date64: pyseries = pyseries.cast(str(pl.Date64), True) return pyseries else: value = _get_first_non_none(values) dtype_ = type(value) if value is not None else float if dtype_ == date or dtype_ == datetime: if not _PYARROW_AVAILABLE: raise ImportError( "'pyarrow' is required for converting a Sequence of date or datetime values to a PySeries." ) return arrow_to_pyseries(name, pa.array(values)) elif dtype_ == list or dtype_ == tuple or dtype_ == pl.Series: nested_value = _get_first_non_none(value) nested_dtype = type(nested_value) if value is not None else float if not _PYARROW_AVAILABLE: raise ImportError( f"'pyarrow' is required for converting a Sequence of {nested_dtype} to a PySeries." ) try: nested_arrow_dtype = py_type_to_arrow_type(nested_dtype) except ValueError as e: raise ValueError( f"Cannot construct Series from sequence of {nested_dtype}." ) from e try: arrow_values = pa.array(values, pa.large_list(nested_arrow_dtype)) return arrow_to_pyseries(name, arrow_values) # failure expected for mixed sequences like `[[12], "foo", 9]` except pa.lib.ArrowInvalid: return PySeries.new_object(name, values, strict) else: constructor = py_type_to_constructor(dtype_) return constructor(name, values, strict)
def sequence_to_pyseries( name: str, values: Sequence[Any], dtype: Optional[Type[DataType]] = None, ) -> "PySeries": """ Construct a PySeries from a sequence. """ # Empty sequence defaults to Float32 type if not values and dtype is None: dtype = Float32 if dtype is not None: constructor = polars_type_to_constructor(dtype) pyseries = constructor(name, values) if dtype == Date32: pyseries = pyseries.cast_date32() elif dtype == Date64: pyseries = pyseries.cast_date64() return pyseries else: value = _get_first_non_none(values) dtype_ = type(value) if value is not None else float if dtype_ == date or dtype_ == datetime: return arrow_to_pyseries(name, pa.array(values)) elif dtype_ == list or dtype_ == tuple: nested_value = _get_first_non_none(value) nested_dtype = type(nested_value) if value is not None else float try: nested_arrow_dtype = py_type_to_arrow_type(nested_dtype) except ValueError as e: raise ValueError( f"Cannot construct Series from sequence of {nested_dtype}." ) from e try: arrow_values = pa.array(values, pa.large_list(nested_arrow_dtype)) return arrow_to_pyseries(name, arrow_values) # failure expected for mixed sequences like `[[12], "foo", 9]` except pa.lib.ArrowInvalid: return PySeries.new_object(name, values) else: constructor = py_type_to_constructor(dtype_) return constructor(name, values)
def sequence_to_pyseries( name: str, values: Sequence[Any], dtype: Optional[Type[DataType]] = None, strict: bool = True, ) -> "PySeries": """ Construct a PySeries from a sequence. """ # Empty sequence defaults to Float32 type if not values and dtype is None: dtype = Float32 if dtype is not None: constructor = polars_type_to_constructor(dtype) pyseries = constructor(name, values, strict) if dtype in (Date, Datetime, Duration, Time, Categorical): pyseries = pyseries.cast(dtype, True) return pyseries else: value = _get_first_non_none(values) dtype_ = type(value) if value is not None else float if dtype_ in {date, datetime, timedelta}: if not _PYARROW_AVAILABLE: # pragma: no cover raise ImportError( "'pyarrow' is required for converting a Sequence of date or datetime values to a PySeries." ) # let arrow infer dtype if not timedelta # arrow uses microsecond durations by default, not supported yet. return arrow_to_pyseries(name, pa.array(values)) elif dtype_ == list or dtype_ == tuple: nested_value = _get_first_non_none(value) nested_dtype = type(nested_value) if value is not None else float # recursively call Series constructor if nested_dtype == list: return sequence_to_pyseries( name=name, values=[ sequence_to_pyseries(name, seq, dtype=None, strict=strict) for seq in values ], dtype=None, strict=strict, ) # logs will show a panic if we infer wrong dtype # and its hard to error from rust side # to reduce the likelihood of this happening # we infer the dtype of first 100 elements # if all() fails, we will hit the PySeries.new_object if not _PYARROW_AVAILABLE: # check lists for consistent inner types if isinstance(value, list): count = 0 equal_to_inner = True for lst in values: for vl in lst: equal_to_inner = type(vl) == nested_dtype if not equal_to_inner or count > 50: break count += 1 if equal_to_inner: dtype = py_type_to_dtype(nested_dtype) try: return PySeries.new_list(name, values, dtype) except BaseException: pass # pass we create an object if we get here else: try: nested_arrow_dtype = py_type_to_arrow_type(nested_dtype) except ValueError as e: # pragma: no cover raise ValueError( f"Cannot construct Series from sequence of {nested_dtype}." ) from e try: arrow_values = pa.array(values, pa.large_list(nested_arrow_dtype)) return arrow_to_pyseries(name, arrow_values) except pa.lib.ArrowInvalid: pass # Convert mixed sequences like `[[12], "foo", 9]` return PySeries.new_object(name, values, strict) elif dtype_ == pli.Series: return PySeries.new_series_list(name, [v.inner() for v in values], strict) elif dtype_ == PySeries: return PySeries.new_series_list(name, values, strict) else: constructor = py_type_to_constructor(dtype_) if constructor == PySeries.new_object: np_constructor = numpy_type_to_constructor(dtype_) if np_constructor is not None: values = np.array(values) # type: ignore constructor = np_constructor return constructor(name, values, strict)