コード例 #1
0
ファイル: construction.py プロジェクト: elferherrera/polars
def sequence_to_pyseries(
    name: str,
    values: Sequence[Any],
    dtype: Optional[Type[DataType]] = None,
    strict: bool = True,
) -> "PySeries":
    """
    Construct a PySeries from a sequence.
    """
    # Empty sequence defaults to Float32 type
    if not values and dtype is None:
        dtype = Float32

    if dtype is not None:
        constructor = polars_type_to_constructor(dtype)
        pyseries = constructor(name, values, strict)
        if dtype == Date32:
            pyseries = pyseries.cast(str(pl.Date32), True)
        elif dtype == Date64:
            pyseries = pyseries.cast(str(pl.Date64), True)
        return pyseries

    else:
        value = _get_first_non_none(values)
        dtype_ = type(value) if value is not None else float

        if dtype_ == date or dtype_ == datetime:
            if not _PYARROW_AVAILABLE:
                raise ImportError(
                    "'pyarrow' is required for converting a Sequence of date or datetime values to a PySeries."
                )
            return arrow_to_pyseries(name, pa.array(values))

        elif dtype_ == list or dtype_ == tuple or dtype_ == pl.Series:
            nested_value = _get_first_non_none(value)
            nested_dtype = type(nested_value) if value is not None else float

            if not _PYARROW_AVAILABLE:
                raise ImportError(
                    f"'pyarrow' is required for converting a Sequence of {nested_dtype} to a PySeries."
                )

            try:
                nested_arrow_dtype = py_type_to_arrow_type(nested_dtype)
            except ValueError as e:
                raise ValueError(
                    f"Cannot construct Series from sequence of {nested_dtype}."
                ) from e

            try:
                arrow_values = pa.array(values,
                                        pa.large_list(nested_arrow_dtype))
                return arrow_to_pyseries(name, arrow_values)
            # failure expected for mixed sequences like `[[12], "foo", 9]`
            except pa.lib.ArrowInvalid:
                return PySeries.new_object(name, values, strict)

        else:
            constructor = py_type_to_constructor(dtype_)
            return constructor(name, values, strict)
コード例 #2
0
def sequence_to_pyseries(
    name: str,
    values: Sequence[Any],
    dtype: Optional[Type[DataType]] = None,
) -> "PySeries":
    """
    Construct a PySeries from a sequence.
    """
    # Empty sequence defaults to Float32 type
    if not values and dtype is None:
        dtype = Float32

    if dtype is not None:
        constructor = polars_type_to_constructor(dtype)
        pyseries = constructor(name, values)
        if dtype == Date32:
            pyseries = pyseries.cast_date32()
        elif dtype == Date64:
            pyseries = pyseries.cast_date64()
        return pyseries

    else:
        value = _get_first_non_none(values)
        dtype_ = type(value) if value is not None else float

        if dtype_ == date or dtype_ == datetime:
            return arrow_to_pyseries(name, pa.array(values))

        elif dtype_ == list or dtype_ == tuple:
            nested_value = _get_first_non_none(value)
            nested_dtype = type(nested_value) if value is not None else float

            try:
                nested_arrow_dtype = py_type_to_arrow_type(nested_dtype)
            except ValueError as e:
                raise ValueError(
                    f"Cannot construct Series from sequence of {nested_dtype}."
                ) from e

            try:
                arrow_values = pa.array(values,
                                        pa.large_list(nested_arrow_dtype))
                return arrow_to_pyseries(name, arrow_values)
            # failure expected for mixed sequences like `[[12], "foo", 9]`
            except pa.lib.ArrowInvalid:
                return PySeries.new_object(name, values)

        else:
            constructor = py_type_to_constructor(dtype_)
            return constructor(name, values)
コード例 #3
0
ファイル: construction.py プロジェクト: yutiansut/polars
def sequence_to_pyseries(
    name: str,
    values: Sequence[Any],
    dtype: Optional[Type[DataType]] = None,
    strict: bool = True,
) -> "PySeries":
    """
    Construct a PySeries from a sequence.
    """
    # Empty sequence defaults to Float32 type
    if not values and dtype is None:
        dtype = Float32

    if dtype is not None:
        constructor = polars_type_to_constructor(dtype)
        pyseries = constructor(name, values, strict)

        if dtype in (Date, Datetime, Duration, Time, Categorical):
            pyseries = pyseries.cast(dtype, True)

        return pyseries

    else:
        value = _get_first_non_none(values)
        dtype_ = type(value) if value is not None else float

        if dtype_ in {date, datetime, timedelta}:
            if not _PYARROW_AVAILABLE:  # pragma: no cover
                raise ImportError(
                    "'pyarrow' is required for converting a Sequence of date or datetime values to a PySeries."
                )
            # let arrow infer dtype if not timedelta
            # arrow uses microsecond durations by default, not supported yet.
            return arrow_to_pyseries(name, pa.array(values))

        elif dtype_ == list or dtype_ == tuple:
            nested_value = _get_first_non_none(value)
            nested_dtype = type(nested_value) if value is not None else float

            # recursively call Series constructor
            if nested_dtype == list:
                return sequence_to_pyseries(
                    name=name,
                    values=[
                        sequence_to_pyseries(name,
                                             seq,
                                             dtype=None,
                                             strict=strict) for seq in values
                    ],
                    dtype=None,
                    strict=strict,
                )

            # logs will show a panic if we infer wrong dtype
            # and its hard to error from rust side
            # to reduce the likelihood of this happening
            # we infer the dtype of first 100 elements
            # if all() fails, we will hit the PySeries.new_object
            if not _PYARROW_AVAILABLE:
                # check lists for consistent inner types
                if isinstance(value, list):
                    count = 0
                    equal_to_inner = True
                    for lst in values:
                        for vl in lst:
                            equal_to_inner = type(vl) == nested_dtype
                            if not equal_to_inner or count > 50:
                                break
                            count += 1
                    if equal_to_inner:
                        dtype = py_type_to_dtype(nested_dtype)
                        try:
                            return PySeries.new_list(name, values, dtype)
                        except BaseException:
                            pass
                # pass we create an object if we get here
            else:
                try:
                    nested_arrow_dtype = py_type_to_arrow_type(nested_dtype)
                except ValueError as e:  # pragma: no cover
                    raise ValueError(
                        f"Cannot construct Series from sequence of {nested_dtype}."
                    ) from e

                try:
                    arrow_values = pa.array(values,
                                            pa.large_list(nested_arrow_dtype))
                    return arrow_to_pyseries(name, arrow_values)
                except pa.lib.ArrowInvalid:
                    pass

            # Convert mixed sequences like `[[12], "foo", 9]`
            return PySeries.new_object(name, values, strict)

        elif dtype_ == pli.Series:
            return PySeries.new_series_list(name, [v.inner() for v in values],
                                            strict)
        elif dtype_ == PySeries:
            return PySeries.new_series_list(name, values, strict)

        else:
            constructor = py_type_to_constructor(dtype_)

            if constructor == PySeries.new_object:
                np_constructor = numpy_type_to_constructor(dtype_)
                if np_constructor is not None:
                    values = np.array(values)  # type: ignore
                    constructor = np_constructor

            return constructor(name, values, strict)