def apply( exprs: List[Union[str, "pli.Expr"]], f: Callable[[List["pli.Series"]], Union["pli.Series", Any]], return_dtype: Optional[Type[DataType]] = None, ) -> "pli.Expr": """ Apply a custom function in a GroupBy context. Depending on the context it has the following behavior: ## Context * Select/Project Don't do this, use `map` * GroupBy expected type `f`: Callable[[Series], Series] Applies a python function over each group. Parameters ---------- exprs Input Series to f f Function to apply over the input return_dtype dtype of the output Series Returns ------- Expr """ exprs = pli.selection_to_pyexpr_list(exprs) return pli.wrap_expr(_map_mul(exprs, f, return_dtype, apply_groups=True))
def map_binary( a: Union[str, "pli.Expr"], b: Union[str, "pli.Expr"], f: Callable[["pli.Series", "pli.Series"], "pli.Series"], return_dtype: Optional[Type[DataType]] = None, ) -> "pli.Expr": """ .. deprecated:: 0.10.4 use `map` or `apply` Map a custom function over two columns and produce a single Series result. Parameters ---------- a Input Series a. b Input Series b. f Function to apply. return_dtype Output type of the udf. """ if isinstance(a, str): a = col(a) if isinstance(b, str): b = col(b) return pli.wrap_expr( pybinary_function(a._pyexpr, b._pyexpr, f, return_dtype))
def first( column: Optional[Union[str, "pli.Series"]] = None) -> Union["pli.Expr", Any]: """ Get the first value. Depending on the input type this function does different things: input: - None -> expression to take first column of a context. - str -> syntactic sugar for `pl.col(..).first()` - Series -> Take first value in `Series` """ if column is None: return pli.wrap_expr(_first()) if isinstance(column, pli.Series): if column.len() > 0: return column[0] else: raise IndexError( "The series is empty, so no first value can be returned.") return col(column).first()
def arange( low: int | pli.Expr | pli.Series, high: int | pli.Expr | pli.Series, step: int = 1, *, eager: bool = False, ) -> pli.Expr | pli.Series: """ Create a range expression. This can be used in a `select`, `with_column` etc. Be sure that the range size is equal to the DataFrame you are collecting. Examples -------- >>> df.lazy().filter(pl.col("foo") < pl.arange(0, 100)).collect() # doctest: +SKIP Parameters ---------- low Lower bound of range. high Upper bound of range. step Step size of the range. eager If eager evaluation is `True`, a Series is returned instead of an Expr. """ low = pli.expr_to_lit_or_expr(low, str_to_lit=False) high = pli.expr_to_lit_or_expr(high, str_to_lit=False) if eager: df = pli.DataFrame({"a": [1]}) return df.select(arange(low, high, step).alias("arange"))["arange"] return pli.wrap_expr(pyarange(low._pyexpr, high._pyexpr, step))
def arg_where(condition: pli.Expr | pli.Series, eager: bool = False) -> pli.Expr | pli.Series: """ Return indices where `condition` evaluates `True`. Parameters ---------- condition Boolean expression to evaluate Examples -------- >>> df = pl.DataFrame({"a": [1, 2, 3, 4, 5]}) >>> df.select( ... [ ... pl.arg_where(pl.col("a") % 2 == 0), ... ] ... ).to_series() shape: (2,) Series: 'a' [u32] [ 1 3 ] """ if eager: if not isinstance(condition, pli.Series): raise ValueError( f"expected 'Series' in 'arg_where' if 'eager=True', got {type(condition)}" ) return (condition.to_frame().select(arg_where(pli.col( condition.name))).to_series()) else: condition = pli.expr_to_lit_or_expr(condition, str_to_lit=True) return pli.wrap_expr(py_arg_where(condition._pyexpr))
def repeat( value: float | int | str | bool | None, n: pli.Expr | int, *, eager: bool = False, name: str | None = None, ) -> pli.Expr | pli.Series: """ Repeat a single value n times. Parameters ---------- value Value to repeat. n repeat `n` times eager Run eagerly and collect into a `Series` name Only used in `eager` mode. As expression, us `alias` """ if eager: if name is None: name = "" dtype = py_type_to_dtype(type(value)) s = pli.Series._repeat(name, value, n, dtype) # type: ignore[arg-type] return s else: if isinstance(n, int): n = lit(n) return pli.wrap_expr(_repeat(value, n._pyexpr))
def repeat( value: Optional[Union[float, int, str, bool]], n: Union["pli.Expr", int], *, eager: bool = False, name: Optional[str] = None, ) -> Union["pli.Expr", "pli.Series"]: """ Repeat a single value n times. Parameters ---------- value Value to repeat. n repeat `n` times eager Run eagerly and collect into a `Series` name Only used in `eager` mode. As expression, us `alias` """ if eager: if name is None: name = "" dtype = py_type_to_dtype(type(value)) s = pli.Series._repeat(name, value, n, dtype) # type: ignore return s else: if isinstance(n, int): n = lit(n) return pli.wrap_expr(_repeat(value, n._pyexpr))
def apply( exprs: list[str | pli.Expr], f: Callable[[list[pli.Series]], pli.Series | Any], return_dtype: type[DataType] | None = None, ) -> pli.Expr: """ Apply a custom function in a GroupBy context. Depending on the context it has the following behavior: * Select Don't use apply, use `map` * GroupBy expected type `f`: Callable[[Series], Series] Applies a python function over each group. Parameters ---------- exprs Input Series to f f Function to apply over the input return_dtype dtype of the output Series Returns ------- Expr """ exprs = pli.selection_to_pyexpr_list(exprs) return pli.wrap_expr(_map_mul(exprs, f, return_dtype, apply_groups=True))
def map_binary( a: str | pli.Expr, b: str | pli.Expr, f: Callable[[pli.Series, pli.Series], pli.Series], return_dtype: type[DataType] | None = None, ) -> pli.Expr: """ Map a custom function over two columns and produce a single Series result. .. deprecated:: 0.10.4 Use :func:`map` or :func:`apply` instead. Parameters ---------- a Input Series a. b Input Series b. f Function to apply. return_dtype Output type of the udf. """ if isinstance(a, str): a = col(a) if isinstance(b, str): b = col(b) return pli.wrap_expr( pybinary_function(a._pyexpr, b._pyexpr, f, return_dtype))
def fold( acc: pli.IntoExpr, f: Callable[[pli.Series, pli.Series], pli.Series], exprs: Sequence[pli.Expr | str] | pli.Expr, ) -> pli.Expr: """ Accumulate over multiple columns horizontally/ row wise with a left fold. Parameters ---------- acc Accumulator Expression. This is the value that will be initialized when the fold starts. For a sum this could for instance be lit(0). f Function to apply over the accumulator and the value. Fn(acc, value) -> new_value exprs Expressions to aggregate over. May also be a wildcard expression. """ # in case of pl.col("*") acc = pli.expr_to_lit_or_expr(acc, str_to_lit=True) if isinstance(exprs, pli.Expr): exprs = [exprs] exprs = pli.selection_to_pyexpr_list(exprs) return pli.wrap_expr(pyfold(acc._pyexpr, f, exprs))
def otherwise(self, expr: Union[pli.Expr, int, float, str]) -> pli.Expr: """ Values to return in case of the predicate being `False`. See Also: the `when` function. """ expr = pli.expr_to_lit_or_expr(expr) return pli.wrap_expr(self._pywhenthen.otherwise(expr._pyexpr))
def _datetime( year: pli.Expr | str, month: pli.Expr | str, day: pli.Expr | str, hour: pli.Expr | str | None = None, minute: pli.Expr | str | None = None, second: pli.Expr | str | None = None, millisecond: pli.Expr | str | None = None, ) -> pli.Expr: """ Create polars `Datetime` from distinct time components. Parameters ---------- year column or literal. month column or literal, ranging from 1-12. day column or literal, ranging from 1-31. hour column or literal, ranging from 1-24. minute column or literal, ranging from 1-60. second column or literal, ranging from 1-60. millisecond column or literal, ranging from 1-1000. Returns ------- Expr of type `pl.Datetime` """ year_expr = pli.expr_to_lit_or_expr(year, str_to_lit=False) month_expr = pli.expr_to_lit_or_expr(month, str_to_lit=False) day_expr = pli.expr_to_lit_or_expr(day, str_to_lit=False) if hour is not None: hour = pli.expr_to_lit_or_expr(hour, str_to_lit=False)._pyexpr if minute is not None: minute = pli.expr_to_lit_or_expr(minute, str_to_lit=False)._pyexpr if second is not None: second = pli.expr_to_lit_or_expr(second, str_to_lit=False)._pyexpr if millisecond is not None: millisecond = pli.expr_to_lit_or_expr(millisecond, str_to_lit=False)._pyexpr return pli.wrap_expr( py_datetime( year_expr._pyexpr, month_expr._pyexpr, day_expr._pyexpr, hour, minute, second, millisecond, ))
def struct( exprs: Union[Sequence[Union["pli.Expr", str]], "pli.Expr"]) -> "pli.Expr": """ Collect several columns into a Series of dtype Struct Parameters ---------- exprs Columns/Expressions to collect into a Struct Examples -------- >>> pl.DataFrame( ... { ... "int": [1, 2], ... "str": ["a", "b"], ... "bool": [True, None], ... "list": [[1, 2], [3]], ... } ... ).select([pl.struct(pl.all()).alias("my_struct")]) shape: (2, 1) ┌───────────────────────┐ │ my_struct │ │ --- │ │ struct{int, ... list} │ ╞═══════════════════════╡ │ {1,"a",true,[1, 2]} │ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ {2,"b",null,[3]} │ └───────────────────────┘ Only collect specific columns as a struct: >>> df = pl.DataFrame( ... {"a": [1, 2, 3, 4], "b": ["one", "two", "three", "four"], "c": [9, 8, 7, 6]} ... ) >>> df.with_column(pl.struct(pl.col(["a", "b"])).alias("a_and_b")) shape: (4, 4) ┌─────┬───────┬─────┬───────────────────────────────┐ │ a ┆ b ┆ c ┆ a_and_b │ │ --- ┆ --- ┆ --- ┆ --- │ │ i64 ┆ str ┆ i64 ┆ struct[2]{'a': i64, 'b': str} │ ╞═════╪═══════╪═════╪═══════════════════════════════╡ │ 1 ┆ one ┆ 9 ┆ {1,"one"} │ ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ 2 ┆ two ┆ 8 ┆ {2,"two"} │ ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ 3 ┆ three ┆ 7 ┆ {3,"three"} │ ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ 4 ┆ four ┆ 6 ┆ {4,"four"} │ └─────┴───────┴─────┴───────────────────────────────┘ """ exprs = pli.selection_to_pyexpr_list(exprs) return pli.wrap_expr(_as_struct(exprs))
def otherwise(self, expr: pli.Expr | int | float | str | None) -> pli.Expr: """ Values to return in case of the predicate being `False`. See Also -------- when : Start another when, then, otherwise layer. then : Values to return in case of the predicate being `True`. """ expr = pli.expr_to_lit_or_expr(expr) return pli.wrap_expr(self._pywhenthen.otherwise(expr._pyexpr))
def concat_list( exprs: Union[Sequence[Union[str, "pli.Expr", "pli.Series"]], "pli.Expr"] ) -> "pli.Expr": """ Concat the arrays in a Series dtype List in linear time. Parameters ---------- exprs Columns to concat into a List Series Examples -------- Create lagged columns and collect them into a list. This mimics a rolling window. >>> df = pl.DataFrame( ... { ... "A": [1.0, 2.0, 9.0, 2.0, 13.0], ... } ... ) >>> ( ... df.with_columns( ... [pl.col("A").shift(i).alias(f"A_lag_{i}") for i in range(3)] ... ).select( ... [ ... pl.concat_list([f"A_lag_{i}" for i in range(3)][::-1]).alias( ... "A_rolling" ... ) ... ] ... ) ... ) shape: (5, 1) ┌─────────────────┐ │ A_rolling │ │ --- │ │ list [f64] │ ╞═════════════════╡ │ [null, null, 1] │ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ [null, 1, 2] │ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ [1, 2, 9] │ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ [2, 9, 2] │ ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ [9, 2, 13] │ └─────────────────┘ """ exprs = pli.selection_to_pyexpr_list(exprs) return pli.wrap_expr(_concat_lst(exprs))
def concat_str(exprs: Union[Sequence[Union["pli.Expr", str]], "pli.Expr"], sep: str = "") -> "pli.Expr": """ Horizontally Concat Utf8 Series in linear time. Non utf8 columns are cast to utf8. Parameters ---------- exprs Columns to concat into a Utf8 Series sep String value that will be used to separate the values. """ exprs = pli.selection_to_pyexpr_list(exprs) return pli.wrap_expr(_concat_str(exprs, sep))
def concat_str(exprs: Sequence[pli.Expr | str] | pli.Expr, sep: str = "") -> pli.Expr: """ Horizontally concat Utf8 Series in linear time. Non-Utf8 columns are cast to Utf8. Parameters ---------- exprs Columns to concat into a Utf8 Series. sep String value that will be used to separate the values. Examples -------- >>> df = pl.DataFrame( ... { ... "a": [1, 2, 3], ... "b": ["dogs", "cats", None], ... "c": ["play", "swim", "walk"], ... } ... ) >>> df.with_columns( ... [ ... pl.concat_str( ... [ ... pl.col("a") * 2, ... pl.col("b"), ... pl.col("c"), ... ], ... sep=" ", ... ).alias("full_sentence"), ... ] ... ) shape: (3, 4) ┌─────┬──────┬──────┬───────────────┐ │ a ┆ b ┆ c ┆ full_sentence │ │ --- ┆ --- ┆ --- ┆ --- │ │ i64 ┆ str ┆ str ┆ str │ ╞═════╪══════╪══════╪═══════════════╡ │ 1 ┆ dogs ┆ play ┆ 2 dogs play │ ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ 2 ┆ cats ┆ swim ┆ 4 cats swim │ ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ │ 3 ┆ null ┆ walk ┆ null │ └─────┴──────┴──────┴───────────────┘ """ exprs = pli.selection_to_pyexpr_list(exprs) return pli.wrap_expr(_concat_str(exprs, sep))
def min(column: str | list[pli.Expr | str] | pli.Series) -> pli.Expr | Any: """ Get the minimum value. column Column(s) to be used in aggregation. Will lead to different behavior based on the input. input: - Union[str, Series] -> aggregate the sum value of that column. - List[Expr] -> aggregate the sum value horizontally. """ if isinstance(column, pli.Series): return column.min() elif isinstance(column, list): exprs = pli.selection_to_pyexpr_list(column) return pli.wrap_expr(_min_exprs(exprs)) else: return col(column).min()
def cov( a: str | pli.Expr, b: str | pli.Expr, ) -> pli.Expr: """ Compute the covariance between two columns/ expressions. Parameters ---------- a Column name or Expression. b Column name or Expression. """ if isinstance(a, str): a = col(a) if isinstance(b, str): b = col(b) return pli.wrap_expr(pycov(a._pyexpr, b._pyexpr))
def spearman_rank_corr( a: Union[str, "pli.Expr"], b: Union[str, "pli.Expr"], ) -> "pli.Expr": """ Compute the spearman rank correlation between two columns. Parameters ---------- a Column name or Expression. b Column name or Expression. """ if isinstance(a, str): a = col(a) if isinstance(b, str): b = col(b) return pli.wrap_expr(pyspearman_rank_corr(a._pyexpr, b._pyexpr))
def count(column: str | pli.Series | None = None) -> pli.Expr | int: """ Count the number of values in this column/context. Parameters ---------- column If dtype is: * ``pl.Series`` : count the values in the series. * ``str`` : count the values in this column. * ``None`` : count the number of values in this context. """ if column is None: return pli.wrap_expr(_count()) if isinstance(column, pli.Series): return column.len() return col(column).count()
def pearson_corr( a: str | pli.Expr, b: str | pli.Expr, ) -> pli.Expr: """ Compute the pearson's correlation between two columns. Parameters ---------- a Column name or Expression. b Column name or Expression. """ if isinstance(a, str): a = col(a) if isinstance(b, str): b = col(b) return pli.wrap_expr(pypearson_corr(a._pyexpr, b._pyexpr))
def count( column: Optional[Union[str, "pli.Series"]] = None) -> Union["pli.Expr", int]: """ Count the number of values in this column/context. Parameters ---------- column If dtype is: pl.Series -> count the values in the series str -> count the values in this column None -> count the number of values in this context """ if column is None: return pli.wrap_expr(_count()) if isinstance(column, pli.Series): return column.len() return col(column).count()
def argsort_by(exprs: List[Union["pli.Expr", str]], reverse: Union[List[bool], bool] = False) -> "pli.Expr": """ Find the indexes that would sort the columns. Argsort by multiple columns. The first column will be used for the ordering. If there are duplicates in the first column, the second column will be used to determine the ordering and so on. Parameters ---------- exprs Columns use to determine the ordering. reverse Default is ascending. """ if not isinstance(reverse, list): reverse = [reverse] * len(exprs) exprs = pli.selection_to_pyexpr_list(exprs) return pli.wrap_expr(pyargsort_by(exprs, reverse))
def max( column: Union[str, List[Union["pli.Expr", str]], "pli.Series"] ) -> Union["pli.Expr", Any]: """ Get the maximum value. Can be used horizontally or vertically. Parameters ---------- column Column(s) to be used in aggregation. Will lead to different behavior based on the input. input: - Union[str, Series] -> aggregate the maximum value of that column. - List[Expr] -> aggregate the maximum value horizontally. """ if isinstance(column, pli.Series): return column.max() elif isinstance(column, list): exprs = pli.selection_to_pyexpr_list(column) return pli.wrap_expr(_max_exprs(exprs)) else: return col(column).max()
def last(column: str | pli.Series | None = None) -> pli.Expr: """ Get the last value. Depending on the input type this function does different things: input: - None -> expression to take last column of a context. - str -> syntactic sugar for `pl.col(..).last()` - Series -> Take last value in `Series` """ if column is None: return pli.wrap_expr(_last()) if isinstance(column, pli.Series): if column.len() > 0: return column[-1] else: raise IndexError( "The series is empty, so no last value can be returned,") return col(column).last()
def map( exprs: list[str] | list[pli.Expr], f: Callable[[list[pli.Series]], pli.Series], return_dtype: type[DataType] | None = None, ) -> pli.Expr: """ Map a custom function over multiple columns/expressions and produce a single Series result. Parameters ---------- exprs Input Series to f f Function to apply over the input return_dtype dtype of the output Series Returns ------- Expr """ exprs = pli.selection_to_pyexpr_list(exprs) return pli.wrap_expr(_map_mul(exprs, f, return_dtype, apply_groups=False))
def argsort_by( exprs: pli.Expr | str | Sequence[pli.Expr | str], reverse: list[bool] | bool = False, ) -> pli.Expr: """ Find the indexes that would sort the columns. Argsort by multiple columns. The first column will be used for the ordering. If there are duplicates in the first column, the second column will be used to determine the ordering and so on. Parameters ---------- exprs Columns use to determine the ordering. reverse Default is ascending. """ if isinstance(exprs, str) or not isinstance(exprs, Sequence): exprs = [exprs] if isinstance(reverse, bool): reverse = [reverse] * len(exprs) exprs = pli.selection_to_pyexpr_list(exprs) return pli.wrap_expr(pyargsort_by(exprs, reverse))
def lit( value: None | (float | int | str | date | datetime | pli.Series | np.ndarray | Any), dtype: type[DataType] | None = None, ) -> pli.Expr: """ A literal value. Parameters ---------- value Value that should be used as a `literal`. dtype Optionally define a dtype. Examples -------- Literal integer: >>> pl.lit(1) # doctest: +IGNORE_RESULT Literal str: >>> pl.lit("foo") # doctest: +IGNORE_RESULT Literal datetime: >>> from datetime import datetime >>> pl.lit(datetime(2021, 1, 20)) # doctest: +IGNORE_RESULT Literal Null: >>> pl.lit(None) # doctest: +IGNORE_RESULT Literal eager Series: >>> pl.lit(pl.Series("a", [1, 2, 3])) # doctest: +IGNORE_RESULT """ if isinstance(value, datetime): if in_nanoseconds_window(value): tu = "ns" else: tu = "ms" return (lit(_datetime_to_pl_timestamp( value, tu)).cast(Datetime).dt.and_time_unit(tu)) if isinstance(value, timedelta): if timedelta_in_nanoseconds_window(value): tu = "ns" else: tu = "ms" return (lit(_timedelta_to_pl_timedelta( value, tu)).cast(Duration).dt.and_time_unit(tu, dtype=Duration)) if isinstance(value, date): return lit(datetime(value.year, value.month, value.day)).cast(Date) if isinstance(value, pli.Series): name = value.name value = value._s e = pli.wrap_expr(pylit(value)) if name == "": return e return e.alias(name) if _NUMPY_AVAILABLE and isinstance(value, np.ndarray): return lit(pli.Series("", value)) if dtype: return pli.wrap_expr(pylit(value)).cast(dtype) # numpy literals like np.float32(0) # have an item if hasattr(value, "item"): value = value.item() # type: ignore[union-attr] return pli.wrap_expr(pylit(value))
def col( name: (str | list[str] | Sequence[PolarsDataType] | pli.Series | PolarsDataType), ) -> pli.Expr: """ A column in a DataFrame. Can be used to select: - a single column by name - all columns by using a wildcard `"*"` - column by regular expression if the regex starts with `^` and ends with `$` Parameters ---------- name A string that holds the name of the column Examples -------- >>> df = pl.DataFrame( ... { ... "ham": [1, 2, 3], ... "hamburger": [11, 22, 33], ... "foo": [3, 2, 1], ... } ... ) >>> df.select(pl.col("foo")) shape: (3, 1) ┌─────┐ │ foo │ │ --- │ │ i64 │ ╞═════╡ │ 3 │ ├╌╌╌╌╌┤ │ 2 │ ├╌╌╌╌╌┤ │ 1 │ └─────┘ >>> df.select(pl.col("*")) shape: (3, 3) ┌─────┬───────────┬─────┐ │ ham ┆ hamburger ┆ foo │ │ --- ┆ --- ┆ --- │ │ i64 ┆ i64 ┆ i64 │ ╞═════╪═══════════╪═════╡ │ 1 ┆ 11 ┆ 3 │ ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ │ 2 ┆ 22 ┆ 2 │ ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ │ 3 ┆ 33 ┆ 1 │ └─────┴───────────┴─────┘ >>> df.select(pl.col("^ham.*$")) shape: (3, 2) ┌─────┬───────────┐ │ ham ┆ hamburger │ │ --- ┆ --- │ │ i64 ┆ i64 │ ╞═════╪═══════════╡ │ 1 ┆ 11 │ ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤ │ 2 ┆ 22 │ ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤ │ 3 ┆ 33 │ └─────┴───────────┘ >>> df.select(pl.col("*").exclude("ham")) shape: (3, 2) ┌───────────┬─────┐ │ hamburger ┆ foo │ │ --- ┆ --- │ │ i64 ┆ i64 │ ╞═══════════╪═════╡ │ 11 ┆ 3 │ ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ │ 22 ┆ 2 │ ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ │ 33 ┆ 1 │ └───────────┴─────┘ >>> df.select(pl.col(["hamburger", "foo"])) shape: (3, 2) ┌───────────┬─────┐ │ hamburger ┆ foo │ │ --- ┆ --- │ │ i64 ┆ i64 │ ╞═══════════╪═════╡ │ 11 ┆ 3 │ ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ │ 22 ┆ 2 │ ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ │ 33 ┆ 1 │ └───────────┴─────┘ """ if isinstance(name, pli.Series): name = name.to_list() # type: ignore[assignment] # note: we need the typing.cast call here twice to make mypy happy under Python 3.7 # On Python 3.10, it is not needed. We use cast as it works across versions, ignoring # the typing error would lead to unneeded ignores under Python 3.10. if isclass(name) and issubclass(cast(type, name), DataType): name = [cast(type, name)] if isinstance(name, DataType): return pli.wrap_expr(_dtype_cols([name])) if isinstance(name, list): if len(name) == 0 or isinstance(name[0], str): return pli.wrap_expr(pycols(name)) elif (isclass(name[0]) and issubclass(name[0], DataType) or isinstance(name[0], DataType)): return pli.wrap_expr(_dtype_cols(name)) else: raise ValueError( "did expect argument of List[str] or List[DataType]") return pli.wrap_expr(pycol(name))