def col(name: Union[str, tp.List[str]]) -> "pl.Expr": """ A column in a DataFrame. Can be used to select: * a single column by name * all columns by using a wildcard `"*"` * column by regular expression if the regex starts with `^` and ends with `$` Parameters col A string that holds the name of the column Examples ------- >>> df = pl.DataFrame({ >>> "ham": [1, 2, 3], >>> "hamburger": [11, 22, 33], >>> "foo": [3, 2, 1]}) >>> df.select(col("foo")) shape: (3, 1) ╭─────╮ │ foo │ │ --- │ │ i64 │ ╞═════╡ │ 3 │ ├╌╌╌╌╌┤ │ 2 │ ├╌╌╌╌╌┤ │ 1 │ ╰─────╯ >>> df.select(col("*")) shape: (3, 3) ╭─────┬───────────┬─────╮ │ ham ┆ hamburger ┆ foo │ │ --- ┆ --- ┆ --- │ │ i64 ┆ i64 ┆ i64 │ ╞═════╪═══════════╪═════╡ │ 1 ┆ 11 ┆ 3 │ ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ │ 2 ┆ 22 ┆ 2 │ ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ │ 3 ┆ 33 ┆ 1 │ ╰─────┴───────────┴─────╯ >>> df.select(col("^ham.*$")) shape: (3, 2) ╭─────┬───────────╮ │ ham ┆ hamburger │ │ --- ┆ --- │ │ i64 ┆ i64 │ ╞═════╪═══════════╡ │ 1 ┆ 11 │ ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤ │ 2 ┆ 22 │ ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤ │ 3 ┆ 33 │ ╰─────┴───────────╯ >>> df.select(col("*").exclude("ham")) shape: (3, 2) ╭───────────┬─────╮ │ hamburger ┆ foo │ │ --- ┆ --- │ │ i64 ┆ i64 │ ╞═══════════╪═════╡ │ 11 ┆ 3 │ ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ │ 22 ┆ 2 │ ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ │ 33 ┆ 1 │ ╰───────────┴─────╯ >>> df.select(col(["hamburger", "foo"]) shape: (3, 2) ╭───────────┬─────╮ │ hamburger ┆ foo │ │ --- ┆ --- │ │ i64 ┆ i64 │ ╞═══════════╪═════╡ │ 11 ┆ 3 │ ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ │ 22 ┆ 2 │ ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ │ 33 ┆ 1 │ ╰───────────┴─────╯ """ if isinstance(name, list): return pl.lazy.expr.wrap_expr(pycols(name)) return pl.lazy.expr.wrap_expr(pycol(name))
def col( name: (str | list[str] | Sequence[PolarsDataType] | pli.Series | PolarsDataType), ) -> pli.Expr: """ A column in a DataFrame. Can be used to select: - a single column by name - all columns by using a wildcard `"*"` - column by regular expression if the regex starts with `^` and ends with `$` Parameters ---------- name A string that holds the name of the column Examples -------- >>> df = pl.DataFrame( ... { ... "ham": [1, 2, 3], ... "hamburger": [11, 22, 33], ... "foo": [3, 2, 1], ... } ... ) >>> df.select(pl.col("foo")) shape: (3, 1) ┌─────┐ │ foo │ │ --- │ │ i64 │ ╞═════╡ │ 3 │ ├╌╌╌╌╌┤ │ 2 │ ├╌╌╌╌╌┤ │ 1 │ └─────┘ >>> df.select(pl.col("*")) shape: (3, 3) ┌─────┬───────────┬─────┐ │ ham ┆ hamburger ┆ foo │ │ --- ┆ --- ┆ --- │ │ i64 ┆ i64 ┆ i64 │ ╞═════╪═══════════╪═════╡ │ 1 ┆ 11 ┆ 3 │ ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ │ 2 ┆ 22 ┆ 2 │ ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ │ 3 ┆ 33 ┆ 1 │ └─────┴───────────┴─────┘ >>> df.select(pl.col("^ham.*$")) shape: (3, 2) ┌─────┬───────────┐ │ ham ┆ hamburger │ │ --- ┆ --- │ │ i64 ┆ i64 │ ╞═════╪═══════════╡ │ 1 ┆ 11 │ ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤ │ 2 ┆ 22 │ ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤ │ 3 ┆ 33 │ └─────┴───────────┘ >>> df.select(pl.col("*").exclude("ham")) shape: (3, 2) ┌───────────┬─────┐ │ hamburger ┆ foo │ │ --- ┆ --- │ │ i64 ┆ i64 │ ╞═══════════╪═════╡ │ 11 ┆ 3 │ ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ │ 22 ┆ 2 │ ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ │ 33 ┆ 1 │ └───────────┴─────┘ >>> df.select(pl.col(["hamburger", "foo"])) shape: (3, 2) ┌───────────┬─────┐ │ hamburger ┆ foo │ │ --- ┆ --- │ │ i64 ┆ i64 │ ╞═══════════╪═════╡ │ 11 ┆ 3 │ ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ │ 22 ┆ 2 │ ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤ │ 33 ┆ 1 │ └───────────┴─────┘ """ if isinstance(name, pli.Series): name = name.to_list() # type: ignore[assignment] # note: we need the typing.cast call here twice to make mypy happy under Python 3.7 # On Python 3.10, it is not needed. We use cast as it works across versions, ignoring # the typing error would lead to unneeded ignores under Python 3.10. if isclass(name) and issubclass(cast(type, name), DataType): name = [cast(type, name)] if isinstance(name, DataType): return pli.wrap_expr(_dtype_cols([name])) if isinstance(name, list): if len(name) == 0 or isinstance(name[0], str): return pli.wrap_expr(pycols(name)) elif (isclass(name[0]) and issubclass(name[0], DataType) or isinstance(name[0], DataType)): return pli.wrap_expr(_dtype_cols(name)) else: raise ValueError( "did expect argument of List[str] or List[DataType]") return pli.wrap_expr(pycol(name))