def ibis_schema_apply_to(schema: sch.Schema, df: dd.DataFrame) -> dd.DataFrame: """Applies the Ibis schema to a dask DataFrame Parameters ---------- schema : ibis.schema.Schema df : dask.dataframe.DataFrame Returns ------- df : dask.dataframeDataFrame Notes ----- Mutates `df` """ for column, dtype in schema.items(): dask_dtype = dtype.to_dask() col = df[column] col_dtype = col.dtype try: not_equal = dask_dtype != col_dtype except TypeError: # ugh, we can't compare dtypes coming from dask, assume not equal not_equal = True if not_equal or isinstance(dtype, dt.String): df[column] = convert(col_dtype, dtype, col) return df
def fmt_schema(schema: sch.Schema) -> str: """Format `schema`. Parameters ---------- schema Ibis schema to format Returns ------- str Formatted schema """ names = schema.names maxlen = max(map(len, names)) cols = [f"{name:<{maxlen}} {typ}" for name, typ in schema.items()] depth = ibis.options.repr.table_columns if depth is not None and depth < len(cols): first_column_name = names[0] raw = fmt_truncated( cols, depth=depth, sep="\n", ellipsis=util.VERTICAL_ELLIPSIS.center(len(first_column_name)), ) else: raw = "\n".join(cols) return util.indent(raw, spaces=2)
def _schema_to_sqlalchemy_columns(schema: sch.Schema) -> list[sa.Column]: return [sa.column(n, to_sqla_type(t)) for n, t in schema.items()]