Example #1
0
def ibis_schema_apply_to(schema: sch.Schema, df: dd.DataFrame) -> dd.DataFrame:
    """Applies the Ibis schema to a dask DataFrame

    Parameters
    ----------
    schema : ibis.schema.Schema
    df : dask.dataframe.DataFrame

    Returns
    -------
    df : dask.dataframeDataFrame

    Notes
    -----
    Mutates `df`
    """

    for column, dtype in schema.items():
        dask_dtype = dtype.to_dask()
        col = df[column]
        col_dtype = col.dtype

        try:
            not_equal = dask_dtype != col_dtype
        except TypeError:
            # ugh, we can't compare dtypes coming from dask, assume not equal
            not_equal = True

        if not_equal or isinstance(dtype, dt.String):
            df[column] = convert(col_dtype, dtype, col)

    return df
Example #2
0
def fmt_schema(schema: sch.Schema) -> str:
    """Format `schema`.

    Parameters
    ----------
    schema
        Ibis schema to format

    Returns
    -------
    str
        Formatted schema
    """
    names = schema.names
    maxlen = max(map(len, names))
    cols = [f"{name:<{maxlen}} {typ}" for name, typ in schema.items()]
    depth = ibis.options.repr.table_columns
    if depth is not None and depth < len(cols):
        first_column_name = names[0]
        raw = fmt_truncated(
            cols,
            depth=depth,
            sep="\n",
            ellipsis=util.VERTICAL_ELLIPSIS.center(len(first_column_name)),
        )
    else:
        raw = "\n".join(cols)

    return util.indent(raw, spaces=2)
Example #3
0
 def fetch_from_cursor(
     self,
     cursor: duckdb.DuckDBPyConnection,
     schema: sch.Schema,
 ):
     df = cursor.cursor.fetch_df()
     return schema.apply_to(df)
Example #4
0
 def fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame:
     df = pd.DataFrame.from_records(
         cursor,
         columns=cursor.keys(),
         coerce_float=True,
     )
     df = schema.apply_to(df)
     if len(df) and geospatial_supported:
         return self._to_geodataframe(df, schema)
     return df
Example #5
0
def schema(
    pairs: Iterable[tuple[str, dt.DataType]]
    | Mapping[str, dt.DataType]
    | None = None,
    names: Iterable[str] | None = None,
    types: Iterable[str | dt.DataType] | None = None,
) -> sch.Schema:
    """Validate and return an Schema object.

    Parameters
    ----------
    pairs
        List or dictionary of name, type pairs. Mutually exclusive with `names`
        and `types`.
    names
        Field names. Mutually exclusive with `pairs`.
    types
        Field types. Mutually exclusive with `pairs`.

    Examples
    --------
    >>> from ibis import schema
    >>> sc = schema([('foo', 'string'),
    ...              ('bar', 'int64'),
    ...              ('baz', 'boolean')])
    >>> sc2 = schema(names=['foo', 'bar', 'baz'],
    ...              types=['string', 'int64', 'boolean'])

    Returns
    -------
    Schema
        An ibis schema
    """  # noqa: E501
    if pairs is not None:
        return Schema.from_dict(dict(pairs))
    else:
        return Schema(names, types)
Example #6
0
    def create_table(
        self,
        table_name: str,
        obj: dd.DataFrame = None,
        schema: sch.Schema = None,
    ):
        """Create a table."""
        if obj is not None:
            df = obj
        elif schema is not None:
            dtypes = ibis_schema_to_dask(schema)
            df = schema.apply_to(
                dd.from_pandas(
                    pd.DataFrame(columns=list(map(toolz.first, dtypes))),
                    npartitions=1,
                ))
        else:
            raise com.IbisError('Must pass expr or schema')

        self.dictionary[table_name] = df
Example #7
0
 def _get_table_schema(self, name):
     return Schema.from_tuples(self._tables[name])
Example #8
0
 def _get_table_schema(self, name):
     name = name.replace('`', '')
     return Schema.from_tuples(self._tables[name])
Example #9
0
 def get_schema(self, name):
     name = name.replace('`', '')
     return Schema.from_tuples(MOCK_TABLES[name])
Example #10
0
def _schema_to_sqlalchemy_columns(schema: sch.Schema) -> list[sa.Column]:
    return [sa.column(n, to_sqla_type(t)) for n, t in schema.items()]
Example #11
0
 def _get_table_schema(self, name):
     # name = name.replace('`', '')
     return Schema.from_tuples(self._tables[name])