def get_schema(self, table_name, database=None): """ Return a Schema object for the indicated table and database Parameters ---------- table_name : string May be fully qualified database : string, default None Returns ------- schema : ibis Schema """ col_names = [] col_types = [] for col in self.con.get_table_details(table_name): col_names.append(col.name) col_types.append(MapDDataType.parse(col.type)) return sch.schema( [ (col.name, MapDDataType.parse(col.type)) for col in self.con.get_table_details(table_name) ] )
def schema_from_table(table, schema=None): """Retrieve an ibis schema from a SQLAlchemy ``Table``. Parameters ---------- table : sa.Table Returns ------- schema : ibis.expr.datatypes.Schema An ibis schema corresponding to the types of the columns in `table`. """ schema = schema if schema is not None else {} pairs = [] for name, column in table.columns.items(): if name in schema: dtype = dt.dtype(schema[name]) else: dtype = dt.dtype( getattr(table.bind, 'dialect', SQLAlchemyDialect()), column.type, nullable=column.nullable, ) pairs.append((name, dtype)) return sch.schema(pairs)
def infer_pandas_schema(df, schema=None): schema = schema if schema is not None else {} pairs = [] for column_name, pandas_dtype in df.dtypes.iteritems(): if not isinstance(column_name, str): raise TypeError( 'Column names must be strings to use the pandas backend' ) if column_name in schema: ibis_dtype = dt.dtype(schema[column_name]) elif pandas_dtype == np.object_: inferred_dtype = infer_pandas_dtype(df[column_name], skipna=True) if inferred_dtype in {'mixed', 'decimal'}: # TODO: in principal we can handle decimal (added in pandas # 0.23) raise TypeError( 'Unable to infer type of column {0!r}. Try instantiating ' 'your table from the client with client.table(' "'my_table', schema={{{0!r}: <explicit type>}})".format( column_name ) ) ibis_dtype = _inferable_pandas_dtypes[inferred_dtype] else: ibis_dtype = dt.dtype(pandas_dtype) pairs.append((column_name, ibis_dtype)) return sch.schema(pairs)
def table(schema, arg): """A table argument. Parameters ---------- schema : Union[sch.Schema, List[Tuple[str, dt.DataType]] A validator for the table's columns. Only column subset validators are currently supported. Accepts any arguments that `sch.schema` accepts. See the example for usage. arg : The validatable argument. Examples -------- The following op will accept an argument named ``'table'``. Note that the ``schema`` argument specifies rules for columns that are required to be in the table: ``time``, ``group`` and ``value1``. These must match the types specified in the column rules. Column ``value2`` is optional, but if present it must be of the specified type. The table may have extra columns not specified in the schema. """ assert isinstance(arg, ir.TableExpr) if arg.schema() >= sch.schema(schema): return arg raise com.IbisTypeError( 'Argument is not a table with column subset of {}'.format(schema) )
def infer_parquet_schema(schema): pairs = [] for field in schema.to_arrow_schema(): ibis_dtype = dt.dtype(field.type, nullable=field.nullable) name = field.name if not re.match(r'^__index_level_\d+__$', name): pairs.append((name, ibis_dtype)) return sch.schema(pairs)
def schema(self): if isinstance(self.expr, (ir.TableExpr, ir.ExprList, sch.HasSchema)): return self.expr.schema() elif isinstance(self.expr, ir.ValueExpr): return sch.schema([(self.expr.get_name(), self.expr.type())]) else: raise ValueError( 'Expression with type {} does not have a ' 'schema'.format(type(self.expr)) )
def table(self, name, path=None, schema=None, **kwargs): if name not in self.list_tables(path): raise AttributeError(name) if path is None: path = self.root # get the schema f = path / "{}.{}".format(name, self.extension) # read sample schema = schema or sch.schema([]) sample = _read_csv(f, schema=schema, header=0, nrows=50, **kwargs) # infer sample's schema and define table schema = sch.infer(sample) table = self.table_class(name, schema, self, **kwargs).to_expr() self.dictionary[name] = f return table
def schema_from_table(table, schema=None): """Retrieve an ibis schema from a SQLAlchemy ``Table``. Parameters ---------- table : sa.Table Returns ------- schema : ibis.expr.datatypes.Schema An ibis schema corresponding to the types of the columns in `table`. """ schema = schema if schema is not None else {} pairs = [] for name, column in table.columns.items(): if name in schema: dtype = dt.dtype(schema[name]) else: dtype = dt.dtype(column.type, nullable=column.nullable) pairs.append((name, dtype)) return sch.schema(pairs)
def get_schema(self, table_name, database=None): """ Return a Schema object for the given table and database. Parameters ---------- table_name : string May be fully qualified database : string, default None Returns ------- schema : ibis Schema """ cols = { col.name: omniscidb_dtypes.sql_to_ibis_dtypes[col.type]( nullable=col.nullable) for col in self.con.get_table_details(table_name) } return sch.schema([(name, tp) for name, tp in cols.items()])
def table(self, name, path=None, schema=None, **kwargs): if name not in self.list_tables(path): raise AttributeError(name) if path is None: path = self.root # get the schema f = path / "{}.{}".format(name, self.extension) # read sample schema = schema or sch.schema([]) sample = _read_csv(f, schema=schema, header=0, nrows=50, **kwargs) # infer sample's schema and define table schema = sch.infer(sample) table = self.table_class(name, schema, self, **kwargs).to_expr() self.dictionary[name] = f return table
def schema(self): """Return the schema of the expression. Returns ------- Schema Raises ------ ValueError if self.expr doesn't have a schema. """ if isinstance(self.expr, (ir.TableExpr, ir.ExprList, sch.HasSchema)): return self.expr.schema() elif isinstance(self.expr, ir.ValueExpr): return sch.schema([(self.expr.get_name(), self.expr.type())]) else: raise ValueError( 'Expression with type {} does not have a ' 'schema'.format(type(self.expr)) )
def ast_schema(self, query_ast): """Return the schema of the expression. Returns ------- Schema Raises ------ ValueError if self.expr doesn't have a schema. """ dml = getattr(query_ast, 'dml', query_ast) expr = getattr(dml, 'parent_expr', getattr(dml, 'table_set', None)) if isinstance(expr, (ir.TableExpr, sch.HasSchema)): return expr.schema() elif isinstance(expr, ir.ValueExpr): return sch.schema([(expr.get_name(), expr.type())]) else: raise ValueError('Expression with type {} does not have a ' 'schema'.format(type(self.expr)))
def get_schema(self, table_name, database=None): """ Return a Schema object for the indicated table and database Parameters ---------- table_name : string May be fully qualified database : string, default None Returns ------- schema : ibis Schema """ qualified_name = self._fully_qualified_name(table_name, database) query = 'DESC {0}'.format(qualified_name) data, _, _ = self.raw_sql(query, results=True) colnames, coltypes = data[:2] coltypes = list(map(ClickhouseDataType.parse, coltypes)) return sch.schema(colnames, coltypes)
def get_schema(self, table_name, database=None): """ Return a Schema object for the indicated table and database Parameters ---------- table_name : string May be fully qualified database : string, default None Returns ------- schema : ibis Schema """ col_names = [] col_types = [] for col in self.con.get_table_details(table_name): col_names.append(col.name) col_types.append(OmniSciDBDataType.parse(col.type)) return sch.schema([(col.name, OmniSciDBDataType.parse(col.type)) for col in self.con.get_table_details(table_name)])
def get_schema( self, table_name: str, database: str | None = None, ) -> sch.Schema: """Return a Schema object for the indicated table and database. Parameters ---------- table_name May be fully qualified database Database name Returns ------- sch.Schema Ibis schema """ qualified_name = self._fully_qualified_name(table_name, database) query = f'DESC {qualified_name}' data, columns = self.raw_sql(query) return sch.schema(data[0], list(map(ClickhouseDataType.parse, data[1])))
def spark_dataframe_schema(df): """Infer the schema of a Spark SQL `DataFrame` object.""" # df.schema is a pt.StructType schema_struct = dt.dtype(df.schema) return sch.schema(schema_struct.names, schema_struct.types)
def _get_schema_using_query(self, query): _, colnames, coltypes = self._execute(query) return sch.schema(colnames, coltypes)
def schema(self): return sch.schema(self.names(), self.types())
def _get_schema_using_query(self, query, **kwargs): data, columns = self.raw_sql(query, **kwargs) colnames, typenames = zip(*columns) coltypes = list(map(ClickhouseDataType.parse, typenames)) return sch.schema(colnames, coltypes)
def infer_pyarrow_schema(schema): fields = [(f.name, dt.dtype(f.type, nullable=f.nullable)) for f in schema] return sch.schema(fields)
def schema_from_series(s): return sch.schema(tuple(s.iteritems()))
def infer_pyarrow_schema(schema: pa.Schema) -> sch.Schema: return sch.schema([(f.name, dt.dtype(f.type, nullable=f.nullable)) for f in schema])
def schema_from_series(s): return sch.schema(tuple(s.iteritems()))