def table(schema, name=None): """ Create an unbound Ibis table for creating expressions. Cannot be executed without being bound to some physical table. Useful for testing Parameters ---------- schema : ibis Schema name : string, default None Name for table Returns ------- table : TableExpr """ if not isinstance(schema, Schema): if isinstance(schema, list): schema = Schema.from_tuples(schema) else: schema = Schema.from_dict(schema) node = _ops.UnboundTable(schema, name=name) return TableExpr(node)
def get_schema(self, table_name, database=None): """ Return a Schema object for the indicated table and database Parameters ---------- table_name : string May be fully qualified database : string, default None Returns ------- schema : ibis Schema """ qualified_name = self._fully_qualified_name(table_name, database) query = 'DESCRIBE {0}'.format(qualified_name) tuples = self.con.fetchall(query) names, types, comments = zip(*tuples) ibis_types = [] for t in types: t = t.lower() t = udf._impala_to_ibis_type.get(t, t) ibis_types.append(t) names = [x.lower() for x in names] return Schema(names, ibis_types)
def get_partition_schema(self, table_name, database=None): """ For partitioned tables, return the schema (names and types) for the partition columns Parameters ---------- table_name : string May be fully qualified database : string, default None Returns ------- partition_schema : ibis Schema """ qualified_name = self._fully_qualified_name(table_name, database) schema = self.get_schema(table_name, database=database) name_to_type = dict(zip(schema.names, schema.types)) query = 'SHOW PARTITIONS {0}'.format(qualified_name) partition_fields = [] with self._execute(query, results=True) as cur: result = self._fetch_from_cursor(cur) for x in result.columns: if x not in name_to_type: break partition_fields.append((x, name_to_type[x])) pnames, ptypes = zip(*partition_fields) return Schema(pnames, ptypes)
def _result_schema(self): names = [] types = [] # All exprs must be named for e in self.by + self.agg_exprs: names.append(e.get_name()) types.append(e.type()) return Schema(names, types)
def _get_schema_using_query(self, query): with self._execute(query, results=True) as cur: # resets the state of the cursor and closes operation cur.fetchall() names, ibis_types = self._adapt_types(cur.description) # per #321; most Impala tables will be lower case already, but Avro # data, depending on the version of Impala, might have field names in # the metastore cased according to the explicit case in the declared # avro schema. This is very annoying, so it's easier to just conform on # all lowercase fields from Impala. names = [x.lower() for x in names] return Schema(names, ibis_types)
def __init__(self, table_expr, proj_exprs): from ibis.expr.analysis import ExprValidator # Need to validate that the column expressions are compatible with the # input table; this means they must either be scalar expressions or # array expressions originating from the same root table expression validator = ExprValidator([table_expr]) # Resolve schema and initialize types = [] names = [] clean_exprs = [] for expr in proj_exprs: if isinstance(expr, py_string): expr = table_expr[expr] validator.assert_valid(expr) if isinstance(expr, ValueExpr): name = expr.get_name() names.append(name) types.append(expr.type()) elif rules.is_table(expr): schema = expr.schema() names.extend(schema.names) types.extend(schema.types) else: raise NotImplementedError clean_exprs.append(expr) # validate uniqueness schema = Schema(names, types) HasSchema.__init__(self, schema) Node.__init__(self, [table_expr] + [clean_exprs]) self.table = table_expr self.selections = clean_exprs
def _get_table_schema(self, name): name = name.replace('`', '') return Schema.from_tuples(self._tables[name])
def schema(pairs=None, names=None, types=None): if pairs is not None: return Schema.from_tuples(pairs) else: return Schema(names, types)