def denormalized(self): """ THE INTERNAL STRUCTURE FOR THE COLUMN METADATA IS VERY DIFFERENT FROM THE DENORMALIZED PERSPECITVE. THIS PROVIDES THAT PERSPECTIVE FOR QUERIES """ with self.locker: self._update_meta() output = [ { "table": c.es_index, "name": untyped_column(c.name)[0], "cardinality": c.cardinality, "es_column": c.es_column, "es_index": c.es_index, "last_updated": c.last_updated, "count": c.count, "nested_path": [unnest_path(n) for n in c.nested_path], "es_type": c.es_type, "type": c.jx_type, } for tname, css in self.data.items() for cname, cs in css.items() for c in cs if c.jx_type not in STRUCT # and c.es_column != "_id" ] from jx_python.containers.list_usingPythonList import ListContainer return ListContainer( self.name, data=output, schema=jx_base.Schema(META_COLUMNS_NAME, SIMPLE_METADATA_COLUMNS), )
def _nest_column(self, column): new_path, type_ = untyped_column(column.es_column) if type_ != SQL_NESTED_TYPE: Log.error("only nested types can be nested") destination_table = concat_field(self.fact_name, new_path) existing_table = concat_field(self.fact_name, column.nested_path[0]) # FIND THE INNER COLUMNS WE WILL BE MOVING moving_columns = [] for c in self.columns: if destination_table != column.es_index and column.es_column == c.es_column: moving_columns.append(c) c.nested_path = new_path # TODO: IF THERE ARE CHILD TABLES, WE MUST UPDATE THEIR RELATIONS TOO? # LOAD THE COLUMNS data = self.namespace.db.about(destination_table) if not data: # DEFINE A NEW TABLE command = ( SQL_CREATE + quote_column(destination_table) + sql_iso(sql_list([ quoted_UID + "INTEGER", quoted_PARENT + "INTEGER", quoted_ORDER + "INTEGER", "PRIMARY KEY " + sql_iso(quoted_UID), "FOREIGN KEY " + sql_iso(quoted_PARENT) + " REFERENCES " + quote_column(existing_table) + sql_iso(quoted_UID) ])) ) with self.namespace.db.transaction() as t: t.execute(command) self.add_table([new_path]+column.nested_path) # TEST IF THERE IS ANY DATA IN THE NEW NESTED ARRAY if not moving_columns: return column.es_index = destination_table with self.namespace.db.transaction() as t: t.execute( "ALTER TABLE " + quote_column(destination_table) + " ADD COLUMN " + quote_column(column.es_column) + " " + column.es_type ) # Deleting parent columns for col in moving_columns: column = col.es_column tmp_table = "tmp_" + existing_table columns = list(map(text, t.query(SQL_SELECT + SQL_STAR + SQL_FROM + quote_column(existing_table) + SQL_LIMIT + SQL_ZERO).header)) t.execute( "ALTER TABLE " + quote_column(existing_table) + " RENAME TO " + quote_column(tmp_table) ) t.execute( SQL_CREATE + quote_column(existing_table) + SQL_AS + SQL_SELECT + sql_list([quote_column(c) for c in columns if c != column]) + SQL_FROM + quote_column(tmp_table) ) t.execute("DROP TABLE " + quote_column(tmp_table))
def _load_from_database(self): # FIND ALL TABLES result = self.db.query( sql_query({ "from": "sqlite_master", "where": { "eq": { "type": "table" } }, "orderby": "name" })) tables = wrap([{k: d for k, d in zip(result.header, row)} for row in result.data]) last_nested_path = ["."] for table in tables: if table.name.startswith("__"): continue base_table, nested_path = tail_field(table.name) # FIND COMMON NESTED PATH SUFFIX if nested_path == ".": last_nested_path = [] else: for i, p in enumerate(last_nested_path): if startswith_field(nested_path, p): last_nested_path = last_nested_path[i:] break else: last_nested_path = [] full_nested_path = [nested_path] + last_nested_path self._snowflakes[literal_field(base_table)] += [full_nested_path] # LOAD THE COLUMNS details = self.db.about(table.name) for cid, name, dtype, notnull, dfft_value, pk in details: if name.startswith("__"): continue cname, ctype = untyped_column(name) self.add( Column(name=cname, jx_type=coalesce(sql_type_to_json_type.get(ctype), IS_NULL), nested_path=full_nested_path, es_type=dtype, es_column=name, es_index=table.name, last_updated=Date.now())) last_nested_path = full_nested_path
def _flatten(data, uid, parent_id, order, full_path, nested_path, row=None, guid=None): """ :param data: the data we are pulling apart :param uid: the uid we are giving this doc :param parent_id: the parent id of this (sub)doc :param order: the number of siblings before this one :param full_path: path to this (sub)doc :param nested_path: list of paths, deepest first :param row: we will be filling this :return: """ table = concat_field(self.name, nested_path[0]) insertion = doc_collection[nested_path[0]] if not row: row = {GUID: guid, UID: uid, PARENT: parent_id, ORDER: order} insertion.rows.append(row) if is_data(data): items = [(concat_field(full_path, k), v) for k, v in wrap(data).leaves()] else: # PRIMITIVE VALUES items = [(full_path, data)] for cname, v in items: jx_type = get_jx_type(v) if jx_type is None: continue insertion = doc_collection[nested_path[0]] if jx_type == NESTED: c = first(cc for cc in insertion.active_columns + snowflake.columns if cc.jx_type in STRUCT and untyped_column(cc.name)[0] == cname) else: c = first(cc for cc in insertion.active_columns + snowflake.columns if cc.jx_type == jx_type and cc.name == cname) if isinstance(c, list): Log.error("confused") if not c: # WHAT IS THE NESTING LEVEL FOR THIS PATH? deeper_nested_path = "." for path in snowflake.query_paths: if startswith_field(cname, path[0]) and len( deeper_nested_path) < len(path): deeper_nested_path = path c = Column(name=cname, jx_type=jx_type, es_type=json_type_to_sqlite_type.get( jx_type, jx_type), es_column=typed_column( cname, json_type_to_sql_type.get(jx_type)), es_index=table, cardinality=0, nested_path=nested_path, last_updated=Date.now()) if jx_type == NESTED: snowflake.query_paths.append(c.es_column) required_changes.append({'nest': c}) else: insertion.active_columns.add(c) required_changes.append({"add": c}) elif c.jx_type == NESTED and jx_type == OBJECT: # ALWAYS PROMOTE OBJECTS TO NESTED jx_type = NESTED v = [v] elif len(c.nested_path) < len(nested_path): from_doc = doc_collection.get(c.nested_path[0], None) column = c.es_column from_doc.active_columns.remove(c) snowflake._remove_column(c) required_changes.append({"nest": c}) deep_c = Column(name=cname, jx_type=jx_type, es_type=json_type_to_sqlite_type.get( jx_type, jx_type), es_column=typed_column( cname, json_type_to_sql_type.get(jx_type)), es_index=table, nested_path=nested_path, last_updated=Date.now()) snowflake._add_column(deep_c) snowflake._drop_column(c) from_doc.active_columns.remove(c) for r in from_doc.rows: r1 = unwrap(r) if column in r1: row1 = { UID: self.container.next_uid(), PARENT: r1["__id__"], ORDER: 0, column: r1[column] } insertion.rows.append(row1) elif len(c.nested_path) > len(nested_path): insertion = doc_collection[c.nested_path[0]] row = { UID: self.container.next_uid(), PARENT: uid, ORDER: order } insertion.rows.append(row) # BE SURE TO NEST VALUES, IF NEEDED if jx_type == NESTED: deeper_nested_path = [cname] + nested_path if not doc_collection.get(cname): doc_collection[cname] = Data(active_columns=Queue(), rows=[]) for i, r in enumerate(v): child_uid = self.container.next_uid() _flatten(r, child_uid, uid, i, cname, deeper_nested_path) elif jx_type == OBJECT: _flatten(v, uid, parent_id, order, cname, nested_path, row=row) elif c.jx_type: row[c.es_column] = v
column_number = len(outer_selects) outer_selects.append(sql) index_to_column[column_number] = ColumnMapping( push_name=s.name, push_column_name=s.name, push_column=si, push_child=".", pull=get_column(column_number), sql=sql, column_alias=quote_column(s.name), type=sql_type_to_json_type["n"] ) elif s.aggregate == "count" and (not query.edges and not query.groupby): value = s.value.var columns = [c.es_column for c in self.snowflake.columns if untyped_column(c.es_column)[0] == value] sql = SQL("+").join(sql_count(quote_column(col)) for col in columns) column_number = len(outer_selects) outer_selects.append(sql_alias(sql, _make_column_name(column_number))) index_to_column[column_number] = ColumnMapping( push_name=s.name, push_column_name=s.name, push_column=si, push_child=".", pull=get_column(column_number), sql=sql, column_alias=_make_column_name(column_number), type=sql_type_to_json_type["n"] ) elif s.aggregate == "percentile": if not isinstance(s.percentile, (int, float)):
def query_metadata(self, query): frum, query['from'] = query['from'], self schema = self.snowflake.tables["."].schema query = QueryOp.wrap(query, schema) columns = self.snowflake.columns where = query.where table_name = None column_name = None if query.edges or query.groupby: Log.error("Aggregates(groupby or edge) are not supported") if where.op == "eq" and where.lhs.var == "table": table_name = mo_json.json2value(where.rhs.json) elif where.op == "eq" and where.lhs.var == "name": column_name = mo_json.json2value(where.rhs.json) else: Log.error("Only simple filters are expected like: \"eq\" on table and column name") tables = [concat_field(self.snowflake.fact_name, i) for i in self.tables.keys()] metadata = [] if columns[-1].es_column != GUID: columns.append(Column( name=GUID, jx_type=STRING, es_column=GUID, es_index=self.snowflake.fact_name, nested_path=["."] )) for tname, table in zip(t, tables): if table_name != None and table_name != table: continue for col in columns: cname, ctype = untyped_column(col.es_column) if column_name != None and column_name != cname: continue metadata.append((table, relative_field(col.name, tname), col.type, unwraplist(col.nested_path))) if query.format == "cube": num_rows = len(metadata) header = ["table", "name", "type", "nested_path"] temp_data = dict(zip(header, zip(*metadata))) return Data( meta={"format": "cube"}, data=temp_data, edges=[{ "name": "rownum", "domain": { "type": "rownum", "min": 0, "max": num_rows, "interval": 1 } }] ) elif query.format == "table": header = ["table", "name", "type", "nested_path"] return Data( meta={"format": "table"}, header=header, data=metadata ) else: header = ["table", "name", "type", "nested_path"] return Data( meta={"format": "list"}, data=[dict(zip(header, r)) for r in metadata] )
def column(self, prefix): full_name = untyped_column(concat_field(self.nested_path, prefix)) return set(c for c in self.snowflake.namespace.columns.find( self.snowflake.fact_name) for k, t in [untyped_column(c.name)] if k == full_name and k != GUID if c.jx_type not in [OBJECT, EXISTS])