def _make_range_domain(self, domain, column_name): width = (domain.max - domain.min) / domain.interval digits = Math.floor(Math.log10(width - 1)) if digits == 0: value = "a.value" else: value = "+".join("1" + ("0" * j) + "*" + unicode(chr(ord(b'a') + j)) + ".value" for j in range(digits + 1)) if domain.interval == 1: if domain.min == 0: domain = "SELECT " + value + " " + column_name + \ "\nFROM __digits__ a" else: domain = "SELECT (" + value + ") + " + quote_value(domain.min) + " " + column_name + \ "\nFROM __digits__ a" else: if domain.min == 0: domain = "SELECT " + value + " * " + quote_value(domain.interval) + " " + column_name + \ "\nFROM __digits__ a" else: domain = "SELECT (" + value + " * " + quote_value(domain.interval) + ") + " + quote_value( domain.min) + " " + column_name + \ "\nFROM __digits__ a" for j in range(digits): domain += "\nJOIN __digits__ " + unicode( chr(ord(b'a') + j + 1)) + " ON 1=1" domain += "\nWHERE " + value + " < " + quote_value(width) return domain
def _insert(self, collection): for nested_path, details in collection.items(): active_columns = wrap(list(details.active_columns)) rows = details.rows table_name = concat_field(self.name, nested_path) if table_name == self.name: # DO NOT REQUIRE PARENT OR ORDER COLUMNS meta_columns = [UID] else: meta_columns = [UID, PARENT, ORDER] all_columns = meta_columns + active_columns.es_column prefix = "INSERT INTO " + quote_table(table_name) + \ "(" + ",".join(map(quote_table, all_columns)) + ")" # BUILD THE RECORDS records = " UNION ALL ".join( "\nSELECT " + ",".join(quote_value(row.get(c)) for c in all_columns) for row in unwrap(rows)) self.db.execute(prefix + records)
def _set_op(self, query, frum): # GET LIST OF COLUMNS primary_nested_path = join_field(split_field(frum)[1:]) vars_ = UNION([s.value.vars() for s in listwrap(query.select)]) nest_to_alias = { nested_path: "__" + unichr(ord('a') + i) + "__" for i, (nested_path, sub_table) in enumerate(self.nested_tables.items()) } active_columns = {".": []} for cname, cols in self.columns.items(): if any(startswith_field(cname, v) for v in vars_): for c in cols: if c.type in STRUCT: continue nest = c.nested_path[0] active = active_columns.get(nest) if not active: active = active_columns[nest] = [] active.append(c) # ANY VARS MENTIONED WITH NO COLUMNS? for v in vars_: if not any( startswith_field(cname, v) for cname in self.columns.keys()): active_columns["."].append( Column(names={self.name: v}, type="null", es_column=".", es_index=".", nested_path=["."])) # EVERY COLUMN, AND THE INDEX IT TAKES UP index_to_column = {} # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE) index_to_uid = {} # FROM NESTED PATH TO THE INDEX OF UID sql_selects = [ ] # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE) nest_to_alias = { nested_path: "__" + unichr(ord('a') + i) + "__" for i, (nested_path, sub_table) in enumerate(self.nested_tables.items()) } sorts = [] if query.sort: for s in query.sort: col = s.value.to_sql(self)[0] for t, sql in col.sql.items(): json_type = sql_type_to_json_type[t] if json_type in STRUCT: continue column_number = len(sql_selects) # SQL HAS ABS TABLE REFERENCE column_alias = _make_column_name(column_number) sql_selects.append(sql + " AS " + column_alias) if s.sort == -1: sorts.append(column_alias + " IS NOT NULL") sorts.append(column_alias + " DESC") else: sorts.append(column_alias + " IS NULL") sorts.append(column_alias) primary_doc_details = Data() # EVERY SELECT STATEMENT THAT WILL BE REQUIRED, NO MATTER THE DEPTH # WE WILL CREATE THEM ACCORDING TO THE DEPTH REQUIRED for nested_path, sub_table in self.nested_tables.items(): nested_doc_details = { "sub_table": sub_table, "children": [], "index_to_column": {}, "nested_path": [nested_path ] # fake the real nested path, we only look at [0] anyway } # INSERT INTO TREE if not primary_doc_details: primary_doc_details = nested_doc_details else: def place(parent_doc_details): if startswith_field(nested_path, parent_doc_details['nested_path'][0]): for c in parent_doc_details['children']: if place(c): return True parent_doc_details['children'].append( nested_doc_details) place(primary_doc_details) alias = nested_doc_details['alias'] = nest_to_alias[nested_path] # WE ALWAYS ADD THE UID AND ORDER column_number = index_to_uid[nested_path] = nested_doc_details[ 'id_coord'] = len(sql_selects) sql_select = alias + "." + quoted_UID sql_selects.append(sql_select + " AS " + _make_column_name(column_number)) if nested_path != ".": sql_select = alias + "." + quote_table(ORDER) sql_selects.append(sql_select + " AS " + _make_column_name(column_number)) # WE DO NOT NEED DATA FROM TABLES WE REQUEST NOTHING FROM if nested_path not in active_columns: continue if primary_nested_path == nested_path: # ADD SQL SELECT COLUMNS FOR EACH jx SELECT CLAUSE si = 0 for s in listwrap(query.select): try: column_number = len(sql_selects) s.pull = get_column(column_number) db_columns = s.value.to_sql(self) if isinstance(s.value, LeavesOp): for column in db_columns: for t, unsorted_sql in column.sql.items(): json_type = sql_type_to_json_type[t] if json_type in STRUCT: continue column_number = len(sql_selects) # SQL HAS ABS TABLE REFERENCE column_alias = _make_column_name( column_number) sql_selects.append(unsorted_sql + " AS " + column_alias) index_to_column[ column_number] = nested_doc_details[ 'index_to_column'][ column_number] = Data( push_name=concat_field( s.name, column.name), push_column=si, push_child=".", pull=get_column( column_number), sql=unsorted_sql, type=json_type, nested_path=[nested_path] # fake the real nested path, we only look at [0] anyway ) si += 1 else: for column in db_columns: for t, unsorted_sql in column.sql.items(): json_type = sql_type_to_json_type[t] if json_type in STRUCT: continue column_number = len(sql_selects) # SQL HAS ABS TABLE REFERENCE column_alias = _make_column_name( column_number) sql_selects.append(unsorted_sql + " AS " + column_alias) index_to_column[ column_number] = nested_doc_details[ 'index_to_column'][ column_number] = Data( push_name=s.name, push_column=si, push_child=column.name, pull=get_column( column_number), sql=unsorted_sql, type=json_type, nested_path=[nested_path] # fake the real nested path, we only look at [0] anyway ) finally: si += 1 elif startswith_field(nested_path, primary_nested_path): # ADD REQUIRED COLUMNS, FOR DEEP STUFF for ci, c in enumerate(active_columns[nested_path]): if c.type in STRUCT: continue column_number = len(sql_selects) nested_path = c.nested_path unsorted_sql = nest_to_alias[ nested_path[0]] + "." + quote_table(c.es_column) column_alias = _make_column_name(column_number) sql_selects.append(unsorted_sql + " AS " + column_alias) index_to_column[column_number] = nested_doc_details[ 'index_to_column'][column_number] = Data( push_name=s.name, push_column=si, push_child=relative_field(c.name, s.name), pull=get_column(column_number), sql=unsorted_sql, type=c.type, nested_path=nested_path) where_clause = query.where.to_sql(self, boolean=True)[0].sql.b unsorted_sql = self._make_sql_for_one_nest_in_set_op( ".", sql_selects, where_clause, active_columns, index_to_column) for n, _ in self.nested_tables.items(): sorts.append(COLUMN + unicode(index_to_uid[n])) ordered_sql = ("SELECT * FROM (\n" + unsorted_sql + "\n)" + "\nORDER BY\n" + ",\n".join(sorts) + "\nLIMIT " + quote_value(query.limit)) result = self.db.query(ordered_sql) def _accumulate_nested(rows, row, nested_doc_details, parent_doc_id, parent_id_coord): """ :param rows: REVERSED STACK OF ROWS (WITH push() AND pop()) :param row: CURRENT ROW BEING EXTRACTED :param nested_doc_details: { "nested_path": wrap_nested_path(nested_path), "index_to_column": map from column number to column details "children": all possible direct decedents' nested_doc_details } :param parent_doc_id: the id of the parent doc (for detecting when to step out of loop) :param parent_id_coord: the column number for the parent id (so we ca extract from each row) :return: the nested property (usually an array) """ previous_doc_id = None doc = Data() output = [] id_coord = nested_doc_details['id_coord'] while True: doc_id = row[id_coord] if doc_id == None or (parent_id_coord is not None and row[parent_id_coord] != parent_doc_id): rows.append(row) # UNDO output = unwraplist(output) return output if output else None if doc_id != previous_doc_id: previous_doc_id = doc_id doc = Data() curr_nested_path = nested_doc_details['nested_path'][0] if isinstance(query.select, list) or isinstance( query.select.value, LeavesOp): # ASSIGN INNER PROPERTIES for i, c in nested_doc_details[ 'index_to_column'].items(): value = row[i] if value == None: continue if value == '': continue relative_path = relative_field( concat_field(c.push_name, c.push_child), curr_nested_path) if relative_path == ".": doc = value else: doc[relative_path] = value else: # ASSIGN INNER PROPERTIES for i, c in nested_doc_details[ 'index_to_column'].items(): value = row[i] if value is not None: relative_path = relative_field( c.push_child, curr_nested_path) if relative_path == ".": doc = value else: doc[relative_path] = value output.append(doc) # ASSIGN NESTED ARRAYS for child_details in nested_doc_details['children']: child_id = row[child_details['id_coord']] if child_id is not None: nested_value = _accumulate_nested( rows, row, child_details, doc_id, id_coord) if nested_value is not None: path = child_details['nested_path'][0] doc[path] = nested_value try: row = rows.pop() except IndexError: output = unwraplist(output) return output if output else None cols = tuple(index_to_column.values()) if query.format == "cube": num_rows = len(result.data) num_cols = MAX([c.push_column for c in cols]) + 1 if len(cols) else 0 map_index_to_name = {c.push_column: c.push_name for c in cols} temp_data = [[None] * num_rows for _ in range(num_cols)] for rownum, d in enumerate(result.data): for c in cols: if c.push_child == ".": temp_data[c.push_column][rownum] = c.pull(d) else: column = temp_data[c.push_column][rownum] if column is None: column = temp_data[c.push_column][rownum] = {} column[c.push_child] = c.pull(d) output = Data( meta={"format": "cube"}, data={n: temp_data[c] for c, n in map_index_to_name.items()}, edges=[{ "name": "rownum", "domain": { "type": "rownum", "min": 0, "max": num_rows, "interval": 1 } }]) return output elif query.format == "table": num_column = MAX([c.push_column for c in cols]) + 1 header = [None] * num_column for c in cols: # header[c.push_column] = c.push_name sf = split_field(c.push_name) if len(sf) == 0: header[c.push_column] = "." elif len(sf) == 1: header[c.push_column] = sf[0] else: # TABLES ONLY USE THE FIRST-LEVEL PROPERTY NAMES # PUSH ALL DEEPER NAMES TO CHILD header[c.push_column] = sf[0] c.push_child = join_field(sf[1:] + split_field(c.push_child)) output_data = [] for d in result.data: row = [None] * num_column for c in cols: set_column(row, c.push_column, c.push_child, c.pull(d)) output_data.append(row) return Data(meta={"format": "table"}, header=header, data=output_data) else: rows = list(reversed(unwrap(result.data))) row = rows.pop() output = Data(meta={"format": "list"}, data=listwrap( _accumulate_nested(rows, row, primary_doc_details, None, None))) return output
def update(self, command): """ :param command: EXPECTING dict WITH {"set": s, "clear": c, "where": w} FORMAT """ command = wrap(command) # REJECT DEEP UPDATES touched_columns = command.set.keys() | set(listwrap(command['clear'])) for c in self.get_leaves(): if c.name in touched_columns and c.nested_path and len( c.name) > len(c.nested_path[0]): Log.error("Deep update not supported") # ADD NEW COLUMNS where = jx_expression(command.where) _vars = where.vars() _map = { v: c.es_column for v in _vars for c in self.columns.get(v, Null) if c.type not in STRUCT } where_sql = where.map(_map).to_sql() new_columns = set(command.set.keys()) - set(self.columns.keys()) for new_column_name in new_columns: nested_value = command.set[new_column_name] ctype = get_type(nested_value) column = Column(names={self.name: new_column_name}, type=ctype, es_index=self.name, es_column=typed_column(new_column_name, ctype)) self.add_column(column) # UPDATE THE NESTED VALUES for nested_column_name, nested_value in command.set.items(): if get_type(nested_value) == "nested": nested_table_name = concat_field(self.name, nested_column_name) nested_table = nested_tables[nested_column_name] self_primary_key = ",".join( quote_table(c.es_column) for u in self.uid for c in self.columns[u]) extra_key_name = UID_PREFIX + "id" + unicode(len(self.uid)) extra_key = [e for e in nested_table.columns[extra_key_name]][0] sql_command = "DELETE FROM " + quote_table(nested_table.name) + \ "\nWHERE EXISTS (" + \ "\nSELECT 1 " + \ "\nFROM " + quote_table(nested_table.name) + " n" + \ "\nJOIN (" + \ "\nSELECT " + self_primary_key + \ "\nFROM " + quote_table(self.name) + \ "\nWHERE " + where_sql + \ "\n) t ON " + \ " AND ".join( "t." + quote_table(c.es_column) + " = n." + quote_table(c.es_column) for u in self.uid for c in self.columns[u] ) + \ ")" self.db.execute(sql_command) # INSERT NEW RECORDS if not nested_value: continue doc_collection = {} for d in listwrap(nested_value): nested_table.flatten(d, Data(), doc_collection, path=nested_column_name) prefix = "INSERT INTO " + quote_table(nested_table.name) + \ "(" + \ self_primary_key + "," + \ _quote_column(extra_key) + "," + \ ",".join( quote_table(c.es_column) for c in doc_collection.get(".", Null).active_columns ) + ")" # BUILD THE PARENT TABLES parent = "\nSELECT " + \ self_primary_key + \ "\nFROM " + quote_table(self.name) + \ "\nWHERE " + jx_expression(command.where).to_sql() # BUILD THE RECORDS children = " UNION ALL ".join( "\nSELECT " + quote_value(i) + " " + quote_table(extra_key.es_column) + "," + ",".join( quote_value(row[c.name]) + " " + quote_table(c.es_column) for c in doc_collection.get(".", Null).active_columns) for i, row in enumerate( doc_collection.get(".", Null).rows)) sql_command = prefix + \ "\nSELECT " + \ ",".join( "p." + quote_table(c.es_column) for u in self.uid for c in self.columns[u] ) + "," + \ "c." + _quote_column(extra_key) + "," + \ ",".join( "c." + quote_table(c.es_column) for c in doc_collection.get(".", Null).active_columns ) + \ "\nFROM (" + parent + ") p " + \ "\nJOIN (" + children + \ "\n) c on 1=1" self.db.execute(sql_command) # THE CHILD COLUMNS COULD HAVE EXPANDED # ADD COLUMNS TO SELF for n, cs in nested_table.columns.items(): for c in cs: column = Column(names={self.name: c.name}, type=c.type, es_index=c.es_index, es_column=c.es_column, nested_path=[nested_column_name] + c.nested_path) if c.name not in self.columns: self.columns[column.name] = {column} elif c.type not in [ c.type for c in self.columns[c.name] ]: self.columns[column.name].add(column) command = "UPDATE " + quote_table(self.name) + " SET " + \ ",\n".join( [ _quote_column(c) + "=" + quote_value(get_if_type(v, c.type)) for k, v in command.set.items() if get_type(v) != "nested" for c in self.columns[k] if c.type != "nested" and len(c.nested_path) == 1 ] + [ _quote_column(c) + "=NULL" for k in listwrap(command['clear']) if k in self.columns for c in self.columns[k] if c.type != "nested" and len(c.nested_path) == 1 ] ) + \ " WHERE " + where_sql self.db.execute(command)
def _edges_op(self, query, frum): index_to_column = {} # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE) outer_selects = [ ] # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE) tables = [] base_table = split_field(frum)[0] path = join_field(split_field(frum)[1:]) nest_to_alias = { nested_path: "__" + unichr(ord('a') + i) + "__" for i, (nested_path, sub_table) in enumerate(self.nested_tables.items()) } columns = self._get_sql_schema(frum) tables = [] for n, a in nest_to_alias.items(): if startswith_field(path, n): tables.append({"nest": n, "alias": a}) tables = jx.sort(tables, {"value": {"length": "nest"}}) from_sql = join_field( [base_table] + split_field(tables[0].nest)) + " " + tables[0].alias previous = tables[0] for t in tables[1::]: from_sql += "\nLEFT JOIN\n" + join_field( [base_table] + split_field(t.nest) ) + " " + t.alias + " ON " + t.alias + "." + PARENT + " = " + previous.alias + "." + GUID # SHIFT THE COLUMN DEFINITIONS BASED ON THE NESTED QUERY DEPTH ons = [] join_types = [] wheres = [] not_ons = ["__exists__ IS NULL"] groupby = [] not_groupby = [] orderby = [] domains = [] select_clause = [ "1 __exists__" # USED TO DISTINGUISH BETWEEN NULL-BECAUSE-LEFT-JOIN OR NULL-BECAUSE-NULL-VALUE ] for edge_index, query_edge in enumerate(query.edges): edge_alias = "e" + unicode(edge_index) if query_edge.value: edge_values = [ p for c in query_edge.value.to_sql(self).sql for p in c.items() ] elif not query_edge.value and any( query_edge.domain.partitions.where): case = "CASE " for pp, p in enumerate(query_edge.domain.partitions): w = p.where.to_sql(self)[0].sql.b t = quote_value(pp) case += " WHEN " + w + " THEN " + t case += " ELSE NULL END " edge_values = [("n", case)] elif query_edge.range: edge_values = query_edge.range.min.to_sql(self)[0].sql.items( ) + query_edge.range.max.to_sql(self)[0].sql.items()
num_push_columns=num_push_columns, push_child=push_child, # CAN NOT HANDLE TUPLES IN COLUMN pull=pull, sql=sql, type=sql_type_to_json_type[json_type]) vals = [v for t, v in edge_values] if query_edge.domain.type == "set": domain_name = "d" + unicode(edge_index) + "c" + unicode( column_index) domain_names = [domain_name] if len(edge_names) > 1: Log.error("Do not know how to handle") if query_edge.value: domain = "\nUNION ALL\n".join( "SELECT " + quote_value(coalesce(p.dataIndex, i)) + " AS rownum, " + quote_value(p.value) + " AS " + domain_name for i, p in enumerate(query_edge.domain.partitions)) if query_edge.allowNulls: domain += "\nUNION ALL\nSELECT " + quote_value( len(query_edge.domain.partitions) ) + " AS rownum, NULL AS " + domain_name where = None join_type = "LEFT JOIN" if query_edge.allowNulls else "JOIN" on_clause = " OR ".join( edge_alias + "." + k + " = " + v for k, (t, v) in zip(domain_names, edge_values)) not_on_clause = None else: domain = "\nUNION ALL\n".join(