def to_sql(self, schema, not_null=False, boolean=False): lhs = SQLang[self.lhs].partial_eval() rhs = SQLang[self.rhs].partial_eval() lhs_sql = lhs.to_sql(schema, not_null=True) rhs_sql = rhs.to_sql(schema, not_null=True) if is_literal(rhs) and lhs_sql[0].sql.b != None and rhs.value in ('T', 'F'): rhs_sql = BooleanOp(rhs).to_sql(schema) if is_literal(lhs) and rhs_sql[0].sql.b != None and lhs.value in ('T', 'F'): lhs_sql = BooleanOp(lhs).to_sql(schema) if len(lhs_sql) != len(rhs_sql): Log.error("lhs and rhs have different dimensionality!?") acc = [] for l, r in zip(lhs_sql, rhs_sql): for t in "bsnj": if r.sql[t] == None: if l.sql[t] == None: pass else: acc.append(ConcatSQL((l.sql[t], SQL_IS_NULL))) elif l.sql[t] == None: acc.append(ConcatSQL((r.sql[t], SQL_IS_NULL))) else: acc.append( ConcatSQL( (sql_iso(l.sql[t]), SQL_EQ, sql_iso(r.sql[t])))) if not acc: return FALSE.to_sql(schema) else: return wrap([{"name": ".", "sql": {"b": JoinSQL(SQL_OR, acc)}}])
def sql_eq(**item): """ RETURN SQL FOR COMPARING VARIABLES TO VALUES (AND'ED TOGETHER) :param item: keyword parameters representing variable and value :return: SQL """ return SQL_AND.join([ ConcatSQL((quote_column(k), SQL_EQ, quote_value(v))) if v != None else ConcatSQL( (quote_column(k), SQL_IS_NULL)) for k, v in item.items() ])
def to_sql(self, schema, not_null=False, boolean=False): acc = [] for term in self.terms: sqls = SQLang[term].to_sql(schema) if len(sqls) > 1: acc.append(SQL_TRUE) else: for t, v in sqls[0].sql.items(): if t in ["b", "s", "n"]: acc.append( ConcatSQL(( SQL_CASE, SQL_WHEN, sql_iso(v), SQL_IS_NULL, SQL_THEN, SQL_ZERO, SQL_ELSE, SQL_ONE, SQL_END, ))) else: acc.append(SQL_TRUE) if not acc: return wrap([{}]) else: return wrap([{"nanme": ".", "sql": {"n": SQL("+").join(acc)}}])
def _insert(self, collection): for nested_path, details in collection.items(): active_columns = wrap(list(details.active_columns)) rows = details.rows num_rows = len(rows) table_name = concat_field(self.name, nested_path) if table_name == self.name: # DO NOT REQUIRE PARENT OR ORDER COLUMNS meta_columns = [GUID, UID] else: meta_columns = [UID, PARENT, ORDER] all_columns = meta_columns + active_columns.es_column # ONLY THE PRIMITIVE VALUE COLUMNS command = ConcatSQL([ SQL_INSERT, quote_column(table_name), sql_iso(sql_list(map(quote_column, all_columns))), SQL_VALUES, sql_list( sql_iso( sql_list(quote_value(row.get(c)) for c in all_columns)) for row in unwrap(rows)) ]) with self.db.transaction() as t: t.execute(command)
def sql_create(table, properties, primary_key=None, unique=None): """ :param table: NAME OF THE TABLE TO CREATE :param properties: DICT WITH {name: type} PAIRS (type can be plain text) :param primary_key: COLUMNS THAT MAKE UP THE PRIMARY KEY :param unique: COLUMNS THAT SHOULD BE UNIQUE :return: """ acc = [ SQL_CREATE, quote_column(table), SQL_OP, sql_list([quote_column(k) + SQL(v) for k, v in properties.items()]), ] if primary_key: acc.append(SQL_COMMA), acc.append(SQL(" PRIMARY KEY ")), acc.append(sql_iso(sql_list([quote_column(c) for c in listwrap(primary_key)]))) if unique: acc.append(SQL_COMMA), acc.append(SQL(" UNIQUE ")), acc.append(sql_iso(sql_list([quote_column(c) for c in listwrap(unique)]))) acc.append(SQL_CP) return ConcatSQL(acc)
def to_sql(self, schema, not_null=False, boolean=False): prefix = SQLang[self.prefix].partial_eval() if is_literal(prefix): value = SQLang[self.value].partial_eval().to_sql(schema)[0].sql.s prefix = prefix.to_sql(schema)[0].sql.s if "%" in prefix or "_" in prefix: for r in "\\_%": prefix = prefix.replaceAll(r, "\\" + r) sql = ConcatSQL( (value, SQL_LIKE, prefix, SQL_ESCAPE, SQL("\\"))) else: sql = ConcatSQL((value, SQL_LIKE, prefix)) return wrap([{"name": ".", "sql": {"b": sql}}]) else: return (SqlEqOp( [SqlSubstrOp([self.value, ONE, LengthOp(prefix)]), prefix]).partial_eval().to_sql())
def quote_column(*path): if not path: Log.error("expecting a name") if any(not is_text(p) for p in path): Log.error("expecting strings, not SQL") try: return ConcatSQL((SQL_SPACE, JoinSQL(SQL_DOT, [SQL(quote(p)) for p in path]), SQL_SPACE)) except Exception as e: Log.error("Not expacted", cause=e)
def sql_lt(**item): """ RETURN SQL FOR LESS-THAN (<) COMPARISION BETWEEN VARIABLES TO VALUES :param item: keyword parameters representing variable and value :return: SQL """ k, v = first(item.items()) return ConcatSQL((quote_column(k), SQL_LT, quote_value(v)))
def sql_insert(table, records): records = listwrap(records) keys = list({k for r in records for k in r.keys()}) return ConcatSQL([ SQL_INSERT, quote_column(table), sql_iso(sql_list(map(quote_column, keys))), SQL_VALUES, sql_list( sql_iso(sql_list([quote_value(r[k]) for k in keys])) for r in records), ])
def to_sql(self, schema, not_null=False, boolean=False): if not is_op(self.superset, Literal): Log.error("Not supported") j_value = json2value(self.superset.json) if j_value: var = SQLang[self.value].to_sql(schema) sql = SQL_OR.join( sql_iso(ConcatSQL((v, SQL_IN, quote_list(j_value)))) for t, v in var[0].sql.items()) else: sql = SQL_FALSE return wrap([{"name": ".", "sql": {"b": sql}}])
def sql(self): self.miss = self.miss.partial_eval() if self.miss is TRUE: return wrap({json_type_to_sql_type[self.data_type]: SQL_NULL}) elif self.miss is FALSE: return wrap({json_type_to_sql_type[self.data_type]: self.expr}) else: return wrap({ json_type_to_sql_type[self.data_type]: ConcatSQL( (SQL_CASE, SQL_WHEN, SQL_NOT, sql_iso(SQLang[self.miss].to_sql(self.schema)[0].sql.b), SQL_THEN, self.expr, SQL_END)) })
def to_sql(self, schema, not_null=False, boolean=False): term = SQLang[self.term].partial_eval() if is_literal(term): val = term.value if isinstance(val, text): sql = quote_value(len(val)) elif isinstance(val, (float, int)): sql = quote_value(len(convert.value2json(val))) else: return Null else: value = term.to_sql(schema, not_null=not_null)[0].sql.s sql = ConcatSQL((SQL("LENGTH"), sql_iso(value))) return wrap([{"name": ".", "sql": {"n": sql}}])
def to_sql(self, schema, not_null=False, boolean=False): if len(self.whens) == 1: return SQLang[self.whens[-1]].to_sql(schema) output = {} for t in "bsn": # EXPENSIVE LOOP to_sql() RUN 3 TIMES els_ = coalesce(SQLang[self.whens[-1]].to_sql(schema)[0].sql[t], SQL_NULL) acc = SQL_ELSE + els_ + SQL_END for w in reversed(self.whens[0:-1]): acc = ConcatSQL(( SQL_WHEN, SQLang[w.when].to_sql(schema, boolean=True)[0].sql.b, SQL_THEN, coalesce(SQLang[w.then].to_sql(schema)[0].sql[t], SQL_NULL), acc, )) output[t] = SQL_CASE + acc return wrap([{"name": ".", "sql": output}])
def sql_query(command): """ VERY BASIC QUERY EXPRESSION TO SQL :param command: jx-expression :return: SQL """ command = wrap(command) acc = [SQL_SELECT] if command.select: acc.append(JoinSQL(SQL_COMMA, map(quote_column, listwrap(command.select)))) else: acc.append(SQL_STAR) acc.append(SQL_FROM) acc.append(quote_column(command["from"])) if command.where.eq: acc.append(SQL_WHERE) acc.append(sql_eq(**command.where.eq)) if command.orderby: acc.append(SQL_ORDERBY) acc.append(JoinSQL(SQL_COMMA, map(quote_column, listwrap(command.orderby)))) return ConcatSQL(acc)
def where(self, filter): """ WILL NOT PULL WHOLE OBJECT, JUST TOP-LEVEL PROPERTIES :param filter: jx_expression filter :return: list of objects that match """ select = [] column_names = [] for c in self.schema.columns: if c.jx_type in STRUCT: continue if len(c.nested_path) != 1: continue column_names.append(c.name) select.append(sql_alias(quote_column(c.es_column), c.name)) where_sql = SQLang[jx_expression(filter)].to_sql(self.schema)[0].sql.b result = self.db.query(ConcatSQL(( SQL_SELECT, JoinSQL(SQL_COMMA, select), SQL_FROM, quote_column(self.snowflake.fact_name), SQL_WHERE, where_sql ))) return wrap([{c: v for c, v in zip(column_names, r)} for r in result.data])
def sql_alias(value, alias): if not isinstance(value, SQL) or not is_text(alias): Log.error("Expecting (SQL, text) parameters") return ConcatSQL((value, SQL_AS, quote_column(alias)))
def _set_op(self, query): # GET LIST OF SELECTED COLUMNS vars_ = UNION([ v.var for select in listwrap(query.select) for v in select.value.vars() ]) schema = self.schema known_vars = schema.keys() active_columns = {".": set()} for v in vars_: for c in schema.leaves(v): nest = c.nested_path[0] active_columns.setdefault(nest, set()).add(c) # ANY VARS MENTIONED WITH NO COLUMNS? for v in vars_: if not any(startswith_field(cname, v) for cname in known_vars): active_columns["."].add( Column(name=v, jx_type=IS_NULL, es_column=".", es_index=".", es_type='NULL', nested_path=["."], last_updated=Date.now())) # EVERY COLUMN, AND THE INDEX IT TAKES UP index_to_column = {} # MAP FROM INDEX TO COLUMN (OR SELECT CLAUSE) index_to_uid = {} # FROM NESTED PATH TO THE INDEX OF UID sql_selects = [ ] # EVERY SELECT CLAUSE (NOT TO BE USED ON ALL TABLES, OF COURSE) nest_to_alias = { nested_path[0]: "__" + unichr(ord('a') + i) + "__" for i, nested_path in enumerate(self.snowflake.query_paths) } sorts = [] if query.sort: for select in query.sort: col = SQLang[select.value].to_sql(schema)[0] for t, sql in col.sql.items(): json_type = sql_type_to_json_type[t] if json_type in STRUCT: continue column_number = len(sql_selects) # SQL HAS ABS TABLE REFERENCE column_alias = _make_column_name(column_number) sql_selects.append(sql_alias(sql, column_alias)) if select.sort == -1: sorts.append(quote_column(column_alias) + SQL_IS_NULL) sorts.append(quote_column(column_alias) + " DESC") else: sorts.append(quote_column(column_alias) + SQL_IS_NULL) sorts.append(quote_column(column_alias)) primary_doc_details = Data() # EVERY SELECT STATEMENT THAT WILL BE REQUIRED, NO MATTER THE DEPTH # WE WILL CREATE THEM ACCORDING TO THE DEPTH REQUIRED nested_path = [] for step, sub_table in self.snowflake.tables: nested_path.insert(0, step) nested_doc_details = { "sub_table": sub_table, "children": [], "index_to_column": {}, "nested_path": nested_path } # INSERT INTO TREE if not primary_doc_details: primary_doc_details = nested_doc_details else: def place(parent_doc_details): if startswith_field(step, parent_doc_details['nested_path'][0]): for c in parent_doc_details['children']: if place(c): return True parent_doc_details['children'].append( nested_doc_details) place(primary_doc_details) alias = nested_doc_details['alias'] = nest_to_alias[step] # WE ALWAYS ADD THE UID column_number = index_to_uid[step] = nested_doc_details[ 'id_coord'] = len(sql_selects) sql_select = quote_column(alias, UID) sql_selects.append( sql_alias(sql_select, _make_column_name(column_number))) if step != ".": # ID AND ORDER FOR CHILD TABLES index_to_column[column_number] = ColumnMapping( sql=sql_select, type="number", nested_path=nested_path, column_alias=_make_column_name(column_number)) column_number = len(sql_selects) sql_select = quote_column(alias, ORDER) sql_selects.append( sql_alias(sql_select, _make_column_name(column_number))) index_to_column[column_number] = ColumnMapping( sql=sql_select, type="number", nested_path=nested_path, column_alias=_make_column_name(column_number)) # WE DO NOT NEED DATA FROM TABLES WE REQUEST NOTHING FROM if step not in active_columns: continue # ADD SQL SELECT COLUMNS FOR EACH jx SELECT CLAUSE si = 0 for select in listwrap(query.select): try: column_number = len(sql_selects) select.pull = get_column(column_number) db_columns = SQLang[select.value].partial_eval().to_sql( schema) for column in db_columns: for t, unsorted_sql in column.sql.items(): json_type = sql_type_to_json_type[t] if json_type in STRUCT: continue column_number = len(sql_selects) column_alias = _make_column_name(column_number) sql_selects.append( sql_alias(unsorted_sql, column_alias)) if startswith_field(schema.path, step) and is_op( select.value, LeavesOp): # ONLY FLATTEN primary_nested_path AND PARENTS, NOT CHILDREN index_to_column[ column_number] = nested_doc_details[ 'index_to_column'][ column_number] = ColumnMapping( push_name=literal_field( get_property_name( concat_field( select.name, column.name))), push_child=".", push_column_name= get_property_name( concat_field( select.name, column.name)), push_column=si, pull=get_column(column_number), sql=unsorted_sql, type=json_type, column_alias=column_alias, nested_path=nested_path) si += 1 else: index_to_column[ column_number] = nested_doc_details[ 'index_to_column'][ column_number] = ColumnMapping( push_name=select.name, push_child=column.name, push_column_name=select.name, push_column=si, pull=get_column(column_number), sql=unsorted_sql, type=json_type, column_alias=column_alias, nested_path=nested_path) finally: si += 1 where_clause = BooleanOp(query.where).partial_eval().to_sql( schema, boolean=True)[0].sql.b unsorted_sql = self._make_sql_for_one_nest_in_set_op( ".", sql_selects, where_clause, active_columns, index_to_column) for n, _ in self.snowflake.tables: sorts.append(quote_column(COLUMN + text(index_to_uid[n]))) ordered_sql = ConcatSQL( (SQL_SELECT, SQL_STAR, SQL_FROM, sql_iso(unsorted_sql), SQL_ORDERBY, sql_list(sorts), SQL_LIMIT, quote_value(query.limit))) result = self.db.query(ordered_sql) def _accumulate_nested(rows, row, nested_doc_details, parent_doc_id, parent_id_coord): """ :param rows: REVERSED STACK OF ROWS (WITH push() AND pop()) :param row: CURRENT ROW BEING EXTRACTED :param nested_doc_details: { "nested_path": wrap_nested_path(nested_path), "index_to_column": map from column number to column details "children": all possible direct decedents' nested_doc_details } :param parent_doc_id: the id of the parent doc (for detecting when to step out of loop) :param parent_id_coord: the column number for the parent id (so we ca extract from each row) :return: the nested property (usually an array) """ previous_doc_id = None doc = Null output = [] id_coord = nested_doc_details['id_coord'] while True: doc_id = row[id_coord] if doc_id == None or (parent_id_coord is not None and row[parent_id_coord] != parent_doc_id): rows.append( row ) # UNDO PREVIOUS POP (RECORD IS NOT A NESTED RECORD OF parent_doc) return output if doc_id != previous_doc_id: previous_doc_id = doc_id doc = Null curr_nested_path = nested_doc_details['nested_path'][0] index_to_column = nested_doc_details[ 'index_to_column'].items() for i, c in index_to_column: value = row[i] if is_list(query.select) or is_op( query.select.value, LeavesOp): # ASSIGN INNER PROPERTIES relative_field = concat_field( c.push_name, c.push_child) else: # FACT IS EXPECTED TO BE A SINGLE VALUE, NOT AN OBJECT relative_field = c.push_child if relative_field == ".": if exists(value): doc = value elif exists(value): if doc is Null: doc = Data() doc[relative_field] = value for child_details in nested_doc_details['children']: # EACH NESTED TABLE MUST BE ASSEMBLED INTO A LIST OF OBJECTS child_id = row[child_details['id_coord']] if child_id is not None: nested_value = _accumulate_nested( rows, row, child_details, doc_id, id_coord) if nested_value != None: push_name = child_details['nested_path'][0] if is_list(query.select) or is_op( query.select.value, LeavesOp): # ASSIGN INNER PROPERTIES relative_field = relative_field( push_name, curr_nested_path) else: # FACT IS EXPECTED TO BE A SINGLE VALUE, NOT AN OBJECT relative_field = "." if relative_field == ".": doc = unwraplist(nested_value) else: doc[relative_field] = unwraplist(nested_value) output.append(doc) try: row = rows.pop() except IndexError: return output cols = tuple( [i for i in index_to_column.values() if i.push_name != None]) rows = list(reversed(unwrap(result.data))) if rows: row = rows.pop() data = _accumulate_nested(rows, row, primary_doc_details, None, None) else: data = result.data if query.format == "cube": # for f, full_name in self.snowflake.tables: # if f != '.' or (test_dots(cols) and is_list(query.select)): # num_rows = len(result.data) # num_cols = MAX([c.push_column for c in cols]) + 1 if len(cols) else 0 # map_index_to_name = {c.push_column: c.push_column_name for c in cols} # temp_data = [[None] * num_rows for _ in range(num_cols)] # for rownum, d in enumerate(result.data): # for c in cols: # if c.push_child == ".": # temp_data[c.push_column][rownum] = c.pull(d) # else: # column = temp_data[c.push_column][rownum] # if column is None: # column = temp_data[c.push_column][rownum] = {} # column[c.push_child] = c.pull(d) # output = Data( # meta={"format": "cube"}, # data={n: temp_data[c] for c, n in map_index_to_name.items()}, # edges=[{ # "name": "rownum", # "domain": { # "type": "rownum", # "min": 0, # "max": num_rows, # "interval": 1 # } # }] # ) # return output if is_list(query.select) or is_op(query.select.value, LeavesOp): num_rows = len(data) temp_data = { c.push_column_name: [None] * num_rows for c in cols } for rownum, d in enumerate(data): for c in cols: temp_data[c.push_column_name][rownum] = d[c.push_name] return Data(meta={"format": "cube"}, data=temp_data, edges=[{ "name": "rownum", "domain": { "type": "rownum", "min": 0, "max": num_rows, "interval": 1 } }]) else: num_rows = len(data) map_index_to_name = { c.push_column: c.push_column_name for c in cols } temp_data = [data] return Data(meta={"format": "cube"}, data={ n: temp_data[c] for c, n in map_index_to_name.items() }, edges=[{ "name": "rownum", "domain": { "type": "rownum", "min": 0, "max": num_rows, "interval": 1 } }]) elif query.format == "table": # for f, _ in self.snowflake.tables: # if frum.endswith(f): # num_column = MAX([c.push_column for c in cols]) + 1 # header = [None] * num_column # for c in cols: # header[c.push_column] = c.push_column_name # # output_data = [] # for d in result.data: # row = [None] * num_column # for c in cols: # set_column(row, c.push_column, c.push_child, c.pull(d)) # output_data.append(row) # # return Data( # meta={"format": "table"}, # header=header, # data=output_data # ) if is_list(query.select) or is_op(query.select.value, LeavesOp): column_names = [None] * (max(c.push_column for c in cols) + 1) for c in cols: column_names[c.push_column] = c.push_column_name temp_data = [] for rownum, d in enumerate(data): row = [None] * len(column_names) for c in cols: row[c.push_column] = d[c.push_name] temp_data.append(row) return Data(meta={"format": "table"}, header=column_names, data=temp_data) else: column_names = listwrap(query.select).name return Data(meta={"format": "table"}, header=column_names, data=[[d] for d in data]) else: # for f, _ in self.snowflake.tables: # if frum.endswith(f) or (test_dots(cols) and is_list(query.select)): # data = [] # for d in result.data: # row = Data() # for c in cols: # if c.push_child == ".": # row[c.push_name] = c.pull(d) # elif c.num_push_columns: # tuple_value = row[c.push_name] # if not tuple_value: # tuple_value = row[c.push_name] = [None] * c.num_push_columns # tuple_value[c.push_child] = c.pull(d) # else: # row[c.push_name][c.push_child] = c.pull(d) # # data.append(row) # # return Data( # meta={"format": "list"}, # data=data # ) if is_list(query.select) or is_op(query.select.value, LeavesOp): temp_data = [] for rownum, d in enumerate(data): row = {} for c in cols: row[c.push_column_name] = d[c.push_name] temp_data.append(row) return Data(meta={"format": "list"}, data=temp_data) else: return Data(meta={"format": "list"}, data=data)
def delete(self, where): filter = SQLang[jx_expression(where)].to_sql(self.schema) with self.db.transaction() as t: t.execute(ConcatSQL((SQL_DELETE, SQL_FROM, quote_column(self.snowflake.fact_name), SQL_WHERE, filter)))