def insert_new(self, table_name, candidate_key, new_record): candidate_key = listwrap(candidate_key) condition = SQL_AND.join([ quote_column(k) + "=" + quote_value(new_record[k]) if new_record[k] != None else quote_column(k) + SQL_IS_NULL for k in candidate_key ]) command = ( "INSERT INTO " + quote_column(table_name) + sql_iso(sql_list( quote_column(k) for k in new_record.keys() )) + SQL_SELECT + "a.*" + SQL_FROM + sql_iso( SQL_SELECT + sql_list([quote_value(v) + " " + quote_column(k) for k, v in new_record.items()]) + SQL_FROM + "DUAL" ) + " a" + SQL_LEFT_JOIN + sql_iso( SQL_SELECT + "'dummy' exist " + SQL_FROM + quote_column(table_name) + SQL_WHERE + condition + SQL_LIMIT + SQL_ONE ) + " b ON " + SQL_TRUE + SQL_WHERE + " exist " + SQL_IS_NULL ) self.execute(command, {})
def _db_load(self): self.last_load = Date.now() result = self._query( SQL_SELECT + "name" + SQL_FROM + "sqlite_master" + SQL_WHERE + SQL_AND.join(["name=" + db_table_name, "type=" + quote_value("table")]) ) if not result.data: self._db_create() return result = self._query( SQL_SELECT + all_columns + SQL_FROM + db_table_name + SQL_ORDERBY + sql_list(map(quote_column, ["es_index", "name", "es_column"])) ) with self.locker: for r in result.data: c = row_to_column(result.header, r) self._add(c)
def to_sql(self, schema, not_null=False, boolean=False): value = SQLang[self.expr].partial_eval() missing_value = value.missing().partial_eval() if not is_op(missing_value, MissingOp): return missing_value.to_sql(schema) value_sql = value.to_sql(schema) if len(value_sql) > 1: return wrap([{"name": ".", "sql": {"b": SQL_FALSE}}]) acc = [] for c in value_sql: for t, v in c.sql.items(): if t == "b": acc.append(sql_iso(v) + SQL_IS_NULL) if t == "s": acc.append( sql_iso(sql_iso(v) + SQL_IS_NULL) + SQL_OR + sql_iso(sql_iso(v) + "=" + SQL_EMPTY_STRING)) if t == "n": acc.append(sql_iso(v) + SQL_IS_NULL) if not acc: return wrap([{"name": ".", "sql": {"b": SQL_TRUE}}]) else: return wrap([{"name": ".", "sql": {"b": SQL_AND.join(acc)}}])
def to_sql(self, schema, not_null=False, boolean=False): if not self.terms: return wrap([{"name": ".", "sql": {"b": SQL_TRUE}}]) elif all(self.terms): return wrap([{"name": ".", "sql": { "b": SQL_AND.join([sql_iso(t.to_sql(schema, boolean=True)[0].sql.b) for t in self.terms]) }}]) else: return wrap([{"name": ".", "sql": {"b": SQL_FALSE}}])
def single(col, r): min = coalesce(r["gte"], r[">="]) max = coalesce(r["lte"], r["<="]) if min != None and max != None: # SPECIAL CASE (BETWEEN) sql = quote_column(col) + SQL(" BETWEEN ") + quote_value(min) + SQL_AND + quote_value(max) else: sql = SQL_AND.join( quote_column(col) + name2sign[sign] + quote_value(value) for sign, value in r.items() ) return sql
def single(col, r): min = coalesce(r["gte"], r[">="]) max = coalesce(r["lte"], r["<="]) if min != None and max != None: # SPECIAL CASE (BETWEEN) sql = db.quote_column(col) + SQL(" BETWEEN ") + db.quote_value(min) + SQL_AND + db.quote_value(max) else: sql = SQL_AND.join( db.quote_column(col) + name2sign[sign] + db.quote_value(value) for sign, value in r.items() ) return sql
def sql_eq(**item): """ RETURN SQL FOR COMPARING VARIABLES TO VALUES (AND'ED TOGETHER) :param item: keyword parameters representing variable and value :return: SQL """ return SQL_AND.join([ ConcatSQL((quote_column(k), SQL_EQ, quote_value(v))) if v != None else ConcatSQL( (quote_column(k), SQL_IS_NULL)) for k, v in item.items() ])
def update(self, table_name, where_slice, new_values): """ where_slice - A Data WHICH WILL BE USED TO MATCH ALL IN table eg {"id": 42} new_values - A dict WITH COLUMN NAME, COLUMN VALUE PAIRS TO SET """ new_values = quote_param(new_values) where_clause = SQL_AND.join([ quote_column(k) + "=" + quote_value(v) if v != None else quote_column(k) + SQL_IS_NULL for k, v in where_slice.items() ]) command = ( "UPDATE " + quote_column(table_name) + "\n" + "SET " + sql_list( [quote_column(k) + "=" + v for k, v in new_values.items()]) + SQL_WHERE + where_clause) self.execute(command, {})
def insert_new(self, table_name, candidate_key, new_record): candidate_key = listwrap(candidate_key) condition = SQL_AND.join([ self.quote_column(k) + "=" + self.quote_value(new_record[k]) if new_record[k] != None else self.quote_column(k) + SQL_IS_NULL for k in candidate_key ]) command = ("INSERT INTO " + self.quote_column(table_name) + sql_iso( sql_list(self.quote_column(k) for k in new_record.keys())) + SQL_SELECT + "a.*" + SQL_FROM + sql_iso(SQL_SELECT + sql_list([ self.quote_value(v) + " " + self.quote_column(k) for k, v in new_record.items() ]) + SQL_FROM + "DUAL") + " a" + SQL_LEFT_JOIN + sql_iso(SQL_SELECT + "'dummy' exist " + SQL_FROM + self.quote_column(table_name) + SQL_WHERE + condition + SQL_LIMIT + SQL_ONE) + " b ON " + SQL_TRUE + SQL_WHERE + " exist " + SQL_IS_NULL) self.execute(command, {})
def update(self, table_name, where_slice, new_values): """ where_slice - A Data WHICH WILL BE USED TO MATCH ALL IN table eg {"id": 42} new_values - A dict WITH COLUMN NAME, COLUMN VALUE PAIRS TO SET """ new_values = quote_param(new_values) where_clause = SQL_AND.join([ quote_column(k) + "=" + quote_value(v) if v != None else quote_column(k) + SQL_IS_NULL for k, v in where_slice.items() ]) command = ( "UPDATE " + quote_column(table_name) + "\n" + "SET " + sql_list([quote_column(k) + "=" + v for k, v in new_values.items()]) + SQL_WHERE + where_clause ) self.execute(command, {})
sql_alias(quote_value(p.value), domain_name) for i, p in enumerate(query_edge.domain.partitions)) if query_edge.allowNulls: domain += (SQL_UNION_ALL + SQL_SELECT + sql_alias( quote_value(len(query_edge.domain.partitions)), quote_column("rownum")) + SQL_COMMA + sql_alias(SQL_NULL, domain_name)) where = None join_type = SQL_LEFT_JOIN if query_edge.allowNulls else SQL_INNER_JOIN on_clause = (SQL_OR.join( join_column(edge_alias, k) + " = " + v for k, v in zip(domain_names, vals)) + SQL_OR + sql_iso( join_column(edge_alias, domain_name) + SQL_IS_NULL + SQL_AND + SQL_AND.join(v + SQL_IS_NULL for v in vals))) null_on_clause = None else: domain = SQL_UNION_ALL.join( SQL_SELECT + sql_alias(quote_value(pp), domain_name) for pp, p in enumerate(query_edge.domain.partitions)) where = None join_type = SQL_LEFT_JOIN if query_edge.allowNulls else SQL_INNER_JOIN on_clause = SQL_AND.join( join_column(edge_alias, k) + " = " + sql for k, (t, sql) in zip(domain_names, edge_values)) null_on_clause = None elif query_edge.domain.type == "range": domain_name = quote_column("d" + text_type(edge_index) + "c0") domain_names = [ domain_name
def update(self, command): """ :param command: EXPECTING dict WITH {"set": s, "clear": c, "where": w} FORMAT """ command = wrap(command) # REJECT DEEP UPDATES touched_columns = command.set.keys() | set(listwrap(command['clear'])) for c in self.get_leaves(): if c.name in touched_columns and c.nested_path and len( c.name) > len(c.nested_path[0]): Log.error("Deep update not supported") # ADD NEW COLUMNS where = jx_expression(command.where) _vars = where.vars() _map = { v: c.es_column for v in _vars for c in self.columns.get(v, Null) if c.jx_type not in STRUCT } where_sql = where.map(_map).to_sql() new_columns = set(command.set.keys()) - set(self.columns.keys()) for new_column_name in new_columns: nested_value = command.set[new_column_name] ctype = get_type(nested_value) column = Column(name=new_column_name, jx_type=ctype, es_index=self.sf.fact, es_column=typed_column(new_column_name, ctype)) self.add_column(column) # UPDATE THE NESTED VALUES for nested_column_name, nested_value in command.set.items(): if get_type(nested_value) == "nested": nested_table_name = concat_field(self.sf.fact, nested_column_name) nested_table = nested_tables[nested_column_name] self_primary_key = sql_list( quote_column(c.es_column) for u in self.uid for c in self.columns[u]) extra_key_name = UID_PREFIX + "id" + text_type(len(self.uid)) extra_key = [e for e in nested_table.columns[extra_key_name]][0] sql_command = ("DELETE" + SQL_FROM + quote_column(nested_table.name) + SQL_WHERE + "EXISTS (" + "\nSELECT 1 " + SQL_FROM + quote_column(nested_table.name) + " n" + SQL_INNER_JOIN + "(" + SQL_SELECT + self_primary_key + SQL_FROM + quote_column(self.sf.fact) + SQL_WHERE + where_sql + "\n) t ON " + SQL_AND.join("t." + quote_column(c.es_column) + " = n." + quote_column(c.es_column) for u in self.uid for c in self.columns[u]) + ")") self.db.execute(sql_command) # INSERT NEW RECORDS if not nested_value: continue doc_collection = {} for d in listwrap(nested_value): nested_table.flatten(d, Data(), doc_collection, path=nested_column_name) prefix = "INSERT INTO " + quote_column( nested_table.name ) + sql_iso( sql_list([self_primary_key] + [quote_column(extra_key)] + [ quote_column(c.es_column) for c in doc_collection.get(".", Null).active_columns ])) # BUILD THE PARENT TABLES parent = (SQL_SELECT + self_primary_key + SQL_FROM + quote_column(self.sf.fact) + SQL_WHERE + jx_expression(command.where).to_sql()) # BUILD THE RECORDS children = SQL_UNION_ALL.join( SQL_SELECT + quote_value(i) + " " + quote_column(extra_key.es_column) + "," + sql_list( quote_value(row[c.name]) + " " + quote_column(c.es_column) for c in doc_collection.get(".", Null).active_columns) for i, row in enumerate( doc_collection.get(".", Null).rows)) sql_command = (prefix + SQL_SELECT + sql_list([ join_column("p", c.es_column) for u in self.uid for c in self.columns[u] ] + [join_column("c", extra_key)] + [ join_column("c", c.es_column) for c in doc_collection.get(".", Null).active_columns ]) + SQL_FROM + sql_iso(parent) + " p" + SQL_INNER_JOIN + sql_iso(children) + " c" + " ON " + SQL_TRUE) self.db.execute(sql_command) # THE CHILD COLUMNS COULD HAVE EXPANDED # ADD COLUMNS TO SELF for n, cs in nested_table.columns.items(): for c in cs: column = Column(name=c.name, jx_type=c.jx_type, es_type=c.es_type, es_index=c.es_index, es_column=c.es_column, nested_path=[nested_column_name] + c.nested_path) if c.name not in self.columns: self.columns[column.name] = {column} elif c.jx_type not in [ c.jx_type for c in self.columns[c.name] ]: self.columns[column.name].add(column) command = ( "UPDATE " + quote_column(self.sf.fact) + " SET " + sql_list([ quote_column(c) + "=" + quote_value(get_if_type(v, c.jx_type)) for k, v in command.set.items() if get_type(v) != "nested" for c in self.columns[k] if c.jx_type != "nested" and len(c.nested_path) == 1 ] + [ quote_column(c) + "=" + SQL_NULL for k in listwrap(command['clear']) if k in self.columns for c in self.columns[k] if c.jx_type != "nested" and len(c.nested_path) == 1 ]) + SQL_WHERE + where_sql) self.db.execute(command)
def _db_worker(self, please_stop): while not please_stop: try: with self._db_transaction(): result = self._query( SQL_SELECT + all_columns + SQL_FROM + db_table_name + SQL_WHERE + "last_updated > " + quote_value(self.last_load) + SQL_ORDERBY + sql_list(map(quote_column, ["es_index", "name", "es_column"])) ) with self.locker: for r in result.data: c = row_to_column(result.header, r) self._add(c) if c.last_updated > self.last_load: self.last_load = c.last_updated updates = self.todo.pop_all() DEBUG and updates and Log.note( "{{num}} columns to push to db", num=len(updates) ) for action, column in updates: while not please_stop: try: with self._db_transaction(): DEBUG and Log.note( "{{action}} db for {{table}}.{{column}}", action=action, table=column.es_index, column=column.es_column, ) if action is EXECUTE: self.db.execute(column) elif action is UPDATE: self.db.execute( "UPDATE" + db_table_name + "SET" + sql_list( [ "count=" + quote_value(column.count), "cardinality=" + quote_value(column.cardinality), "multi=" + quote_value(column.multi), "partitions=" + quote_value( value2json(column.partitions) ), "last_updated=" + quote_value(column.last_updated), ] ) + SQL_WHERE + SQL_AND.join( [ "es_index = " + quote_value(column.es_index), "es_column = " + quote_value(column.es_column), "last_updated < " + quote_value(column.last_updated), ] ) ) elif action is DELETE: self.db.execute( "DELETE FROM" + db_table_name + SQL_WHERE + SQL_AND.join( [ "es_index = " + quote_value(column.es_index), "es_column = " + quote_value(column.es_column), ] ) ) else: self._db_insert_column(column) break except Exception as e: e = Except.wrap(e) if "database is locked" in e: Log.note("metadata database is locked") Till(seconds=1).wait() break else: Log.warning("problem updataing database", cause=e) except Exception as e: Log.warning("problem updating database", cause=e) (Till(seconds=10) | please_stop).wait()
def _esfilter2sqlwhere(db, esfilter): """ CONVERT ElassticSearch FILTER TO SQL FILTER db - REQUIRED TO PROPERLY QUOTE VALUES AND COLUMN NAMES """ esfilter = wrap(esfilter) if esfilter is True: return SQL_TRUE elif esfilter["and"]: return sql_iso(SQL_AND.join([esfilter2sqlwhere(db, a) for a in esfilter["and"]])) elif esfilter["or"]: return sql_iso(SQL_OR.join([esfilter2sqlwhere(db, a) for a in esfilter["or"]])) elif esfilter["not"]: return SQL_NOT + sql_iso(esfilter2sqlwhere(db, esfilter["not"])) elif esfilter.term: return sql_iso(SQL_AND.join([ quote_column(col) + SQL("=") + quote_value(val) for col, val in esfilter.term.items() ])) elif esfilter.terms: for col, v in esfilter.terms.items(): if len(v) == 0: return "FALSE" try: int_list = convert.value2intlist(v) has_null = False for vv in v: if vv == None: has_null = True break if int_list: filter = int_list_packer(col, int_list) if has_null: return esfilter2sqlwhere(db, {"or": [{"missing": col}, filter]}) elif 'terms' in filter and set(filter['terms'].get(col, []))==set(int_list): return quote_column(col) + " in " + quote_list(int_list) else: return esfilter2sqlwhere(db, filter) else: if has_null: return esfilter2sqlwhere(db, {"missing": col}) else: return "false" except Exception as e: e = Except.wrap(e) pass return quote_column(col) + " in " + quote_list(v) elif esfilter.script: return sql_iso(esfilter.script) elif esfilter.range: name2sign = { "gt": SQL(">"), "gte": SQL(">="), "lte": SQL("<="), "lt": SQL("<") } def single(col, r): min = coalesce(r["gte"], r[">="]) max = coalesce(r["lte"], r["<="]) if min != None and max != None: # SPECIAL CASE (BETWEEN) sql = quote_column(col) + SQL(" BETWEEN ") + quote_value(min) + SQL_AND + quote_value(max) else: sql = SQL_AND.join( quote_column(col) + name2sign[sign] + quote_value(value) for sign, value in r.items() ) return sql terms = [single(col, ranges) for col, ranges in esfilter.range.items()] if len(terms) == 1: output = terms[0] else: output = sql_iso(SQL_AND.join(terms)) return output elif esfilter.missing: if isinstance(esfilter.missing, text_type): return sql_iso(quote_column(esfilter.missing) + SQL_IS_NULL) else: return sql_iso(quote_column(esfilter.missing.field) + SQL_IS_NULL) elif esfilter.exists: if isinstance(esfilter.exists, text_type): return sql_iso(quote_column(esfilter.exists) + SQL_IS_NOT_NULL) else: return sql_iso(quote_column(esfilter.exists.field) + SQL_IS_NOT_NULL) elif esfilter.match_all: return SQL_TRUE elif esfilter.instr: return sql_iso(SQL_AND.join(["instr" + sql_iso(quote_column(col) + ", " + quote_value(val)) + ">0" for col, val in esfilter.instr.items()])) else: Log.error("Can not convert esfilter to SQL: {{esfilter}}", esfilter=esfilter)