def sync_query( columns: typing.List[SqlId], key: typing.List[SqlId], key_table: SqlObject, query: str, target: SqlObject, ) -> SqlQuery: """ Insert, update, and delete """ data_columns = [SqlId(column) for column in columns if column not in key] is_temp = target.names[0].name == "pg_temp" if not is_temp: query += f"\nORDER BY {sql_list(SqlNumber(i + 1) for i, _ in enumerate(key))}" if data_columns: upsert_query = f""" INSERT INTO {target} ({sql_list(columns)}) {query} ON CONFLICT ({sql_list(key)}) DO UPDATE SET {update_excluded(data_columns)} RETURNING {sql_list(key)} """.strip() elif not is_temp: upsert_query = f""" INSERT INTO {target} ({sql_list(columns)}) {query} ON CONFLICT ({sql_list(key)}) DO UPDATE SET {update_excluded(key)} WHERE false RETURNING {sql_list(key)} """.strip() else: upsert_query = f""" INSERT INTO {target} ({sql_list(columns)}) {query} ON CONFLICT ({sql_list(key)}) DO NOTHING """ upsert_expression = SqlTableExpr(SqlId("_upsert"), upsert_query) delete_query = f""" DELETE FROM {target} AS t USING {key_table} AS k LEFT JOIN _upsert AS u ON ({table_fields(SqlId('k'), key)}) = ({table_fields(SqlId('u'), key)}) WHERE ({table_fields(SqlId('t'), key)}) = ({table_fields(SqlId('k'), key)}) AND u.* IS NOT DISTINCT FROM NULL """.strip() return SqlQuery(delete_query, expressions=[upsert_expression])
def create_setup_function( id: str, structure: AggStructure, aggregates: typing.Dict[str, AggAggregate], groups: typing.Dict[str, str], target: AggTable, ): refresh_constraint = structure.refresh_constraint() refresh_function = structure.refresh_function() refresh_table = structure.refresh_table() setup_function = structure.setup_function() tmp_table = structure.tmp_table() group_columns = [SqlId(col) for col in groups] aggregate_columns = [SqlId(col) for col in aggregates] yield f""" CREATE FUNCTION {setup_function} () RETURNS void LANGUAGE plpgsql AS $$ BEGIN IF to_regclass({SqlString(str(refresh_table))}) IS NOT NULL THEN RETURN; END IF; CREATE TEMP TABLE {tmp_table} ON COMMIT DELETE ROWS AS SELECT {sql_list(group_columns)}, {sql_list(aggregate_columns)} FROM {target.sql} WITH NO DATA; ALTER TABLE {tmp_table} ADD PRIMARY KEY ({sql_list(group_columns)}); CREATE TEMP TABLE {refresh_table} ( ) ON COMMIT DELETE ROWS; CREATE CONSTRAINT TRIGGER {refresh_constraint} AFTER INSERT ON {refresh_table} DEFERRABLE INITIALLY DEFERRED FOR EACH ROW EXECUTE PROCEDURE {refresh_function}(); END; $$ """.strip() yield f""" COMMENT ON FUNCTION {setup_function} IS {SqlString(f"Set up temp tables for {id}")} """.strip()
def enqueue_sql( id: str, table: JoinTable, foreign: JoinTable, structure: Structure, key_query: str, exprs: typing.List[SqlTableExpr], last_expr: typing.Optional[str], ): queue_table = structure.queue_table(id) local_columns = [local_column(column) for column in table.key] insert = f""" INSERT INTO {queue_table} ({sql_list(local_columns)}) {key_query} ORDER BY {sql_list(SqlNumber(i + 1) for i, _ in enumerate(table.key))} ON CONFLICT ({sql_list(local_columns)}) DO UPDATE SET {update_excluded(foreign_column(column) for column in foreign.key)}, seq = excluded.seq """.strip() query = SqlQuery(insert, expressions=exprs) if last_expr is not None: query.append(SqlId("_other"), last_expr) return f""" {query}; NOTIFY {SqlId(str(queue_table))}; """.strip()
def sql( self, key_query: str, exprs: typing.List[SqlTableExpr] = [], last_expr: typing.Optional[str] = None, ): setup_function = self._structure.setup_function() refresh_table = self._structure.refresh_table() query = upsert_query( columns=self._key, key=self._key, query=key_query, target=self._structure.key_table(), ) for expr in reversed(exprs): query.prepend(expr) if last_expr is not None: query.append(SqlId("_other"), last_expr) return f""" PERFORM {setup_function}(); {query}; INSERT INTO {refresh_table} SELECT WHERE NOT EXISTS (TABLE {refresh_table}); """.strip()
def create_cleanup( id: str, groups: typing.Dict[str, str], structure: AggStructure, target: AggTable ): cleanup_function = structure.cleanup_function() cleanup_trigger = structure.cleanup_trigger() group_columns = [SqlId(group) for group in groups] yield f""" CREATE FUNCTION {cleanup_function} () RETURNS trigger LANGUAGE plpgsql AS $$ BEGIN DELETE FROM {target.sql} AS t USING _new AS n WHERE ({table_fields(SqlId("t"), group_columns)}) = ({table_fields(SqlId("n"), group_columns)}) AND n._count = 0; RETURN NULL; END; $$ """.strip() yield f""" COMMENT ON FUNCTION {cleanup_function} IS {SqlString(f'Cleanup records for {id}')} """.strip() yield f""" CREATE TRIGGER {cleanup_trigger} AFTER UPDATE ON {target.sql} REFERENCING NEW TABLE AS _new FOR EACH STATEMENT EXECUTE PROCEDURE {cleanup_function}() """.strip() yield f""" COMMENT ON TRIGGER {cleanup_trigger} ON {target.sql} IS {SqlString(f'Cleanup records for {id}')} """.strip()
def sql( self, key_query: str, exprs: typing.List[SqlTableExpr] = [], last_expr: typing.Optional[str] = None, ): if self._setup is None: # or deps[0][1].join_mode == JoinJoinMode.ASYNC setup_sql = "" else: setup_sql = f"PERFORM {self._setup.sql}();" if self._lock: lock_table = self._structure.lock_table() inner = f""" ANALYZE {lock_table}; {target_query}; """.strip() target_query = self._target.sql(lock_table) for expr in reversed(exprs): target_query.prepend(expr) if last_expr is not None: target_query.append(SqlId("other_"), last_expr) return f""" {setup_sql} {lock_sql(structure, self._key.names, inner)} """.strip() else: key_table = SqlId("_key") target_query = self._target.sql(key_table) target_query.prepend(SqlTableExpr(key_table, key_query)) for expr in reversed(exprs): target_query.prepend(expr) if last_expr is not None: target_query.append(SqlId("other_"), last_expr) return f""" {setup_sql} {target_query}; """.strip()
def create_refresh_function( id: str, structure: AggStructure, aggregates: typing.Dict[str, AggAggregate], groups: typing.Dict[str, str], target: AggTable, ): refresh_function = structure.refresh_function() refresh_table = structure.refresh_table() tmp_table = structure.tmp_table() group_columns = [SqlId(col) for col in groups] aggregate_columns = [SqlId(col) for col in aggregates] yield f""" CREATE FUNCTION {refresh_function} () RETURNS trigger LANGUAGE plpgsql AS $$ BEGIN DELETE FROM {refresh_table}; WITH _delete AS ( DELETE FROM {tmp_table} RETURNING * ) INSERT INTO {target.sql} AS existing ( {sql_list(group_columns)}, {sql_list(aggregate_columns)} ) SELECT {sql_list(group_columns)}, {sql_list(aggregate_columns)} FROM {tmp_table} ORDER BY {sql_list(SqlNumber(i + 1) for i, _ in enumerate(groups))} ON CONFLICT ({sql_list(group_columns)}) DO UPDATE SET {sql_list(f'{SqlId(col)} = {agg.combine_expression(col)}' for col, agg in aggregates.items())}; RETURN NULL; END; $$ """.strip()
def sql( self, root: SqlObject, exprs: typing.List[SqlTableExpr] = [], last_expr: typing.Optional[str] = None, ) -> str: _, last_table = self._deps[0] if last_table.join is not None: foreign = self._tables[last_table.join] else: foreign = None key_query = "" for i, (dep_id, dep) in enumerate(self._deps): table_sql = root if i == len(self._deps) - 1 else str(dep.sql) if dep.target_key is not None: key_query += f"SELECT DISTINCT {sql_list(f'{k} AS {SqlId(t)}' for t, k in zip(self._key, dep.target_key))}" key_query += f"\nFROM" key_query += f"\n {table_sql} AS {SqlId(dep_id)}" elif dep.join_mode == JoinJoinMode.ASYNC: dep_columns = [SqlId(column) for column in dep.key] key_query += ( f"SELECT DISTINCT {table_fields(SqlId(dep_id), dep_columns)}" ) key_query += f"\nFROM" key_query += f"\n {table_sql} AS {SqlId(dep_id)}" else: if dep.join_other is not None: key_query += f"\n {dep.join_other}" key_query += f"\n JOIN {table_sql} AS {SqlId(dep_id)} ON {dep.join_on}" last_id, last_table = self._deps[-1] if last_table.join_mode == JoinJoinMode.ASYNC: from .join_async import enqueue_sql return enqueue_sql( id=last_id, table=last_table, foreign=foreign, structure=self._structure, key_query=key_query, exprs=exprs, last_expr=last_expr, ) return self._action.sql(key_query, exprs=exprs, last_expr=last_expr)
def upsert_query( columns: typing.List[SqlId], key: typing.List[SqlId], query: str, target: SqlObject, ) -> SqlQuery: """ Upsert data """ data_columns = [SqlId(column) for column in columns if column not in key] is_temp = target.names[0].name == "pg_temp" if not is_temp: query += f"\nORDER BY {sql_list(SqlNumber(i + 1) for i, _ in enumerate(key))}" if data_columns: upsert_query = f""" INSERT INTO {target} ({sql_list(columns)}) {query} ON CONFLICT ({sql_list(key)}) DO UPDATE SET {update_excluded(data_columns)} """.strip() elif not is_temp: upsert_query = f""" INSERT INTO {target} ({sql_list(columns)}) {query} ON CONFLICT ({sql_list(key)}) DO UPDATE SET {update_excluded(key)} WHERE false """.strip() else: upsert_query = f""" INSERT INTO {target} ({sql_list(columns)}) {query} ON CONFLICT ({sql_list(key)}) DO NOTHING """ return SqlQuery(upsert_query)
def refresh_table(self) -> SqlObject: return SqlObject(SqlId("pg_temp"), self._name("refresh"))
def key_table(self) -> SqlObject: return SqlObject(SqlId("pg_temp"), self._name("key"))
def refresh_constraint(self) -> SqlId: return SqlId(self._id)
def _name(self, name: str): return SqlId(f"{self._id}__{name}")
def _sql_object(self, name: SqlId): return (SqlObject(SqlId(self._schema), name) if self._schema is not None else SqlObject(name))
def sql(self) -> SqlObject: return (SqlObject(SqlId(self.schema), SqlId(self.name)) if self.schema is not None else SqlObject(SqlId(self.name)))
def sql(self) -> SqlId: return SqlId(self.name)
def create_queue( id: str, table_id: str, structure: Structure, resolver: KeyResolver, tables: typing.Dict[str, JoinTable], ): table = tables[table_id] dep = table.join foreign_table = tables[dep] if table.lock_id is not None: lock_id = table.lock_id else: digest = hashlib.md5(f"{id}__{table_id}".encode("utf-8")).digest() lock_id = int.from_bytes(digest[0:2], "big", signed=True) lock_base = lock_id * (2**48) queue_table = structure.queue_table(table_id) local_columns = [local_column(column) for column in table.key] foreign_columns = [foreign_column(column) for column in foreign_table.key] yield f""" CREATE TABLE {queue_table} AS SELECT {sql_list(f"{SqlObject(SqlId('l'), SqlId(column))} AS {local_column(column)}" for column in table.key)}, {sql_list(f"{SqlObject(SqlId('f'), SqlId(column))} AS {foreign_column(column)}" for column in foreign_table.key)}, NULL::bigint AS seq, NULL::bigint AS lock FROM {table.sql} AS l CROSS JOIN {foreign_table.sql} AS f WITH NO DATA """.strip() yield f""" ALTER TABLE {queue_table} ADD PRIMARY KEY ({sql_list(local_columns)}), ALTER lock ADD GENERATED BY DEFAULT AS IDENTITY, ALTER lock SET NOT NULL, ALTER seq ADD GENERATED BY DEFAULT AS IDENTITY, ALTER seq SET NOT NULL """.strip() yield f""" COMMENT ON TABLE {queue_table} IS {SqlString(f"Asynchronous processing of changes to {table.sql}")} """.strip() for column in table.key: yield f""" COMMENT ON COLUMN {queue_table}.{local_column(column)} IS {SqlString(f"{table.sql} key: {SqlId(column)}")} """ for column in foreign_table.key: yield f""" COMMENT ON COLUMN {queue_table}.{foreign_column(column)} IS {SqlString(f"{foreign_table.sql} iterator: {SqlId(column)}")} """ yield f""" COMMENT ON COLUMN {queue_table}.seq IS 'Order to process' """.strip() yield f""" COMMENT ON COLUMN {queue_table}.lock IS 'Lock ID' """.strip() yield f""" CREATE INDEX ON {queue_table} (seq) """.strip() foreign_key_table = SqlObject(SqlId("_foreign_key")) item = SqlId("_item") new_item = SqlId("_new_item") get_item = f""" SELECT {table_fields(item, local_columns)}, {table_fields(SqlId("k"), [SqlId(column) for column in foreign_table.key])}, _item.seq, _item.lock INTO _new_item FROM {SqlObject(foreign_key_table)} AS k ORDER BY {table_fields(SqlId("k"), foreign_table.key)} DESC """.strip() key1_query = f""" SELECT {SqlId(dep)}.* FROM {foreign_table.sql} AS {SqlId(dep)} JOIN (VALUES ({table_fields(item, local_columns)})) AS {SqlId(table_id)} ({sql_list(SqlId(col) for col in table.key)}) ON {table.join_on} ORDER BY {sql_list(SqlObject(SqlId(dep), SqlId(name)) for name in foreign_table.key)} LIMIT max_records """.strip() gather1 = resolver.sql( foreign_key_table, exprs=[SqlTableExpr(foreign_key_table, key1_query)], last_expr=get_item, ) key2_query = f""" SELECT {SqlId(dep)}.* FROM {foreign_table.sql} AS {SqlId(dep)} JOIN (VALUES ({table_fields(item, local_columns)})) AS {SqlId(table_id)} ({sql_list(SqlId(col) for col in table.key)}) ON {table.join_on} WHERE ({table_fields(item, foreign_columns)}) < ({table_fields(SqlId(dep), (SqlId(column) for column in foreign_table.key))}) ORDER BY {sql_list(SqlObject(SqlId(dep), SqlId(name)) for name in foreign_table.key)} LIMIT max_records """.strip() gather2 = resolver.sql( foreign_key_table, exprs=[SqlTableExpr(foreign_key_table, key2_query)], last_expr=get_item, ) process_function = structure.queue_process_function(table_id) yield f""" CREATE FUNCTION {process_function} (max_records bigint) RETURNS bool LANGUAGE plpgsql AS $$ DECLARE _item {queue_table}; _new_item {queue_table}; BEGIN -- find item SELECT (q.*) INTO _item FROM {queue_table} AS q WHERE pg_try_advisory_xact_lock({lock_base} + q.lock) ORDER BY q.seq LIMIT 1; IF _item IS NULL THEN -- if no item found, exit RETURN false; END IF; IF ({table_fields(item, (foreign_column(column) for column in foreign_table.key))}) IS NULL THEN -- if there is no iterator, start at the beginning {indent(gather1, 3)} ELSE -- if there is an iterator, start at the iterator {indent(gather2, 3)} END IF; IF _new_item IS NULL THEN -- if the iterator was at the end, remove the queue item DELETE FROM {queue_table} AS q WHERE ({table_fields(SqlId("q"), local_columns)}, q.seq) = ({table_fields(item, local_columns)}, _item.seq); ELSE -- update the queue item with the new iterator UPDATE {queue_table} AS q SET {sql_list(f'{column} = (_new_item).{column}' for column in foreign_columns)}, seq = nextval(pg_get_serial_sequence({SqlString(str(queue_table))}, 'seq')) WHERE ({table_fields(SqlId("q"), local_columns)}, q.seq) = ({table_fields(item, local_columns)}, _item.seq); END IF; -- notify listeners that the queue has been updated NOTIFY {SqlId(str(queue_table))}; RETURN true; END; $$ """.strip() yield f""" COMMENT ON FUNCTION {process_function} IS {SqlString(f"Refresh for {queue_table}")} """.strip()
def create_change( aggregates: typing.Dict[str, AggAggregate], filter: typing.Optional[str], source: AggTable, id: str, consistency: AggConsistency, groups: typing.Dict[str, str], structure: AggStructure, target: AggTable, ): change_function = structure.change_function() group_columns = [SqlId(col) for col in groups] aggregate_columns = [SqlId(col) for col in aggregates] where = f"WHERE {filter}" if filter is not None else "" if consistency == AggConsistency.DEFERRED: setup_function = structure.setup_function() refresh_table = structure.refresh_table() target_table = structure.tmp_table() order = "" setup = f""" PERFORM {setup_function}(); """.strip() finalize = f""" INSERT INTO {refresh_table} SELECT WHERE NOT EXISTS (TABLE {refresh_table}); """.strip() elif consistency == AggConsistency.IMMEDIATE: target_table = target.sql order = f"ORDER BY {sql_list(SqlNumber(i + 1) for i, _ in enumerate(groups))}" setup = "" finalize = "" yield f""" CREATE FUNCTION {change_function} () RETURNS trigger LANGUAGE plpgsql AS $$ DECLARE sign smallint := TG_ARGV[0]::smallint; BEGIN {indent(setup, 2)} INSERT INTO {target_table} AS existing ( {sql_list(group_columns)}, {sql_list(aggregate_columns)} ) SELECT {sql_list(value for value in groups.values())}, {sql_list(agg.value for agg in aggregates.values())} FROM _change AS {SqlId(id)} {where} GROUP BY {sql_list(SqlNumber(i + 1) for i, _ in enumerate(groups))} {order} ON CONFLICT ({sql_list(group_columns)}) DO UPDATE SET {sql_list(f'{SqlId(col)} = {agg.combine_expression(col)}' for col, agg in aggregates.items())}; {indent(finalize, 2)} RETURN NULL; END; $$ """.strip() yield f""" COMMENT ON FUNCTION {change_function} IS {SqlString(f'Handle changes for {id}')} """.strip() delete_trigger = structure.delete_trigger() yield f""" CREATE TRIGGER {delete_trigger} AFTER DELETE ON {source.sql} REFERENCING OLD TABLE AS _change FOR EACH STATEMENT EXECUTE PROCEDURE {change_function}('-1'); """.strip() insert_trigger = structure.insert_trigger() yield f""" CREATE TRIGGER {insert_trigger} AFTER INSERT ON {source.sql} REFERENCING NEW TABLE AS _change FOR EACH STATEMENT EXECUTE PROCEDURE {change_function}('1'); """.strip() update_1_trigger = structure.update_1_trigger() yield f""" CREATE TRIGGER {update_1_trigger} AFTER UPDATE ON {source.sql} REFERENCING NEW TABLE AS _change FOR EACH STATEMENT EXECUTE PROCEDURE {change_function}('1'); """.strip() update_2_trigger = structure.update_2_trigger() yield f""" CREATE TRIGGER {update_2_trigger} AFTER UPDATE ON {source.sql} REFERENCING OLD TABLE AS _change FOR EACH STATEMENT EXECUTE PROCEDURE {change_function}('-1'); """.strip()
def local_column(column: str) -> str: return SqlId(f"local_{column}")
def foreign_column(column: str) -> str: return SqlId(f"foreign_{column}")
def tmp_table(self) -> SqlObject: return SqlObject(SqlId("pg_temp"), self._name("tmp"))