def create_temp_table_from(conn, schema, table): """ Create a temporary table that inherits from `table` and return the temporary table name. """ tmp_table_name = "tmp_{0}".format(table) full_table_name = create_full_table_name(schema, table) query = """CREATE TEMPORARY TABLE \"{0}\" (LIKE {1}) ON COMMIT DROP""".format(tmp_table_name, full_table_name) query_drop_modified_column = \ "ALTER TABLE \"{0}\" DROP COLUMN modified".format(tmp_table_name) with closing(conn.cursor()) as cursor: cursor.execute(query) try: cursor.execute(query_drop_modified_column) except psycopg2.ProgrammingError as exc: if exc.pgcode == psycopg2.errorcodes.UNDEFINED_TABLE: # Might happen after database connection loss raise RecoverableError(str(exc), do_nothing) else: raise NonRecoverableError("{0}, {1!s} in query '{2}'".format( exc.pgcode, exc, query_drop_modified_column)) return tmp_table_name
def create_data_table(conn, table_name, column_names, data_types): """ :param conn: psycopg2 database connection :param schema: name of the database schema to create the table in :param table_name: name of table to be created """ columns_part = "".join( ["\"{0}\" {1}, ".format(name, type) for (name, type) in zip(column_names, data_types)]) full_table_name = create_full_table_name(schema, table_name) query = ( "CREATE TABLE {0} ( " "entity_id integer NOT NULL, " '"timestamp" timestamp with time zone NOT NULL, ' '"modified" timestamp with time zone NOT NULL, ' "{1}" 'CONSTRAINT "{2}_pkey" PRIMARY KEY (entity_id, "timestamp"))'.format( full_table_name, columns_part, table_name)) alter_query = ( "ALTER TABLE {0} ALTER COLUMN modified " "SET DEFAULT CURRENT_TIMESTAMP".format(full_table_name)) index_query_modified = ( 'CREATE INDEX "idx_{0}_modified" ON {1} ' 'USING btree (modified)'.format(table_name, full_table_name)) index_query_timestamp = ( 'CREATE INDEX "idx_{0}_timestamp" ON {1} ' 'USING btree (timestamp)'.format(table_name, full_table_name)) trigger_query = ( "CREATE TRIGGER update_modified_modtime " "BEFORE UPDATE " "ON {0} FOR EACH ROW EXECUTE PROCEDURE " "directory.update_modified_column()".format(full_table_name)) owner_query = "ALTER TABLE {} OWNER TO minerva_writer".format(full_table_name) with closing(conn.cursor()) as cursor: try: cursor.execute(query) cursor.execute(alter_query) cursor.execute(index_query_modified) cursor.execute(index_query_timestamp) cursor.execute(trigger_query) cursor.execute(owner_query) except psycopg2.IntegrityError as exc: raise RecoverableError(str(exc), NoOpFix) except psycopg2.ProgrammingError as exc: if exc.pgcode == psycopg2.errorcodes.DUPLICATE_TABLE: raise RecoverableError(str(exc), NoOpFix) else: raise NonRecoverableError(\ "ProgrammingError({0}): {1}".format(exc.pgcode, exc.pgerror)) else: grant(conn, "TABLE", "SELECT", full_table_name, "minerva") conn.commit()
def store_insert_rows(conn, schema, table, trend_names, timestamp, modified, data_rows): column_names = ["entity_id", "timestamp", "modified"] column_names.extend(trend_names) columns = ",".join('"{0}"'.format(column_name) for column_name in column_names) data_placeholders = ", ".join(["%s"] * len(column_names)) full_table_name = create_full_table_name(schema, table) select_query = ( "SELECT 1 FROM {0} " "WHERE entity_id = %s AND timestamp = %s" ).format(full_table_name) insert_query = "INSERT INTO {0} ({1}) VALUES ({2})". format( full_table_name, columns, data_placeholders) with closing(conn.cursor()) as cursor: for entity_id, values in data_rows: data = [entity_id, timestamp, modified] data.extend(values) cursor.execute(select_query, [entity_id, timestamp]) if cursor.rowcount == 0: cursor.execute(insert_query, data)
def create_copy_from_query(schema, table, trend_names): column_names = ["entity_id", "timestamp", "modified"] column_names.extend(trend_names) full_table_name = create_full_table_name(schema, table) return "COPY {0}({1}) FROM STDIN".format(full_table_name, ",".join('"{0}"'.format(column_name) for column_name in column_names))
def store_using_tmp(conn, schema, table, trend_names, timestamp, modified, data_rows): """ Store the data using the PostgreSQL specific COPY FROM command and a temporary table. The temporary table is joined against the target table to make sure only new records are inserted. """ tmp_table_name = create_temp_table_from(conn, schema, table) store_insert_tmp(conn, tmp_table_name, trend_names, timestamp, modified, data_rows) column_names = ['entity_id', 'timestamp'] column_names.extend(trend_names) tmp_column_names = ",".join('tmp."{0}"'.format(name) for name in column_names) dest_column_names = ",".join('"{0}"'.format(name) for name in column_names) full_table_name = create_full_table_name(schema, table) insert_query = " ".join([ "INSERT INTO {0} ({1})".format(full_table_name, dest_column_names), "SELECT {0} FROM \"{1}\" AS tmp".format(tmp_column_names, tmp_table_name), "LEFT JOIN {0} ON tmp.\"timestamp\" = {0}.\"timestamp\" " "AND tmp.entity_id = {0}.entity_id".format(full_table_name), "WHERE \"{0}\".entity_id IS NULL".format(table)]) with closing(conn.cursor()) as cursor: try: cursor.execute(insert_query) except psycopg2.Error as exc: if exc.pgcode == psycopg2.errorcodes.UNDEFINED_COLUMN: fix = partial(check_columns_exist, conn, schema, table, trend_names) raise RecoverableError(str(exc), fix) elif exc.pgcode == psycopg2.errorcodes.UNIQUE_VIOLATION: conn.rollback() store_insert_rows(conn, schema, table, trend_names, timestamp, modified, data_rows) else: raise NonRecoverableError(str(exc))
def store_using_update(conn, schema, table, trend_names, timestamp, modified, data_rows): set_columns = ", ".join("\"{0}\"=%s".format(name) for name in trend_names) full_table_name = create_full_table_name(schema, table) update_query = ( 'UPDATE {0} SET modified=greatest(modified, %s), {1} ' 'WHERE entity_id=%s AND "timestamp"=%s').format(full_table_name, set_columns) rows = [list(chain((modified,), values, (entity_id, timestamp))) for entity_id, values in data_rows] with closing(conn.cursor()) as cursor: try: cursor.executemany(update_query, rows) except psycopg2.DatabaseError as exc: if exc.pgcode == psycopg2.errorcodes.UNDEFINED_COLUMN: fix = partial(check_columns_exist, conn, schema, table, trend_names) raise RecoverableError(str(exc), fix) else: raise NonRecoverableError(str(exc))
def store_batch_insert(conn, schema, table, trend_names, timestamp, modified, data_rows): column_names = ["entity_id", "timestamp", "modified"] column_names.extend(trend_names) dest_column_names = ",".join('"{0}"'.format(column_name) for column_name in column_names) parameters = ", ".join(["%s"] * len(column_names)) full_table_name = create_full_table_name(schema, table) query = "INSERT INTO {0} ({1}) VALUES ({2})".format( full_table_name, dest_column_names, parameters) rows = [] for entity_id, values in data_rows: row = [entity_id, timestamp, modified] row.extend(values) rows.append(row) with closing(conn.cursor()) as cursor: try: cursor.executemany(query, rows) except psycopg2.DatabaseError as exc: if exc.pgcode == psycopg2.errorcodes.UNIQUE_VIOLATION: raise UniqueViolation() elif exc.pgcode == psycopg2.errorcodes.UNDEFINED_COLUMN: raise NoSuchColumnError() elif exc.pgcode == psycopg2.errorcodes.UNDEFINED_TABLE: raise NoSuchTable() elif exc.pgcode in DATATYPE_MISMATCH_ERRORS: raise DataTypeMismatch(str(exc)) else: raise NonRecoverableError("{0}: {1}".format(exc.pgcode, exc))