def produce_table_info(conn, filter_schemas=None, tables: Optional[List[str]] = None): """ Generates info about tables in the cluster """ # typlen -1 == variable length arrays # typelem != 0 points to subtypes. 23 in the case of arrays # so integer arrays are typlen = -1, typelem = 23 because integer types are oid 23 # this seems to identify all arrays: # select typname from pg_attribute as pga join pg_type as pgt on pgt.oid = pga.atttypid # where typlen = -1 and typelem != 0 and pga.attndims > 0; with conn.cursor(cursor_factory=psycopg2.extras.DictCursor, name='stitch_cursor') as cur: cur.itersize = post_db.CURSOR_ITER_SIZE table_info = {} # SELECT CASE WHEN $2.typtype = 'd' THEN $2.typbasetype ELSE $1.atttypid END sql = """ SELECT pg_class.reltuples::BIGINT AS approximate_row_count, (pg_class.relkind = 'v' or pg_class.relkind = 'm') AS is_view, n.nspname AS schema_name, pg_class.relname AS table_name, attname AS column_name, i.indisprimary AS primary_key, format_type(a.atttypid, NULL::integer) AS data_type, information_schema._pg_char_max_length(CASE WHEN COALESCE(subpgt.typtype, pgt.typtype) = 'd' THEN COALESCE(subpgt.typbasetype, pgt.typbasetype) ELSE COALESCE(subpgt.oid, pgt.oid) END, information_schema._pg_truetypmod(a.*, pgt.*))::information_schema.cardinal_number AS character_maximum_length, information_schema._pg_numeric_precision(CASE WHEN COALESCE(subpgt.typtype, pgt.typtype) = 'd' THEN COALESCE(subpgt.typbasetype, pgt.typbasetype) ELSE COALESCE(subpgt.oid, pgt.oid) END, information_schema._pg_truetypmod(a.*, pgt.*))::information_schema.cardinal_number AS numeric_precision, information_schema._pg_numeric_scale(CASE WHEN COALESCE(subpgt.typtype, pgt.typtype) = 'd' THEN COALESCE(subpgt.typbasetype, pgt.typbasetype) ELSE COALESCE(subpgt.oid, pgt.oid) END, information_schema._pg_truetypmod(a.*, pgt.*))::information_schema.cardinal_number AS numeric_scale, pgt.typcategory = 'A' AS is_array, COALESCE(subpgt.typtype, pgt.typtype) = 'e' AS is_enum FROM pg_attribute a LEFT JOIN pg_type AS pgt ON a.atttypid = pgt.oid JOIN pg_class ON pg_class.oid = a.attrelid JOIN pg_catalog.pg_namespace n ON n.oid = pg_class.relnamespace LEFT OUTER JOIN pg_index as i ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey) AND i.indisprimary = true LEFT OUTER JOIN pg_type AS subpgt ON pgt.typelem = subpgt.oid AND pgt.typelem != 0 WHERE attnum > 0 AND NOT a.attisdropped AND pg_class.relkind IN ('r', 'v', 'm') AND n.nspname NOT in ('pg_toast', 'pg_catalog', 'information_schema') AND has_column_privilege(pg_class.oid, attname, 'SELECT') = true """ if filter_schemas: sql = post_db.filter_schemas_sql_clause(sql, filter_schemas) if tables: sql = post_db.filter_tables_sql_clause(sql, tables) cur.execute(sql) for row in cur.fetchall(): row_count, is_view, schema_name, table_name, *col_info = row if table_info.get(schema_name) is None: table_info[schema_name] = {} if table_info[schema_name].get(table_name) is None: table_info[schema_name][table_name] = { 'is_view': is_view, 'row_count': row_count, 'columns': {} } col_name = col_info[0] table_info[schema_name][table_name]['columns'][col_name] = Column( *col_info) return table_info
def produce_table_info(conn, filter_schemas=None, filter_tables=None): with conn.cursor(cursor_factory=psycopg2.extras.DictCursor, name='stitch_cursor') as cur: cur.itersize = post_db.cursor_iter_size table_info = {} # SELECT CASE WHEN $2.typtype = 'd' THEN $2.typbasetype ELSE $1.atttypid END sql = """ SELECT pg_class.reltuples::BIGINT AS approximate_row_count, (pg_class.relkind = 'v' or pg_class.relkind = 'm') AS is_view, n.nspname AS schema_name, pg_class.relname AS table_name, attname AS column_name, i.indisprimary AS primary_key, format_type(a.atttypid, NULL::integer) AS data_type, information_schema._pg_char_max_length(CASE WHEN COALESCE(subpgt.typtype, pgt.typtype) = 'd' THEN COALESCE(subpgt.typbasetype, pgt.typbasetype) ELSE COALESCE(subpgt.oid, pgt.oid) END, information_schema._pg_truetypmod(a.*, pgt.*))::information_schema.cardinal_number AS character_maximum_length, information_schema._pg_numeric_precision(CASE WHEN COALESCE(subpgt.typtype, pgt.typtype) = 'd' THEN COALESCE(subpgt.typbasetype, pgt.typbasetype) ELSE COALESCE(subpgt.oid, pgt.oid) END, information_schema._pg_truetypmod(a.*, pgt.*))::information_schema.cardinal_number AS numeric_precision, information_schema._pg_numeric_scale(CASE WHEN COALESCE(subpgt.typtype, pgt.typtype) = 'd' THEN COALESCE(subpgt.typbasetype, pgt.typbasetype) ELSE COALESCE(subpgt.oid, pgt.oid) END, information_schema._pg_truetypmod(a.*, pgt.*))::information_schema.cardinal_number AS numeric_scale, pgt.typcategory = 'A' AS is_array, COALESCE(subpgt.typtype, pgt.typtype) = 'e' AS is_enum FROM pg_attribute a LEFT JOIN pg_type AS pgt ON a.atttypid = pgt.oid JOIN pg_class ON pg_class.oid = a.attrelid JOIN pg_catalog.pg_namespace n ON n.oid = pg_class.relnamespace LEFT OUTER JOIN pg_index as i ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey) AND i.indisprimary = true LEFT OUTER JOIN pg_type AS subpgt ON pgt.typelem = subpgt.oid AND pgt.typelem != 0 WHERE attnum > 0 AND NOT a.attisdropped AND pg_class.relkind IN ('r', 'v', 'm') AND n.nspname NOT in ('pg_toast', 'pg_catalog', 'information_schema') AND has_table_privilege(pg_class.oid, 'SELECT') = true """ if filter_schemas: sql = post_db.filter_schemas_sql_clause(sql, filter_schemas) if filter_tables: sql = post_db.filter_tables_sql_clause(sql, filter_tables) cur.execute(sql) for row in cur.fetchall(): row_count, is_view, schema_name, table_name, *col_info = row if table_info.get(schema_name) is None: table_info[schema_name] = {} if table_info[schema_name].get(table_name) is None: table_info[schema_name][table_name] = { 'is_view': is_view, 'row_count': row_count, 'columns': {} } col_name = col_info[0] table_info[schema_name][table_name]['columns'][col_name] = Column( *col_info) return table_info