def get_manga_artists(self, manga_id: int, *, cur: Cursor = NotImplemented) -> List[MangaArtist]: sql = 'SELECT * FROM manga_artists WHERE manga_id=%s' cur.execute(sql, (manga_id, )) return list(map(MangaArtist.parse_obj, cur))
def sub_species(self, database_cursor: DatabaseCursor) -> List[Taxonomy]: """ Returns all sub taxonomies with rank TaxonomyRank.SPECIES including itself if itself has rank TaxonomyRank.SPECIES. Parameters ---------- database_cursor : DatabaseCursor Database cursor Returns ------- List[Taxonomy] List of taxonomies including the self """ recursive_subspecies_id_query = ( "WITH RECURSIVE subtaxonomies AS (" "SELECT id, parent_id, name, rank " f"FROM {self.__class__.TABLE_NAME} " "WHERE id = %s " "UNION " "SELECT t.id, t.parent_id, t.name, t.rank " f"FROM {self.__class__.TABLE_NAME} t " "INNER JOIN subtaxonomies s ON s.id = t.parent_id " f") SELECT id, parent_id, name, rank FROM subtaxonomies WHERE rank = %s;" ) database_cursor.execute(recursive_subspecies_id_query, (self.id, TaxonomyRank.SPECIES.value)) return [ self.__class__(row[0], row[1], row[2], row[3]) for row in database_cursor.fetchall() ]
def execute_operation(cur: cursor, conn: connection, commands: List[str], verbose: bool = True) -> None: """ Execute database operation. Parameters ---------- cur : psycopg2.extensions.cursor Database cursor. conn : psycopg2.extensions.connection Database connection. commands : list[str] Commands or queries to be executed in the database. verbose : bool, optional Whether to be verbose. """ for c in commands: if verbose: print(80 * '-') print(f'[{datetime.now()}] Running...') print(c) cur.execute(c) conn.commit() if verbose: print('Done!')
def get_list_field_type_tuples(schema_table: SchemaTable, cursor: extensions.cursor) -> List[Tuple[str, str]]: """ Takes a schema table and a cursor and returns a list of tuples with the str field name and the sqltype in the proper ordinal order (which it gets by querying the information_schema). Note that the type is simply whatever is in the data_type field, and as of now, this does not use the precision and scale for numeric types. Note that this works equally well if the schema_table actually refers to a view, but it won't work with a materialized view since they aren't part of the SQL standard (so they aren't in the information schema) :param schema_table: the schema table to use (can also be views, but not materialized views) :param cursor: the cursor for where to execute this query :return: a list of tuple of strings, each one containing the field name and the sql type in ordinal order """ schema_name = schema_table.schema.string table_name = schema_table.table.string cursor.execute(sql.SQL(""" SELECT column_name, data_type FROM information_schema.columns WHERE table_schema = %s AND table_name = %s ORDER BY ordinal_position """), (schema_name, table_name)) # TODO add precision and scale to a parenthetical for numeric types # TODO make this return List[Tuple[Field, SQLType] instead of List[str, str] return cursor.fetchall()
def get_movies_by_ids(ids: List[str], cursor: _cursor) -> List[dict]: """ Retrieves full movies data. """ logger.debug(f"Looking for {len(ids)} movies") args = ",".join(cursor.mogrify("%s", (_id, )).decode() for _id in ids) cursor.execute(f""" SELECT fw.id as fw_id, fw.title, fw.description, fw.rating, fw.created_at, fw.updated_at, array_agg(g.name) as genres, array_agg(p.full_name) as names, array_agg(pfw.role) as roles, array_agg(p.id) as persons_ids FROM content.film_work fw LEFT JOIN content.person_film_work pfw ON pfw.film_work_id = fw.id LEFT JOIN content.person p ON p.id = pfw.person_id LEFT JOIN content.genre_film_work gfw ON gfw.film_work_id = fw.id LEFT JOIN content.genre g ON g.id = gfw.genre_id WHERE fw.id IN ({args}) GROUP BY fw_id; """) movies = cursor.fetchall() logger.debug(f"Found {len(movies)} movies by ids") return movies
def process_song_file(cur: cursor, filepath: str): """Process a song file Args: cur (cursor): Conected cursor filepath (str): Path for a json file """ # open song file df = pd.read_json(filepath, lines=True) # insert song record song_data = list( df.loc[0, ["song_id", "title", "artist_id", "year", "duration"]].values) song_data = [str(col) for col in song_data] cur.execute(sql.song_table_insert, song_data) # insert artist record cols = [ "artist_id", "artist_name", "artist_location", "artist_latitude", "artist_longitude" ] artist_data = list(df.loc[0, cols].values) # converting all cells to str artist_data = [str(col) for col in artist_data] cur.execute(sql.artist_table_insert, artist_data)
def update_estimated_release(self, manga_id: int, *, cur: Cursor = NotImplemented) -> None: sql = 'WITH tmp AS (SELECT MAX(chapter_number) as chn FROM chapters WHERE manga_id=%(manga)s) ' \ 'UPDATE manga SET estimated_release=(' \ ' SELECT MIN(release_date) FROM chapters ' \ ' WHERE manga_id=%(manga)s AND ' \ ' chapter_number=(SELECT chn FROM tmp) AND ' \ ' chapter_decimal IS NOT DISTINCT FROM (SELECT MAX(chapter_decimal) FROM chapters WHERE manga_id= %(manga)s AND chapter_number=(SELECT chn FROM tmp))' \ ') + release_interval ' \ 'WHERE manga_id=%(manga)s AND release_interval IS NOT NULL ' \ 'RETURNING estimated_release, (SELECT estimated_release FROM manga WHERE manga_id=%(manga)s) as estimated_release_old' cur.execute(sql, {'manga': manga_id}) rows = cur.fetchall() if not rows: maintenance.warning( "Nothing updated because manga id doesn't exist or release_interval was NULL" ) return row = rows[0] maintenance.info( f'Set estimated release from {row["estimated_release_old"]} to {row["estimated_release"]}' ) return row
def update_latest_chapter(self, data: Collection[Tuple[int, int, datetime]], *, cur: Cursor = NotImplemented) -> None: """ Updates the latest chapter and next chapter estimates for the given manga that contain new chapters Args: cur: Optional database cursor data: iterable of tuples or lists [manga_id, latest_chapter, release_date] Returns: None """ if not data: return format_ids = ','.join(['%s'] * len(data)) sql = f'SELECT latest_chapter, manga_id FROM manga WHERE manga_id IN ({format_ids})' cur.execute(sql, [d[0] for d in data]) rows = cur.fetchall() if not rows: return # Filter latest chapters rows = {r[1]: r[0] for r in rows} data = [d for d in data if rows[d[0]] is None or rows[d[0]] < d[1]] if not data: return sql = 'UPDATE manga m SET latest_chapter=c.latest_chapter, estimated_release=c.release_date + release_interval FROM ' \ ' (VALUES %s) as c(manga_id, latest_chapter, release_date) ' \ 'WHERE c.manga_id=m.manga_id' execute_values(cur, sql, data)
def get_only_latest_entries( self, service_id: int, entries: Collection[BaseChapter], manga_id: int = None, limit: int = 400, *, cur: Cursor = NotImplemented) -> Collection[BaseChapter]: if len(entries) > 200: logger.warning( 'Over 200 entries passed to get_only_latest_entries') args: Tuple = tuple(c.chapter_identifier for c in entries) format_args = ','.join(('%s', ) * len(args)) if manga_id: sql = 'SELECT chapter_identifier FROM chapters ' \ f'WHERE service_id=%s AND manga_id=%s AND chapter_identifier IN ({format_args})' args = (service_id, manga_id, *args) else: sql = 'SELECT chapter_identifier FROM chapters ' \ f'WHERE service_id=%s AND chapter_identifier IN ({format_args})' args = (service_id, *args) try: cur.execute(sql, args) return set(entries).difference(set(r[0] for r in cur)) except: logger.exception('Failed to get old chapters') return list(entries)
def _execute_file(migration_file: MigrationFile, curs: cursor): with open(migration_file.path) as stream: curs.execute(stream.read()) t = """INSERT INTO arctic_tern_migrations VALUES (%s, %s, %s, now())""" curs.execute( t, [migration_file.stamp, migration_file.name, migration_file.hash_])
def find_parent(cursor: extensions.cursor, named_location_id: int, parents: Dict[str, str]): """ Recursively search for the site. :param cursor: A database cursor object. :param named_location_id: The named location ID. :param parents: Collection to append to. """ sql = ''' select prnt_nam_locn_id, nam_locn.nam_locn_name, type.type_name from nam_locn_tree join nam_locn on nam_locn.nam_locn_id = nam_locn_tree.prnt_nam_locn_id join type on type.type_id = nam_locn.type_id where chld_nam_locn_id = %s ''' cursor.execute(sql, [named_location_id]) row = cursor.fetchone() if row is not None: parent_id = row[0] name = row[1] type_name = row[2] if type_name.lower() == 'site': parents['site'] = name if type_name.lower() == 'domain': parents['domain'] = name find_parent(cursor, parent_id, parents)
def primary_key(pg_cur: cursor, schema_name: str, table_name: str) -> str: """ Returns the primary of a table Parameters ---------- pg_cur psycopg cursor schema_name the schema name table_name the table name """ sql = "SELECT c.column_name"\ " FROM information_schema.key_column_usage AS c "\ " LEFT JOIN information_schema.table_constraints AS t"\ " ON t.constraint_name = c.constraint_name"\ " WHERE t.table_name = '{t}'"\ " AND t.table_schema = '{s}'"\ " AND t.constraint_type = 'PRIMARY KEY'".format(s=schema_name, t=table_name) pg_cur.execute(sql) try: pkey = pg_cur.fetchone()[0] except Exception: raise TableHasNoPrimaryKey(sql) return pkey
def default_value(pg_cur: cursor, table_schema: str, table_name: str, column: str) -> str: """ Returns the default value of the column Parameters ---------- pg_cur the psycopg cursor table_schema the table schema table_name the table name column the column name """ # see https://stackoverflow.com/a/8148177/1548052 sql = "SELECT pg_get_expr(d.adbin, d.adrelid) AS default_value\n" \ "FROM pg_catalog.pg_attribute a\n" \ "LEFT JOIN pg_catalog.pg_attrdef d ON (a.attrelid, a.attnum) = (d.adrelid, d.adnum)\n" \ "WHERE NOT a.attisdropped -- no dropped (dead) columns\n" \ "AND a.attnum > 0 -- no system columns\n" \ "AND a.attrelid = '{ts}.{tn}'::regclass\n" \ "AND a.attname = '{col}';" \ .format(ts=table_schema, tn=table_name, col=column) pg_cur.execute(sql) return pg_cur.fetchone()[0] or 'NULL'
def geometry_type(pg_cur: cursor, table_schema: str, table_name: str, column: str = 'geometry') -> (str, int): """ Returns the geometry type of a column as a tuple (type, srid) Parameters ---------- pg_cur the psycopg cursor table_schema the table schema table_name the table name column: the geometry column name, defaults to "geometry" """ sql = "SELECT type, srid " \ "FROM geometry_columns " \ "WHERE f_table_schema = '{s}' " \ "AND f_table_name = '{t}' " \ "AND f_geometry_column = '{c}';".format(s=table_schema, t=table_name, c=column) pg_cur.execute(sql) res = pg_cur.fetchone() if res: return res[0], res[1] else: return None
def test_tables(cur: psycopg2Ext.cursor, conn: psycopg2Ext.connection) -> None: """ Description: Test table status to make sure tables exists. Arguments: cur (psycopg2Ext.cursor): cursor object conn (psycopg2Ext.connection): connection object Returns: None """ print("\n==================== TEST -- table status ====================") for query in create_table_queries: tbl_name = query[query.find("EXISTS") + len("EXISTS"):query.find("(")].strip() query = f"""select exists(select * from information_schema.tables where table_name='{tbl_name}')""" try: cur.execute(query) except psycopg2.Error as e: msg = f"ERROR: Could not retrieve table info with query: {query}" logger.warning(msg, e) return conn.commit() try: tbl_status = cur.fetchone()[0] except psycopg2.Error as e: msg = f"ERROR: Could not fetch table status for table: {tbl_name}" logger.warning(msg, e) return print(f"Table '{tbl_name}' exists status: {tbl_status}.")
def create_dist_schema( DWH_DB_USER: str, schema_name: str, cur: psycopg2Ext.cursor, conn: psycopg2Ext.connection, ) -> None: """ Description: Create distribution schema and set search path to the schema name. Arguments: DWH_DB_USER (str): db user name to restrict authorization schema_name (str): schema cur (psycopg2Ext.cursor): cursor object conn (psycopg2Ext.connection): connection object Returns: None """ queries = [ f"DROP SCHEMA IF EXISTS {schema_name} CASCADE;", f"CREATE SCHEMA IF NOT EXISTS {schema_name} authorization {DWH_DB_USER };", f"SET search_path TO {schema_name};", ] for query in queries: try: cur.execute(query) except psycopg2.Error as e: msg = f"ERROR: Issue dropping/creating schema." logger.warning(msg, e) return conn.commit()
def set_service_disabled_until(self, service_id: int, disabled_until: datetime, *, cur: Cursor = NotImplemented): sql = 'UPDATE services SET disabled_until=%s WHERE service_id=%s' cur.execute(sql, (disabled_until, service_id))
def upload_questions(questions: Questions, cursor: PostgresCursor) -> None: logger = log.get_logger() logger.info("Uploading questions") items = questions["items"] def iter_questions() -> Iterable[dict]: for question in items: yield {**question, "owner_id": question["owner"].get("user_id")} execute_batch(cursor, db.get_sql("upsert_question"), iter_questions(), 1000) logger.info("Uploading tags") tags = {(tag, ) for question in items for tag in question["tags"]} execute_batch(cursor, db.get_sql("upsert_tag"), tags) def iter_question_tags() -> Iterable[dict]: for question in items: question_id = question["question_id"] for tag in question["tags"]: yield {"question_id": question_id, "tag_id": tag} logger.info("Uploading question tags") delete, insert = db.get_sql_script("upsert_question_tag") question_ids = tuple({question["question_id"] for question in items}) cursor.execute(delete, (question_ids, )) execute_batch(cursor, insert, iter_question_tags(), 10000)
def manga_has_author(self, manga_id: int, *, cur: Cursor = NotImplemented) -> bool: sql = 'SELECT EXISTS(SELECT 1 FROM manga_authors WHERE manga_id=%s) as "exists"' cur.execute(sql, (manga_id, )) return cur.fetchone()['exists']
def columns(pg_cur: cursor, table_schema: str, table_name: str, table_type: str = 'table', remove_pkey: bool = False, skip_columns: list = []) -> list: """ Returns the list of columns of a table Parameters ---------- pg_cur psycopg cursor table_schema the table_schema table_name the table table_type the type of table, i.e. view or table remove_pkey if True, the primary key is dropped skip_columns list of columns to be skipped """ assert table_type.lower() in ('table', 'view') if table_type.lower() == 'table': sql = """SELECT attname FROM pg_attribute WHERE attrelid = '{s}.{t}'::regclass AND attisdropped IS NOT TRUE AND attnum > 0 ORDER BY attnum ASC""".format(s=table_schema, t=table_name) else: sql = """ SELECT c.column_name FROM information_schema.tables t LEFT JOIN information_schema.columns c ON t.table_schema = c.table_schema AND t.table_name = c.table_name WHERE table_type = 'VIEW' AND t.table_schema = '{s}' AND t.table_name = '{t}' ORDER BY ordinal_position""".format(s=table_schema, t=table_name) pg_cur.execute(sql) pg_fields = pg_cur.fetchall() pg_fields = [field[0] for field in pg_fields if field[0]] for col in skip_columns: try: pg_fields.remove(col) except ValueError: raise InvalidSkipColumns( 'Cannot skip unexisting column "{col}" in "{s}.{t}"'.format( col=col, s=table_schema, t=table_name)) if remove_pkey: pkey = primary_key(pg_cur, table_schema, table_name) pg_fields.remove(pkey) return pg_fields
def update_manga_next_update(self, service_id: int, manga_id: int, next_update: datetime, *, cur: Cursor = NotImplemented) -> None: sql = 'UPDATE manga_service SET next_update=%s WHERE manga_id=%s AND service_id=%s' cur.execute(sql, (next_update, manga_id, service_id))
def find_service_manga(self, service_id: int, title_id: str, *, cur: Cursor = NotImplemented) -> DictRow: sql = 'SELECT * from manga_service WHERE service_id=%s AND title_id=%s' cur.execute(sql, (service_id, title_id)) return cur.fetchone()
def set_manga_last_checked(self, service_id: int, manga_id: int, last_checked: Optional[datetime], *, cur: Cursor = NotImplemented): sql = 'UPDATE manga_service SET last_check=%s WHERE manga_id=%s AND service_id=%s' cur.execute(sql, [last_checked, manga_id, service_id])
def query(cursor: Cursor, sql: str, *args: Union[str, Tuple[str, ...], Password]) -> None: """ Run a SQL query against a database cursor. """ LOG.debug("Query: %r %% %r", sql, args) cursor.execute( sql, [str(arg) if isinstance(arg, Password) else arg for arg in args])
def get_service_configs(self, *, cur: Cursor = NotImplemented ) -> List[ServiceConfig]: sql = 'SELECT * FROM service_config' cur.execute(sql) return list(map(ServiceConfig.parse_obj, cur))
def find_manga_by_title(self, title: str, *, cur: Cursor = NotImplemented) -> Optional[Manga]: sql = 'SELECT * FROM manga WHERE title=%s LIMIT 1' cur.execute(sql, (title, )) row = cur.fetchone() return None if not row else Manga(**row)
def check_table(db: Cursor, table_name: str) -> bool: sql = """ SELECT EXISTS ( SELECT * FROM information_schema.tables WHERE table_name = %s ); """ db.execute(sql, (table_name, )) return db.fetchone()[0]
def get_all_scheduled_runs(self, *, cur: Cursor = NotImplemented ) -> List[ScheduledRunResult]: sql = 'SELECT sr.manga_id, sr.service_id, ms.title_id FROM scheduled_runs sr ' \ 'LEFT JOIN manga_service ms ON sr.manga_id = ms.manga_id AND sr.service_id = ms.service_id' cur.execute(sql) return list(map(ScheduledRunResult.parse_obj, cur))
def create_users_table(db: Cursor): sql = """ CREATE TABLE IF NOT EXISTS users( id serial PRIMARY KEY, name VARCHAR(50), age INT ) """ db.execute(sql)
def update_latest_release(self, data: List[int], *, cur: Cursor = NotImplemented) -> None: format_ids = self.get_format_args(data) sql = 'UPDATE manga m SET latest_release=c.release_date FROM ' \ f'(SELECT MAX(release_date), manga_id FROM chapters WHERE manga_id IN ({format_ids}) GROUP BY manga_id) as c(release_date, manga_id)' \ 'WHERE m.manga_id=c.manga_id' cur.execute(sql, data)