def add_and_update_vids(self, videos: Iterable[BaseVideo], existing_ids: set[str], site: int | Site, cur: Cursor = NotImplemented): """ Adds new videos to database and updates the following properties from duplicate entries: title, description, thumbnail url, publish date also sets the deleted flag to False Do not give deleted videos to this function. It will set the deleted flag to true on them Args: videos: Iterable of :class:BaseVideo that will be added or updated existing_ids: Set of all video ids that are in the database for the given site. site: id of the site being used cur: Optional cursor """ videos_set = set(videos) # We can't use intersection because it wouldn't give our custom object # instead it always would give a set of values used for comparison do_insert = videos_set - existing_ids do_update = videos_set - do_insert if not (do_insert or do_update): return site = int(site) if do_insert: sql = 'INSERT INTO videos AS v (video_id, title, description, published_at, site, thumbnail) ' \ 'VALUES %s' values = tuple( (vid.video_id, vid.title, vid.description, vid.published_at, site, vid.thumbnail) for vid in do_insert ) execute_values(cur, sql, values, page_size=500) if do_update: # https://stackoverflow.com/a/18799497/6046713 sql = 'UPDATE videos AS v SET ' \ 'title=CASE WHEN v.title!=c.title THEN c.title ELSE v.title END, ' \ 'description=CASE WHEN v.description!=c.description THEN c.description ELSE v.description END, ' \ 'deleted=FALSE,' \ 'thumbnail=COALESCE(c.thumbnail, v.thumbnail), ' \ 'published_at=CASE WHEN c.published_at >= v.published_at THEN v.published_at ELSE c.published_at END ' \ 'FROM (VALUES %s) AS c(video_id, title, description, published_at, site, thumbnail) ' \ 'WHERE c.site=v.site AND c.video_id=v.video_id' values = tuple( (vid.video_id, vid.title, vid.description, vid.published_at, site, vid.thumbnail) for vid in do_update ) execute_values(cur, sql, values, page_size=500)
def add_channel_videos(self, videos: Iterable[BaseVideo], channels: Sequence[BaseChannel | str], channel_cache: set[str], db_videos: dict[str, int], site: int | Site): """ Link video ids to channel ids in the channelVideos table This will handle adding missing channels for you. The videos need to have the channel property set to for this to work Args: videos: List of :class:BaseVideo instances channels: List of BaseChannel instances and channel_ids as str channel_cache: List of cached channel ids db_videos: video_id to db id site: id of the site being used """ self.add_channels([c for c in channels if not isinstance(c, str)], channel_cache, site) sql = 'SELECT id, channel_id FROM channels WHERE channel_id=ANY(%s)' channel_ids = {} with self.class_cursor(models.Channel) as cursor: cursor.execute(sql, [[c if isinstance(c, str) else c.channel_id for c in channels]]) for row in cursor: channel_ids[row.channel_id] = row.id data = [] for vid in videos: channel_id = channel_ids.get(vid.channel_id) if not channel_id: logger.warning(f'Channel not found for video {vid}') continue vid_id = db_videos.get(vid.video_id) if not vid_id: continue data.append((channel_id, vid_id)) sql = 'INSERT INTO channelVideos (channel_id, video_id) VALUES %s ON CONFLICT DO NOTHING' with self.conn.cursor() as cursor: execute_values(cursor, sql, data, page_size=2000)
def add_playlist_vids(self, playlist_id: int, video_ids: Iterable[int], cur: Cursor = NotImplemented): """ Add video playlist connection to the playlistVideos table Args: playlist_id: The database id for the playlist video_ids: An iterable of database ids for videos that are added the specified playlist cur: optional cursor """ sql = 'INSERT INTO playlistVideos (playlist_id, video_id) VALUES ' \ '%s ON CONFLICT DO NOTHING' values = tuple((playlist_id, video_id) for video_id in video_ids) execute_values(cur, sql, values, page_size=2000)
def add_channels(self, channels: Iterable[BaseChannel], channel_cache: set[str], site: int | Site, cur: Cursor = NotImplemented): """ Adds channels to db and updates old entries Columns updated are as follows: name and thumbnail aka profile pic Args: channels: iterable of channels to add channel_cache: Set of all channels ids in db site: id of the site cur: Optional cursor """ site = int(site) channels = set(channels) do_insert = channels - cast(set[BaseChannel], channel_cache) do_update = channels - do_insert if not (do_insert or do_update): return if do_insert: sql = 'INSERT INTO channels (channel_id, name, thumbnail, site) VALUES %s' execute_values(cur, sql, [(c.channel_id, c.name, c.thumbnail, site) for c in do_insert], page_size=1000) channel_cache.update([c.channel_id for c in do_insert]) if do_update: sql = 'UPDATE channels AS c SET ' \ 'name=COALESCE(v.name, c.name), ' \ 'thumbnail=COALESCE(v.thumbnail, c.thumbnail) ' \ 'FROM (VALUES %s) AS v(channel_id, name, thumbnail) ' \ 'WHERE v.channel_id=c.channel_id' execute_values(cur, sql, [(c.channel_id, c.name, c.thumbnail) for c in do_update], page_size=1000)
def add_deleted_vids(self, videos: Iterable[BaseVideo], existing_ids: set[str], site: int | Site, cur: Cursor = NotImplemented): """ Sets the deleted flag on the videos provided and also sets the deletion time column if the deleted flag hasn't been set before Args: videos: Iterable of BaseVideo that are deleted site: id of the site being used cur: Optional cursor """ videos = set(videos) do_insert = videos - existing_ids do_update = videos - do_insert if not (do_insert or do_update): return site = int(site) if do_insert: t = datetime.now(timezone.utc) sql = 'INSERT INTO videos (video_id, title, published_at, site, deleted, deleted_at) VALUES %s' values = tuple((vid.video_id, t, t) for vid in do_insert) execute_values(cur, sql, values, page_size=1000, template=f"(%s, 'Deleted video', %s, {site}, True, %s)") if do_update: sql = 'UPDATE videos AS v SET ' \ 'deleted_at=CASE WHEN v.deleted=FALSE THEN CURRENT_TIMESTAMP ELSE v.deleted_at END, ' \ 'deleted=TRUE ' \ 'WHERE site=%s AND video_id=ANY(%s)' cur.execute(sql, [site, [v.video_id for v in do_update]])
def add_vid_tags(self, videos: set[BaseVideo], cached_tags: dict[str, int], all_videos: dict[str, int], default_tags: list[str] = None): """ Adds missing tags to the database based on the provided videos. Also updates cached_tags and all_videos Args: videos: List of videos from which the tags will be added cached_tags: tags that have already been cached to memory all_videos: All videos stored in the db of the specified site. default_tags: An list of tag names to be applied to every video in the videos param """ default_tags = [] if not default_tags else default_tags tag_values: set[str] = set(default_tags) cached_tag_names = set(cached_tags.keys()) # Copy the list of videos since we don't want to edit the original list videos = videos.copy() for vid in videos.copy(): # This copy is probably needed if not vid.data: videos.remove(vid) continue tags = vid.tags if not tags: videos.remove(vid) continue tag_values.update(map(str.lower, tags)) # Get non cached tags and add them to db tobecached = tag_values - cached_tag_names if tobecached: sql = 'INSERT INTO tags (tag) VALUES %s ON CONFLICT DO NOTHING RETURNING tag, id' with self.class_cursor(models.Tag) as cursor: results: list[models.Tag] = execute_values(cursor, sql, [(x,) for x in tobecached], page_size=1000, fetch=True) for tag in results: cached_tags[tag.tag] = tag.id values = [] for vid in videos: video_id = all_videos.get(vid.video_id) if not video_id: logger.warning('Video id not found with %s' % vid) continue # Add video specific tags for tag in vid.tags: tag_id = cached_tags.get(tag.lower()) if not tag_id: logger.warning('Tag %s not found' % tag) continue values.append((tag_id, video_id)) # Add default tags for default_tag in default_tags: tag_id = cached_tags.get(default_tag.lower()) if not tag_id: logger.warning('Tag %s not found' % default_tag) continue values.append((tag_id, video_id)) sql = 'INSERT INTO videoTags (tag_id, video_id) VALUES %s ON CONFLICT DO NOTHING ' with self.conn.cursor() as cursor: execute_values(cursor, sql, values, page_size=2000)