Beispiel #1
0
    def add_and_update_vids(self, videos: Iterable[BaseVideo], existing_ids: set[str], site: int | Site, cur: Cursor = NotImplemented):
        """
        Adds new videos to database and updates the following properties from
        duplicate entries:
            title, description, thumbnail url, publish date
            also sets the deleted flag to False

        Do not give deleted videos to this function. It will set the deleted
        flag to true on them

        Args:
            videos:
                Iterable of :class:BaseVideo that will be added or updated
            existing_ids:
                Set of all video ids that are in the database for the given site.
            site:
                id of the site being used
            cur:
                Optional cursor
        """
        videos_set = set(videos)
        # We can't use intersection because it wouldn't give our custom object
        # instead it always would give a set of values used for comparison
        do_insert = videos_set - existing_ids
        do_update = videos_set - do_insert

        if not (do_insert or do_update):
            return

        site = int(site)

        if do_insert:
            sql = 'INSERT INTO videos AS v (video_id, title, description, published_at, site, thumbnail) ' \
                  'VALUES %s'

            values = tuple(
                (vid.video_id, vid.title, vid.description, vid.published_at, site, vid.thumbnail)
                for vid in do_insert
            )

            execute_values(cur, sql, values, page_size=500)

        if do_update:
            # https://stackoverflow.com/a/18799497/6046713
            sql = 'UPDATE videos AS v SET ' \
                  'title=CASE WHEN v.title!=c.title THEN c.title ELSE v.title END, ' \
                  'description=CASE WHEN v.description!=c.description THEN c.description ELSE v.description END, ' \
                  'deleted=FALSE,' \
                  'thumbnail=COALESCE(c.thumbnail, v.thumbnail), ' \
                  'published_at=CASE WHEN c.published_at >= v.published_at THEN v.published_at ELSE c.published_at END ' \
                  'FROM (VALUES %s) AS c(video_id, title, description, published_at, site, thumbnail) ' \
                  'WHERE c.site=v.site AND c.video_id=v.video_id'

            values = tuple(
                (vid.video_id, vid.title, vid.description, vid.published_at, site, vid.thumbnail)
                for vid in do_update
            )

            execute_values(cur, sql, values, page_size=500)
Beispiel #2
0
    def add_channel_videos(self,
                           videos: Iterable[BaseVideo],
                           channels: Sequence[BaseChannel | str],
                           channel_cache: set[str],
                           db_videos:  dict[str, int],
                           site: int | Site):
        """
        Link video ids to channel ids in the channelVideos table
        This will handle adding missing channels for you. The videos need
        to have the channel property set to for this to work

        Args:
            videos: List of :class:BaseVideo instances
            channels: List of BaseChannel instances and channel_ids as str
            channel_cache: List of cached channel ids
            db_videos: video_id to db id
            site: id of the site being used
        """
        self.add_channels([c for c in channels if not isinstance(c, str)], channel_cache, site)
        sql = 'SELECT id, channel_id FROM channels WHERE channel_id=ANY(%s)'

        channel_ids = {}
        with self.class_cursor(models.Channel) as cursor:
            cursor.execute(sql, [[c if isinstance(c, str) else c.channel_id for c in channels]])

            for row in cursor:
                channel_ids[row.channel_id] = row.id

        data = []

        for vid in videos:
            channel_id = channel_ids.get(vid.channel_id)
            if not channel_id:
                logger.warning(f'Channel not found for video {vid}')
                continue

            vid_id = db_videos.get(vid.video_id)
            if not vid_id:
                continue

            data.append((channel_id, vid_id))

        sql = 'INSERT INTO channelVideos (channel_id, video_id) VALUES %s ON CONFLICT DO NOTHING'
        with self.conn.cursor() as cursor:
            execute_values(cursor, sql, data, page_size=2000)
Beispiel #3
0
    def add_playlist_vids(self, playlist_id: int, video_ids: Iterable[int], cur: Cursor = NotImplemented):
        """
        Add video playlist connection to the playlistVideos table

        Args:
            playlist_id:
                The database id for the playlist
            video_ids:
                An iterable of database ids for videos that are added the
                specified playlist
            cur: optional cursor
        """
        sql = 'INSERT INTO playlistVideos (playlist_id, video_id) VALUES ' \
              '%s ON CONFLICT DO NOTHING'

        values = tuple((playlist_id, video_id) for video_id in video_ids)

        execute_values(cur, sql, values, page_size=2000)
Beispiel #4
0
    def add_channels(self, channels: Iterable[BaseChannel], channel_cache: set[str], site: int | Site, cur: Cursor = NotImplemented):
        """
        Adds channels to db and updates old entries
        Columns updated are as follows:
            name and thumbnail aka profile pic

        Args:
            channels: iterable of channels to add
            channel_cache: Set of all channels ids in db
            site: id of the site
            cur: Optional cursor

        """
        site = int(site)
        channels = set(channels)
        do_insert = channels - cast(set[BaseChannel], channel_cache)
        do_update = channels - do_insert

        if not (do_insert or do_update):
            return

        if do_insert:
            sql = 'INSERT INTO channels (channel_id, name, thumbnail, site) VALUES %s'

            execute_values(cur, sql, [(c.channel_id, c.name, c.thumbnail, site) for c in do_insert], page_size=1000)

            channel_cache.update([c.channel_id for c in do_insert])

        if do_update:
            sql = 'UPDATE channels AS c SET ' \
                  'name=COALESCE(v.name, c.name), ' \
                  'thumbnail=COALESCE(v.thumbnail, c.thumbnail) ' \
                  'FROM (VALUES %s) AS v(channel_id, name, thumbnail) ' \
                  'WHERE v.channel_id=c.channel_id'

            execute_values(cur, sql, [(c.channel_id, c.name, c.thumbnail) for c in do_update], page_size=1000)
Beispiel #5
0
    def add_deleted_vids(self, videos: Iterable[BaseVideo], existing_ids: set[str], site: int | Site, cur: Cursor = NotImplemented):
        """
        Sets the deleted flag on the videos provided and also sets the
        deletion time column if the deleted flag hasn't been set before

        Args:
            videos:
                Iterable of BaseVideo that are deleted
            site:
                id of the site being used
            cur:
                Optional cursor
        """
        videos = set(videos)
        do_insert = videos - existing_ids
        do_update = videos - do_insert

        if not (do_insert or do_update):
            return

        site = int(site)
        if do_insert:
            t = datetime.now(timezone.utc)
            sql = 'INSERT INTO videos (video_id, title, published_at, site, deleted, deleted_at) VALUES %s'

            values = tuple((vid.video_id, t, t) for vid in do_insert)

            execute_values(cur, sql, values, page_size=1000, template=f"(%s, 'Deleted video', %s, {site}, True, %s)")

        if do_update:
            sql = 'UPDATE videos AS v SET ' \
                  'deleted_at=CASE WHEN v.deleted=FALSE THEN CURRENT_TIMESTAMP ELSE v.deleted_at END, ' \
                  'deleted=TRUE ' \
                  'WHERE site=%s AND video_id=ANY(%s)'

            cur.execute(sql, [site, [v.video_id for v in do_update]])
Beispiel #6
0
    def add_vid_tags(self,
                     videos: set[BaseVideo],
                     cached_tags: dict[str, int],
                     all_videos: dict[str, int],
                     default_tags: list[str] = None):
        """
        Adds missing tags to the database based on the provided videos.
        Also updates cached_tags and all_videos

        Args:
            videos:
                List of videos from which the tags will be added
            cached_tags:
                tags that have already been cached to memory
            all_videos:
                All videos stored in the db of the specified site.
            default_tags:
                An list of tag names to be applied to every video in the videos
                param
        """
        default_tags = [] if not default_tags else default_tags
        tag_values: set[str] = set(default_tags)
        cached_tag_names = set(cached_tags.keys())

        # Copy the list of videos since we don't want to edit the original list
        videos = videos.copy()
        for vid in videos.copy():  # This copy is probably needed
            if not vid.data:
                videos.remove(vid)
                continue

            tags = vid.tags
            if not tags:
                videos.remove(vid)
                continue

            tag_values.update(map(str.lower, tags))

        # Get non cached tags and add them to db
        tobecached = tag_values - cached_tag_names

        if tobecached:
            sql = 'INSERT INTO tags (tag) VALUES %s ON CONFLICT DO NOTHING RETURNING tag, id'
            with self.class_cursor(models.Tag) as cursor:
                results: list[models.Tag] = execute_values(cursor, sql,
                                                           [(x,) for x in tobecached],
                                                           page_size=1000, fetch=True)

            for tag in results:
                cached_tags[tag.tag] = tag.id

        values = []
        for vid in videos:
            video_id = all_videos.get(vid.video_id)
            if not video_id:
                logger.warning('Video id not found with %s' % vid)
                continue

            # Add video specific tags
            for tag in vid.tags:
                tag_id = cached_tags.get(tag.lower())
                if not tag_id:
                    logger.warning('Tag %s not found' % tag)
                    continue

                values.append((tag_id, video_id))

            # Add default tags
            for default_tag in default_tags:
                tag_id = cached_tags.get(default_tag.lower())
                if not tag_id:
                    logger.warning('Tag %s not found' % default_tag)
                    continue

                values.append((tag_id, video_id))

        sql = 'INSERT INTO videoTags (tag_id, video_id) VALUES %s ON CONFLICT DO NOTHING '

        with self.conn.cursor() as cursor:
            execute_values(cursor, sql, values, page_size=2000)