Example #1
0
def init_db():
    db = Database(":memory:")
    db.add_channel("publisher1", "id_publisher1")
    db.add_channel("publisher2", "id_publisher2")
    db.add_channel("publisher3", "id_publisher3")
    db.add_videos(insert_list)
    return db
Example #2
0
def init_db():
    insert_list = [
        Video(yt_videoid="0", title="title1", description="description1", publisher="id_publisher1", publish_date=1488286166,
              watched=False),
        Video(yt_videoid="0", title="title1", description="description1", publisher="id_publisher1", publish_date=1488286167,
              watched=False),
        Video(yt_videoid="1", title="title2", description="description1", publisher="id_publisher1", publish_date=1488286168,
              watched=False),
        Video(yt_videoid="1", title="title2", description="description2", publisher="id_publisher2", publish_date=1488286170,
              watched=False),
        Video(yt_videoid="2", title="title3", description="description3", publisher="id_publisher2", publish_date=1488286171,
              watched=False)
    ]
    db = Database(":memory:")
    db.add_channel(Channel(displayname="publisher1", yt_channelid="id_publisher1"))
    db.add_channel(Channel(displayname="publisher2", yt_channelid="id_publisher2"))
    db.add_channel(Channel(displayname="publisher3", yt_channelid="id_publisher3"))
    db.add_videos(insert_list)
    return db
Example #3
0
class Ytcc:
    """The Ytcc class handles updating the YouTube RSS feed and playing and listing/filtering
    videos. Filters can be set with with following methods:
        set_channel_filter
        set_date_begin_filter
        set_date_end_filter
        set_include_watched_filter
    """

    def __init__(self, override_cfg_file: Optional[str] = None) -> None:
        self.config = Config(override_cfg_file)
        self.db = Database(self.config.db_path)
        self.channel_filter: List[str] = []
        self.date_begin_filter = 0.0
        self.date_end_filter = time.mktime(time.gmtime()) + 20
        self.include_watched_filter = False
        self.search_filter = ""

    def __enter__(self) -> "Ytcc":
        return self

    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> Any:
        self.db.__exit__(exc_type, exc_val, exc_tb)

    @staticmethod
    def get_youtube_video_url(yt_videoid: str) -> str:
        """Returns the YouTube URL for the given youtube video ID

        Args:
            yt_videoid (str): the YouTube video ID

        Returns (str):
            the YouTube URL for the given youtube video ID
        """

        return "https://www.youtube.com/watch?v=" + yt_videoid

    def set_channel_filter(self, channel_filter: List[str]) -> None:
        """Sets the channel filter. The results when listing videos will only include videos by
        channels specifide in the filter

        Args:
            channel_filter (list): the list of channel names
        """

        self.channel_filter.clear()
        self.channel_filter.extend(channel_filter)

    def set_date_begin_filter(self, begin: datetime.datetime) -> None:
        """Sets the time filter. The results when listing videos will only include videos newer
        than the given time.

        Args:
            begin (datetime.datetime): the lower bound of the time filter
        """

        self.date_begin_filter = begin.timestamp()

    def set_date_end_filter(self, end: datetime.datetime) -> None:
        """Sets the time filter. The results when listing videos will only include videos older
        than the given time.

        Args:
            end (datetime.datetime): the upper bound of the time filter
        """

        self.date_end_filter = end.timestamp()

    def set_include_watched_filter(self) -> None:
        """Sets "watched video" filter. The results when listing videos will include both watched
        and unwatched videos.
        """

        self.include_watched_filter = True

    def set_search_filter(self, searchterm: str) -> None:
        """Sets a search filter. When this filter is set, all other filters are ignored

        Args:
            searchterm (str): only videos whose title, channel or description match this term will
                be included
        """

        self.search_filter = searchterm

    @staticmethod
    def _update_channel(yt_channel_id: str) -> List[DBVideo]:
        feed = feedparser.parse("https://www.youtube.com/feeds/videos.xml?channel_id="
                                + yt_channel_id)
        return [(str(entry.yt_videoid),
                 str(entry.title),
                 str(entry.description),
                 yt_channel_id,
                 time.mktime(entry.published_parsed),
                 False)
                for entry in feed.entries]

    def update_all(self) -> None:
        """Checks every channel for new videos"""

        channels = map(lambda channel: channel.yt_channelid, self.db.get_channels())
        num_workers = unpack_optional(os.cpu_count(), lambda: 1) * 3

        with ThreadPoolExecutor(num_workers) as pool:
            videos = chain.from_iterable(pool.map(self._update_channel, channels))

        self.db.add_videos(videos)

    def play_video(self, video_id: int, no_video: bool = False) -> bool:
        """Plays the video identified by the given video ID with the mpv video player and marks the
        video watched, if the player exits with an exit code of zero.

        Args:
            video_id (int): The (local) video ID.
            no_video (bool): If True only the audio is played

        Returns (bool):
            False if the given video_id does not exist or the player closed with a non zero exit
            code. True if the video was played successfully.
        """

        no_video_flag = []
        if no_video:
            no_video_flag.append("--no-video")

        video = self.db.resolve_video_id(video_id)
        if video:
            try:
                mpv_result = subprocess.run(["mpv", *no_video_flag, *self.config.mpv_flags,
                                             self.get_youtube_video_url(video.yt_videoid)])
            except FileNotFoundError:
                raise YtccException("Could not locate the mpv video player!")

            if mpv_result.returncode == 0:
                self.db.mark_watched([video.id])
                return True

        return False

    def download_videos(self, video_ids: Optional[List[int]] = None, path: str = "",
                        no_video: bool = False) -> Iterable[Tuple[int, bool]]:
        """Downloads the videos identified by the given video IDs with youtube-dl.

        Args:
            video_ids ([int]): The (local) video IDs.
            path (str): The directory where the download is saved.
            no_video (bool): If True only the audio is downloaded

        Returns:
            Generator of tuples indicating whether the a download was successful.
        """

        if path:
            download_dir = path
        elif self.config.download_dir:
            download_dir = self.config.download_dir
        else:
            download_dir = ""

        videos = self.get_videos(unpack_optional(video_ids, self._get_filtered_video_ids))
        conf = self.config.youtube_dl

        ydl_opts: Dict[str, Any] = {
            "outtmpl": os.path.join(download_dir, conf.output_template),
            "ratelimit": conf.ratelimit,
            "retries": conf.retries,
            "quiet": conf.loglevel == "quiet",
            "verbose": conf.loglevel == "verbose",
            "ignoreerrors": False
        }

        if no_video:
            ydl_opts["format"] = "bestaudio/best"
            if conf.thumbnail:
                ydl_opts["writethumbnail"] = True
                ydl_opts["postprocessors"] = [
                    {
                        'key': 'FFmpegExtractAudio',
                        'preferredcodec': "m4a"
                    },
                    {"key": "EmbedThumbnail"}
                ]
        else:
            ydl_opts["format"] = conf.format
            if conf.subtitles != "off":
                ydl_opts["subtitleslangs"] = list(map(str.strip, conf.subtitles.split(",")))
                ydl_opts["writesubtitles"] = True
                ydl_opts["writeautomaticsub"] = True
                ydl_opts["postprocessors"] = [{"key": "FFmpegEmbedSubtitle"}]

        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            for video in videos:
                url = self.get_youtube_video_url(video.yt_videoid)
                try:
                    # will raise exception on error and not yield video.id
                    ydl.download([url])
                    yield video.id, True
                except youtube_dl.utils.DownloadError:
                    yield video.id, False

    def add_channel(self, displayname: str, channel_url: str) -> None:
        """Subscribes to a channel.

        Args:
            displayname (str): a human readable name of the channel.
            channel_url (str): the url to the channel's home page.

        Raises:
            ChannelDoesNotExistException: when the given URL does not exist.
            DuplicateChannelException: when trying to subscribe to a channel the second (or more)
                                       time.
            BadURLException: when a given URL does not refer to a YouTube channel.
        """

        regex = r"^(https?://)?(www\.)?youtube\.com/(?P<type>user|channel)/(?P<channel>[^/?=]+)$"
        match = re.search(regex, channel_url)

        if match:
            channel = match.group("channel")
            url = "https://www.youtube.com/" + match.group("type") + "/" + channel + "/videos"

            try:
                response = urlopen(url).read().decode('utf-8')
            except URLError:
                raise ChannelDoesNotExistException("Channel does not exist: " + channel)

            parser = etree.HTMLParser()
            root = etree.parse(StringIO(response), parser).getroot()
            result = root.xpath('/html/head/meta[@itemprop="channelId"]')
            yt_channelid = result[0].attrib.get("content")

            try:
                self.db.add_channel(displayname, yt_channelid)
            except sqlite3.IntegrityError:
                raise DuplicateChannelException("Channel already subscribed: " + channel)

        else:
            raise BadURLException("'" + channel_url + "' is not a valid URL")

    def import_channels(self, file: TextIO) -> None:
        """Imports all channels from YouTube's subsciption export file.

        Args:
            file (TextIOWrapper): the opened file
        """

        try:
            root = etree.parse(file)
        except Exception:
            raise InvalidSubscriptionFileError(
                "'" + file.name + "' is not a valid YouTube export file"
            )

        elements = root.xpath('//outline[@type="rss"]')
        channels = [(e.attrib["title"], urlparse(e.attrib["xmlUrl"]).query[11:]) for e in elements]

        for channel in channels:
            try:
                self.db.add_channel(*channel)
            except sqlite3.IntegrityError:
                pass

    def list_videos(self) -> List[Video]:
        """Returns a list of videos that match the filters set by the set_*_filter methods.

        Returns (list):
            A list of ytcc.video.Video objects
        """

        if self.search_filter:
            return self.db.search(self.search_filter)

        return self.db.get_videos(self.channel_filter, self.date_begin_filter,
                                  self.date_end_filter, self.include_watched_filter)

    def _get_filtered_video_ids(self) -> List[int]:
        return list(map(lambda video: video.id, self.list_videos()))

    def mark_watched(self, video_ids: Optional[List[int]] = None) -> List[Video]:
        """Marks the videos of channels specified in the filter as watched without playing them.
        The filters are set by the set_*_filter methods.

        Args:
            video_ids ([int]): The video IDs to mark as watched.

        Returns (list):
            A list of ytcc.video.Video objects. Contains the videos that were marked watched.
        """

        mark_ids = unpack_optional(video_ids, self._get_filtered_video_ids)
        self.db.mark_watched(mark_ids)
        return self.get_videos(mark_ids)

    def delete_channels(self, displaynames: List[str]) -> None:
        """Delete (or unsubscribe) channels.

        Args:
            displaynames (list): A list of channels' displaynames.
        """

        self.db.delete_channels(displaynames)

    def get_channels(self) -> List[Channel]:
        """Returns a list of all subscribed channels.

        Returns ([str]):
            A list of channel names.
        """

        return self.db.get_channels()

    def get_videos(self, video_ids: Iterable[int]) -> List[Video]:
        """Returns the ytcc.video.Video object for the given video IDs.

        Args:
            video_ids ([int]): the video IDs.

        Returns (list)
            A list of ytcc.video.Video objects
        """
        def resolve_ids():
            for video_id in video_ids:
                video = self.db.resolve_video_id(video_id)
                if video:
                    yield video

        return list(resolve_ids())

    def cleanup(self) -> None:
        """Deletes old videos from the database."""

        self.db.cleanup()
Example #4
0
class Ytcc:
    """The Ytcc class handles updating the RSS feeds and playing and listing/filtering videos.

    Filters can be set with with following methods:
    * ``set_channel_filter``
    * ``set_date_begin_filter``
    * ``set_date_end_filter``
    * ``set_include_watched_filter``
    """
    def __init__(self, override_cfg_file: Optional[str] = None) -> None:
        self.config = Config(override_cfg_file)
        self.database = Database(self.config.db_path)
        self.video_id_filter: List[int] = []
        self.channel_filter: List[str] = []
        self.date_begin_filter = 0.0
        self.date_end_filter = (0.0, False)
        self.include_watched_filter = False

    def __enter__(self) -> "Ytcc":
        return self

    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> Any:
        self.database.__exit__(exc_type, exc_val, exc_tb)

    def close(self) -> None:
        """Close open resources like the database connection."""
        self.database.close()

    @staticmethod
    def get_youtube_video_url(yt_videoid: Optional[str]) -> str:
        """Return the YouTube URL for the given youtube video ID.

        :param yt_videoid:  The YouTube video ID.
        :return: The YouTube URL for the given youtube video ID.
        """
        if yt_videoid is None:
            raise YtccException("Video id is none!")

        return f"https://www.youtube.com/watch?v={yt_videoid}"

    def set_channel_filter(self, channel_filter: List[str]) -> None:
        """Set the channel filter.

        The results when listing videos will only include videos by channels specified in the
        filter.

        :param channel_filter: The list of channel names.
        """
        self.channel_filter.clear()
        self.channel_filter.extend(channel_filter)

    def set_date_begin_filter(self, begin: datetime.datetime) -> None:
        """Set the time filter.

        The results when listing videos will only include videos newer than the given time.

        :param begin: The lower bound of the time filter.
        """
        self.date_begin_filter = begin.timestamp()

    def set_date_end_filter(self, end: datetime.datetime) -> None:
        """Set the time filter.

        The results when listing videos will only include videos older than the given time.

        :param end: The upper bound of the time filter.
        """
        self.date_end_filter = (end.timestamp(), True)

    def set_include_watched_filter(self) -> None:
        """Set the "watched video" filter.

        The results when listing videos will include both watched and unwatched videos.
        """
        self.include_watched_filter = True

    def set_video_id_filter(self, ids: Optional[Iterable[int]] = None) -> None:
        """Set the id filter.

        This filter overrides all other filters.
        :param ids: IDs to filter for.
        """
        self.video_id_filter.clear()
        if ids is not None:
            self.video_id_filter.extend(ids)

    @staticmethod
    def _update_channel(channel: Channel) -> List[Video]:
        yt_channel_id = channel.yt_channelid
        url = _get_youtube_rss_url(yt_channel_id)
        feed = feedparser.parse(url)
        return [
            Video(yt_videoid=str(entry.yt_videoid),
                  title=str(entry.title),
                  description=str(entry.description),
                  publisher=yt_channel_id,
                  publish_date=time.mktime(entry.published_parsed),
                  watched=False) for entry in feed.entries
        ]

    def update_all(self) -> None:
        """Check every channel for new videos."""
        channels = self.database.get_channels()
        num_workers = unpack_optional(os.cpu_count(), lambda: 1) * 2

        with Pool(num_workers) as pool:
            videos = chain.from_iterable(
                pool.map(self._update_channel, channels))

        try:
            self.database.add_videos(videos)
        except sqlalchemy.exc.OperationalError as original:
            raise DatabaseOperationalError() from original

    def play_video(self, video: Video, audio_only: bool = False) -> bool:
        """Play the given video with the mpv video player and mark the the video as watched.

        The video will not be marked as watched, if the player exits unexpectedly (i.e. exits with
        non-zero exit code) or another error occurs.

        :param video: The video to play.
        :param audio_only: If True, only the audio track of the video is played
        :return: False if the given video_id does not exist or the player closed with a non-zero
         exit code. True if the video was played successfully.
        """
        no_video_flag = []
        if audio_only:
            no_video_flag.append("--no-video")

        if video:
            try:
                command = [
                    "mpv", *no_video_flag, *self.config.mpv_flags,
                    self.get_youtube_video_url(video.yt_videoid)
                ]
                subprocess.run(command, check=True)
            except FileNotFoundError:
                raise YtccException("Could not locate the mpv video player!")
            except subprocess.CalledProcessError:
                return False

            video.watched = True
            return True

        return False

    def download_video(self,
                       video: Video,
                       path: str = "",
                       audio_only: bool = False) -> bool:
        """Download the given video with youtube-dl and mark it as watched.

        If the path is not given, the path is read from the config file.

        :param video: The video to download.
        :param path: The directory where the download is saved.
        :param audio_only: If True, only the audio track is downloaded.
        :return: True, if the video was downloaded successfully. False otherwise.
        """
        if path:
            download_dir = path
        elif self.config.download_dir:
            download_dir = self.config.download_dir
        else:
            download_dir = ""

        conf = self.config.youtube_dl

        ydl_opts: Dict[str, Any] = {
            "outtmpl": os.path.join(download_dir, conf.output_template),
            "ratelimit": conf.ratelimit,
            "retries": conf.retries,
            "quiet": conf.loglevel == "quiet",
            "verbose": conf.loglevel == "verbose",
            "merge_output_format": conf.merge_output_format,
            "ignoreerrors": False,
            "postprocessors": [{
                "key": "FFmpegMetadata"
            }]
        }

        if audio_only:
            ydl_opts["format"] = "bestaudio/best"
            if conf.thumbnail:
                ydl_opts["writethumbnail"] = True
                extract_audio_pp = {
                    'key': 'FFmpegExtractAudio',
                    'preferredcodec': "m4a"
                }
                ydl_opts["postprocessors"].insert(0, extract_audio_pp)
                ydl_opts["postprocessors"].append({"key": "EmbedThumbnail"})
        else:
            ydl_opts["format"] = conf.format
            if conf.subtitles != "off":
                ydl_opts["subtitleslangs"] = list(
                    map(str.strip, conf.subtitles.split(",")))
                ydl_opts["writesubtitles"] = True
                ydl_opts["writeautomaticsub"] = True
                ydl_opts["postprocessors"].append(
                    {"key": "FFmpegEmbedSubtitle"})

        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            url = self.get_youtube_video_url(video.yt_videoid)
            try:
                info = ydl.extract_info(url, download=False, process=False)
                if info.get("is_live", False) and conf.skip_live_stream:
                    return False

                ydl.process_ie_result(info, download=True)
                video.watched = True
                return True
            except youtube_dl.utils.YoutubeDLError:
                return False

    def add_channel(self, displayname: str, channel_url: str) -> None:
        """Subscribe to a channel.

        :param displayname: A human readable name of the channel.
        :param channel_url: The url to a page that can identify the channel.
        :raises ChannelDoesNotExistException: If the given URL does not exist.
        :raises DuplicateChannelException: If the channel already exists in the database.
        :raises BadURLException: If the given URL does not refer to a YouTube channel.
        """
        known_yt_domains = [
            "youtu.be", "youtube.com", "youtubeeducation.com",
            "youtubekids.com", "youtube-nocookie.com", "yt.be", "ytimg.com"
        ]

        url_parts = urlparse(channel_url, scheme="https")
        if not url_parts.netloc:
            url_parts = urlparse("https://" + channel_url)

        domain = url_parts.netloc.split(":")[0]
        domain = ".".join(domain.split(".")[-2:])

        if domain not in known_yt_domains:
            raise BadURLException(f"{channel_url} is not a valid URL")

        url = urlunparse(
            ("https", url_parts.netloc, url_parts.path, url_parts.params,
             url_parts.query, url_parts.fragment))

        try:
            response = urlopen(url).read().decode('utf-8')
        except URLError:
            raise BadURLException(f"{channel_url} is not a valid URL")

        parser = etree.HTMLParser()
        root = etree.parse(StringIO(response), parser).getroot()
        site_name_node = root.xpath(
            '/html/head/meta[@property="og:site_name"]')
        channel_id_node = root.xpath('//meta[@itemprop="channelId"]')

        if not site_name_node or site_name_node[0].attrib.get("content",
                                                              "") != "YouTube":
            raise BadURLException(
                f"{channel_url} does not seem to be a YouTube URL")

        if not channel_id_node:
            raise ChannelDoesNotExistException(
                f"{channel_url} does not seem to be a YouTube URL")

        yt_channelid = channel_id_node[0].attrib.get("content")

        try:
            self.database.add_channel(
                Channel(displayname=displayname, yt_channelid=yt_channelid))
        except sqlalchemy.exc.IntegrityError:
            raise DuplicateChannelException(
                f"Channel already subscribed: {displayname}")

    def import_channels(self, file: TextIO) -> None:
        """Import all channels from YouTube's subscription export file.

        :param file: The file to read from.
        """
        def _create_channel(elem: etree.Element) -> Channel:
            rss_url = urlparse(elem.attrib["xmlUrl"])
            query_dict = parse_qs(rss_url.query, keep_blank_values=False)
            channel_id = query_dict.get("channel_id", [])
            if len(channel_id) != 1:
                message = f"'{file.name}' is not a valid YouTube export file"
                raise InvalidSubscriptionFileError(message)
            return Channel(displayname=elem.attrib["title"],
                           yt_channelid=channel_id[0])

        try:
            root = etree.parse(file)
        except Exception:
            raise InvalidSubscriptionFileError(
                f"'{file.name}' is not a valid YouTube export file")

        elements = root.xpath('//outline[@type="rss"]')
        self.database.add_channels((_create_channel(e) for e in elements))

    def export_channels(self, outstream: BinaryIO) -> None:
        """Export all channels as OPML file.

        :param outstream: The file/stream the OPML file will be written to.
        """
        opml = etree.Element("opml", version="1.1")
        body = etree.SubElement(opml, "body")
        outline = etree.SubElement(body,
                                   "outline",
                                   text="ytcc subscriptions",
                                   title="ytcc subscriptions")
        for channel in self.get_channels():
            outline.append(
                etree.Element("outline",
                              text=channel.displayname,
                              title=channel.displayname,
                              type="rss",
                              xmlUrl=_get_youtube_rss_url(
                                  channel.yt_channelid)))

        outstream.write(etree.tostring(opml, pretty_print=True))

    def list_videos(self) -> List[Video]:
        """Return a list of videos that match the filters set by the set_*_filter methods.

        :return: A list of videos.
        """
        if self.video_id_filter:
            return self.database.session.query(Video) \
                .join(Channel, Channel.yt_channelid == Video.publisher) \
                .filter(Video.id.in_(self.video_id_filter)) \
                .order_by(*self.config.order_by).all()

        if not self.date_end_filter[1]:
            date_end_filter = time.mktime(time.gmtime()) + 20
        else:
            date_end_filter = self.date_end_filter[0]

        query = self.database.session.query(Video) \
            .join(Channel, Channel.yt_channelid == Video.publisher) \
            .filter(Video.publish_date > self.date_begin_filter) \
            .filter(Video.publish_date < date_end_filter)

        if self.channel_filter:
            query = query.filter(Channel.displayname.in_(self.channel_filter))

        if not self.include_watched_filter:
            query = query.filter(~Video.watched)

        query = query.order_by(*self.config.order_by)
        return query.all()

    def delete_channels(self, displaynames: List[str]) -> None:
        """Delete (or unsubscribe) channels.

        :param displaynames: The names of channels to delete.
        """
        self.database.delete_channels(displaynames)

    def rename_channel(self, oldname: str, newname: str) -> None:
        """Rename the given channel.

        :param oldname: The name of the channel.
        :param newname: The new name of the channel.
        :raises ChannelDoesNotExistException: If the given channel does not exist.
        :raises DuplicateChannelException: If new name already exists.
        """
        self.database.rename_channel(oldname, newname)

    def get_channels(self) -> List[Channel]:
        """Get the list of all subscribed channels.

        :return: A list of channel names.
        """
        return self.database.get_channels()

    def cleanup(self) -> None:
        """Delete old videos from the database."""
        self.database.cleanup()