Beispiel #1
0
 def __init__(self, override_cfg_file: Optional[str] = None) -> None:
     self.config = Config(override_cfg_file)
     self.database = Database(self.config.db_path)
     self.video_id_filter: List[int] = []
     self.channel_filter: List[str] = []
     self.date_begin_filter = 0.0
     self.date_end_filter = (0.0, False)
     self.include_watched_filter = False
Beispiel #2
0
 def __init__(self, override_cfg_file: Optional[str] = None) -> None:
     self.config = Config(override_cfg_file)
     self.db = Database(self.config.db_path)
     self.channel_filter: List[str] = []
     self.date_begin_filter = 0.0
     self.date_end_filter = time.mktime(time.gmtime()) + 20
     self.include_watched_filter = False
     self.search_filter = ""
Beispiel #3
0
def init_db():
    db = Database(":memory:")
    db.add_channel("publisher1", "id_publisher1")
    db.add_channel("publisher2", "id_publisher2")
    db.add_channel("publisher3", "id_publisher3")
    db.add_videos(insert_list)
    return db
Beispiel #4
0
    def process_entry(self, e_hash: str,
                      entry: Any) -> Tuple[str, Optional[Video]]:
        with Database(self.db_path) as database:
            if database.get_extractor_fail_count(e_hash) >= self.max_fail:
                return e_hash, None

        with youtube_dl.YoutubeDL(self.ydl_opts) as ydl:
            try:
                processed = ydl.process_ie_result(entry, False)
            except youtube_dl.DownloadError as download_error:
                logging.warning("Failed to get a video. Youtube-dl said: '%s'",
                                download_error)
                return e_hash, None
            else:
                publish_date = 0.0
                date_str = processed.get("upload_date")
                if date_str:
                    publish_date = datetime.datetime.strptime(
                        date_str, "%Y%m%d").timestamp()

                if processed.get("age_limit", 0) > config.ytcc.age_limit:
                    logger.warning("Ignoring video '%s' due to age limit",
                                   processed.get("title"))
                    return e_hash, None

                logger.info("Processed video '%s'", processed.get("title"))

                return e_hash, Video(url=processed["webpage_url"],
                                     title=processed["title"],
                                     description=processed.get(
                                         "description", ""),
                                     publish_date=publish_date,
                                     watch_date=None,
                                     duration=processed.get("duration", -1),
                                     extractor_hash=e_hash)
Beispiel #5
0
    def get_new_entries(self,
                        playlist: Playlist) -> List[Tuple[Any, str, Playlist]]:
        with Database(self.db_path) as database:
            hashes = frozenset(
                x.extractor_hash
                for x in database.list_videos(playlists=[playlist.name]))

        result = []
        with youtube_dl.YoutubeDL(self.ydl_opts) as ydl:
            logger.info("Checking playlist '%s'...", playlist.name)
            try:
                info = ydl.extract_info(playlist.url,
                                        download=False,
                                        process=False)
            except youtube_dl.DownloadError as download_error:
                logging.error(
                    "Failed to get playlist %s. Youtube-dl said: '%s'",
                    playlist.name, download_error)
            else:
                for entry in take(self.max_items, info.get("entries", [])):
                    e_hash = ydl._make_archive_id(entry)  # pylint: disable=protected-access
                    if e_hash is None:
                        logger.warning(
                            "Ignoring malformed playlist entry from %s",
                            playlist.name)
                    elif e_hash not in hashes:
                        result.append((entry, e_hash, playlist))

        return result
Beispiel #6
0
    def __call__(self, *args, **kwargs):
        from ytcc.cli import cli

        if kwargs.get("subscribe", False):
            from ytcc.database import Database
            with Database(self.db_file) as db:
                db.add_playlist(WEBDRIVER_PLAYLIST.name,
                                WEBDRIVER_PLAYLIST.url)
            del kwargs["subscribe"]

        if kwargs.get("update", False):
            from ytcc.database import Database
            with Database(self.db_file) as db:
                db.add_videos(WEBDRIVER_VIDEOS, WEBDRIVER_PLAYLIST)
            del kwargs["update"]

        return self.invoke(cli, ["--conf", self.conf_file, *args], **kwargs)
Beispiel #7
0
 def test_add_and_get_channels(self):
     db = Database(":memory:")
     db.add_channel("Webdriver Torso", "UCsLiV4WJfkTEHH0b9PmRklw")
     db.add_channel("Webdriver YPP", "UCxexYYtOetqikZqriLuTS-g")
     channels = db.get_channels()
     self.assertEqual(len(channels), 2)
     self.assertEqual(channels[0].displayname, "Webdriver Torso")
     self.assertEqual(channels[0].yt_channelid, "UCsLiV4WJfkTEHH0b9PmRklw")
     self.assertEqual(channels[1].displayname, "Webdriver YPP")
     self.assertEqual(channels[1].yt_channelid, "UCxexYYtOetqikZqriLuTS-g")
Beispiel #8
0
def init_db():
    insert_list = [
        Video(yt_videoid="0", title="title1", description="description1", publisher="id_publisher1", publish_date=1488286166,
              watched=False),
        Video(yt_videoid="0", title="title1", description="description1", publisher="id_publisher1", publish_date=1488286167,
              watched=False),
        Video(yt_videoid="1", title="title2", description="description1", publisher="id_publisher1", publish_date=1488286168,
              watched=False),
        Video(yt_videoid="1", title="title2", description="description2", publisher="id_publisher2", publish_date=1488286170,
              watched=False),
        Video(yt_videoid="2", title="title3", description="description3", publisher="id_publisher2", publish_date=1488286171,
              watched=False)
    ]
    db = Database(":memory:")
    db.add_channel(Channel(displayname="publisher1", yt_channelid="id_publisher1"))
    db.add_channel(Channel(displayname="publisher2", yt_channelid="id_publisher2"))
    db.add_channel(Channel(displayname="publisher3", yt_channelid="id_publisher3"))
    db.add_videos(insert_list)
    return db
Beispiel #9
0
    def update(self):
        num_workers = unpack_optional(os.cpu_count(), lambda: 1) * 4

        with Pool(num_workers) as pool, Database(self.db_path) as database:
            playlists = database.list_playlists()
            raw_entries = dict()
            playlists_mapping = defaultdict(list)
            full_content = pool.map(self.get_new_entries, playlists)
            for entry, e_hash, playlist in itertools.chain.from_iterable(
                    full_content):
                raw_entries[e_hash] = entry
                playlists_mapping[e_hash].append(playlist)

            results = dict(
                pool.map(self.process_entry, *zip(*raw_entries.items())))

            for key in raw_entries:
                for playlist in playlists_mapping[key]:
                    if results[key] is not None:
                        database.add_videos([results[key]], playlist)
                    else:
                        database.increase_extractor_fail_count(
                            key, max_fail=self.max_fail)
Beispiel #10
0
 def database(self) -> Database:
     if self._database is None:
         self._database = Database(config.ytcc.db_path)
     return self._database
Beispiel #11
0
class Ytcc:
    """The Ytcc class handles updating the RSS feeds and playing and listing/filtering videos.

    Filters can be set with with following methods:
    * ``set_set_playlist_filter``
    * ``set_date_begin_filter``
    * ``set_date_end_filter``
    * ``set_include_watched_filter``
    * ``set_set_video_id_filter``
    * ``set_tags_set_tags_filter``
    """
    def __init__(self) -> None:
        self._database: Optional[Database] = None
        self.video_id_filter: Optional[List[int]] = None
        self.playlist_filter: Optional[List[str]] = None
        self.tags_filter: Optional[List[str]] = None
        self.date_begin_filter = 0.0
        self.date_end_filter = (0.0, False)
        self.include_watched_filter: Optional[bool] = False
        self.order_by: Optional[List[Tuple[VideoAttr, Direction]]] = None

    def __enter__(self) -> "Ytcc":
        return self

    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> Any:
        if self._database is not None:
            self._database.__exit__(exc_type, exc_val, exc_tb)

    @property
    def database(self) -> Database:
        if self._database is None:
            self._database = Database(config.ytcc.db_path)
        return self._database

    def close(self) -> None:
        """Close open resources like the database connection."""
        if self._database is not None:
            self._database.close()

    def set_playlist_filter(self, playlists: List[str]) -> None:
        """Set the channel filter.

        The results when listing videos will only include videos by channels specified in the
        filter.

        :param playlists: The list of channel names.
        """
        self.playlist_filter = playlists

    def set_date_begin_filter(self, begin: datetime.datetime) -> None:
        """Set the time filter.

        The results when listing videos will only include videos newer than the given time.

        :param begin: The lower bound of the time filter.
        """
        self.date_begin_filter = begin.timestamp()

    def set_date_end_filter(self, end: datetime.datetime) -> None:
        """Set the time filter.

        The results when listing videos will only include videos older than the given time.

        :param end: The upper bound of the time filter.
        """
        self.date_end_filter = (end.timestamp(), True)

    def set_watched_filter(self, enabled: Optional[bool] = False) -> None:
        """Set the "watched video" filter.

        The results when listing videos will include both watched and unwatched videos.

        :param enabled: If None, all videos ar listed. If True, only watched videos are listed.
                        If False, only unwatched are listed
        """
        self.include_watched_filter = enabled

    def set_video_id_filter(self, ids: Optional[List[int]] = None) -> None:
        """Set the id filter.

        The results will have the given ids. This filter should in most cases be combined with the
        `set_include_watched_filter()`
        :param ids: IDs to filter for.
        """
        self.video_id_filter = ids

    def set_tags_filter(self, tags: Optional[List[str]] = None) -> None:
        """Set the tag filter.

        The results of ``list_videos()`` will include only playlists tagged with at least one of
        the given tags.

        :param tags: The tags of playlists to include in the result
        """
        self.tags_filter = tags

    def set_listing_order(self, order_by: List[Tuple[VideoAttr, Direction]]):
        self.order_by = order_by

    @staticmethod
    def update(max_fail: Optional[int] = None,
               max_backlog: Optional[int] = None) -> None:
        Updater(db_path=config.ytcc.db_path,
                max_fail=max_fail or config.ytcc.max_update_fail,
                max_backlog=max_backlog
                or config.ytcc.max_update_backlog).update()

    @staticmethod
    def play_video(video: Video, audio_only: bool = False) -> bool:
        """Play the given video with the mpv video player.

        The video will not be marked as watched, if the player exits unexpectedly (i.e. exits with
        non-zero exit code) or another error occurs.

        :param video: The video to play.
        :param audio_only: If True, only the audio track of the video is played
        :return: False if the given video_id does not exist or the player closed with a non-zero
         exit code. True if the video was played successfully.
        """
        no_video_flag = []
        if audio_only:
            no_video_flag.append("--no-video")

        if video:
            mpv_flags = filter(bool,
                               map(str.strip, config.ytcc.mpv_flags.split()))
            try:
                command = ["mpv", *no_video_flag, *mpv_flags, video.url]
                subprocess.run(command, check=True)
            except FileNotFoundError as fnfe:
                raise YtccException(
                    "Could not locate the mpv video player!") from fnfe
            except subprocess.CalledProcessError as cpe:
                logger.debug(
                    "MPV failed! Command: %s; Stdout: %s; Stderr %s; Returncode: %s",
                    cpe.cmd, cpe.stdout, cpe.stderr, cpe.returncode)
                return False

            return True

        return False

    @staticmethod
    def download_video(video: Video,
                       path: str = "",
                       audio_only: bool = False) -> bool:
        """Download the given video with youtube-dl.

        If the path is not given, the path is read from the config file.

        :param video: The video to download.
        :param path: The directory where the download is saved.
        :param audio_only: If True, only the audio track is downloaded.
        :return: True, if the video was downloaded successfully. False otherwise.
        """
        if path:
            download_dir = path
        elif config.ytcc.download_dir:
            download_dir = config.ytcc.download_dir
        else:
            download_dir = ""

        conf = config.youtube_dl

        ydl_opts: Dict[str, Any] = {
            **YTDL_COMMON_OPTS, "outtmpl":
            os.path.join(download_dir, conf.output_template),
            "ratelimit":
            conf.ratelimit if conf.ratelimit > 0 else None,
            "retries":
            conf.retries,
            "merge_output_format":
            conf.merge_output_format,
            "ignoreerrors":
            False,
            "postprocessors": [{
                "key": "FFmpegMetadata"
            }]
        }

        if audio_only:
            ydl_opts["format"] = "bestaudio/best"
            if conf.thumbnail:
                ydl_opts["writethumbnail"] = True
                extract_audio_pp = {
                    'key': 'FFmpegExtractAudio',
                    'preferredcodec': "m4a"
                }
                ydl_opts["postprocessors"].insert(0, extract_audio_pp)
                ydl_opts["postprocessors"].append({"key": "EmbedThumbnail"})
        else:
            ydl_opts["format"] = conf.format
            if conf.subtitles != ["off"]:
                ydl_opts["subtitleslangs"] = conf.subtitles
                ydl_opts["writesubtitles"] = True
                ydl_opts["writeautomaticsub"] = True
                ydl_opts["postprocessors"].append(
                    {"key": "FFmpegEmbedSubtitle"})

        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            try:
                info = ydl.extract_info(video.url,
                                        download=False,
                                        process=False)
                if info.get("is_live", False) and conf.skip_live_stream:
                    logger.info("Skipping livestream %s", video.url)
                    return False

                ydl.process_ie_result(info, download=True)
                return True
            except youtube_dl.utils.YoutubeDLError as ydl_err:
                logger.debug("youtube-dl failed with '%s'", ydl_err)
                return False

    def add_playlist(self, name: str, url: str) -> None:
        ydl_opts = {
            **YTDL_COMMON_OPTS, "playliststart": 1,
            "playlistend": 2,
            "noplaylist": False,
            "extract_flat": "in_playlist"
        }
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            try:
                info = ydl.extract_info(url, download=False, process=True)
            except youtube_dl.utils.DownloadError as download_error:
                logger.debug(
                    "'%s' is not supported by youtube-dl. Youtube-dl's error: '%s'",
                    url, download_error)
                raise BadURLException(
                    "URL is not supported by youtube-dl or does not exist"
                ) from download_error

            if not info.get("_type") == "playlist":
                logger.debug(
                    "'%s' doesn't seem point to a playlist. Extractor info is: '%s'",
                    url, info)
                raise BadURLException(
                    "Not a playlist or not supported by youtube-dl")

            peek = list(info.get("entries"))
            if not peek:
                logger.warning("The playlist might be empty")

            for entry in peek:
                if ydl._make_archive_id(entry) is None:  # pylint: disable=protected-access
                    raise BadURLException(
                        "The given URL is not supported by ytcc, because it "
                        "doesn't point to a playlist")

            real_url = info.get("webpage_url")
            if not real_url:
                raise BadURLException("The playlist URL cannot be found")

        try:
            self.database.add_playlist(name, real_url)
        except sqlite3.IntegrityError as integrity_error:
            logger.debug(
                "Cannot subscribe to playlist due to integrity constraint error: %s",
                integrity_error)
            raise NameConflictError(
                "Playlist already exists") from integrity_error

    def list_videos(self) -> Iterable[MappedVideo]:
        """Return a list of videos that match the filters set by the set_*_filter methods.

        :return: A list of videos.
        """
        if not self.date_end_filter[1]:
            date_end_filter = time.mktime(time.gmtime()) + 20
        else:
            date_end_filter = self.date_end_filter[0]

        return self.database.list_videos(since=self.date_begin_filter,
                                         till=date_end_filter,
                                         watched=self.include_watched_filter,
                                         tags=self.tags_filter,
                                         playlists=self.playlist_filter,
                                         ids=self.video_id_filter,
                                         order_by=self.order_by)

    def mark_watched(self, video: Union[List[int], int, MappedVideo]) -> None:
        self.database.mark_watched(video)

    def mark_unwatched(self, video: Union[List[int], int,
                                          MappedVideo]) -> None:
        self.database.mark_unwatched(video)

    def delete_playlist(self, name: str) -> None:
        if not self.database.delete_playlist(name):
            raise PlaylistDoesNotExistException(
                f"Could not remove playlist {name}, because "
                "it does not exist")

    def rename_playlist(self, oldname: str, newname: str) -> None:
        if not self.database.rename_playlist(oldname, newname):
            raise NameConflictError(
                "Renaming failed. Either the old name does not exist or the "
                "new name is already used.")

    def list_playlists(self) -> Iterable[MappedPlaylist]:
        return self.database.list_playlists()

    def tag_playlist(self, name: str, tags: List[str]) -> None:
        self.database.tag_playlist(name, tags)

    def list_tags(self) -> Iterable[str]:
        return self.database.list_tags()

    def cleanup(self, keep: int) -> None:
        """Delete old videos from the database.

        :param keep: The number of videos to keep
        :return: None
        """
        self.database.cleanup(keep)

    def import_yt_opml(self, file: Path):
        def _from_xml_element(elem: ET.Element) -> Tuple[str, str]:
            rss_url = urlparse(elem.attrib["xmlUrl"])
            query_dict = parse_qs(rss_url.query, keep_blank_values=False)
            channel_id = query_dict.get("channel_id", [])
            if len(channel_id) != 1:
                message = f"'{str(file)}' is not a valid YouTube export file"
                raise InvalidSubscriptionFileError(message)
            yt_url = f"https://www.youtube.com/channel/{channel_id[0]}/videos"
            return elem.attrib["title"], yt_url

        try:
            tree = ET.parse(file)
        except ET.ParseError as err:
            raise InvalidSubscriptionFileError(
                f"'{str(file)}' is not a valid YouTube export file") from err
        except OSError as err:
            raise InvalidSubscriptionFileError(
                f"{str(file)} cannot be accessed") from err

        root = tree.getroot()
        for element in root.findall('.//outline[@type="rss"]'):
            name, url = _from_xml_element(element)
            try:
                self.add_playlist(name, url)
            except NameConflictError:
                logger.warning(
                    "Ignoring playlist '%s', because it already subscribed",
                    name)
            except BadURLException:
                logger.warning(
                    "Ignoring playlist '%s', "
                    "because it is not supported by youtube-dl", name)
            else:
                logger.info("Added playlist '%s'", name)
Beispiel #12
0
class Ytcc:
    """The Ytcc class handles updating the YouTube RSS feed and playing and listing/filtering
    videos. Filters can be set with with following methods:
        set_channel_filter
        set_date_begin_filter
        set_date_end_filter
        set_include_watched_filter
    """

    def __init__(self, override_cfg_file: Optional[str] = None) -> None:
        self.config = Config(override_cfg_file)
        self.db = Database(self.config.db_path)
        self.channel_filter: List[str] = []
        self.date_begin_filter = 0.0
        self.date_end_filter = time.mktime(time.gmtime()) + 20
        self.include_watched_filter = False
        self.search_filter = ""

    def __enter__(self) -> "Ytcc":
        return self

    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> Any:
        self.db.__exit__(exc_type, exc_val, exc_tb)

    @staticmethod
    def get_youtube_video_url(yt_videoid: str) -> str:
        """Returns the YouTube URL for the given youtube video ID

        Args:
            yt_videoid (str): the YouTube video ID

        Returns (str):
            the YouTube URL for the given youtube video ID
        """

        return "https://www.youtube.com/watch?v=" + yt_videoid

    def set_channel_filter(self, channel_filter: List[str]) -> None:
        """Sets the channel filter. The results when listing videos will only include videos by
        channels specifide in the filter

        Args:
            channel_filter (list): the list of channel names
        """

        self.channel_filter.clear()
        self.channel_filter.extend(channel_filter)

    def set_date_begin_filter(self, begin: datetime.datetime) -> None:
        """Sets the time filter. The results when listing videos will only include videos newer
        than the given time.

        Args:
            begin (datetime.datetime): the lower bound of the time filter
        """

        self.date_begin_filter = begin.timestamp()

    def set_date_end_filter(self, end: datetime.datetime) -> None:
        """Sets the time filter. The results when listing videos will only include videos older
        than the given time.

        Args:
            end (datetime.datetime): the upper bound of the time filter
        """

        self.date_end_filter = end.timestamp()

    def set_include_watched_filter(self) -> None:
        """Sets "watched video" filter. The results when listing videos will include both watched
        and unwatched videos.
        """

        self.include_watched_filter = True

    def set_search_filter(self, searchterm: str) -> None:
        """Sets a search filter. When this filter is set, all other filters are ignored

        Args:
            searchterm (str): only videos whose title, channel or description match this term will
                be included
        """

        self.search_filter = searchterm

    @staticmethod
    def _update_channel(yt_channel_id: str) -> List[DBVideo]:
        feed = feedparser.parse("https://www.youtube.com/feeds/videos.xml?channel_id="
                                + yt_channel_id)
        return [(str(entry.yt_videoid),
                 str(entry.title),
                 str(entry.description),
                 yt_channel_id,
                 time.mktime(entry.published_parsed),
                 False)
                for entry in feed.entries]

    def update_all(self) -> None:
        """Checks every channel for new videos"""

        channels = map(lambda channel: channel.yt_channelid, self.db.get_channels())
        num_workers = unpack_optional(os.cpu_count(), lambda: 1) * 3

        with ThreadPoolExecutor(num_workers) as pool:
            videos = chain.from_iterable(pool.map(self._update_channel, channels))

        self.db.add_videos(videos)

    def play_video(self, video_id: int, no_video: bool = False) -> bool:
        """Plays the video identified by the given video ID with the mpv video player and marks the
        video watched, if the player exits with an exit code of zero.

        Args:
            video_id (int): The (local) video ID.
            no_video (bool): If True only the audio is played

        Returns (bool):
            False if the given video_id does not exist or the player closed with a non zero exit
            code. True if the video was played successfully.
        """

        no_video_flag = []
        if no_video:
            no_video_flag.append("--no-video")

        video = self.db.resolve_video_id(video_id)
        if video:
            try:
                mpv_result = subprocess.run(["mpv", *no_video_flag, *self.config.mpv_flags,
                                             self.get_youtube_video_url(video.yt_videoid)])
            except FileNotFoundError:
                raise YtccException("Could not locate the mpv video player!")

            if mpv_result.returncode == 0:
                self.db.mark_watched([video.id])
                return True

        return False

    def download_videos(self, video_ids: Optional[List[int]] = None, path: str = "",
                        no_video: bool = False) -> Iterable[Tuple[int, bool]]:
        """Downloads the videos identified by the given video IDs with youtube-dl.

        Args:
            video_ids ([int]): The (local) video IDs.
            path (str): The directory where the download is saved.
            no_video (bool): If True only the audio is downloaded

        Returns:
            Generator of tuples indicating whether the a download was successful.
        """

        if path:
            download_dir = path
        elif self.config.download_dir:
            download_dir = self.config.download_dir
        else:
            download_dir = ""

        videos = self.get_videos(unpack_optional(video_ids, self._get_filtered_video_ids))
        conf = self.config.youtube_dl

        ydl_opts: Dict[str, Any] = {
            "outtmpl": os.path.join(download_dir, conf.output_template),
            "ratelimit": conf.ratelimit,
            "retries": conf.retries,
            "quiet": conf.loglevel == "quiet",
            "verbose": conf.loglevel == "verbose",
            "ignoreerrors": False
        }

        if no_video:
            ydl_opts["format"] = "bestaudio/best"
            if conf.thumbnail:
                ydl_opts["writethumbnail"] = True
                ydl_opts["postprocessors"] = [
                    {
                        'key': 'FFmpegExtractAudio',
                        'preferredcodec': "m4a"
                    },
                    {"key": "EmbedThumbnail"}
                ]
        else:
            ydl_opts["format"] = conf.format
            if conf.subtitles != "off":
                ydl_opts["subtitleslangs"] = list(map(str.strip, conf.subtitles.split(",")))
                ydl_opts["writesubtitles"] = True
                ydl_opts["writeautomaticsub"] = True
                ydl_opts["postprocessors"] = [{"key": "FFmpegEmbedSubtitle"}]

        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            for video in videos:
                url = self.get_youtube_video_url(video.yt_videoid)
                try:
                    # will raise exception on error and not yield video.id
                    ydl.download([url])
                    yield video.id, True
                except youtube_dl.utils.DownloadError:
                    yield video.id, False

    def add_channel(self, displayname: str, channel_url: str) -> None:
        """Subscribes to a channel.

        Args:
            displayname (str): a human readable name of the channel.
            channel_url (str): the url to the channel's home page.

        Raises:
            ChannelDoesNotExistException: when the given URL does not exist.
            DuplicateChannelException: when trying to subscribe to a channel the second (or more)
                                       time.
            BadURLException: when a given URL does not refer to a YouTube channel.
        """

        regex = r"^(https?://)?(www\.)?youtube\.com/(?P<type>user|channel)/(?P<channel>[^/?=]+)$"
        match = re.search(regex, channel_url)

        if match:
            channel = match.group("channel")
            url = "https://www.youtube.com/" + match.group("type") + "/" + channel + "/videos"

            try:
                response = urlopen(url).read().decode('utf-8')
            except URLError:
                raise ChannelDoesNotExistException("Channel does not exist: " + channel)

            parser = etree.HTMLParser()
            root = etree.parse(StringIO(response), parser).getroot()
            result = root.xpath('/html/head/meta[@itemprop="channelId"]')
            yt_channelid = result[0].attrib.get("content")

            try:
                self.db.add_channel(displayname, yt_channelid)
            except sqlite3.IntegrityError:
                raise DuplicateChannelException("Channel already subscribed: " + channel)

        else:
            raise BadURLException("'" + channel_url + "' is not a valid URL")

    def import_channels(self, file: TextIO) -> None:
        """Imports all channels from YouTube's subsciption export file.

        Args:
            file (TextIOWrapper): the opened file
        """

        try:
            root = etree.parse(file)
        except Exception:
            raise InvalidSubscriptionFileError(
                "'" + file.name + "' is not a valid YouTube export file"
            )

        elements = root.xpath('//outline[@type="rss"]')
        channels = [(e.attrib["title"], urlparse(e.attrib["xmlUrl"]).query[11:]) for e in elements]

        for channel in channels:
            try:
                self.db.add_channel(*channel)
            except sqlite3.IntegrityError:
                pass

    def list_videos(self) -> List[Video]:
        """Returns a list of videos that match the filters set by the set_*_filter methods.

        Returns (list):
            A list of ytcc.video.Video objects
        """

        if self.search_filter:
            return self.db.search(self.search_filter)

        return self.db.get_videos(self.channel_filter, self.date_begin_filter,
                                  self.date_end_filter, self.include_watched_filter)

    def _get_filtered_video_ids(self) -> List[int]:
        return list(map(lambda video: video.id, self.list_videos()))

    def mark_watched(self, video_ids: Optional[List[int]] = None) -> List[Video]:
        """Marks the videos of channels specified in the filter as watched without playing them.
        The filters are set by the set_*_filter methods.

        Args:
            video_ids ([int]): The video IDs to mark as watched.

        Returns (list):
            A list of ytcc.video.Video objects. Contains the videos that were marked watched.
        """

        mark_ids = unpack_optional(video_ids, self._get_filtered_video_ids)
        self.db.mark_watched(mark_ids)
        return self.get_videos(mark_ids)

    def delete_channels(self, displaynames: List[str]) -> None:
        """Delete (or unsubscribe) channels.

        Args:
            displaynames (list): A list of channels' displaynames.
        """

        self.db.delete_channels(displaynames)

    def get_channels(self) -> List[Channel]:
        """Returns a list of all subscribed channels.

        Returns ([str]):
            A list of channel names.
        """

        return self.db.get_channels()

    def get_videos(self, video_ids: Iterable[int]) -> List[Video]:
        """Returns the ytcc.video.Video object for the given video IDs.

        Args:
            video_ids ([int]): the video IDs.

        Returns (list)
            A list of ytcc.video.Video objects
        """
        def resolve_ids():
            for video_id in video_ids:
                video = self.db.resolve_video_id(video_id)
                if video:
                    yield video

        return list(resolve_ids())

    def cleanup(self) -> None:
        """Deletes old videos from the database."""

        self.db.cleanup()
Beispiel #13
0
class Ytcc:
    """The Ytcc class handles updating the RSS feeds and playing and listing/filtering videos.

    Filters can be set with with following methods:
    * ``set_channel_filter``
    * ``set_date_begin_filter``
    * ``set_date_end_filter``
    * ``set_include_watched_filter``
    """
    def __init__(self, override_cfg_file: Optional[str] = None) -> None:
        self.config = Config(override_cfg_file)
        self.database = Database(self.config.db_path)
        self.video_id_filter: List[int] = []
        self.channel_filter: List[str] = []
        self.date_begin_filter = 0.0
        self.date_end_filter = (0.0, False)
        self.include_watched_filter = False

    def __enter__(self) -> "Ytcc":
        return self

    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> Any:
        self.database.__exit__(exc_type, exc_val, exc_tb)

    def close(self) -> None:
        """Close open resources like the database connection."""
        self.database.close()

    @staticmethod
    def get_youtube_video_url(yt_videoid: Optional[str]) -> str:
        """Return the YouTube URL for the given youtube video ID.

        :param yt_videoid:  The YouTube video ID.
        :return: The YouTube URL for the given youtube video ID.
        """
        if yt_videoid is None:
            raise YtccException("Video id is none!")

        return f"https://www.youtube.com/watch?v={yt_videoid}"

    def set_channel_filter(self, channel_filter: List[str]) -> None:
        """Set the channel filter.

        The results when listing videos will only include videos by channels specified in the
        filter.

        :param channel_filter: The list of channel names.
        """
        self.channel_filter.clear()
        self.channel_filter.extend(channel_filter)

    def set_date_begin_filter(self, begin: datetime.datetime) -> None:
        """Set the time filter.

        The results when listing videos will only include videos newer than the given time.

        :param begin: The lower bound of the time filter.
        """
        self.date_begin_filter = begin.timestamp()

    def set_date_end_filter(self, end: datetime.datetime) -> None:
        """Set the time filter.

        The results when listing videos will only include videos older than the given time.

        :param end: The upper bound of the time filter.
        """
        self.date_end_filter = (end.timestamp(), True)

    def set_include_watched_filter(self) -> None:
        """Set the "watched video" filter.

        The results when listing videos will include both watched and unwatched videos.
        """
        self.include_watched_filter = True

    def set_video_id_filter(self, ids: Optional[Iterable[int]] = None) -> None:
        """Set the id filter.

        This filter overrides all other filters.
        :param ids: IDs to filter for.
        """
        self.video_id_filter.clear()
        if ids is not None:
            self.video_id_filter.extend(ids)

    @staticmethod
    def _update_channel(channel: Channel) -> List[Video]:
        yt_channel_id = channel.yt_channelid
        url = _get_youtube_rss_url(yt_channel_id)
        feed = feedparser.parse(url)
        return [
            Video(yt_videoid=str(entry.yt_videoid),
                  title=str(entry.title),
                  description=str(entry.description),
                  publisher=yt_channel_id,
                  publish_date=time.mktime(entry.published_parsed),
                  watched=False) for entry in feed.entries
        ]

    def update_all(self) -> None:
        """Check every channel for new videos."""
        channels = self.database.get_channels()
        num_workers = unpack_optional(os.cpu_count(), lambda: 1) * 2

        with Pool(num_workers) as pool:
            videos = chain.from_iterable(
                pool.map(self._update_channel, channels))

        try:
            self.database.add_videos(videos)
        except sqlalchemy.exc.OperationalError as original:
            raise DatabaseOperationalError() from original

    def play_video(self, video: Video, audio_only: bool = False) -> bool:
        """Play the given video with the mpv video player and mark the the video as watched.

        The video will not be marked as watched, if the player exits unexpectedly (i.e. exits with
        non-zero exit code) or another error occurs.

        :param video: The video to play.
        :param audio_only: If True, only the audio track of the video is played
        :return: False if the given video_id does not exist or the player closed with a non-zero
         exit code. True if the video was played successfully.
        """
        no_video_flag = []
        if audio_only:
            no_video_flag.append("--no-video")

        if video:
            try:
                command = [
                    "mpv", *no_video_flag, *self.config.mpv_flags,
                    self.get_youtube_video_url(video.yt_videoid)
                ]
                subprocess.run(command, check=True)
            except FileNotFoundError:
                raise YtccException("Could not locate the mpv video player!")
            except subprocess.CalledProcessError:
                return False

            video.watched = True
            return True

        return False

    def download_video(self,
                       video: Video,
                       path: str = "",
                       audio_only: bool = False) -> bool:
        """Download the given video with youtube-dl and mark it as watched.

        If the path is not given, the path is read from the config file.

        :param video: The video to download.
        :param path: The directory where the download is saved.
        :param audio_only: If True, only the audio track is downloaded.
        :return: True, if the video was downloaded successfully. False otherwise.
        """
        if path:
            download_dir = path
        elif self.config.download_dir:
            download_dir = self.config.download_dir
        else:
            download_dir = ""

        conf = self.config.youtube_dl

        ydl_opts: Dict[str, Any] = {
            "outtmpl": os.path.join(download_dir, conf.output_template),
            "ratelimit": conf.ratelimit,
            "retries": conf.retries,
            "quiet": conf.loglevel == "quiet",
            "verbose": conf.loglevel == "verbose",
            "merge_output_format": conf.merge_output_format,
            "ignoreerrors": False,
            "postprocessors": [{
                "key": "FFmpegMetadata"
            }]
        }

        if audio_only:
            ydl_opts["format"] = "bestaudio/best"
            if conf.thumbnail:
                ydl_opts["writethumbnail"] = True
                extract_audio_pp = {
                    'key': 'FFmpegExtractAudio',
                    'preferredcodec': "m4a"
                }
                ydl_opts["postprocessors"].insert(0, extract_audio_pp)
                ydl_opts["postprocessors"].append({"key": "EmbedThumbnail"})
        else:
            ydl_opts["format"] = conf.format
            if conf.subtitles != "off":
                ydl_opts["subtitleslangs"] = list(
                    map(str.strip, conf.subtitles.split(",")))
                ydl_opts["writesubtitles"] = True
                ydl_opts["writeautomaticsub"] = True
                ydl_opts["postprocessors"].append(
                    {"key": "FFmpegEmbedSubtitle"})

        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            url = self.get_youtube_video_url(video.yt_videoid)
            try:
                info = ydl.extract_info(url, download=False, process=False)
                if info.get("is_live", False) and conf.skip_live_stream:
                    return False

                ydl.process_ie_result(info, download=True)
                video.watched = True
                return True
            except youtube_dl.utils.YoutubeDLError:
                return False

    def add_channel(self, displayname: str, channel_url: str) -> None:
        """Subscribe to a channel.

        :param displayname: A human readable name of the channel.
        :param channel_url: The url to a page that can identify the channel.
        :raises ChannelDoesNotExistException: If the given URL does not exist.
        :raises DuplicateChannelException: If the channel already exists in the database.
        :raises BadURLException: If the given URL does not refer to a YouTube channel.
        """
        known_yt_domains = [
            "youtu.be", "youtube.com", "youtubeeducation.com",
            "youtubekids.com", "youtube-nocookie.com", "yt.be", "ytimg.com"
        ]

        url_parts = urlparse(channel_url, scheme="https")
        if not url_parts.netloc:
            url_parts = urlparse("https://" + channel_url)

        domain = url_parts.netloc.split(":")[0]
        domain = ".".join(domain.split(".")[-2:])

        if domain not in known_yt_domains:
            raise BadURLException(f"{channel_url} is not a valid URL")

        url = urlunparse(
            ("https", url_parts.netloc, url_parts.path, url_parts.params,
             url_parts.query, url_parts.fragment))

        try:
            response = urlopen(url).read().decode('utf-8')
        except URLError:
            raise BadURLException(f"{channel_url} is not a valid URL")

        parser = etree.HTMLParser()
        root = etree.parse(StringIO(response), parser).getroot()
        site_name_node = root.xpath(
            '/html/head/meta[@property="og:site_name"]')
        channel_id_node = root.xpath('//meta[@itemprop="channelId"]')

        if not site_name_node or site_name_node[0].attrib.get("content",
                                                              "") != "YouTube":
            raise BadURLException(
                f"{channel_url} does not seem to be a YouTube URL")

        if not channel_id_node:
            raise ChannelDoesNotExistException(
                f"{channel_url} does not seem to be a YouTube URL")

        yt_channelid = channel_id_node[0].attrib.get("content")

        try:
            self.database.add_channel(
                Channel(displayname=displayname, yt_channelid=yt_channelid))
        except sqlalchemy.exc.IntegrityError:
            raise DuplicateChannelException(
                f"Channel already subscribed: {displayname}")

    def import_channels(self, file: TextIO) -> None:
        """Import all channels from YouTube's subscription export file.

        :param file: The file to read from.
        """
        def _create_channel(elem: etree.Element) -> Channel:
            rss_url = urlparse(elem.attrib["xmlUrl"])
            query_dict = parse_qs(rss_url.query, keep_blank_values=False)
            channel_id = query_dict.get("channel_id", [])
            if len(channel_id) != 1:
                message = f"'{file.name}' is not a valid YouTube export file"
                raise InvalidSubscriptionFileError(message)
            return Channel(displayname=elem.attrib["title"],
                           yt_channelid=channel_id[0])

        try:
            root = etree.parse(file)
        except Exception:
            raise InvalidSubscriptionFileError(
                f"'{file.name}' is not a valid YouTube export file")

        elements = root.xpath('//outline[@type="rss"]')
        self.database.add_channels((_create_channel(e) for e in elements))

    def export_channels(self, outstream: BinaryIO) -> None:
        """Export all channels as OPML file.

        :param outstream: The file/stream the OPML file will be written to.
        """
        opml = etree.Element("opml", version="1.1")
        body = etree.SubElement(opml, "body")
        outline = etree.SubElement(body,
                                   "outline",
                                   text="ytcc subscriptions",
                                   title="ytcc subscriptions")
        for channel in self.get_channels():
            outline.append(
                etree.Element("outline",
                              text=channel.displayname,
                              title=channel.displayname,
                              type="rss",
                              xmlUrl=_get_youtube_rss_url(
                                  channel.yt_channelid)))

        outstream.write(etree.tostring(opml, pretty_print=True))

    def list_videos(self) -> List[Video]:
        """Return a list of videos that match the filters set by the set_*_filter methods.

        :return: A list of videos.
        """
        if self.video_id_filter:
            return self.database.session.query(Video) \
                .join(Channel, Channel.yt_channelid == Video.publisher) \
                .filter(Video.id.in_(self.video_id_filter)) \
                .order_by(*self.config.order_by).all()

        if not self.date_end_filter[1]:
            date_end_filter = time.mktime(time.gmtime()) + 20
        else:
            date_end_filter = self.date_end_filter[0]

        query = self.database.session.query(Video) \
            .join(Channel, Channel.yt_channelid == Video.publisher) \
            .filter(Video.publish_date > self.date_begin_filter) \
            .filter(Video.publish_date < date_end_filter)

        if self.channel_filter:
            query = query.filter(Channel.displayname.in_(self.channel_filter))

        if not self.include_watched_filter:
            query = query.filter(~Video.watched)

        query = query.order_by(*self.config.order_by)
        return query.all()

    def delete_channels(self, displaynames: List[str]) -> None:
        """Delete (or unsubscribe) channels.

        :param displaynames: The names of channels to delete.
        """
        self.database.delete_channels(displaynames)

    def rename_channel(self, oldname: str, newname: str) -> None:
        """Rename the given channel.

        :param oldname: The name of the channel.
        :param newname: The new name of the channel.
        :raises ChannelDoesNotExistException: If the given channel does not exist.
        :raises DuplicateChannelException: If new name already exists.
        """
        self.database.rename_channel(oldname, newname)

    def get_channels(self) -> List[Channel]:
        """Get the list of all subscribed channels.

        :return: A list of channel names.
        """
        return self.database.get_channels()

    def cleanup(self) -> None:
        """Delete old videos from the database."""
        self.database.cleanup()
Beispiel #14
0
 def test_add_channel_duplicate(self):
     db = Database(":memory:")
     db.add_channel("Webdriver Torso", "UCsLiV4WJfkTEHH0b9PmRklw")
     db.add_channel("Webdriver Torso2", "UCsLiV4WJfkTEHH0b9PmRklw")
Beispiel #15
0
 def test_add_channel_duplicate(self):
     db = Database(":memory:")
     db.add_channel(Channel(displayname="Webdriver Torso", yt_channelid="UCsLiV4WJfkTEHH0b9PmRklw"))
     db.add_channel(Channel(displayname="Webdriver Torso2", yt_channelid="UCsLiV4WJfkTEHH0b9PmRklw"))