Beispiel #1
0
    def get_info(self, url: str) -> Tuple[int, Optional[List[MovieType]]]:
        """
        Instead of downloading a video, get basic information about the video

        :param url: To get information from
        :return: a dictionary (MovieType) from the json returned by site
        """
        clz = VideoDownloader
        trailer_info: Optional[List[MovieType]] = None

        if clz.check_too_many_requests(url) != 0:
            return 429, None

        info_logger = TfhInfoLogger(self, url, parse_json_as_youtube=False)
        try:
            ydl_opts = {
                'forcejson': 'true',
                'skip_download': 'true',
                'logger': info_logger,
                'progress_hooks': [TrailerInfoProgressHook(self).status_hook]
            }
            cookie_path = Settings.get_youtube_dl_cookie_path()
            if len(cookie_path) > 0 and os.path.exists(cookie_path):
                ydl_opts['cookiefile'] = cookie_path

            Monitor.throw_exception_if_abort_requested()

            # Start Download

            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                ydl.download([url])

            # Wait for download

            while info_logger.is_finished is None and self._error == 0:
                Monitor.throw_exception_if_abort_requested(timeout=0.5)

            if self._error == 0:
                trailer_info: List[MovieType] = info_logger.get_trailer_info()

        except AbortException:
            reraise(*sys.exc_info())

        except Exception as e:
            if self._error == 0:
                clz._logger.exception(e)
                if clz._logger.isEnabledFor(LazyLogger.DEBUG_VERBOSE):
                    clz._logger.debug_verbose(
                        'Failed to download site info for:', url)
            trailer_info = None

        if self._error not in (0, 99):
            clz._logger.debug('Results for url:', url, 'error:', self._error)
            info_logger.log_debug()
            info_logger.log_warning()
            info_logger.log_error()

        Monitor.throw_exception_if_abort_requested(timeout=DELAY)
        return 0, trailer_info
Beispiel #2
0
    def get_tfh_index(self, url: str, trailers_to_download: str,
                      trailer_handler) -> int:
        """
        Fetches all of the urls in the Trailers From Hell playlist. Note that
        the entire list is well over a thousand and that indiscriminate
        downloading can get the dreaded "429" code from Youtube (Too Many
        Requests) which will cause downloads to be denied for an extended
        period of time and potentially banned. To help prevent this
        reducing how many trailers are requested at a time, caching and
        throttling of requests should be used.

        :param url: points to playlist
        :param trailers_to_download: Specifies the index of which trailers to get
                                     url of. An empty list means get all urls.
        :param trailer_handler: Call back to DiscoverTFHMovies to process each
                returned entry as it occurs.
        :return:
        """

        clz = VideoDownloader

        rc = self.check_too_many_requests(url)
        if rc != 0:
            return rc

        # Would prefer to specify a list of playlist_items in order
        # to control rate of fetches (and hopefully avoid TOO_MANY REQUESTS)
        # But.. when you use playlist_items you do NOT get the total number
        # of items in the playlist as part of the results. Further, if you
        # try to get a playlist item out of range, there is no error, nothing.
        #
        # Therefore, reluctantly not using playlist_items and getting everything
        # at once (although no downloaded trailers).

        tfh_index_logger = TfhIndexLogger(self, trailer_handler, url)
        ydl_opts = {
            'forcejson': True,
            'noplaylist': False,
            # 'extract_flat': 'in_playlist',
            'skip_download': True,
            'logger': tfh_index_logger,
            'sleep_interval': 10,
            'max_sleep_interval': 240,
            #  'playlist_items': trailers_to_download,
            'playlistrandom': True,
            'progress_hooks': [TFHIndexProgressHook(self).status_hook],
            #'debug_printtraffic': True
        }
        cookie_path = Settings.get_youtube_dl_cookie_path()
        if len(cookie_path) > 0 and os.path.exists(cookie_path):
            ydl_opts['cookiefile'] = cookie_path

        cache_dir = Settings.get_youtube_dl_cache_path()
        if len(cache_dir) > 0 and os.path.exists(cache_dir):
            ydl_opts['cachedir'] = cache_dir

        if len(trailers_to_download) > 10:
            ydl_opts['playlist_items'] = trailers_to_download

        Monitor.throw_exception_if_abort_requested()
        try:
            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                ydl.download([url])

        except AbortException:
            reraise(*sys.exc_info())

        except Exception as e:
            if self._error == 0:
                clz._logger.exception(e)

        if self._error not in (0, 99):
            clz._logger.debug('Results for url:', url, 'error:', self._error)
            tfh_index_logger.log_error()
            tfh_index_logger.log_debug()
            tfh_index_logger.log_warning()

        Monitor.throw_exception_if_abort_requested(timeout=DELAY)
        return self._error
Beispiel #3
0
    def get_video(self, url, folder, movie_id):
        # type: (str, str, Union[int, str]) -> Tuple[int, Optional[MovieType]]
        """
             Downloads a video from the given url into the given folder.

        :param url:      To download from
        :param folder:   To download to
        :param movie_id: To pass to youtube-dl to embed in the created file name
        :return:
        """
        clz = VideoDownloader

        if clz.check_too_many_requests(url) != 0:
            return 429, None

        # The embedded % fields are for youtube_dl to fill  in.

        template = os.path.join(folder, f'_rt_{movie_id}_%(title)s.%(ext)s')
        movie = None

        # Collect and respond to output from youtube-dl
        video_logger = VideoLogger(self, url)
        try:
            ydl_opts = {
                'forcejson': 'true',
                'outtmpl': template,
                'updatetime': 'false',
                'logger': video_logger,
                'progress_hooks':
                [VideoDownloadProgressHook(self).status_hook]
            }
            # Optional cookie-file used to avoid youtube 429 errors (see  above).

            cookie_path = Settings.get_youtube_dl_cookie_path()
            if len(cookie_path) > 0 and os.path.exists(cookie_path):
                ydl_opts['cookiefile'] = cookie_path

            # Start download
            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                ydl.download([url])

            while video_logger.data is None and self._error == 0:
                Monitor.throw_exception_if_abort_requested(timeout=0.5)

            movie = video_logger.data
            if self._error == 0:
                trailer_file = os.path.join(folder, f'_rt_{movie_id}*')
                trailer_file = glob.glob(trailer_file)
                if trailer_file is not None:
                    if len(trailer_file) > 0:
                        trailer_file = trailer_file[0]
                    #
                    # Don't know why, but sometimes youtube_dl returns incorrect
                    # file extension

                    if trailer_file != movie[Movie.TRAILER]:
                        if clz._logger.isEnabledFor(
                                LazyLogger.DEBUG_EXTRA_VERBOSE):
                            clz._logger.debug_extra_verbose(
                                'youtube_dl gave incorrect file name:',
                                movie[Movie.TRAILER], 'changing to:',
                                trailer_file)

                        movie[Movie.TRAILER] = trailer_file

        except AbortException:
            self._error = 99
            movie = None
            to_delete = os.path.join(folder, f'_rt_{movie_id}*')
            to_delete = glob.glob(to_delete)
            for aFile in to_delete:
                try:
                    os.remove(aFile)
                except Exception as e:
                    pass
            reraise(*sys.exc_info())
        except Exception as e:
            if self._error == 0:
                self._error = 3
                clz._logger.exception(e)

        if self._error == 0 and movie is None:
            self._error = 1

        if self._error != 0:
            clz._logger.debug('Results for url:', url, 'error:', self._error)
            video_logger.log_debug()
            video_logger.log_warning()
            video_logger.log_error()
            movie = None
            to_delete = os.path.join(folder, f'_rt_{movie_id}*')
            to_delete = glob.glob(to_delete)
            for aFile in to_delete:
                try:
                    os.remove(aFile)
                except Exception as e:
                    pass

        Monitor.throw_exception_if_abort_requested(timeout=DELAY)
        return self._error, movie
Beispiel #4
0
    def get_tfh_index(self,
                      url: str,
                      trailer_handler,
                      block: bool = False) -> int:
        """
        Fetches all of the urls in the Trailers From Hell playlist. Note that
        the entire list is well over a thousand and that indiscriminate
        downloading can get the dreaded "429" code from Youtube (Too Many
        Requests) which will cause downloads to be denied for an extended
        period of time and potentially banned. To help prevent this
        reducing how many trailers are requested at a time, caching and
        throttling of requests should be used.

        :param url: points to playlist
        :param trailer_handler: Call back to DiscoverTFHMovies to process each
                returned entry as it occurs.
        :param block: If true, then wait until no longer TOO_MANY_REQUESTS
        :return:
        """

        clz = VideoDownloader
        clz.delay_between_transactions(Movie.TFH_SOURCE, False)
        tfh_index_logger = TfhIndexLogger(self, trailer_handler, url)

        try:
            clz.get_lock(Movie.TFH_SOURCE)
            # HAVE LOCK

            if not block:
                too_many_requests = clz.check_too_many_requests(
                    url, Movie.TFH_SOURCE)
                if too_many_requests != 0:
                    if clz._logger.isEnabledFor(LazyLogger.DEBUG_VERBOSE):
                        clz._logger.debug_verbose(
                            f'Not getting tfh_index url: {url} Too Many Requests'
                        )
                    return too_many_requests
            else:
                clz.wait_if_too_many_requests(Movie.TFH_SOURCE, True)

            # Would prefer to specify a list of playlist_items in order
            # to control rate of fetches (and hopefully avoid TOO_MANY REQUESTS)
            # But.. when you use playlist_items you do NOT get the total number
            # of items in the playlist as part of the results. Further, if you
            # try to get a playlist item out of range, there is no error, nothing.
            #
            # Therefore, reluctantly not using playlist_items and getting everything
            # at once (although no downloaded trailers).
            """
            Returns:
                {
                    "_type": "playlist",
                    "entries": [
                        {
                            "_type": "url_transparent",
                            "ie_key": "Youtube",
                            "id": "Sz0FCYJaQUc",
                            "url": "Sz0FCYJaQUc",
                            "title": "WATCH LIVE: The Old Path Bible Exposition - April 24, "
                                     "2020, 7 PM PHT",
                            "description": null,
                            "duration": 10235.0,
                            "view_count": null,
                            "uploader": null
                        }
                    ]
                }
            """
            ydl_opts = {
                'forcejson': True,
                'noplaylist': False,
                'extract_flat': 'in_playlist',
                'ignoreerrors': True,
                'skip_download': True,
                'logger': tfh_index_logger,
                'sleep_interval': 1,
                'max_sleep_interval': 8,
                #  'playlist_items': trailers_to_download,
                'playlistrandom': True,
                'progress_hooks': [TFHIndexProgressHook(self).status_hook]
                # 'debug_printtraffic': True
            }
            cookie_path = Settings.get_youtube_dl_cookie_path()
            if len(cookie_path) > 0 and os.path.exists(cookie_path):
                ydl_opts['cookiefile'] = cookie_path

            cache_dir = Settings.get_youtube_dl_cache_path()
            if len(cache_dir) > 0 and os.path.exists(cache_dir):
                ydl_opts['cachedir'] = cache_dir

            Monitor.throw_exception_if_abort_requested()

            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                ydl.download([url])

        except AbortException:
            reraise(*sys.exc_info())

        except Exception as e:
            if self._error == 0:
                clz._logger.exception(f'Error downloading: url: {url}')
        finally:
            clz.release_lock(Movie.TFH_SOURCE)

            if self._error != Constants.HTTP_TOO_MANY_REQUESTS:
                clz._retry_attempts = 0
            else:
                VideoDownloader._retry_attempts += 1
                VideoDownloader._too_many_requests_resume_time = (
                    datetime.datetime.now() +
                    (RETRY_DELAY * VideoDownloader._retry_attempts))

        if self._error not in (0, 99):
            clz._logger.debug('Results for url:', url, 'error:', self._error)
            tfh_index_logger.log_error()
            tfh_index_logger.log_debug()
            tfh_index_logger.log_warning()

        Monitor.throw_exception_if_abort_requested()
        return self._error
Beispiel #5
0
    def get_info(self,
                 url: str,
                 movie_source: str,
                 block: bool = False) -> Tuple[int, Optional[List[MovieType]]]:
        """
        Instead of downloading a video, get basic information about the video

        :param url:          To get information from
        :param movie_source: Used to determine delay between requests
        :param block:        Wait extended period of time for TOO_MANY_REQUESTS,
                             if needed.
        :return: a dictionary (MovieType) from the json returned by site
        """
        clz = VideoDownloader
        trailer_info: Optional[List[MovieType]] = None
        clz.delay_between_transactions(movie_source, False)
        info_logger = TfhInfoLogger(self, url, parse_json_as_youtube=False)

        try:
            clz.get_lock(movie_source)
            # HAVE LOCK

            if not block:
                too_many_requests = clz.check_too_many_requests(
                    url, movie_source)
                if too_many_requests != 0:
                    if clz._logger.isEnabledFor(LazyLogger.DEBUG_VERBOSE):
                        clz._logger.debug_verbose(
                            f'Not getting info url: {url} Too Many Requests')
                    return too_many_requests, None
            else:
                clz.wait_if_too_many_requests(movie_source, True)

            ydl_opts = {
                'forcejson': 'true',
                'skip_download': 'true',
                'logger': info_logger,
                'progress_hooks': [TrailerInfoProgressHook(self).status_hook]
            }
            cookie_path = Settings.get_youtube_dl_cookie_path()
            if len(cookie_path) > 0 and os.path.exists(cookie_path):
                ydl_opts['cookiefile'] = cookie_path

            Monitor.throw_exception_if_abort_requested()

            # Start Download

            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                ydl.download([url])

            # Wait for download

            while info_logger.is_finished is None and self._error == 0:
                Monitor.throw_exception_if_abort_requested(timeout=0.5)

            if self._error == 0:
                trailer_info: List[MovieType] = info_logger.get_trailer_info()

        except AbortException:
            reraise(*sys.exc_info())

        except Exception as e:
            if self._error == 0:
                clz._logger.exception(
                    f'Error downloading: {movie_source} url: {url}')
                if clz._logger.isEnabledFor(LazyLogger.DEBUG_VERBOSE):
                    clz._logger.debug_verbose(
                        'Failed to download site info for:', url)
            trailer_info = None
        finally:
            clz.release_lock(movie_source)  # LOCK RELEASED
            if self._error != Constants.HTTP_TOO_MANY_REQUESTS:
                clz._retry_attempts = 0
            else:
                VideoDownloader._retry_attempts += 1
                VideoDownloader._too_many_requests_resume_time = (
                    datetime.datetime.now() +
                    (RETRY_DELAY * VideoDownloader._retry_attempts))

        if self._error not in (0, 99):
            clz._logger.debug('Results for url:', url, 'error:', self._error)
            info_logger.log_debug()
            info_logger.log_warning()
            info_logger.log_error()

        Monitor.throw_exception_if_abort_requested()
        return 0, trailer_info
Beispiel #6
0
    def get_video(self,
                  url: str,
                  folder: str,
                  movie_id: Union[int, str],
                  title: str,
                  source: str,
                  block: bool = True) -> Tuple[int, Optional[MovieType]]:
        """
             Downloads a video from the given url into the given folder.

        :param url:      To download from
        :param folder:   To download to
        :param movie_id: To pass to youtube-dl to embed in the created file name
        :param title:    For logging
        :param source:   Movie source used to determine delay
        :param block:    Wait extended period of time for TOO_MANY_REQUESTS,
                         if needed.
        :return:
        """
        clz = VideoDownloader
        movie = None
        video_logger: Optional[VideoLogger] = None

        if clz._logger.isEnabledFor(LazyLogger.DISABLED):
            clz._logger.debug_extra_verbose(f'title: {title}')
        try:
            clz.get_lock(source)
            # HAVE LOCK

            if not block:
                too_many_requests = clz.check_too_many_requests(url, source)
                if too_many_requests != 0:
                    if clz._logger.isEnabledFor(LazyLogger.DEBUG_VERBOSE):
                        clz._logger.debug_verbose(
                            f'Not getting video url: {url} Too Many Requests')
                    return too_many_requests, None
            else:
                clz.wait_if_too_many_requests(source, True)

            clz.delay_between_transactions(source, True)
            # The embedded % fields are for youtube_dl to fill  in.

            template = os.path.join(folder,
                                    f'_rt_{movie_id}_%(title)s.%(ext)s')

            # Collect and respond to output from youtube-dl
            if source == Movie.ITUNES_SOURCE:
                parse_json_as_youtube = False
            else:
                parse_json_as_youtube = True

            # clz._logger.debug_extra_verbose(f'title: {title} Getting VideoLogger')
            video_logger = VideoLogger(
                self, url, parse_json_as_youtube=parse_json_as_youtube)
            ydl_opts = {
                'forcejson': 'true',
                'outtmpl': template,
                'updatetime': 'false',
                'logger': video_logger,
                'progress_hooks':
                [VideoDownloadProgressHook(self).status_hook]
            }
            # Optional cookie-file used to avoid youtube 429 errors (see
            # above).

            cookie_path = Settings.get_youtube_dl_cookie_path()
            if len(cookie_path) > 0 and os.path.exists(cookie_path):
                ydl_opts['cookiefile'] = cookie_path

            # Start download
            # Sometimes fail with Nonetype or other errors because of a URL that
            # requires a login, is for an ADULT movie, etc.

            # clz._logger.debug_extra_verbose(f'title: {title} starting download')

            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                ydl.download([url])

            while (video_logger.data is None and self._error == 0
                   and not self._download_finished):
                Monitor.throw_exception_if_abort_requested(timeout=0.5)

            movie = video_logger.data
            if self._error == 0:
                trailer_file = os.path.join(folder, f'_rt_{movie_id}*')
                trailer_file = glob.glob(trailer_file)
                if trailer_file is not None:
                    if len(trailer_file) > 0:
                        trailer_file = trailer_file[0]
                    #
                    # Don't know why, but sometimes youtube_dl returns incorrect
                    # file extension

                    movie.setdefault(Movie.TRAILER, trailer_file)
                    if trailer_file != movie[Movie.TRAILER]:
                        if clz._logger.isEnabledFor(LazyLogger.DISABLED):
                            clz._logger.debug_extra_verbose(
                                'youtube_dl gave incorrect file name:',
                                movie[Movie.TRAILER], 'changing to:',
                                trailer_file)

                        movie[Movie.TRAILER] = trailer_file
        except AbortException:
            self.set_error(99, force=True)
            movie = None
            to_delete = os.path.join(folder, f'_rt_{movie_id}*')
            to_delete = glob.glob(to_delete)
            for aFile in to_delete:
                try:
                    os.remove(aFile)
                except Exception as e:
                    pass
            reraise(*sys.exc_info())
        except Exception as e:
            self.set_error(3)
            if self._error == 3:
                clz._logger.exception(
                    f'Error downloading: {title} {source} url: {url}')
        finally:
            clz.release_lock(source)
            # LOCK RELEASED

            if self._error != Constants.HTTP_TOO_MANY_REQUESTS:
                clz._retry_attempts = 0
            else:
                VideoDownloader._retry_attempts += 1
                VideoDownloader._too_many_requests_resume_time = (
                    datetime.datetime.now() +
                    (RETRY_DELAY * VideoDownloader._retry_attempts))

        if movie is None:
            self.set_error(1)

        if self._error != 0:
            clz._logger.debug(
                f'Results for {title} url:{url} error: {self._error}')
            video_logger.log_debug()
            video_logger.log_warning()
            video_logger.log_error()
            movie = None
            to_delete = os.path.join(folder, f'_rt_{movie_id}*')
            to_delete = glob.glob(to_delete)
            for aFile in to_delete:
                try:
                    os.remove(aFile)
                except Exception as e:
                    pass

        Monitor.throw_exception_if_abort_requested()
        return self._error, movie