コード例 #1
0
    def get_tfh_index(self, url: str, trailers_to_download: str,
                      trailer_handler) -> int:
        """
        Fetches all of the urls in the Trailers From Hell playlist. Note that
        the entire list is well over a thousand and that indiscriminate
        downloading can get the dreaded "429" code from Youtube (Too Many
        Requests) which will cause downloads to be denied for an extended
        period of time and potentially banned. To help prevent this
        reducing how many trailers are requested at a time, caching and
        throttling of requests should be used.

        :param url: points to playlist
        :param trailers_to_download: Specifies the index of which trailers to get
                                     url of. An empty list means get all urls.
        :param trailer_handler: Call back to DiscoverTFHMovies to process each
                returned entry as it occurs.
        :return:
        """

        clz = VideoDownloader

        rc = self.check_too_many_requests(url)
        if rc != 0:
            return rc

        # Would prefer to specify a list of playlist_items in order
        # to control rate of fetches (and hopefully avoid TOO_MANY REQUESTS)
        # But.. when you use playlist_items you do NOT get the total number
        # of items in the playlist as part of the results. Further, if you
        # try to get a playlist item out of range, there is no error, nothing.
        #
        # Therefore, reluctantly not using playlist_items and getting everything
        # at once (although no downloaded trailers).

        tfh_index_logger = TfhIndexLogger(self, trailer_handler, url)
        ydl_opts = {
            'forcejson': True,
            'noplaylist': False,
            # 'extract_flat': 'in_playlist',
            'skip_download': True,
            'logger': tfh_index_logger,
            'sleep_interval': 10,
            'max_sleep_interval': 240,
            #  'playlist_items': trailers_to_download,
            'playlistrandom': True,
            'progress_hooks': [TFHIndexProgressHook(self).status_hook],
            #'debug_printtraffic': True
        }
        cookie_path = Settings.get_youtube_dl_cookie_path()
        if len(cookie_path) > 0 and os.path.exists(cookie_path):
            ydl_opts['cookiefile'] = cookie_path

        cache_dir = Settings.get_youtube_dl_cache_path()
        if len(cache_dir) > 0 and os.path.exists(cache_dir):
            ydl_opts['cachedir'] = cache_dir

        if len(trailers_to_download) > 10:
            ydl_opts['playlist_items'] = trailers_to_download

        Monitor.throw_exception_if_abort_requested()
        try:
            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                ydl.download([url])

        except AbortException:
            reraise(*sys.exc_info())

        except Exception as e:
            if self._error == 0:
                clz._logger.exception(e)

        if self._error not in (0, 99):
            clz._logger.debug('Results for url:', url, 'error:', self._error)
            tfh_index_logger.log_error()
            tfh_index_logger.log_debug()
            tfh_index_logger.log_warning()

        Monitor.throw_exception_if_abort_requested(timeout=DELAY)
        return self._error
コード例 #2
0
    def get_tfh_index(self,
                      url: str,
                      trailer_handler,
                      block: bool = False) -> int:
        """
        Fetches all of the urls in the Trailers From Hell playlist. Note that
        the entire list is well over a thousand and that indiscriminate
        downloading can get the dreaded "429" code from Youtube (Too Many
        Requests) which will cause downloads to be denied for an extended
        period of time and potentially banned. To help prevent this
        reducing how many trailers are requested at a time, caching and
        throttling of requests should be used.

        :param url: points to playlist
        :param trailer_handler: Call back to DiscoverTFHMovies to process each
                returned entry as it occurs.
        :param block: If true, then wait until no longer TOO_MANY_REQUESTS
        :return:
        """

        clz = VideoDownloader
        clz.delay_between_transactions(Movie.TFH_SOURCE, False)
        tfh_index_logger = TfhIndexLogger(self, trailer_handler, url)

        try:
            clz.get_lock(Movie.TFH_SOURCE)
            # HAVE LOCK

            if not block:
                too_many_requests = clz.check_too_many_requests(
                    url, Movie.TFH_SOURCE)
                if too_many_requests != 0:
                    if clz._logger.isEnabledFor(LazyLogger.DEBUG_VERBOSE):
                        clz._logger.debug_verbose(
                            f'Not getting tfh_index url: {url} Too Many Requests'
                        )
                    return too_many_requests
            else:
                clz.wait_if_too_many_requests(Movie.TFH_SOURCE, True)

            # Would prefer to specify a list of playlist_items in order
            # to control rate of fetches (and hopefully avoid TOO_MANY REQUESTS)
            # But.. when you use playlist_items you do NOT get the total number
            # of items in the playlist as part of the results. Further, if you
            # try to get a playlist item out of range, there is no error, nothing.
            #
            # Therefore, reluctantly not using playlist_items and getting everything
            # at once (although no downloaded trailers).
            """
            Returns:
                {
                    "_type": "playlist",
                    "entries": [
                        {
                            "_type": "url_transparent",
                            "ie_key": "Youtube",
                            "id": "Sz0FCYJaQUc",
                            "url": "Sz0FCYJaQUc",
                            "title": "WATCH LIVE: The Old Path Bible Exposition - April 24, "
                                     "2020, 7 PM PHT",
                            "description": null,
                            "duration": 10235.0,
                            "view_count": null,
                            "uploader": null
                        }
                    ]
                }
            """
            ydl_opts = {
                'forcejson': True,
                'noplaylist': False,
                'extract_flat': 'in_playlist',
                'ignoreerrors': True,
                'skip_download': True,
                'logger': tfh_index_logger,
                'sleep_interval': 1,
                'max_sleep_interval': 8,
                #  'playlist_items': trailers_to_download,
                'playlistrandom': True,
                'progress_hooks': [TFHIndexProgressHook(self).status_hook]
                # 'debug_printtraffic': True
            }
            cookie_path = Settings.get_youtube_dl_cookie_path()
            if len(cookie_path) > 0 and os.path.exists(cookie_path):
                ydl_opts['cookiefile'] = cookie_path

            cache_dir = Settings.get_youtube_dl_cache_path()
            if len(cache_dir) > 0 and os.path.exists(cache_dir):
                ydl_opts['cachedir'] = cache_dir

            Monitor.throw_exception_if_abort_requested()

            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                ydl.download([url])

        except AbortException:
            reraise(*sys.exc_info())

        except Exception as e:
            if self._error == 0:
                clz._logger.exception(f'Error downloading: url: {url}')
        finally:
            clz.release_lock(Movie.TFH_SOURCE)

            if self._error != Constants.HTTP_TOO_MANY_REQUESTS:
                clz._retry_attempts = 0
            else:
                VideoDownloader._retry_attempts += 1
                VideoDownloader._too_many_requests_resume_time = (
                    datetime.datetime.now() +
                    (RETRY_DELAY * VideoDownloader._retry_attempts))

        if self._error not in (0, 99):
            clz._logger.debug('Results for url:', url, 'error:', self._error)
            tfh_index_logger.log_error()
            tfh_index_logger.log_debug()
            tfh_index_logger.log_warning()

        Monitor.throw_exception_if_abort_requested()
        return self._error