def get_tfh_index(self, url: str, trailers_to_download: str, trailer_handler) -> int: """ Fetches all of the urls in the Trailers From Hell playlist. Note that the entire list is well over a thousand and that indiscriminate downloading can get the dreaded "429" code from Youtube (Too Many Requests) which will cause downloads to be denied for an extended period of time and potentially banned. To help prevent this reducing how many trailers are requested at a time, caching and throttling of requests should be used. :param url: points to playlist :param trailers_to_download: Specifies the index of which trailers to get url of. An empty list means get all urls. :param trailer_handler: Call back to DiscoverTFHMovies to process each returned entry as it occurs. :return: """ clz = VideoDownloader rc = self.check_too_many_requests(url) if rc != 0: return rc # Would prefer to specify a list of playlist_items in order # to control rate of fetches (and hopefully avoid TOO_MANY REQUESTS) # But.. when you use playlist_items you do NOT get the total number # of items in the playlist as part of the results. Further, if you # try to get a playlist item out of range, there is no error, nothing. # # Therefore, reluctantly not using playlist_items and getting everything # at once (although no downloaded trailers). tfh_index_logger = TfhIndexLogger(self, trailer_handler, url) ydl_opts = { 'forcejson': True, 'noplaylist': False, # 'extract_flat': 'in_playlist', 'skip_download': True, 'logger': tfh_index_logger, 'sleep_interval': 10, 'max_sleep_interval': 240, # 'playlist_items': trailers_to_download, 'playlistrandom': True, 'progress_hooks': [TFHIndexProgressHook(self).status_hook], #'debug_printtraffic': True } cookie_path = Settings.get_youtube_dl_cookie_path() if len(cookie_path) > 0 and os.path.exists(cookie_path): ydl_opts['cookiefile'] = cookie_path cache_dir = Settings.get_youtube_dl_cache_path() if len(cache_dir) > 0 and os.path.exists(cache_dir): ydl_opts['cachedir'] = cache_dir if len(trailers_to_download) > 10: ydl_opts['playlist_items'] = trailers_to_download Monitor.throw_exception_if_abort_requested() try: with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) except AbortException: reraise(*sys.exc_info()) except Exception as e: if self._error == 0: clz._logger.exception(e) if self._error not in (0, 99): clz._logger.debug('Results for url:', url, 'error:', self._error) tfh_index_logger.log_error() tfh_index_logger.log_debug() tfh_index_logger.log_warning() Monitor.throw_exception_if_abort_requested(timeout=DELAY) return self._error
def get_tfh_index(self, url: str, trailer_handler, block: bool = False) -> int: """ Fetches all of the urls in the Trailers From Hell playlist. Note that the entire list is well over a thousand and that indiscriminate downloading can get the dreaded "429" code from Youtube (Too Many Requests) which will cause downloads to be denied for an extended period of time and potentially banned. To help prevent this reducing how many trailers are requested at a time, caching and throttling of requests should be used. :param url: points to playlist :param trailer_handler: Call back to DiscoverTFHMovies to process each returned entry as it occurs. :param block: If true, then wait until no longer TOO_MANY_REQUESTS :return: """ clz = VideoDownloader clz.delay_between_transactions(Movie.TFH_SOURCE, False) tfh_index_logger = TfhIndexLogger(self, trailer_handler, url) try: clz.get_lock(Movie.TFH_SOURCE) # HAVE LOCK if not block: too_many_requests = clz.check_too_many_requests( url, Movie.TFH_SOURCE) if too_many_requests != 0: if clz._logger.isEnabledFor(LazyLogger.DEBUG_VERBOSE): clz._logger.debug_verbose( f'Not getting tfh_index url: {url} Too Many Requests' ) return too_many_requests else: clz.wait_if_too_many_requests(Movie.TFH_SOURCE, True) # Would prefer to specify a list of playlist_items in order # to control rate of fetches (and hopefully avoid TOO_MANY REQUESTS) # But.. when you use playlist_items you do NOT get the total number # of items in the playlist as part of the results. Further, if you # try to get a playlist item out of range, there is no error, nothing. # # Therefore, reluctantly not using playlist_items and getting everything # at once (although no downloaded trailers). """ Returns: { "_type": "playlist", "entries": [ { "_type": "url_transparent", "ie_key": "Youtube", "id": "Sz0FCYJaQUc", "url": "Sz0FCYJaQUc", "title": "WATCH LIVE: The Old Path Bible Exposition - April 24, " "2020, 7 PM PHT", "description": null, "duration": 10235.0, "view_count": null, "uploader": null } ] } """ ydl_opts = { 'forcejson': True, 'noplaylist': False, 'extract_flat': 'in_playlist', 'ignoreerrors': True, 'skip_download': True, 'logger': tfh_index_logger, 'sleep_interval': 1, 'max_sleep_interval': 8, # 'playlist_items': trailers_to_download, 'playlistrandom': True, 'progress_hooks': [TFHIndexProgressHook(self).status_hook] # 'debug_printtraffic': True } cookie_path = Settings.get_youtube_dl_cookie_path() if len(cookie_path) > 0 and os.path.exists(cookie_path): ydl_opts['cookiefile'] = cookie_path cache_dir = Settings.get_youtube_dl_cache_path() if len(cache_dir) > 0 and os.path.exists(cache_dir): ydl_opts['cachedir'] = cache_dir Monitor.throw_exception_if_abort_requested() with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) except AbortException: reraise(*sys.exc_info()) except Exception as e: if self._error == 0: clz._logger.exception(f'Error downloading: url: {url}') finally: clz.release_lock(Movie.TFH_SOURCE) if self._error != Constants.HTTP_TOO_MANY_REQUESTS: clz._retry_attempts = 0 else: VideoDownloader._retry_attempts += 1 VideoDownloader._too_many_requests_resume_time = ( datetime.datetime.now() + (RETRY_DELAY * VideoDownloader._retry_attempts)) if self._error not in (0, 99): clz._logger.debug('Results for url:', url, 'error:', self._error) tfh_index_logger.log_error() tfh_index_logger.log_debug() tfh_index_logger.log_warning() Monitor.throw_exception_if_abort_requested() return self._error