def get_info(self, url: str) -> Tuple[int, Optional[List[MovieType]]]: """ Instead of downloading a video, get basic information about the video :param url: To get information from :return: a dictionary (MovieType) from the json returned by site """ clz = VideoDownloader trailer_info: Optional[List[MovieType]] = None if clz.check_too_many_requests(url) != 0: return 429, None info_logger = TfhInfoLogger(self, url, parse_json_as_youtube=False) try: ydl_opts = { 'forcejson': 'true', 'skip_download': 'true', 'logger': info_logger, 'progress_hooks': [TrailerInfoProgressHook(self).status_hook] } cookie_path = Settings.get_youtube_dl_cookie_path() if len(cookie_path) > 0 and os.path.exists(cookie_path): ydl_opts['cookiefile'] = cookie_path Monitor.throw_exception_if_abort_requested() # Start Download with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) # Wait for download while info_logger.is_finished is None and self._error == 0: Monitor.throw_exception_if_abort_requested(timeout=0.5) if self._error == 0: trailer_info: List[MovieType] = info_logger.get_trailer_info() except AbortException: reraise(*sys.exc_info()) except Exception as e: if self._error == 0: clz._logger.exception(e) if clz._logger.isEnabledFor(LazyLogger.DEBUG_VERBOSE): clz._logger.debug_verbose( 'Failed to download site info for:', url) trailer_info = None if self._error not in (0, 99): clz._logger.debug('Results for url:', url, 'error:', self._error) info_logger.log_debug() info_logger.log_warning() info_logger.log_error() Monitor.throw_exception_if_abort_requested(timeout=DELAY) return 0, trailer_info
def get_tfh_index(self, url: str, trailers_to_download: str, trailer_handler) -> int: """ Fetches all of the urls in the Trailers From Hell playlist. Note that the entire list is well over a thousand and that indiscriminate downloading can get the dreaded "429" code from Youtube (Too Many Requests) which will cause downloads to be denied for an extended period of time and potentially banned. To help prevent this reducing how many trailers are requested at a time, caching and throttling of requests should be used. :param url: points to playlist :param trailers_to_download: Specifies the index of which trailers to get url of. An empty list means get all urls. :param trailer_handler: Call back to DiscoverTFHMovies to process each returned entry as it occurs. :return: """ clz = VideoDownloader rc = self.check_too_many_requests(url) if rc != 0: return rc # Would prefer to specify a list of playlist_items in order # to control rate of fetches (and hopefully avoid TOO_MANY REQUESTS) # But.. when you use playlist_items you do NOT get the total number # of items in the playlist as part of the results. Further, if you # try to get a playlist item out of range, there is no error, nothing. # # Therefore, reluctantly not using playlist_items and getting everything # at once (although no downloaded trailers). tfh_index_logger = TfhIndexLogger(self, trailer_handler, url) ydl_opts = { 'forcejson': True, 'noplaylist': False, # 'extract_flat': 'in_playlist', 'skip_download': True, 'logger': tfh_index_logger, 'sleep_interval': 10, 'max_sleep_interval': 240, # 'playlist_items': trailers_to_download, 'playlistrandom': True, 'progress_hooks': [TFHIndexProgressHook(self).status_hook], #'debug_printtraffic': True } cookie_path = Settings.get_youtube_dl_cookie_path() if len(cookie_path) > 0 and os.path.exists(cookie_path): ydl_opts['cookiefile'] = cookie_path cache_dir = Settings.get_youtube_dl_cache_path() if len(cache_dir) > 0 and os.path.exists(cache_dir): ydl_opts['cachedir'] = cache_dir if len(trailers_to_download) > 10: ydl_opts['playlist_items'] = trailers_to_download Monitor.throw_exception_if_abort_requested() try: with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) except AbortException: reraise(*sys.exc_info()) except Exception as e: if self._error == 0: clz._logger.exception(e) if self._error not in (0, 99): clz._logger.debug('Results for url:', url, 'error:', self._error) tfh_index_logger.log_error() tfh_index_logger.log_debug() tfh_index_logger.log_warning() Monitor.throw_exception_if_abort_requested(timeout=DELAY) return self._error
def get_video(self, url, folder, movie_id): # type: (str, str, Union[int, str]) -> Tuple[int, Optional[MovieType]] """ Downloads a video from the given url into the given folder. :param url: To download from :param folder: To download to :param movie_id: To pass to youtube-dl to embed in the created file name :return: """ clz = VideoDownloader if clz.check_too_many_requests(url) != 0: return 429, None # The embedded % fields are for youtube_dl to fill in. template = os.path.join(folder, f'_rt_{movie_id}_%(title)s.%(ext)s') movie = None # Collect and respond to output from youtube-dl video_logger = VideoLogger(self, url) try: ydl_opts = { 'forcejson': 'true', 'outtmpl': template, 'updatetime': 'false', 'logger': video_logger, 'progress_hooks': [VideoDownloadProgressHook(self).status_hook] } # Optional cookie-file used to avoid youtube 429 errors (see above). cookie_path = Settings.get_youtube_dl_cookie_path() if len(cookie_path) > 0 and os.path.exists(cookie_path): ydl_opts['cookiefile'] = cookie_path # Start download with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) while video_logger.data is None and self._error == 0: Monitor.throw_exception_if_abort_requested(timeout=0.5) movie = video_logger.data if self._error == 0: trailer_file = os.path.join(folder, f'_rt_{movie_id}*') trailer_file = glob.glob(trailer_file) if trailer_file is not None: if len(trailer_file) > 0: trailer_file = trailer_file[0] # # Don't know why, but sometimes youtube_dl returns incorrect # file extension if trailer_file != movie[Movie.TRAILER]: if clz._logger.isEnabledFor( LazyLogger.DEBUG_EXTRA_VERBOSE): clz._logger.debug_extra_verbose( 'youtube_dl gave incorrect file name:', movie[Movie.TRAILER], 'changing to:', trailer_file) movie[Movie.TRAILER] = trailer_file except AbortException: self._error = 99 movie = None to_delete = os.path.join(folder, f'_rt_{movie_id}*') to_delete = glob.glob(to_delete) for aFile in to_delete: try: os.remove(aFile) except Exception as e: pass reraise(*sys.exc_info()) except Exception as e: if self._error == 0: self._error = 3 clz._logger.exception(e) if self._error == 0 and movie is None: self._error = 1 if self._error != 0: clz._logger.debug('Results for url:', url, 'error:', self._error) video_logger.log_debug() video_logger.log_warning() video_logger.log_error() movie = None to_delete = os.path.join(folder, f'_rt_{movie_id}*') to_delete = glob.glob(to_delete) for aFile in to_delete: try: os.remove(aFile) except Exception as e: pass Monitor.throw_exception_if_abort_requested(timeout=DELAY) return self._error, movie
def get_tfh_index(self, url: str, trailer_handler, block: bool = False) -> int: """ Fetches all of the urls in the Trailers From Hell playlist. Note that the entire list is well over a thousand and that indiscriminate downloading can get the dreaded "429" code from Youtube (Too Many Requests) which will cause downloads to be denied for an extended period of time and potentially banned. To help prevent this reducing how many trailers are requested at a time, caching and throttling of requests should be used. :param url: points to playlist :param trailer_handler: Call back to DiscoverTFHMovies to process each returned entry as it occurs. :param block: If true, then wait until no longer TOO_MANY_REQUESTS :return: """ clz = VideoDownloader clz.delay_between_transactions(Movie.TFH_SOURCE, False) tfh_index_logger = TfhIndexLogger(self, trailer_handler, url) try: clz.get_lock(Movie.TFH_SOURCE) # HAVE LOCK if not block: too_many_requests = clz.check_too_many_requests( url, Movie.TFH_SOURCE) if too_many_requests != 0: if clz._logger.isEnabledFor(LazyLogger.DEBUG_VERBOSE): clz._logger.debug_verbose( f'Not getting tfh_index url: {url} Too Many Requests' ) return too_many_requests else: clz.wait_if_too_many_requests(Movie.TFH_SOURCE, True) # Would prefer to specify a list of playlist_items in order # to control rate of fetches (and hopefully avoid TOO_MANY REQUESTS) # But.. when you use playlist_items you do NOT get the total number # of items in the playlist as part of the results. Further, if you # try to get a playlist item out of range, there is no error, nothing. # # Therefore, reluctantly not using playlist_items and getting everything # at once (although no downloaded trailers). """ Returns: { "_type": "playlist", "entries": [ { "_type": "url_transparent", "ie_key": "Youtube", "id": "Sz0FCYJaQUc", "url": "Sz0FCYJaQUc", "title": "WATCH LIVE: The Old Path Bible Exposition - April 24, " "2020, 7 PM PHT", "description": null, "duration": 10235.0, "view_count": null, "uploader": null } ] } """ ydl_opts = { 'forcejson': True, 'noplaylist': False, 'extract_flat': 'in_playlist', 'ignoreerrors': True, 'skip_download': True, 'logger': tfh_index_logger, 'sleep_interval': 1, 'max_sleep_interval': 8, # 'playlist_items': trailers_to_download, 'playlistrandom': True, 'progress_hooks': [TFHIndexProgressHook(self).status_hook] # 'debug_printtraffic': True } cookie_path = Settings.get_youtube_dl_cookie_path() if len(cookie_path) > 0 and os.path.exists(cookie_path): ydl_opts['cookiefile'] = cookie_path cache_dir = Settings.get_youtube_dl_cache_path() if len(cache_dir) > 0 and os.path.exists(cache_dir): ydl_opts['cachedir'] = cache_dir Monitor.throw_exception_if_abort_requested() with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) except AbortException: reraise(*sys.exc_info()) except Exception as e: if self._error == 0: clz._logger.exception(f'Error downloading: url: {url}') finally: clz.release_lock(Movie.TFH_SOURCE) if self._error != Constants.HTTP_TOO_MANY_REQUESTS: clz._retry_attempts = 0 else: VideoDownloader._retry_attempts += 1 VideoDownloader._too_many_requests_resume_time = ( datetime.datetime.now() + (RETRY_DELAY * VideoDownloader._retry_attempts)) if self._error not in (0, 99): clz._logger.debug('Results for url:', url, 'error:', self._error) tfh_index_logger.log_error() tfh_index_logger.log_debug() tfh_index_logger.log_warning() Monitor.throw_exception_if_abort_requested() return self._error
def get_info(self, url: str, movie_source: str, block: bool = False) -> Tuple[int, Optional[List[MovieType]]]: """ Instead of downloading a video, get basic information about the video :param url: To get information from :param movie_source: Used to determine delay between requests :param block: Wait extended period of time for TOO_MANY_REQUESTS, if needed. :return: a dictionary (MovieType) from the json returned by site """ clz = VideoDownloader trailer_info: Optional[List[MovieType]] = None clz.delay_between_transactions(movie_source, False) info_logger = TfhInfoLogger(self, url, parse_json_as_youtube=False) try: clz.get_lock(movie_source) # HAVE LOCK if not block: too_many_requests = clz.check_too_many_requests( url, movie_source) if too_many_requests != 0: if clz._logger.isEnabledFor(LazyLogger.DEBUG_VERBOSE): clz._logger.debug_verbose( f'Not getting info url: {url} Too Many Requests') return too_many_requests, None else: clz.wait_if_too_many_requests(movie_source, True) ydl_opts = { 'forcejson': 'true', 'skip_download': 'true', 'logger': info_logger, 'progress_hooks': [TrailerInfoProgressHook(self).status_hook] } cookie_path = Settings.get_youtube_dl_cookie_path() if len(cookie_path) > 0 and os.path.exists(cookie_path): ydl_opts['cookiefile'] = cookie_path Monitor.throw_exception_if_abort_requested() # Start Download with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) # Wait for download while info_logger.is_finished is None and self._error == 0: Monitor.throw_exception_if_abort_requested(timeout=0.5) if self._error == 0: trailer_info: List[MovieType] = info_logger.get_trailer_info() except AbortException: reraise(*sys.exc_info()) except Exception as e: if self._error == 0: clz._logger.exception( f'Error downloading: {movie_source} url: {url}') if clz._logger.isEnabledFor(LazyLogger.DEBUG_VERBOSE): clz._logger.debug_verbose( 'Failed to download site info for:', url) trailer_info = None finally: clz.release_lock(movie_source) # LOCK RELEASED if self._error != Constants.HTTP_TOO_MANY_REQUESTS: clz._retry_attempts = 0 else: VideoDownloader._retry_attempts += 1 VideoDownloader._too_many_requests_resume_time = ( datetime.datetime.now() + (RETRY_DELAY * VideoDownloader._retry_attempts)) if self._error not in (0, 99): clz._logger.debug('Results for url:', url, 'error:', self._error) info_logger.log_debug() info_logger.log_warning() info_logger.log_error() Monitor.throw_exception_if_abort_requested() return 0, trailer_info
def get_video(self, url: str, folder: str, movie_id: Union[int, str], title: str, source: str, block: bool = True) -> Tuple[int, Optional[MovieType]]: """ Downloads a video from the given url into the given folder. :param url: To download from :param folder: To download to :param movie_id: To pass to youtube-dl to embed in the created file name :param title: For logging :param source: Movie source used to determine delay :param block: Wait extended period of time for TOO_MANY_REQUESTS, if needed. :return: """ clz = VideoDownloader movie = None video_logger: Optional[VideoLogger] = None if clz._logger.isEnabledFor(LazyLogger.DISABLED): clz._logger.debug_extra_verbose(f'title: {title}') try: clz.get_lock(source) # HAVE LOCK if not block: too_many_requests = clz.check_too_many_requests(url, source) if too_many_requests != 0: if clz._logger.isEnabledFor(LazyLogger.DEBUG_VERBOSE): clz._logger.debug_verbose( f'Not getting video url: {url} Too Many Requests') return too_many_requests, None else: clz.wait_if_too_many_requests(source, True) clz.delay_between_transactions(source, True) # The embedded % fields are for youtube_dl to fill in. template = os.path.join(folder, f'_rt_{movie_id}_%(title)s.%(ext)s') # Collect and respond to output from youtube-dl if source == Movie.ITUNES_SOURCE: parse_json_as_youtube = False else: parse_json_as_youtube = True # clz._logger.debug_extra_verbose(f'title: {title} Getting VideoLogger') video_logger = VideoLogger( self, url, parse_json_as_youtube=parse_json_as_youtube) ydl_opts = { 'forcejson': 'true', 'outtmpl': template, 'updatetime': 'false', 'logger': video_logger, 'progress_hooks': [VideoDownloadProgressHook(self).status_hook] } # Optional cookie-file used to avoid youtube 429 errors (see # above). cookie_path = Settings.get_youtube_dl_cookie_path() if len(cookie_path) > 0 and os.path.exists(cookie_path): ydl_opts['cookiefile'] = cookie_path # Start download # Sometimes fail with Nonetype or other errors because of a URL that # requires a login, is for an ADULT movie, etc. # clz._logger.debug_extra_verbose(f'title: {title} starting download') with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) while (video_logger.data is None and self._error == 0 and not self._download_finished): Monitor.throw_exception_if_abort_requested(timeout=0.5) movie = video_logger.data if self._error == 0: trailer_file = os.path.join(folder, f'_rt_{movie_id}*') trailer_file = glob.glob(trailer_file) if trailer_file is not None: if len(trailer_file) > 0: trailer_file = trailer_file[0] # # Don't know why, but sometimes youtube_dl returns incorrect # file extension movie.setdefault(Movie.TRAILER, trailer_file) if trailer_file != movie[Movie.TRAILER]: if clz._logger.isEnabledFor(LazyLogger.DISABLED): clz._logger.debug_extra_verbose( 'youtube_dl gave incorrect file name:', movie[Movie.TRAILER], 'changing to:', trailer_file) movie[Movie.TRAILER] = trailer_file except AbortException: self.set_error(99, force=True) movie = None to_delete = os.path.join(folder, f'_rt_{movie_id}*') to_delete = glob.glob(to_delete) for aFile in to_delete: try: os.remove(aFile) except Exception as e: pass reraise(*sys.exc_info()) except Exception as e: self.set_error(3) if self._error == 3: clz._logger.exception( f'Error downloading: {title} {source} url: {url}') finally: clz.release_lock(source) # LOCK RELEASED if self._error != Constants.HTTP_TOO_MANY_REQUESTS: clz._retry_attempts = 0 else: VideoDownloader._retry_attempts += 1 VideoDownloader._too_many_requests_resume_time = ( datetime.datetime.now() + (RETRY_DELAY * VideoDownloader._retry_attempts)) if movie is None: self.set_error(1) if self._error != 0: clz._logger.debug( f'Results for {title} url:{url} error: {self._error}') video_logger.log_debug() video_logger.log_warning() video_logger.log_error() movie = None to_delete = os.path.join(folder, f'_rt_{movie_id}*') to_delete = glob.glob(to_delete) for aFile in to_delete: try: os.remove(aFile) except Exception as e: pass Monitor.throw_exception_if_abort_requested() return self._error, movie