def test_proxy_playlist_download(tmp_path): playlist = YouTubeResource(YOUTUBE_TEST_PLAYLIST) playlist.download(tmp_path) temp_files = os.listdir(os.path.join(tmp_path, 'Playlist')) expected = [ 'zbkizy-Y3qw.jpg', 'oXnzstpBEOg.mp4', 'oXnzstpBEOg.jpg', 'zbkizy-Y3qw.mp4' ] assert set(temp_files) == set(expected)
def test_proxy_download(tmp_path): proxy.get_proxies(refresh=True) assert len(proxy.PROXY_LIST) > 1 video = YouTubeResource(YOUTUBE_TEST_VIDEO) video.download(tmp_path) temp_files = os.listdir(os.path.join(tmp_path, 'Watch')) has_video = False for afile in temp_files: if afile.endswith('.mp4'): has_video = True assert has_video, 'Video file not found'
def test_bad_proxies_get_banned(tmp_path): # create some fake proxies... FAKE_PROXIES = [ '123.123.123.123:1234', '142.123.1.234:123345', '156.245.233.211:12323', '11.22.33.44:123', ] # initialize PROXY_LIST to known-bad proxies to check that they get banned proxy.PROXY_LIST = FAKE_PROXIES.copy() video = YouTubeResource(YOUTUBE_TEST_VIDEO) video.download(tmp_path) # Fake proxies should get added to BROKEN_PROXIES assert set(FAKE_PROXIES).issubset(set(proxy.BROKEN_PROXIES))
def download_from_web(web_url, download_settings, file_format=file_formats.MP4, ext="", download_ext=""): """ Download `web_url` using YoutubeDL using `download_settings` options. Args: download_settings (dict): options to pass onto YoutubeDL file_format (str): one of "mp4" or "vtt" ext (str): extensions to use as part of `outtmpl` given to YoutubeDL download_ext (str): extensions to append to `outtmpl` after downloading This is function operates differently when downloadin videos and substitles. For videos we set the `outtmpl` to the actual filename that will be downloaded, and the function must be called with ext = ".mp4" and download_ext="". For subtitles we set the `outtmpl` to extension-less string, and YoutubeDL automatically appends the language code and vtt extension, so the function must be called with ext="" and download_ext=".{youtube_lang}.vtt" :return: filename derived from hash of file contents {md5hash(file)}.ext """ key = generate_key("DOWNLOADED", web_url, settings=download_settings) cache_file = get_cache_filename(key) if cache_file: return cache_file # Get hash of web_url to act as temporary storage name url_hash = hashlib.md5() url_hash.update(web_url.encode('utf-8')) tempfilename = "{}{ext}".format(url_hash.hexdigest(), ext=ext) outtmpl_path = os.path.join(tempfile.gettempdir(), tempfilename) download_settings["outtmpl"] = outtmpl_path destination_path = outtmpl_path + download_ext # file dest. after download # Delete files in case previously downloaeded if os.path.exists(outtmpl_path): os.remove(outtmpl_path) if os.path.exists(destination_path): os.remove(destination_path) # Download the web_url which can be either a video or subtitles if not config.USEPROXY: # Connect to YouTube directly with youtube_dl.YoutubeDL(download_settings) as ydl: ydl.download([web_url]) if not os.path.exists(destination_path): raise youtube_dl.utils.DownloadError('Failed to download ' + web_url) else: # Connect to YouTube via an HTTP proxy yt_resource = YouTubeResource(web_url, useproxy=True, options=download_settings) result1 = yt_resource.get_resource_info() if result1 is None: raise youtube_dl.utils.DownloadError('Failed to get resource info') download_settings[ "writethumbnail"] = False # overwrite default behaviour if file_format == file_formats.VTT: # We need to use the proxy when downloading subtitles result2 = yt_resource.download(options=download_settings, useproxy=True) else: # For video files we can skip the proxy for faster download speed result2 = yt_resource.download(options=download_settings) if result2 is None or not os.path.exists(destination_path): raise youtube_dl.utils.DownloadError( 'Failed to download resource ' + web_url) # Write file to local storage filename = "{}.{}".format(get_hash(destination_path), file_format) with open(destination_path, "rb") as dlf, open(config.get_storage_path(filename), 'wb') as destf: shutil.copyfileobj(dlf, destf) FILECACHE.set(key, bytes(filename, "utf-8")) return filename