Ejemplo n.º 1
0
def test_proxy_playlist_download(tmp_path):
    playlist = YouTubeResource(YOUTUBE_TEST_PLAYLIST)
    playlist.download(tmp_path)

    temp_files = os.listdir(os.path.join(tmp_path, 'Playlist'))
    expected = [
        'zbkizy-Y3qw.jpg', 'oXnzstpBEOg.mp4', 'oXnzstpBEOg.jpg',
        'zbkizy-Y3qw.mp4'
    ]

    assert set(temp_files) == set(expected)
Ejemplo n.º 2
0
def test_proxy_download(tmp_path):
    proxy.get_proxies(refresh=True)
    assert len(proxy.PROXY_LIST) > 1

    video = YouTubeResource(YOUTUBE_TEST_VIDEO)
    video.download(tmp_path)

    temp_files = os.listdir(os.path.join(tmp_path, 'Watch'))
    has_video = False
    for afile in temp_files:
        if afile.endswith('.mp4'):
            has_video = True

    assert has_video, 'Video file not found'
Ejemplo n.º 3
0
def test_bad_proxies_get_banned(tmp_path):
    # create some fake proxies...
    FAKE_PROXIES = [
        '123.123.123.123:1234',
        '142.123.1.234:123345',
        '156.245.233.211:12323',
        '11.22.33.44:123',
    ]
    # initialize PROXY_LIST to known-bad proxies to check that they get banned
    proxy.PROXY_LIST = FAKE_PROXIES.copy()

    video = YouTubeResource(YOUTUBE_TEST_VIDEO)
    video.download(tmp_path)

    # Fake proxies should get added to BROKEN_PROXIES
    assert set(FAKE_PROXIES).issubset(set(proxy.BROKEN_PROXIES))
Ejemplo n.º 4
0
def download_from_web(web_url,
                      download_settings,
                      file_format=file_formats.MP4,
                      ext="",
                      download_ext=""):
    """
    Download `web_url` using YoutubeDL using `download_settings` options.
    Args:
        download_settings (dict): options to pass onto YoutubeDL
        file_format (str): one of "mp4" or "vtt"
        ext (str): extensions to use as part of `outtmpl` given to YoutubeDL
        download_ext (str): extensions to append to `outtmpl` after downloading
    This is function operates differently when downloadin videos and substitles.
    For videos we set the `outtmpl` to the actual filename that will be downloaded,
    and the function must be called with ext = ".mp4" and download_ext="".
    For subtitles we set the `outtmpl` to extension-less string, and YoutubeDL
    automatically appends the language code and vtt extension, so the function
    must be called with ext="" and download_ext=".{youtube_lang}.vtt"
    :return: filename derived from hash of file contents {md5hash(file)}.ext
    """
    key = generate_key("DOWNLOADED", web_url, settings=download_settings)
    cache_file = get_cache_filename(key)
    if cache_file:
        return cache_file

    # Get hash of web_url to act as temporary storage name
    url_hash = hashlib.md5()
    url_hash.update(web_url.encode('utf-8'))
    tempfilename = "{}{ext}".format(url_hash.hexdigest(), ext=ext)
    outtmpl_path = os.path.join(tempfile.gettempdir(), tempfilename)
    download_settings["outtmpl"] = outtmpl_path
    destination_path = outtmpl_path + download_ext  # file dest. after download

    # Delete files in case previously downloaeded
    if os.path.exists(outtmpl_path):
        os.remove(outtmpl_path)
    if os.path.exists(destination_path):
        os.remove(destination_path)

    # Download the web_url which can be either a video or subtitles
    if not config.USEPROXY:
        # Connect to YouTube directly
        with youtube_dl.YoutubeDL(download_settings) as ydl:
            ydl.download([web_url])
            if not os.path.exists(destination_path):
                raise youtube_dl.utils.DownloadError('Failed to download ' +
                                                     web_url)
    else:
        # Connect to YouTube via an HTTP proxy
        yt_resource = YouTubeResource(web_url,
                                      useproxy=True,
                                      options=download_settings)
        result1 = yt_resource.get_resource_info()
        if result1 is None:
            raise youtube_dl.utils.DownloadError('Failed to get resource info')
        download_settings[
            "writethumbnail"] = False  # overwrite default behaviour
        if file_format == file_formats.VTT:
            # We need to use the proxy when downloading subtitles
            result2 = yt_resource.download(options=download_settings,
                                           useproxy=True)
        else:
            # For video files we can skip the proxy for faster download speed
            result2 = yt_resource.download(options=download_settings)
        if result2 is None or not os.path.exists(destination_path):
            raise youtube_dl.utils.DownloadError(
                'Failed to download resource ' + web_url)

    # Write file to local storage
    filename = "{}.{}".format(get_hash(destination_path), file_format)
    with open(destination_path,
              "rb") as dlf, open(config.get_storage_path(filename),
                                 'wb') as destf:
        shutil.copyfileobj(dlf, destf)

    FILECACHE.set(key, bytes(filename, "utf-8"))
    return filename