def test_construct():
    ob = Playlist(short_test_pl)
    expected = 'https://www.youtube.com/' \
               'playlist?list=' \
               'PL525f8ds9RvsXDl44X6Wwh9t3fCzFNApw'

    assert ob.construct_playlist_url() == expected
Exemple #2
0
def parse_all_links(Playlist):

    url = Playlist.construct_playlist_url()
    req = request.get(url)

    # split the page source by line and process each line
    content = [
        x for x in req.split('\n')
        if 'yt-uix-sessionlink yt-uix-tile-link' in x
    ]
    link_list = [x.split('href="', 1)[1].split('&', 1)[0] for x in content]

    # The above only returns 100 or fewer links
    # Simulating a browser request for the load more link
    load_more_url = Playlist._load_more_url(req)
    while len(load_more_url):  # there is an url found
        req = request.get(load_more_url)
        load_more = json.loads(req)
        videos = re.findall(
            r'href=\"(/watch\?v=[\w-]*)',
            load_more['content_html'],
        )
        # remove duplicates
        link_list.extend(list(OrderedDict.fromkeys(videos)))
        load_more_url = Playlist._load_more_url(
            load_more['load_more_widget_html'], )

    return link_list
Exemple #3
0
def test_construct():
    ob = Playlist(
        'https://www.youtube.com/watch?v=m5q2GCsteQs&list='
        'PL525f8ds9RvsXDl44X6Wwh9t3fCzFNApw', )
    expected = 'https://www.youtube.com/' \
               'playlist?list=' \
               'PL525f8ds9RvsXDl44X6Wwh9t3fCzFNApw'

    assert ob.construct_playlist_url() == expected
Exemple #4
0
def downloadList(url, maxCount=None, start=None, end=None):
    print("download Youtube playlist:%s, maxCount:%s" % (url, str(maxCount)))
    # taskCount = cpu_count() -1
    # print("we have %d cpus" % (taskCount + 1))

    taskCount = DOWNLOAD_TASK_CUNT

    pl = Playlist(url)
    pl.populate_video_urls()
    videoUrls = pl.video_urls
    if maxCount:
        videoUrls = videoUrls[0:maxCount:1]
    elif start and end:
        videoUrls = videoUrls[start - 1:end]
    elif start and end is None:
        videoUrls = videoUrls[start - 1::]
    elif start is None and end:
        videoUrls = videoUrls[:end:]

    prefix_gen = pl._path_num_prefix_generator()

    playlistTitle = getPlaylistTitle(pl.construct_playlist_url())

    #single thread
    # for link in videoUrls:
    #     prefix = next(prefix_gen)
    #     print('file prefix is: %s' % prefix)
    #     downloadSingle(link, filename_prefix=prefix, subFolder=playlistTitle)

    # multiple thread
    argsArrayList = []
    for i in range(0, taskCount):
        argsArrayList.append([])

    i = 0
    for link in videoUrls:
        idx = i % taskCount
        i += 1
        prefix = next(prefix_gen)
        argsArrayList[idx].append((link, prefix, playlistTitle))
        s_linkStatusDic[link] = False

    downloadListMultipleThread(argsArrayList)
    times = 1
    while hasToDownloadTask():
        times += 1
        toDownloadFileDic = {
            k: v
            for k, v in s_linkStatusDic.items() if v == False
        }
        print("=>try %d times, file to download count: %d" %
              (times, len(toDownloadFileDic)))
        print(" %s", str(toDownloadFileDic))
        downloadListMultipleThread(argsArrayList)

    print("all download task done.")
def test_construct():
    ob = Playlist(
        'https://www.youtube.com/watch?v=m5q2GCsteQs&list='
        'PL525f8ds9RvsXDl44X6Wwh9t3fCzFNApw',
    )
    expected = 'https://www.youtube.com/' \
               'playlist?list=' \
               'PL525f8ds9RvsXDl44X6Wwh9t3fCzFNApw'

    assert ob.construct_playlist_url() == expected
def from_playlist_url(url):
    pli = Playlist(url)
    pli.parse_links()
    pli.populate_video_urls()

    output = dict()
    output['type'] = 'playlist'
    output['title'] = pli.title()
    url = pli.construct_playlist_url()
    output['url'] = url
    output['playlist_id'] = _get_playlist_id(url)
    video_urls = pli.parse_links()
    output['video_urls'] = video_urls
    output['video_ids'] = [v.split('=')[1] for v in video_urls]
    return output