Exemple #1
0
 def get_vid_from_url(url):
     """Extracts video ID from URL.
     """
     return match1(url, r'youtu\.be/([^?/]+)') or \
         match1(url, r'youtube\.com/embed/([^/?]+)') or \
         match1(url, r'youtube\.com/v/([^/?]+)') or \
         match1(url, r'youtube\.com/watch/([^/?]+)') or \
         parse_query_param(url, 'v') or \
         parse_query_param(parse_query_param(url, 'u'), 'v')
Exemple #2
0
    def download_playlist_by_url(self, url, **kwargs):
        self.url = url

        playlist_id = self.__class__.get_playlist_id_from_url(self.url)
        if playlist_id is None:
            log.wtf('[Failed] Unsupported URL pattern.')

        video_page = get_content(
            'https://www.youtube.com/playlist?list={}'.format(playlist_id))
        from html.parser import HTMLParser
        videos = sorted([
            HTMLParser().unescape(video)
            for video in re.findall(r'<a href="(/watch\?[^"]+)"', video_page)
            if parse_query_param(video, 'index')
        ],
                        key=lambda video: parse_query_param(video, 'index'))

        # Parse browse_ajax page for more videos to load
        load_more_href = match1(video_page,
                                r'data-uix-load-more-href="([^"]+)"')
        while load_more_href:
            browse_ajax = get_content(
                'https://www.youtube.com/{}'.format(load_more_href))
            browse_data = json.loads(browse_ajax)
            load_more_widget_html = browse_data['load_more_widget_html']
            content_html = browse_data['content_html']
            vs = set(re.findall(r'href="(/watch\?[^"]+)"', content_html))
            videos += sorted([
                HTMLParser().unescape(video) for video in list(vs)
                if parse_query_param(video, 'index')
            ])
            load_more_href = match1(load_more_widget_html,
                                    r'data-uix-load-more-href="([^"]+)"')

        self.title = re.search(r'<meta name="title" content="([^"]+)"',
                               video_page).group(1)
        self.p_playlist()
        for video in videos:
            vid = parse_query_param(video, 'v')
            index = parse_query_param(video, 'index')
            self.__class__().download_by_url(
                self.__class__.get_url_from_vid(vid), index=index, **kwargs)
Exemple #3
0
 def get_playlist_id_from_url(url):
     """Extracts playlist ID from URL.
     """
     return parse_query_param(url, 'list') or \
         parse_query_param(url, 'p')