def get_real_download_url(url, preferred_fmt_ids=None): if not preferred_fmt_ids: preferred_fmt_ids, _, _ = formats_dict[22] # MP4 720p vid = get_youtube_id(url) if vid is not None: page = None url = 'http://www.youtube.com/get_video_info?&el=detailpage&video_id=' + vid while page is None: req = util.http_request(url, method='GET') if 'location' in req.msg: url = req.msg['location'] else: page = req.read() page = page.decode() # Try to find the best video format available for this video # (http://forum.videohelp.com/topic336882-1800.html#1912972) def find_urls(page): r4 = re.search('url_encoded_fmt_stream_map=([^&]+)', page) if r4 is not None: fmt_url_map = urllib.parse.unquote(r4.group(1)) for fmt_url_encoded in fmt_url_map.split(','): video_info = parse_qs(fmt_url_encoded) yield int(video_info['itag'][0]), video_info['url'][0] else: error_info = parse_qs(page) error_message = util.remove_html_tags(error_info['reason'][0]) raise YouTubeError('Cannot download video: %s' % error_message) fmt_id_url_map = sorted(find_urls(page), reverse=True) if not fmt_id_url_map: raise YouTubeError('fmt_url_map not found for video ID "%s"' % vid) # Default to the highest fmt_id if we don't find a match below _, url = fmt_id_url_map[0] formats_available = set(fmt_id for fmt_id, url in fmt_id_url_map) fmt_id_url_map = dict(fmt_id_url_map) for id in preferred_fmt_ids: id = int(id) if id in formats_available: format = formats_dict.get(id) if format is not None: _, _, description = format else: description = 'Unknown' logger.info('Found YouTube format: %s (fmt_id=%d)', description, id) url = fmt_id_url_map[id] break return url
def get_real_download_url(url, preferred_fmt_id=18): vid = get_youtube_id(url) if vid is not None: page = None url = 'http://www.youtube.com/watch?v=' + vid while page is None: req = util.http_request(url, method='GET') if 'location' in req.msg: url = req.msg['location'] else: page = req.read() # Try to find the best video format available for this video # (http://forum.videohelp.com/topic336882-1800.html#1912972) r3 = re.compile('.*"fmt_map"\:\s+"([^"]+)".*').search(page) if r3: formats_available = urllib.unquote(r3.group(1)).split(',') else: formats_available = [] if gpodder.ui.diablo: # Hardcode fmt_id 5 for Maemo (for performance reasons) - we could # also use 13 and 17 here, but the quality is very low then. There # seems to also be a 6, but I could not find a video with that yet. fmt_id = 5 elif gpodder.ui.fremantle: # This provides good quality video, seems to be always available # and is playable fluently in Media Player fmt_id = 18 else: # As a fallback, use fmt_id 18 (seems to be always available) fmt_id = 18 # This will be set to True if the search below has already "seen" # our preferred format, but has not yet found a suitable available # format for the given video. seen_preferred = False for id, wanted, description in supported_formats: # If we see our preferred format, accept formats below if id == preferred_fmt_id: seen_preferred = True # If the format is available and preferred (or lower), # use the given format for our fmt_id if wanted in formats_available and seen_preferred: log('Found available YouTube format: %s (fmt_id=%d)', \ description, id) fmt_id = id break r2 = re.compile('.*"t"\:\s+"([^"]+)".*').search(page) if r2: next = 'http://www.youtube.com/get_video?video_id=' + vid + '&t=' + r2.group(1) + '&fmt=%d' % fmt_id log('YouTube link resolved: %s => %s', url, next) return next return url
def youtube_resolve_download_url(episode, config): url = episode.url preferred_fmt_ids = get_fmt_ids(config.plugins.youtube) if not preferred_fmt_ids: preferred_fmt_ids, _, _ = formats_dict[22] # MP4 720p vid = get_youtube_id(url) if vid is None: return None page = None url = 'http://www.youtube.com/get_video_info?&el=detailpage&video_id=' + vid while page is None: req = util.http_request(url, method='GET') if 'location' in req.msg: url = req.msg['location'] else: page = req.read().decode('utf-8') # Try to find the best video format available for this video # (http://forum.videohelp.com/topic336882-1800.html#1912972) def find_urls(page): r4 = re.search('.*&url_encoded_fmt_stream_map=([^&]+)&.*', page) if r4 is not None: fmt_url_map = urllib.parse.unquote(r4.group(1)) for fmt_url_encoded in fmt_url_map.split(','): video_info = urllib.parse.parse_qs(fmt_url_encoded) yield (int(video_info['itag'][0]), video_info['url'][0]) else: error_info = urllib.parse.parse_qs(page) error_message = util.remove_html_tags(error_info['reason'][0]) raise YouTubeError('Cannot download video: %s' % error_message) fmt_id_url_map = sorted(find_urls(page), reverse=True) if not fmt_id_url_map: raise YouTubeError('fmt_url_map not found for video ID "%s"' % vid) # Default to the highest fmt_id if we don't find a match below _, url = fmt_id_url_map[0] formats_available = set(fmt_id for fmt_id, url in fmt_id_url_map) fmt_id_url_map = dict(fmt_id_url_map) for id in preferred_fmt_ids: id = int(id) if id in formats_available: format = formats_dict.get(id) if format is not None: _, _, description = format else: description = 'Unknown' logger.info('Found YouTube format: %s (fmt_id=%d)', description, id) return fmt_id_url_map[id]
def get_real_download_url(url, preferred_fmt_ids=None): if not preferred_fmt_ids: preferred_fmt_ids, _, _ = formats_dict[22] # MP4 720p vid = get_youtube_id(url) if vid is not None: page = None url = 'http://www.youtube.com/get_video_info?&el=detailpage&video_id=' + vid while page is None: req = util.http_request(url, method='GET') if 'location' in req.msg: url = req.msg['location'] else: page = req.read() # Try to find the best video format available for this video # (http://forum.videohelp.com/topic336882-1800.html#1912972) def find_urls(page): r4 = re.search('url_encoded_fmt_stream_map=([^&]+)', page) if r4 is not None: fmt_url_map = urllib.unquote(r4.group(1)) for fmt_url_encoded in fmt_url_map.split(','): video_info = parse_qs(fmt_url_encoded) yield int(video_info['itag'][0]), video_info['url'][0] else: error_info = parse_qs(page) error_message = util.remove_html_tags(error_info['reason'][0]) raise YouTubeError('Cannot download video: %s' % error_message) fmt_id_url_map = sorted(find_urls(page), reverse=True) if not fmt_id_url_map: raise YouTubeError('fmt_url_map not found for video ID "%s"' % vid) # Default to the highest fmt_id if we don't find a match below _, url = fmt_id_url_map[0] formats_available = set(fmt_id for fmt_id, url in fmt_id_url_map) fmt_id_url_map = dict(fmt_id_url_map) # This provides good quality video, seems to be always available # and is playable fluently in Media Player if gpodder.ui.harmattan: preferred_fmt_ids = [18] for id in preferred_fmt_ids: id = int(id) if id in formats_available: format = formats_dict.get(id) if format is not None: _, _, description = format else: description = 'Unknown' logger.info('Found YouTube format: %s (fmt_id=%d)', description, id) url = fmt_id_url_map[id] break return url
def get_real_download_url(url, preferred_fmt_id=18): vid = get_youtube_id(url) if vid is not None: page = None url = 'http://www.youtube.com/watch?v=' + vid while page is None: req = util.http_request(url, method='GET') if 'location' in req.msg: url = req.msg['location'] else: page = req.read() # Try to find the best video format available for this video # (http://forum.videohelp.com/topic336882-1800.html#1912972) def find_urls(page): r4 = re.search('.*"url_encoded_fmt_stream_map"\:\s+"([^"]+)".*', page) if r4 is not None: fmt_url_map = r4.group(1) for fmt_url_encoded in fmt_url_map.split(','): video_info = dict(map(urllib.unquote, x.split('=', 1)) for x in fmt_url_encoded.split('\\u0026')) yield int(video_info['itag']), video_info['url'] fmt_id_url_map = sorted(find_urls(page), reverse=True) # Default to the highest fmt_id if we don't find a match below if fmt_id_url_map: default_fmt_id, default_url = fmt_id_url_map[0] else: raise YouTubeError('fmt_url_map not found for video ID "%s"' % vid) formats_available = set(fmt_id for fmt_id, url in fmt_id_url_map) fmt_id_url_map = dict(fmt_id_url_map) if gpodder.ui.diablo: # Hardcode fmt_id 5 for Maemo (for performance reasons) - we could # also use 13 and 17 here, but the quality is very low then. There # seems to also be a 6, but I could not find a video with that yet. fmt_id = 5 elif gpodder.ui.fremantle: # This provides good quality video, seems to be always available # and is playable fluently in Media Player if preferred_fmt_id == 5: fmt_id = 5 else: fmt_id = 18 else: # As a fallback, use fmt_id 18 (seems to be always available) fmt_id = 18 # This will be set to True if the search below has already "seen" # our preferred format, but has not yet found a suitable available # format for the given video. seen_preferred = False for id, wanted, description in supported_formats: # If we see our preferred format, accept formats below if id == preferred_fmt_id: seen_preferred = True # If the format is available and preferred (or lower), # use the given format for our fmt_id if id in formats_available and seen_preferred: log('Found available YouTube format: %s (fmt_id=%d)', \ description, id) fmt_id = id break url = fmt_id_url_map.get(fmt_id, None) if url is None: url = default_url return url
def find_urls(page): # streamingData is preferable to url_encoded_fmt_stream_map # streamingData.formats are the same as url_encoded_fmt_stream_map # streamingData.adaptiveFormats are audio-only and video-only formats x = parse_qs(page) error_message = None if 'reason' in x: error_message = util.remove_html_tags(x['reason'][0]) elif 'player_response' in x: player_response = json.loads(x['player_response'][0]) playabilityStatus = player_response['playabilityStatus'] if 'reason' in playabilityStatus: error_message = util.remove_html_tags( playabilityStatus['reason']) elif 'liveStreamability' in playabilityStatus \ and not playabilityStatus['liveStreamability'].get('liveStreamabilityRenderer', {}).get('displayEndscreen', False): # playabilityStatus.liveStreamability -- video is or was a live stream # playabilityStatus.liveStreamability.liveStreamabilityRenderer.displayEndscreen -- video has ended if present if allow_partial and 'streamingData' in player_response and 'hlsManifestUrl' in player_response[ 'streamingData']: manifest = None url = player_response['streamingData'][ 'hlsManifestUrl'] while manifest is None: req = util.http_request(url, method='GET') if 'location' in req.msg: url = req.msg['location'] else: manifest = req.read() manifest = manifest.decode().splitlines() urls = [line for line in manifest if line[0] != '#'] itag_re = re.compile(r'/itag/([0-9]+)/') for url in urls: itag = itag_re.search(url).group(1) yield int(itag), [url, None] return error_message = 'live stream' elif 'streamingData' in player_response: # DRM videos store url inside a cipher key - not supported if 'formats' in player_response['streamingData']: for f in player_response['streamingData']['formats']: if 'url' in f: yield int(f['itag']), [ f['url'], f.get('approxDurationMs') ] if 'adaptiveFormats' in player_response['streamingData']: for f in player_response['streamingData'][ 'adaptiveFormats']: if 'url' in f: yield int(f['itag']), [ f['url'], f.get('approxDurationMs') ] return if error_message is not None: raise YouTubeError('Cannot download video: %s' % error_message) r4 = re.search(r'url_encoded_fmt_stream_map=([^&]+)', page) if r4 is not None: fmt_url_map = urllib.parse.unquote(r4.group(1)) for fmt_url_encoded in fmt_url_map.split(','): video_info = parse_qs(fmt_url_encoded) yield int( video_info['itag'][0]), [video_info['url'][0], None]
def get_real_download_url(url, allow_partial, preferred_fmt_ids=None): if not preferred_fmt_ids: preferred_fmt_ids, _, _ = formats_dict[22] # MP4 720p duration = None vid = get_youtube_id(url) if vid is not None: page = None url = 'https://www.youtube.com/get_video_info?&el=detailpage&video_id=' + vid while page is None: req = util.http_request(url, method='GET') if 'location' in req.msg: url = req.msg['location'] else: page = req.read() page = page.decode() # Try to find the best video format available for this video # (http://forum.videohelp.com/topic336882-1800.html#1912972) def find_urls(page): # streamingData is preferable to url_encoded_fmt_stream_map # streamingData.formats are the same as url_encoded_fmt_stream_map # streamingData.adaptiveFormats are audio-only and video-only formats x = parse_qs(page) error_message = None if 'reason' in x: error_message = util.remove_html_tags(x['reason'][0]) elif 'player_response' in x: player_response = json.loads(x['player_response'][0]) playabilityStatus = player_response['playabilityStatus'] if 'reason' in playabilityStatus: error_message = util.remove_html_tags( playabilityStatus['reason']) elif 'liveStreamability' in playabilityStatus \ and not playabilityStatus['liveStreamability'].get('liveStreamabilityRenderer', {}).get('displayEndscreen', False): # playabilityStatus.liveStreamability -- video is or was a live stream # playabilityStatus.liveStreamability.liveStreamabilityRenderer.displayEndscreen -- video has ended if present if allow_partial and 'streamingData' in player_response and 'hlsManifestUrl' in player_response[ 'streamingData']: manifest = None url = player_response['streamingData'][ 'hlsManifestUrl'] while manifest is None: req = util.http_request(url, method='GET') if 'location' in req.msg: url = req.msg['location'] else: manifest = req.read() manifest = manifest.decode().splitlines() urls = [line for line in manifest if line[0] != '#'] itag_re = re.compile(r'/itag/([0-9]+)/') for url in urls: itag = itag_re.search(url).group(1) yield int(itag), [url, None] return error_message = 'live stream' elif 'streamingData' in player_response: # DRM videos store url inside a cipher key - not supported if 'formats' in player_response['streamingData']: for f in player_response['streamingData']['formats']: if 'url' in f: yield int(f['itag']), [ f['url'], f.get('approxDurationMs') ] if 'adaptiveFormats' in player_response['streamingData']: for f in player_response['streamingData'][ 'adaptiveFormats']: if 'url' in f: yield int(f['itag']), [ f['url'], f.get('approxDurationMs') ] return if error_message is not None: raise YouTubeError('Cannot download video: %s' % error_message) r4 = re.search(r'url_encoded_fmt_stream_map=([^&]+)', page) if r4 is not None: fmt_url_map = urllib.parse.unquote(r4.group(1)) for fmt_url_encoded in fmt_url_map.split(','): video_info = parse_qs(fmt_url_encoded) yield int( video_info['itag'][0]), [video_info['url'][0], None] fmt_id_url_map = sorted(find_urls(page), reverse=True) if not fmt_id_url_map: drm = re.search(r'%22(cipher|signatureCipher)%22%3A', page) if drm is not None: raise YouTubeError( 'Unsupported DRM content found for video ID "%s"' % vid) raise YouTubeError('No formats found for video ID "%s"' % vid) formats_available = set(fmt_id for fmt_id, url in fmt_id_url_map) fmt_id_url_map = dict(fmt_id_url_map) for id in preferred_fmt_ids: if re.search(r'\+', str(id)): # skip formats that contain a + (136+140) continue id = int(id) if id in formats_available: format = formats_dict.get(id) or hls_formats_dict.get(id) if format is not None: _, _, description = format else: description = 'Unknown' logger.info('Found YouTube format: %s (fmt_id=%d)', description, id) url, duration = fmt_id_url_map[id] break else: raise YouTubeError('No preferred formats found for video ID "%s"' % vid) return url, duration
def get_real_download_url(url, preferred_fmt_id=None): # Default fmt_id when none preferred if preferred_fmt_id is None: preferred_fmt_id = 18 vid = get_youtube_id(url) if vid is not None: page = None url = 'http://www.youtube.com/get_video_info?&el=detailpage&video_id=' + vid while page is None: req = util.http_request(url, method='GET') if 'location' in req.msg: url = req.msg['location'] else: page = req.read() # Try to find the best video format available for this video # (http://forum.videohelp.com/topic336882-1800.html#1912972) def find_urls(page): r4 = re.search('.*&url_encoded_fmt_stream_map=([^&]+)&.*', page) if r4 is not None: fmt_url_map = urllib.unquote(r4.group(1)) for fmt_url_encoded in fmt_url_map.split(','): video_info = parse_qs(fmt_url_encoded) yield int(video_info['itag'][0]), video_info['url'][0] + "&signature=" + video_info['sig'][0] else: error_info = parse_qs(page) error_message = util.remove_html_tags(error_info['reason'][0]) raise YouTubeError('Cannot download video: %s' % error_message) fmt_id_url_map = sorted(find_urls(page), reverse=True) # Default to the highest fmt_id if we don't find a match below if fmt_id_url_map: default_fmt_id, default_url = fmt_id_url_map[0] else: raise YouTubeError('fmt_url_map not found for video ID "%s"' % vid) formats_available = set(fmt_id for fmt_id, url in fmt_id_url_map) fmt_id_url_map = dict(fmt_id_url_map) if gpodder.ui.harmattan: # This provides good quality video, seems to be always available # and is playable fluently in Media Player if preferred_fmt_id == 5: fmt_id = 5 else: fmt_id = 18 else: # As a fallback, use fmt_id 18 (seems to be always available) fmt_id = 18 # This will be set to True if the search below has already "seen" # our preferred format, but has not yet found a suitable available # format for the given video. seen_preferred = False for id, wanted, description in supported_formats: # If we see our preferred format, accept formats below if id == preferred_fmt_id: seen_preferred = True # If the format is available and preferred (or lower), # use the given format for our fmt_id if id in formats_available and seen_preferred: logger.info('Found YouTube format: %s (fmt_id=%d)', description, id) fmt_id = id break url = fmt_id_url_map.get(fmt_id, None) if url is None: url = default_url return url