def extract_info(video_id, use_invidious, playlist_id=None, index=None): # bpctr=9999999999 will bypass are-you-sure dialogs for controversial # videos url = 'https://m.youtube.com/watch?v=' + video_id + '&pbj=1&bpctr=9999999999' if playlist_id: url += '&list=' + playlist_id if index: url += '&index=' + index polymer_json = util.fetch_url(url, headers=headers, debug_name='watch') polymer_json = polymer_json.decode('utf-8') # TODO: Decide whether this should be done in yt_data_extract.extract_watch_info try: polymer_json = json.loads(polymer_json) except json.decoder.JSONDecodeError: traceback.print_exc() return {'error': 'Failed to parse json response'} info = yt_data_extract.extract_watch_info(polymer_json) # age restriction bypass if info['age_restricted']: print('Fetching age restriction bypass page') data = { 'video_id': video_id, 'eurl': 'https://youtube.googleapis.com/v/' + video_id, } url = 'https://www.youtube.com/get_video_info?' + urllib.parse.urlencode( data) video_info_page = util.fetch_url( url, debug_name='get_video_info', report_text='Fetched age restriction bypass page').decode('utf-8') yt_data_extract.update_with_age_restricted_info(info, video_info_page) # signature decryption decryption_error = decrypt_signatures(info) if decryption_error: decryption_error = 'Error decrypting url signatures: ' + decryption_error info['playability_error'] = decryption_error # check if urls ready (non-live format) in former livestream # urls not ready if all of them have no filesize if info['was_live']: info['urls_ready'] = False for fmt in info['formats']: if fmt['file_size'] is not None: info['urls_ready'] = True else: info['urls_ready'] = True # livestream urls # sometimes only the livestream urls work soon after the livestream is over if (info['hls_manifest_url'] and (info['live'] or not info['formats'] or not info['urls_ready'])): manifest = util.fetch_url( info['hls_manifest_url'], debug_name='hls_manifest.m3u8', report_text='Fetched hls manifest').decode('utf-8') info['hls_formats'], err = yt_data_extract.extract_hls_formats( manifest) if not err: info['playability_error'] = None for fmt in info['hls_formats']: fmt['video_quality'] = video_quality_string(fmt) else: info['hls_formats'] = [] # check for 403 info['invidious_used'] = False info['invidious_reload_button'] = False if settings.route_tor and info['formats'] and info['formats'][0]['url']: try: response = util.head(info['formats'][0]['url'], report_text='Checked for URL access') except urllib3.exceptions.HTTPError: print('Error while checking for URL access:\n') traceback.print_exc() return info if response.status == 403: print('Access denied (403) for video urls.') if use_invidious: print(' Retrieving urls from Invidious...') info['invidious_used'] = True try: video_info = util.fetch_url( 'https://invidio.us/api/v1/videos/' + video_id + '?fields=adaptiveFormats,formatStreams', report_text='Retrieved urls from Invidious', debug_name='invidious_urls') except (util.FetchError, urllib3.exceptions.HTTPError) as e: traceback.print_exc() playability_error = ( 'Access denied (403) for video urls.' + ' Failed to use Invidious to get the urls: ' + str(e)) if info['playability_error']: info['playability_error'] += '\n' + playability_error else: info['playability_error'] = playability_error # include button to reload without invidious info['invidious_reload_button'] = True return info video_info = json.loads(video_info.decode('utf-8')) # collect invidious urls for each itag itag_to_url = {} for invidious_fmt in (video_info['adaptiveFormats'] + video_info['formatStreams']): itag_to_url[invidious_fmt['itag']] = invidious_fmt['url'] # replace urls with urls from invidious for fmt in info['formats']: itag = str(fmt['itag']) if itag not in itag_to_url: print(('Warning: itag ' + itag + ' not found in invidious urls')) continue fmt['url'] = itag_to_url[itag] else: info['playability_error'] = ('Access denied (403) for video ' 'urls') elif 300 <= response.status < 400: print('Error: exceeded max redirects while checking video URL') return info
def extract_info(video_id, use_invidious, playlist_id=None, index=None): # bpctr=9999999999 will bypass are-you-sure dialogs for controversial # videos url = 'https://m.youtube.com/embed/' + video_id + '?bpctr=9999999999' if playlist_id: url += '&list=' + playlist_id if index: url += '&index=' + index watch_page = util.fetch_url(url, headers=watch_headers, debug_name='watch') watch_page = watch_page.decode('utf-8') info = yt_data_extract.extract_watch_info_from_html(watch_page) context = { 'client': { 'clientName': 'ANDROID', 'clientVersion': '16.20', 'gl': 'US', 'hl': 'en', }, # https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-887739287 'thirdParty': { 'embedUrl': 'https://google.com', # Can be any valid URL } } if info['age_restricted'] or info['player_urls_missing']: if info['age_restricted']: print('Age restricted video. Fetching /youtubei/v1/player page') else: print('Missing player. Fetching /youtubei/v1/player page') context['client']['clientScreen'] = 'EMBED' else: print('Fetching /youtubei/v1/player page') # https://github.com/yt-dlp/yt-dlp/issues/574#issuecomment-887171136 # ANDROID is used instead because its urls don't require decryption # The URLs returned with WEB for videos requiring decryption # couldn't be decrypted with the base.js from the web page for some # reason url = 'https://youtubei.googleapis.com/youtubei/v1/player' url += '?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' data = { 'videoId': video_id, 'context': context, } data = json.dumps(data) content_header = (('Content-Type', 'application/json'), ) player_response = util.fetch_url( url, data=data, headers=util.mobile_ua + content_header, debug_name='youtubei_player', report_text='Fetched youtubei player page').decode('utf-8') yt_data_extract.update_with_age_restricted_info(info, player_response) # signature decryption decryption_error = decrypt_signatures(info, video_id) if decryption_error: decryption_error = 'Error decrypting url signatures: ' + decryption_error info['playability_error'] = decryption_error # check if urls ready (non-live format) in former livestream # urls not ready if all of them have no filesize if info['was_live']: info['urls_ready'] = False for fmt in info['formats']: if fmt['file_size'] is not None: info['urls_ready'] = True else: info['urls_ready'] = True # livestream urls # sometimes only the livestream urls work soon after the livestream is over if (info['hls_manifest_url'] and (info['live'] or not info['formats'] or not info['urls_ready'])): manifest = util.fetch_url( info['hls_manifest_url'], debug_name='hls_manifest.m3u8', report_text='Fetched hls manifest').decode('utf-8') info['hls_formats'], err = yt_data_extract.extract_hls_formats( manifest) if not err: info['playability_error'] = None for fmt in info['hls_formats']: fmt['video_quality'] = video_quality_string(fmt) else: info['hls_formats'] = [] # check for 403. Unnecessary for tor video routing b/c ip address is same info['invidious_used'] = False info['invidious_reload_button'] = False info['tor_bypass_used'] = False if (settings.route_tor == 1 and info['formats'] and info['formats'][0]['url']): try: response = util.head(info['formats'][0]['url'], report_text='Checked for URL access') except urllib3.exceptions.HTTPError: print('Error while checking for URL access:\n') traceback.print_exc() return info if response.status == 403: print('Access denied (403) for video urls.') print('Routing video through Tor') info['tor_bypass_used'] = True for fmt in info['formats']: fmt['url'] += '&use_tor=1' elif 300 <= response.status < 400: print('Error: exceeded max redirects while checking video URL') return info
def extract_info(video_id, use_invidious, playlist_id=None, index=None): # bpctr=9999999999 will bypass are-you-sure dialogs for controversial # videos url = 'https://m.youtube.com/watch?v=' + video_id + '&pbj=1&bpctr=9999999999' if playlist_id: url += '&list=' + playlist_id if index: url += '&index=' + index polymer_json = util.fetch_url(url, headers=headers, debug_name='watch') polymer_json = polymer_json.decode('utf-8') # TODO: Decide whether this should be done in yt_data_extract.extract_watch_info try: polymer_json = json.loads(polymer_json) except json.decoder.JSONDecodeError: traceback.print_exc() return {'error': 'Failed to parse json response'} info = yt_data_extract.extract_watch_info(polymer_json) # request player urls if it's missing # see https://github.com/user234683/youtube-local/issues/22#issuecomment-706395160 if info['age_restricted'] or info['player_urls_missing']: if info['age_restricted']: print('Age restricted video. Fetching get_video_info page') else: print('Missing player. Fetching get_video_info page') data = { 'video_id': video_id, 'eurl': 'https://youtube.googleapis.com/v/' + video_id, } url = 'https://www.youtube.com/get_video_info?' + urllib.parse.urlencode( data) video_info_page = util.fetch_url( url, debug_name='get_video_info', report_text='Fetched get_video_info page').decode('utf-8') yt_data_extract.update_with_age_restricted_info(info, video_info_page) # signature decryption decryption_error = decrypt_signatures(info) if decryption_error: decryption_error = 'Error decrypting url signatures: ' + decryption_error info['playability_error'] = decryption_error # check if urls ready (non-live format) in former livestream # urls not ready if all of them have no filesize if info['was_live']: info['urls_ready'] = False for fmt in info['formats']: if fmt['file_size'] is not None: info['urls_ready'] = True else: info['urls_ready'] = True # livestream urls # sometimes only the livestream urls work soon after the livestream is over if (info['hls_manifest_url'] and (info['live'] or not info['formats'] or not info['urls_ready'])): manifest = util.fetch_url( info['hls_manifest_url'], debug_name='hls_manifest.m3u8', report_text='Fetched hls manifest').decode('utf-8') info['hls_formats'], err = yt_data_extract.extract_hls_formats( manifest) if not err: info['playability_error'] = None for fmt in info['hls_formats']: fmt['video_quality'] = video_quality_string(fmt) else: info['hls_formats'] = [] # check for 403. Unnecessary for tor video routing b/c ip address is same info['invidious_used'] = False info['invidious_reload_button'] = False info['tor_bypass_used'] = False if (settings.route_tor == 1 and info['formats'] and info['formats'][0]['url']): try: response = util.head(info['formats'][0]['url'], report_text='Checked for URL access') except urllib3.exceptions.HTTPError: print('Error while checking for URL access:\n') traceback.print_exc() return info if response.status == 403: print('Access denied (403) for video urls.') print('Routing video through Tor') info['tor_bypass_used'] = True for fmt in info['formats']: fmt['url'] += '&use_tor=1' elif 300 <= response.status < 400: print('Error: exceeded max redirects while checking video URL') return info
def extract_info(video_id): # bpctr=9999999999 will bypass are-you-sure dialogs for controversial # videos polymer_json = util.fetch_url('https://m.youtube.com/watch?v=' + video_id + '&pbj=1&bpctr=9999999999', headers=headers, debug_name='watch').decode('utf-8') # TODO: Decide whether this should be done in yt_data_extract.extract_watch_info try: polymer_json = json.loads(polymer_json) except json.decoder.JSONDecodeError: traceback.print_exc() return {'error': 'Failed to parse json response'} info = yt_data_extract.extract_watch_info(polymer_json) # age restriction bypass if info['age_restricted']: print('Fetching age restriction bypass page') data = { 'video_id': video_id, 'eurl': 'https://youtube.googleapis.com/v/' + video_id, } url = 'https://www.youtube.com/get_video_info?' + urllib.parse.urlencode( data) video_info_page = util.fetch_url( url, debug_name='get_video_info', report_text='Fetched age restriction bypass page').decode('utf-8') yt_data_extract.update_with_age_restricted_info(info, video_info_page) # signature decryption decryption_error = decrypt_signatures(info) if decryption_error: decryption_error = 'Error decrypting url signatures: ' + decryption_error info['playability_error'] = decryption_error # check for 403 info['invidious_used'] = False if settings.route_tor and info['formats'] and info['formats'][0]['url']: try: response = util.head(info['formats'][0]['url'], report_text='Checked for URL access') except urllib3.exceptions.HTTPError: print('Error while checking for URL access:\n') traceback.print_exc() return info if response.status == 403: print(('Access denied (403) for video urls.' ' Retrieving urls from Invidious...')) info['invidious_used'] = True try: video_info = util.fetch_url( 'https://invidio.us/api/v1/videos/' + video_id + '?fields=adaptiveFormats,formatStreams', report_text='Retrieved urls from Invidious', debug_name='invidious_urls') except (urllib3.exceptions.HTTPError) as e: traceback.print_exc() playability_error = ( 'Access denied (403) for video urls.' + ' Failed to use Invidious to get the urls: ' + str(e)) if info['playability_error']: info['playability_error'] += '\n' + playability_error else: info['playability_error'] = playability_error return info video_info = json.loads(video_info.decode('utf-8')) # collect invidious urls for each itag itag_to_url = {} for invidious_fmt in (video_info['adaptiveFormats'] + video_info['formatStreams']): itag_to_url[invidious_fmt['itag']] = invidious_fmt['url'] # replace urls with urls from invidious for fmt in info['formats']: itag = str(fmt['itag']) if itag not in itag_to_url: print(('Warning: itag ' + itag + ' not found in invidious urls')) continue fmt['url'] = itag_to_url[itag] elif 300 <= response.status < 400: print('Error: exceeded max redirects while checking video URL') return info