Exemple #1
0
def extract_info(video_id, use_invidious, playlist_id=None, index=None):
    # bpctr=9999999999 will bypass are-you-sure dialogs for controversial
    # videos
    url = 'https://m.youtube.com/watch?v=' + video_id + '&pbj=1&bpctr=9999999999'
    if playlist_id:
        url += '&list=' + playlist_id
    if index:
        url += '&index=' + index
    polymer_json = util.fetch_url(url, headers=headers, debug_name='watch')
    polymer_json = polymer_json.decode('utf-8')
    # TODO: Decide whether this should be done in yt_data_extract.extract_watch_info
    try:
        polymer_json = json.loads(polymer_json)
    except json.decoder.JSONDecodeError:
        traceback.print_exc()
        return {'error': 'Failed to parse json response'}
    info = yt_data_extract.extract_watch_info(polymer_json)

    # age restriction bypass
    if info['age_restricted']:
        print('Fetching age restriction bypass page')
        data = {
            'video_id': video_id,
            'eurl': 'https://youtube.googleapis.com/v/' + video_id,
        }
        url = 'https://www.youtube.com/get_video_info?' + urllib.parse.urlencode(
            data)
        video_info_page = util.fetch_url(
            url,
            debug_name='get_video_info',
            report_text='Fetched age restriction bypass page').decode('utf-8')
        yt_data_extract.update_with_age_restricted_info(info, video_info_page)

    # signature decryption
    decryption_error = decrypt_signatures(info)
    if decryption_error:
        decryption_error = 'Error decrypting url signatures: ' + decryption_error
        info['playability_error'] = decryption_error

    # check if urls ready (non-live format) in former livestream
    # urls not ready if all of them have no filesize
    if info['was_live']:
        info['urls_ready'] = False
        for fmt in info['formats']:
            if fmt['file_size'] is not None:
                info['urls_ready'] = True
    else:
        info['urls_ready'] = True

    # livestream urls
    # sometimes only the livestream urls work soon after the livestream is over
    if (info['hls_manifest_url'] and
        (info['live'] or not info['formats'] or not info['urls_ready'])):
        manifest = util.fetch_url(
            info['hls_manifest_url'],
            debug_name='hls_manifest.m3u8',
            report_text='Fetched hls manifest').decode('utf-8')

        info['hls_formats'], err = yt_data_extract.extract_hls_formats(
            manifest)
        if not err:
            info['playability_error'] = None
        for fmt in info['hls_formats']:
            fmt['video_quality'] = video_quality_string(fmt)
    else:
        info['hls_formats'] = []

    # check for 403
    info['invidious_used'] = False
    info['invidious_reload_button'] = False
    if settings.route_tor and info['formats'] and info['formats'][0]['url']:
        try:
            response = util.head(info['formats'][0]['url'],
                                 report_text='Checked for URL access')
        except urllib3.exceptions.HTTPError:
            print('Error while checking for URL access:\n')
            traceback.print_exc()
            return info

        if response.status == 403:
            print('Access denied (403) for video urls.')
            if use_invidious:
                print(' Retrieving urls from Invidious...')
                info['invidious_used'] = True
                try:
                    video_info = util.fetch_url(
                        'https://invidio.us/api/v1/videos/' + video_id +
                        '?fields=adaptiveFormats,formatStreams',
                        report_text='Retrieved urls from Invidious',
                        debug_name='invidious_urls')
                except (util.FetchError, urllib3.exceptions.HTTPError) as e:
                    traceback.print_exc()
                    playability_error = (
                        'Access denied (403) for video urls.' +
                        ' Failed to use Invidious to get the urls: ' + str(e))
                    if info['playability_error']:
                        info['playability_error'] += '\n' + playability_error
                    else:
                        info['playability_error'] = playability_error
                    # include button to reload without invidious
                    info['invidious_reload_button'] = True
                    return info

                video_info = json.loads(video_info.decode('utf-8'))
                # collect invidious urls for each itag
                itag_to_url = {}
                for invidious_fmt in (video_info['adaptiveFormats'] +
                                      video_info['formatStreams']):
                    itag_to_url[invidious_fmt['itag']] = invidious_fmt['url']

                # replace urls with urls from invidious
                for fmt in info['formats']:
                    itag = str(fmt['itag'])
                    if itag not in itag_to_url:
                        print(('Warning: itag ' + itag +
                               ' not found in invidious urls'))
                        continue
                    fmt['url'] = itag_to_url[itag]
            else:
                info['playability_error'] = ('Access denied (403) for video '
                                             'urls')
        elif 300 <= response.status < 400:
            print('Error: exceeded max redirects while checking video URL')
    return info
Exemple #2
0
def extract_info(video_id, use_invidious, playlist_id=None, index=None):
    # bpctr=9999999999 will bypass are-you-sure dialogs for controversial
    # videos
    url = 'https://m.youtube.com/embed/' + video_id + '?bpctr=9999999999'
    if playlist_id:
        url += '&list=' + playlist_id
    if index:
        url += '&index=' + index
    watch_page = util.fetch_url(url, headers=watch_headers, debug_name='watch')
    watch_page = watch_page.decode('utf-8')
    info = yt_data_extract.extract_watch_info_from_html(watch_page)

    context = {
        'client': {
            'clientName': 'ANDROID',
            'clientVersion': '16.20',
            'gl': 'US',
            'hl': 'en',
        },
        # https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-887739287
        'thirdParty': {
            'embedUrl': 'https://google.com',  # Can be any valid URL
        }
    }
    if info['age_restricted'] or info['player_urls_missing']:
        if info['age_restricted']:
            print('Age restricted video. Fetching /youtubei/v1/player page')
        else:
            print('Missing player. Fetching /youtubei/v1/player page')
        context['client']['clientScreen'] = 'EMBED'
    else:
        print('Fetching /youtubei/v1/player page')

    # https://github.com/yt-dlp/yt-dlp/issues/574#issuecomment-887171136
    # ANDROID is used instead because its urls don't require decryption
    # The URLs returned with WEB for videos requiring decryption
    # couldn't be decrypted with the base.js from the web page for some
    # reason
    url = 'https://youtubei.googleapis.com/youtubei/v1/player'
    url += '?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
    data = {
        'videoId': video_id,
        'context': context,
    }
    data = json.dumps(data)
    content_header = (('Content-Type', 'application/json'), )
    player_response = util.fetch_url(
        url,
        data=data,
        headers=util.mobile_ua + content_header,
        debug_name='youtubei_player',
        report_text='Fetched youtubei player page').decode('utf-8')
    yt_data_extract.update_with_age_restricted_info(info, player_response)

    # signature decryption
    decryption_error = decrypt_signatures(info, video_id)
    if decryption_error:
        decryption_error = 'Error decrypting url signatures: ' + decryption_error
        info['playability_error'] = decryption_error

    # check if urls ready (non-live format) in former livestream
    # urls not ready if all of them have no filesize
    if info['was_live']:
        info['urls_ready'] = False
        for fmt in info['formats']:
            if fmt['file_size'] is not None:
                info['urls_ready'] = True
    else:
        info['urls_ready'] = True

    # livestream urls
    # sometimes only the livestream urls work soon after the livestream is over
    if (info['hls_manifest_url'] and
        (info['live'] or not info['formats'] or not info['urls_ready'])):
        manifest = util.fetch_url(
            info['hls_manifest_url'],
            debug_name='hls_manifest.m3u8',
            report_text='Fetched hls manifest').decode('utf-8')

        info['hls_formats'], err = yt_data_extract.extract_hls_formats(
            manifest)
        if not err:
            info['playability_error'] = None
        for fmt in info['hls_formats']:
            fmt['video_quality'] = video_quality_string(fmt)
    else:
        info['hls_formats'] = []

    # check for 403. Unnecessary for tor video routing b/c ip address is same
    info['invidious_used'] = False
    info['invidious_reload_button'] = False
    info['tor_bypass_used'] = False
    if (settings.route_tor == 1 and info['formats']
            and info['formats'][0]['url']):
        try:
            response = util.head(info['formats'][0]['url'],
                                 report_text='Checked for URL access')
        except urllib3.exceptions.HTTPError:
            print('Error while checking for URL access:\n')
            traceback.print_exc()
            return info

        if response.status == 403:
            print('Access denied (403) for video urls.')
            print('Routing video through Tor')
            info['tor_bypass_used'] = True
            for fmt in info['formats']:
                fmt['url'] += '&use_tor=1'
        elif 300 <= response.status < 400:
            print('Error: exceeded max redirects while checking video URL')
    return info
Exemple #3
0
def extract_info(video_id, use_invidious, playlist_id=None, index=None):
    # bpctr=9999999999 will bypass are-you-sure dialogs for controversial
    # videos
    url = 'https://m.youtube.com/watch?v=' + video_id + '&pbj=1&bpctr=9999999999'
    if playlist_id:
        url += '&list=' + playlist_id
    if index:
        url += '&index=' + index
    polymer_json = util.fetch_url(url, headers=headers, debug_name='watch')
    polymer_json = polymer_json.decode('utf-8')
    # TODO: Decide whether this should be done in yt_data_extract.extract_watch_info
    try:
        polymer_json = json.loads(polymer_json)
    except json.decoder.JSONDecodeError:
        traceback.print_exc()
        return {'error': 'Failed to parse json response'}
    info = yt_data_extract.extract_watch_info(polymer_json)

    # request player urls if it's missing
    # see https://github.com/user234683/youtube-local/issues/22#issuecomment-706395160
    if info['age_restricted'] or info['player_urls_missing']:
        if info['age_restricted']:
            print('Age restricted video. Fetching get_video_info page')
        else:
            print('Missing player. Fetching get_video_info page')
        data = {
            'video_id': video_id,
            'eurl': 'https://youtube.googleapis.com/v/' + video_id,
        }
        url = 'https://www.youtube.com/get_video_info?' + urllib.parse.urlencode(
            data)
        video_info_page = util.fetch_url(
            url,
            debug_name='get_video_info',
            report_text='Fetched get_video_info page').decode('utf-8')
        yt_data_extract.update_with_age_restricted_info(info, video_info_page)

    # signature decryption
    decryption_error = decrypt_signatures(info)
    if decryption_error:
        decryption_error = 'Error decrypting url signatures: ' + decryption_error
        info['playability_error'] = decryption_error

    # check if urls ready (non-live format) in former livestream
    # urls not ready if all of them have no filesize
    if info['was_live']:
        info['urls_ready'] = False
        for fmt in info['formats']:
            if fmt['file_size'] is not None:
                info['urls_ready'] = True
    else:
        info['urls_ready'] = True

    # livestream urls
    # sometimes only the livestream urls work soon after the livestream is over
    if (info['hls_manifest_url'] and
        (info['live'] or not info['formats'] or not info['urls_ready'])):
        manifest = util.fetch_url(
            info['hls_manifest_url'],
            debug_name='hls_manifest.m3u8',
            report_text='Fetched hls manifest').decode('utf-8')

        info['hls_formats'], err = yt_data_extract.extract_hls_formats(
            manifest)
        if not err:
            info['playability_error'] = None
        for fmt in info['hls_formats']:
            fmt['video_quality'] = video_quality_string(fmt)
    else:
        info['hls_formats'] = []

    # check for 403. Unnecessary for tor video routing b/c ip address is same
    info['invidious_used'] = False
    info['invidious_reload_button'] = False
    info['tor_bypass_used'] = False
    if (settings.route_tor == 1 and info['formats']
            and info['formats'][0]['url']):
        try:
            response = util.head(info['formats'][0]['url'],
                                 report_text='Checked for URL access')
        except urllib3.exceptions.HTTPError:
            print('Error while checking for URL access:\n')
            traceback.print_exc()
            return info

        if response.status == 403:
            print('Access denied (403) for video urls.')
            print('Routing video through Tor')
            info['tor_bypass_used'] = True
            for fmt in info['formats']:
                fmt['url'] += '&use_tor=1'
        elif 300 <= response.status < 400:
            print('Error: exceeded max redirects while checking video URL')
    return info
Exemple #4
0
def extract_info(video_id):
    # bpctr=9999999999 will bypass are-you-sure dialogs for controversial
    # videos
    polymer_json = util.fetch_url('https://m.youtube.com/watch?v=' + video_id +
                                  '&pbj=1&bpctr=9999999999',
                                  headers=headers,
                                  debug_name='watch').decode('utf-8')
    # TODO: Decide whether this should be done in yt_data_extract.extract_watch_info
    try:
        polymer_json = json.loads(polymer_json)
    except json.decoder.JSONDecodeError:
        traceback.print_exc()
        return {'error': 'Failed to parse json response'}
    info = yt_data_extract.extract_watch_info(polymer_json)

    # age restriction bypass
    if info['age_restricted']:
        print('Fetching age restriction bypass page')
        data = {
            'video_id': video_id,
            'eurl': 'https://youtube.googleapis.com/v/' + video_id,
        }
        url = 'https://www.youtube.com/get_video_info?' + urllib.parse.urlencode(
            data)
        video_info_page = util.fetch_url(
            url,
            debug_name='get_video_info',
            report_text='Fetched age restriction bypass page').decode('utf-8')
        yt_data_extract.update_with_age_restricted_info(info, video_info_page)

    # signature decryption
    decryption_error = decrypt_signatures(info)
    if decryption_error:
        decryption_error = 'Error decrypting url signatures: ' + decryption_error
        info['playability_error'] = decryption_error

    # check for 403
    info['invidious_used'] = False
    if settings.route_tor and info['formats'] and info['formats'][0]['url']:
        try:
            response = util.head(info['formats'][0]['url'],
                                 report_text='Checked for URL access')
        except urllib3.exceptions.HTTPError:
            print('Error while checking for URL access:\n')
            traceback.print_exc()
            return info

        if response.status == 403:
            print(('Access denied (403) for video urls.'
                   ' Retrieving urls from Invidious...'))
            info['invidious_used'] = True
            try:
                video_info = util.fetch_url(
                    'https://invidio.us/api/v1/videos/' + video_id +
                    '?fields=adaptiveFormats,formatStreams',
                    report_text='Retrieved urls from Invidious',
                    debug_name='invidious_urls')
            except (urllib3.exceptions.HTTPError) as e:
                traceback.print_exc()
                playability_error = (
                    'Access denied (403) for video urls.' +
                    ' Failed to use Invidious to get the urls: ' + str(e))
                if info['playability_error']:
                    info['playability_error'] += '\n' + playability_error
                else:
                    info['playability_error'] = playability_error

                return info

            video_info = json.loads(video_info.decode('utf-8'))
            # collect invidious urls for each itag
            itag_to_url = {}
            for invidious_fmt in (video_info['adaptiveFormats'] +
                                  video_info['formatStreams']):
                itag_to_url[invidious_fmt['itag']] = invidious_fmt['url']

            # replace urls with urls from invidious
            for fmt in info['formats']:
                itag = str(fmt['itag'])
                if itag not in itag_to_url:
                    print(('Warning: itag ' + itag +
                           ' not found in invidious urls'))
                    continue
                fmt['url'] = itag_to_url[itag]
        elif 300 <= response.status < 400:
            print('Error: exceeded max redirects while checking video URL')
    return info