Ejemplo n.º 1
0
def get_channel_page_general_url(base_url, tab, request):

    page_number = int(request.args.get('page', 1))
    sort = request.args.get('sort', '3')
    view = request.args.get('view', '1')
    query = request.args.get('query', '')

    if tab == 'videos':
        polymer_json = util.fetch_url(base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1, debug_name='gen_channel_videos')
    elif tab == 'about':
        polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1, debug_name='gen_channel_about')
    elif tab == 'playlists':
        polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1', util.desktop_ua + headers_1, debug_name='gen_channel_playlists')
    elif tab == 'search':
        raise NotImplementedError()
    else:
        flask.abort(404, 'Unknown channel tab: ' + tab)


    info = extract_info(json.loads(polymer_json), tab)
    post_process_channel_info(info)
    if tab in ('videos', 'search'):
        info['number_of_videos'] = 1000
        info['number_of_pages'] = math.ceil(1000/30)
        info['header_playlist_names'] = local_playlist.get_playlist_names()
    if tab in ('videos', 'playlists'):
        info['current_sort'] = sort
    elif tab == 'search':
        info['search_box_value'] = query


    return flask.render_template('channel.html',
        parameters_dictionary = request.args,
        **info
    )
Ejemplo n.º 2
0
def get_channel_tab(channel_id,
                    page="1",
                    sort=3,
                    tab='videos',
                    view=1,
                    print_status=True):
    message = 'Got channel tab' if print_status else None

    if int(sort) == 2 and int(page) > 1:
        ctoken = channel_ctoken_v1(channel_id, page, sort, tab, view)
        ctoken = ctoken.replace('=', '%3D')
        url = ('https://www.youtube.com/channel/' + channel_id + '/' + tab +
               '?action_continuation=1&continuation=' + ctoken + '&pbj=1')
        content = util.fetch_url(url,
                                 headers_desktop + real_cookie,
                                 debug_name='channel_tab',
                                 report_text=message)
    else:
        ctoken = channel_ctoken_v3(channel_id, page, sort, tab, view)
        ctoken = ctoken.replace('=', '%3D')
        url = 'https://www.youtube.com/browse_ajax?ctoken=' + ctoken
        content = util.fetch_url(url,
                                 headers_desktop + generic_cookie,
                                 debug_name='channel_tab',
                                 report_text=message)

    return content
Ejemplo n.º 3
0
def get_channel_page_general_url(base_url, tab, request, channel_id=None):

    page_number = int(request.args.get('page', 1))
    sort = request.args.get('sort', '3')
    view = request.args.get('view', '1')
    query = request.args.get('query', '')

    if tab == 'videos' and channel_id:
        tasks = (
            gevent.spawn(get_number_of_videos_channel, channel_id), 
            gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view)
        )
        gevent.joinall(tasks)
        util.check_gevent_exceptions(*tasks)
        number_of_videos, polymer_json = tasks[0].value, tasks[1].value
    elif tab == 'videos':
        tasks = (
            gevent.spawn(get_number_of_videos_general, base_url), 
            gevent.spawn(util.fetch_url, base_url + '/videos?pbj=1&view=0', headers_desktop, debug_name='gen_channel_videos')
        )
        gevent.joinall(tasks)
        util.check_gevent_exceptions(*tasks)
        number_of_videos, polymer_json = tasks[0].value, tasks[1].value
    elif tab == 'about':
        polymer_json = util.fetch_url(base_url + '/about?pbj=1', headers_desktop, debug_name='gen_channel_about')
    elif tab == 'playlists':
        polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], headers_desktop, debug_name='gen_channel_playlists')
    elif tab == 'search' and channel_id:
        polymer_json = get_channel_search_json(channel_id, query, page_number)
    elif tab == 'search':
        url = base_url + '/search?pbj=1&query=' + urllib.parse.quote(query, safe='')
        polymer_json = util.fetch_url(url, headers_desktop, debug_name='gen_channel_search')
    else:
        flask.abort(404, 'Unknown channel tab: ' + tab)


    info = yt_data_extract.extract_channel_info(json.loads(polymer_json), tab)
    if info['error'] is not None:
        return flask.render_template('error.html', error_message = info['error'])

    post_process_channel_info(info)
    if tab == 'videos':
        info['number_of_videos'] = number_of_videos
        info['number_of_pages'] = math.ceil(number_of_videos/30)
        info['header_playlist_names'] = local_playlist.get_playlist_names()
    if tab in ('videos', 'playlists'):
        info['current_sort'] = sort
    elif tab == 'search':
        info['search_box_value'] = query
        info['header_playlist_names'] = local_playlist.get_playlist_names()
        info['page_number'] = page_number
    info['subscribed'] = subscriptions.is_subscribed(info['channel_id'])

    return flask.render_template('channel.html',
        parameters_dictionary = request.args,
        **info
    )
Ejemplo n.º 4
0
def test_exit_node_retry(monkeypatch, new_identities_till_success):
    new_identity_state = NewIdentityState(new_identities_till_success)
    # https://docs.pytest.org/en/stable/monkeypatch.html
    monkeypatch.setattr(settings, 'route_tor', 1)
    monkeypatch.setattr(util, 'tor_manager', util.TorManager()) # fresh one
    MockController.signal = new_identity_state.new_identity
    monkeypatch.setattr(stem.control, 'Controller', MockController)
    monkeypatch.setattr(util, 'fetch_url_response',
                        new_identity_state.fetch_url_response)
    if new_identities_till_success <= NewIdentityState.MAX_TRIES:
        assert util.fetch_url('url') == b'success'
    else:
        with pytest.raises(util.FetchError) as excinfo:
            util.fetch_url('url')
        assert int(excinfo.value.code) == 429
Ejemplo n.º 5
0
def get_channel_page(env, start_response):
    path_parts = env['path_parts']
    channel_id = path_parts[1]
    try:
        tab = path_parts[2]
    except IndexError:
        tab = 'videos'
    
    parameters = env['parameters']
    page_number = int(util.default_multi_get(parameters, 'page', 0, default='1'))
    sort = util.default_multi_get(parameters, 'sort', 0, default='3')
    view = util.default_multi_get(parameters, 'view', 0, default='1')
    query = util.default_multi_get(parameters, 'query', 0, default='')

    if tab == 'videos':
        tasks = (
            gevent.spawn(get_number_of_videos, channel_id ), 
            gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view)
        )
        gevent.joinall(tasks)
        number_of_videos, polymer_json = tasks[0].value, tasks[1].value

        result = channel_videos_html(polymer_json, page_number, sort, number_of_videos, env['QUERY_STRING'])
    elif tab == 'about':
        polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', util.desktop_ua + headers_1)
        polymer_json = json.loads(polymer_json)
        result = channel_about_page(polymer_json)
    elif tab == 'playlists':
        polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], util.desktop_ua + headers_1)
        '''with open('debug/channel_playlists_debug', 'wb') as f:
            f.write(polymer_json)'''
        polymer_json = json.loads(polymer_json)
        result = channel_playlists_html(polymer_json, sort)
    elif tab == 'search':
        tasks = (
            gevent.spawn(get_number_of_videos, channel_id ), 
            gevent.spawn(get_channel_search_json, channel_id, query, page_number)
        )
        gevent.joinall(tasks)
        number_of_videos, polymer_json = tasks[0].value, tasks[1].value

        result = channel_search_page(polymer_json, query, page_number, number_of_videos, env['QUERY_STRING'])
    else:
        start_response('404 Not Found', [('Content-type', 'text/plain'),])
        return b'Unknown channel tab: ' + tab.encode('utf-8')

    start_response('200 OK', [('Content-type','text/html'),])
    return result.encode('utf-8')
Ejemplo n.º 6
0
def get_channel_page(channel_id, tab='videos'):

    page_number = int(request.args.get('page', 1))
    sort = request.args.get('sort', '3')
    view = request.args.get('view', '1')
    query = request.args.get('query', '')


    if tab == 'videos':
        tasks = (
            gevent.spawn(get_number_of_videos, channel_id ), 
            gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view)
        )
        gevent.joinall(tasks)
        number_of_videos, polymer_json = tasks[0].value, tasks[1].value

    elif tab == 'about':
        polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', util.desktop_ua + headers_1, debug_name='channel_about')
    elif tab == 'playlists':
        polymer_json = util.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1&sort=' + playlist_sort_codes[sort], util.desktop_ua + headers_1, debug_name='channel_playlists')
    elif tab == 'search':
        tasks = (
            gevent.spawn(get_number_of_videos, channel_id ), 
            gevent.spawn(get_channel_search_json, channel_id, query, page_number)
        )
        gevent.joinall(tasks)
        number_of_videos, polymer_json = tasks[0].value, tasks[1].value

    else:
        flask.abort(404, 'Unknown channel tab: ' + tab)


    info = extract_info(json.loads(polymer_json), tab)
    post_process_channel_info(info)
    if tab in ('videos', 'search'):
        info['number_of_videos'] = number_of_videos
        info['number_of_pages'] = math.ceil(number_of_videos/30)
        info['header_playlist_names'] = local_playlist.get_playlist_names()
    if tab in ('videos', 'playlists'):
        info['current_sort'] = sort
    elif tab == 'search':
        info['search_box_value'] = query


    return flask.render_template('channel.html',
        parameters_dictionary = request.args,
        **info
    )
Ejemplo n.º 7
0
def serve_subscription_thumbnail(thumbnail):
    '''Serves thumbnail from disk if it's been saved already. If not, downloads the thumbnail, saves to disk, and serves it.'''
    assert thumbnail[-4:] == '.jpg'
    video_id = thumbnail[0:-4]
    thumbnail_path = os.path.join(thumbnails_directory, thumbnail)

    if video_id in existing_thumbnails:
        try:
            f = open(thumbnail_path, 'rb')
        except FileNotFoundError:
            existing_thumbnails.remove(video_id)
        else:
            image = f.read()
            f.close()
            return flask.Response(image, mimetype='image/jpeg')

    url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
    try:
        image = util.fetch_url(url, report_text="Saved thumbnail: " + video_id)
    except urllib.error.HTTPError as e:
        print("Failed to download thumbnail for " + video_id + ": " + str(e))
        abort(e.code)
    try:
        f = open(thumbnail_path, 'wb')
    except FileNotFoundError:
        os.makedirs(thumbnails_directory, exist_ok=True)
        f = open(thumbnail_path, 'wb')
    f.write(image)
    f.close()
    existing_thumbnails.add(video_id)

    return flask.Response(image, mimetype='image/jpeg')
Ejemplo n.º 8
0
def decrypt_signatures(info):
    '''return error string, or False if no errors'''
    if not yt_data_extract.requires_decryption(info):
        return False
    if not info['player_name']:
        return 'Could not find player name'
    if not info['base_js']:
        return 'Failed to find base.js'

    player_name = info['player_name']
    if player_name in decrypt_cache:
        print('Using cached decryption function for: ' + player_name)
        info['decryption_function'] = decrypt_cache[player_name]
    else:
        base_js = util.fetch_url(info['base_js'],
                                 debug_name='base.js',
                                 report_text='Fetched player ' + player_name)
        base_js = base_js.decode('utf-8')
        err = yt_data_extract.extract_decryption_function(info, base_js)
        if err:
            return err
        decrypt_cache[player_name] = info['decryption_function']
        save_decrypt_cache()
    err = yt_data_extract.decrypt_signatures(info)
    return err
Ejemplo n.º 9
0
def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1,
                    ctoken=None, print_status=True):
    message = 'Got channel tab' if print_status else None

    if not ctoken:
        ctoken = channel_ctoken_v3(channel_id, page, sort, tab, view)
        ctoken = ctoken.replace('=', '%3D')

    # Not sure what the purpose of the key is or whether it will change
    # For now it seems to be constant for the API endpoint, not dependent
    # on the browsing session or channel
    key = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
    url = 'https://www.youtube.com/youtubei/v1/browse?key=' + key

    data = {
        'context': {
            'client': {
                'hl': 'en',
                'gl': 'US',
                'clientName': 'WEB',
                'clientVersion': '2.20180830',
            },
        },
        'continuation': ctoken,
    }

    content_type_header = (('Content-Type', 'application/json'),)
    content = util.fetch_url(
        url, headers_desktop + content_type_header,
        data=json.dumps(data), debug_name='channel_tab', report_text=message)

    return content
Ejemplo n.º 10
0
def get_channel_search_json(channel_id, query, page):
    offset = proto.unpadded_b64encode(proto.uint(3, (page-1)*30))
    params = proto.string(2, 'search') + proto.string(15, offset)
    params = proto.percent_b64encode(params)
    ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query)
    ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii')

    key = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
    url = 'https://www.youtube.com/youtubei/v1/browse?key=' + key

    data = {
        'context': {
            'client': {
                'hl': 'en',
                'gl': 'US',
                'clientName': 'WEB',
                'clientVersion': '2.20180830',
            },
        },
        'continuation': ctoken,
    }

    content_type_header = (('Content-Type', 'application/json'),)
    polymer_json = util.fetch_url(
        url, headers_desktop + content_type_header,
        data=json.dumps(data), debug_name='channel_search')

    return polymer_json
Ejemplo n.º 11
0
def get_number_of_videos(channel_id):
    # Uploads playlist
    playlist_id = 'UU' + channel_id[2:]
    url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
    print("Getting number of videos")

    # Sometimes retrieving playlist info fails with 403 for no discernable reason
    try:
        response = util.fetch_url(url, util.mobile_ua + headers_pbj)
    except urllib.error.HTTPError as e:
        if e.code != 403:
            raise
        print("Couldn't retrieve number of videos")
        return 1000

    '''with open('debug/playlist_debug_metadata', 'wb') as f:
        f.write(response)'''
    response = response.decode('utf-8')
    print("Got response for number of videos")

    match = re.search(r'"numVideosText":\s*{\s*"runs":\s*\[{"text":\s*"([\d,]*) videos"', response)
    if match:
        return int(match.group(1).replace(',',''))
    else:
        return 0
Ejemplo n.º 12
0
 def req(url, f_req, note, errnote):
     data = login_form.copy()
     data.update({
         'pstMsg': 1,
         'checkConnection': 'youtube',
         'checkedDomains': 'youtube',
         'hl': 'en',
         'deviceinfo':
         '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
         'f.req': json.dumps(f_req),
         'flowName': 'GlifWebSignIn',
         'flowEntry': 'ServiceLogin',
         'bgRequest': '["identifier",""]',
     })
     headers = {
         'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
         'Google-Accounts-XSRF': 1,
     }
     headers.update(yt_dl_headers)
     result = util.fetch_url(url,
                             headers,
                             report_text=note,
                             data=data,
                             cookiejar_send=cookiejar,
                             cookiejar_receive=cookiejar,
                             use_tor=use_tor,
                             debug_name=note).decode('utf-8')
     result = re.sub(r'^[^\[]*', '', result)
     return json.loads(result)
Ejemplo n.º 13
0
def request_comments(ctoken, replies=False):
    if replies:  # let's make it use different urls for no reason despite all the data being encoded
        base_url = "https://m.youtube.com/watch_comment?action_get_comment_replies=1&ctoken="
    else:
        base_url = "https://m.youtube.com/watch_comment?action_get_comments=1&ctoken="
    url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"

    for i in range(0, 8):  # don't retry more than 8 times
        content = util.fetch_url(url,
                                 headers=mobile_headers,
                                 report_text="Retrieved comments")
        if content[
                0:
                4] == b")]}'":  # random closing characters included at beginning of response for some reason
            content = content[4:]
        elif content[
                0:
                10] == b'\n<!DOCTYPE':  # occasionally returns html instead of json for no reason
            content = b''
            print("got <!DOCTYPE>, retrying")
            continue
        break
    '''with open('debug/comments_debug', 'wb') as f:
        f.write(content)'''
    return content
Ejemplo n.º 14
0
def get_number_of_videos_channel(channel_id):
    if channel_id is None:
        return 1000

    # Uploads playlist
    playlist_id = 'UU' + channel_id[2:]
    url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'

    try:
        response = util.fetch_url(url,
                                  headers_mobile,
                                  debug_name='number_of_videos',
                                  report_text='Got number of videos')
    except urllib.error.HTTPError as e:
        traceback.print_exc()
        print("Couldn't retrieve number of videos")
        return 1000

    response = response.decode('utf-8')

    match = re.search(
        r'"numVideosText":\s*{\s*"runs":\s*\[{"text":\s*"([\d,]*) videos"',
        response)
    if match:
        return int(match.group(1).replace(',', ''))
    else:
        return 0
Ejemplo n.º 15
0
def get_transcript(caption_path):
    try:
        captions = util.fetch_url(
            'https://www.youtube.com/' + caption_path + '?' +
            request.environ['QUERY_STRING']).decode('utf-8')
    except util.FetchError as e:
        msg = ('Error retrieving captions: ' + str(e) + '\n\n' +
               'The caption url may have expired.')
        print(msg)
        return flask.Response(msg,
                              status=e.code,
                              mimetype='text/plain;charset=UTF-8')

    lines = captions.splitlines()
    segments = []

    # skip captions file header
    i = 0
    while lines[i] != '':
        i += 1

    current_segment = None
    while i < len(lines):
        line = lines[i]
        if line == '':
            if ((current_segment is not None)
                    and (current_segment['begin'] is not None)):
                segments.append(current_segment)
            current_segment = {
                'begin': None,
                'end': None,
                'lines': [],
            }
        elif times_reg.fullmatch(line.rstrip()):
            current_segment['begin'], current_segment['end'] = line.split(
                ' --> ')
        else:
            current_segment['lines'].append(
                inner_timestamp_removal_reg.sub('', line))
        i += 1

    # if automatic captions, but not translated
    if request.args.get('kind') == 'asr' and not request.args.get('tlang'):
        # Automatic captions repeat content. The new segment is displayed
        # on the bottom row; the old one is displayed on the top row.
        # So grab the bottom row only
        for seg in segments:
            seg['text'] = seg['lines'][1]
    else:
        for seg in segments:
            seg['text'] = ' '.join(map(str.rstrip, seg['lines']))

    result = ''
    for seg in segments:
        if seg['text'] != ' ':
            result += seg['begin'] + ' ' + seg['text'] + '\r\n'

    return flask.Response(result.encode('utf-8'),
                          mimetype='text/plain;charset=UTF-8')
Ejemplo n.º 16
0
def _get_atoma_feed(channel_id):
    url = 'https://www.youtube.com/feeds/videos.xml?channel_id=' + channel_id
    try:
        return util.fetch_url(url).decode('utf-8')
    except util.FetchError as e:
        # 404 is expected for terminated channels
        if e.code in ('404', '429'):
            return ''
        raise
Ejemplo n.º 17
0
def get_channel_search_json(channel_id, query, page):
    params = proto.string(2, 'search') + proto.string(15, str(page))
    params = proto.percent_b64encode(params)
    ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query)
    ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii')

    polymer_json = util.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, util.desktop_ua + headers_1, debug_name='channel_search')

    return polymer_json
Ejemplo n.º 18
0
def playlist_first_page(playlist_id, report_text="Retrieved playlist"):
    url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
    content = util.fetch_url(url,
                             util.mobile_ua + headers_1,
                             report_text=report_text,
                             debug_name='playlist_first_page')
    content = json.loads(util.uppercase_escape(content.decode('utf-8')))

    return content
Ejemplo n.º 19
0
def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1):
    ctoken = channel_ctoken(channel_id, page, sort, tab, view).replace('=', '%3D')
    url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken

    print("Sending channel tab ajax request")
    content = util.fetch_url(url, util.desktop_ua + headers_1, debug_name='channel_tab')
    print("Finished recieving channel tab response")

    return content
Ejemplo n.º 20
0
def get_channel_id(base_url):
    # method that gives the smallest possible response at ~4 kb
    # needs to be as fast as possible
    base_url = base_url.replace('https://www', 'https://m') # avoid redirect
    response = util.fetch_url(base_url + '/about?pbj=1', headers_mobile,
        debug_name='get_channel_id', report_text='Got channel id').decode('utf-8')
    match = channel_id_re.search(response)
    if match:
        return match.group(1)
    return None
Ejemplo n.º 21
0
def playlist_first_page(playlist_id, report_text="Retrieved playlist"):
    url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1'
    content = util.fetch_url(url,
                             util.mobile_ua + headers_1,
                             report_text=report_text)
    '''with open('debug/playlist_debug', 'wb') as f:
        f.write(content)'''
    content = json.loads(util.uppercase_escape(content.decode('utf-8')))

    return content
Ejemplo n.º 22
0
def get_channel_search_json(channel_id, query, page):
    params = proto.string(2, 'search') + proto.string(15, str(page))
    params = proto.percent_b64encode(params)
    ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query)
    ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii')

    polymer_json = util.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, util.desktop_ua + headers_1)
    '''with open('debug/channel_search_debug', 'wb') as f:
        f.write(polymer_json)'''
    polymer_json = json.loads(polymer_json)

    return polymer_json
Ejemplo n.º 23
0
def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1):
    ctoken = channel_ctoken(channel_id, page, sort, tab, view).replace('=', '%3D')
    url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken

    print("Sending channel tab ajax request")
    content = util.fetch_url(url, util.desktop_ua + headers_1)
    print("Finished recieving channel tab response")

    '''with open('debug/channel_debug', 'wb') as f:
        f.write(content)'''
    info = json.loads(content)
    return info
Ejemplo n.º 24
0
def get_channel_page_general_url(env, start_response):
    path_parts = env['path_parts']

    is_toplevel = not path_parts[0] in ('user', 'c')

    if len(path_parts) + int(is_toplevel) == 3:       # has /[page] after it
        page = path_parts[2]
        base_url = 'https://www.youtube.com/' + '/'.join(path_parts[0:-1])
    elif len(path_parts) + int(is_toplevel) == 2:     # does not have /[page] after it, use /videos by default
        page = 'videos'
        base_url = 'https://www.youtube.com/' + '/'.join(path_parts)
    else:
        start_response('404 Not Found', [('Content-type', 'text/plain'),])
        return b'Invalid channel url'

    if page == 'videos':
        polymer_json = util.fetch_url(base_url + '/videos?pbj=1&view=0', util.desktop_ua + headers_1)
        '''with open('debug/user_page_videos', 'wb') as f:
            f.write(polymer_json)'''
        polymer_json = json.loads(polymer_json)
        result = channel_videos_html(polymer_json)
    elif page == 'about':
        polymer_json = util.fetch_url(base_url + '/about?pbj=1', util.desktop_ua + headers_1)
        polymer_json = json.loads(polymer_json)
        result = channel_about_page(polymer_json)
    elif page == 'playlists':
        polymer_json = util.fetch_url(base_url+ '/playlists?pbj=1&view=1', util.desktop_ua + headers_1)
        polymer_json = json.loads(polymer_json)
        result = channel_playlists_html(polymer_json)
    elif page == 'search':
        raise NotImplementedError()
        '''polymer_json = util.fetch_url('https://www.youtube.com/user' + username +  '/search?pbj=1&' + query_string, util.desktop_ua + headers_1)
        polymer_json = json.loads(polymer_json)
        return channel_search_page('''
    else:
        start_response('404 Not Found', [('Content-type', 'text/plain'),])
        return b'Unknown channel page: ' + page.encode('utf-8')

    start_response('200 OK', [('Content-type','text/html'),])
    return result.encode('utf-8')
Ejemplo n.º 25
0
def get_videos(playlist_id, page):

    url = "https://m.youtube.com/playlist?ctoken=" + playlist_ctoken(playlist_id, (int(page)-1)*20) + "&pbj=1"
    headers = {
        'User-Agent': '  Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
        'Accept': '*/*',
        'Accept-Language': 'en-US,en;q=0.5',
        'X-YouTube-Client-Name': '2',
        'X-YouTube-Client-Version': '2.20180508',
    }

    content = util.fetch_url(url, headers, report_text="Retrieved playlist", debug_name='playlist_videos')

    info = json.loads(util.uppercase_escape(content.decode('utf-8')))
    return info
Ejemplo n.º 26
0
def download_thumbnail(playlist_name, video_id):
    url = "https://i.ytimg.com/vi/" + video_id + "/mqdefault.jpg"
    save_location = os.path.join(thumbnails_directory, playlist_name, video_id + ".jpg")
    try:
        thumbnail = util.fetch_url(url, report_text="Saved local playlist thumbnail: " + video_id)
    except urllib.error.HTTPError as e:
        print("Failed to download thumbnail for " + video_id + ": " + str(e))
        return
    try:
        f = open(save_location, 'wb')
    except FileNotFoundError:
        os.makedirs(os.path.join(thumbnails_directory, playlist_name))
        f = open(save_location, 'wb')
    f.write(thumbnail)
    f.close()
Ejemplo n.º 27
0
def request_comments(ctoken, replies=False):
    base_url = 'https://m.youtube.com/watch_comment?'
    if replies:
        base_url += 'action_get_comment_replies=1&ctoken='
    else:
        base_url += 'action_get_comments=1&ctoken='
    url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"

    content = util.fetch_url(url,
                             headers=mobile_headers,
                             report_text='Retrieved comments',
                             debug_name='request_comments')
    content = content.decode('utf-8')

    polymer_json = json.loads(content)
    return polymer_json
Ejemplo n.º 28
0
def _post_comment_reply(text, video_id, parent_comment_id, session_token,
                        cookiejar):
    headers = {
        'User-Agent':
        'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
        'Accept': '*/*',
        'Accept-Language': 'en-US,en;q=0.5',
        'X-YouTube-Client-Name': '2',
        'X-YouTube-Client-Version': '2.20180823',
        'Content-Type': 'application/x-www-form-urlencoded',
    }

    comment_params = proto.string(2, video_id) + proto.string(
        4, parent_comment_id) + proto.nested(5, proto.uint(1, 0)) + proto.uint(
            6, 0) + proto.uint(10, 1)
    comment_params = proto.percent_b64encode(comment_params).decode('ascii')

    sej = json.dumps({
        "clickTrackingParams": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=",
        "commandMetadata": {
            "webCommandMetadata": {
                "url": "/service_ajax",
                "sendPost": True
            }
        },
        "createCommentReplyEndpoint": {
            "createReplyParams": comment_params
        }
    })

    data_dict = {
        'comment_text': text,
        'sej': sej,
        'session_token': session_token,
    }
    data = urllib.parse.urlencode(data_dict).encode()

    content = util.fetch_url(
        "https://m.youtube.com/service_ajax?name=createCommentReplyEndpoint",
        headers=headers,
        data=data,
        cookiejar_send=cookiejar)

    code = json.loads(content)['code']
    print("Comment posting code: " + code)
    return code
    '''with open('debug/post_comment_response', 'wb') as f:
Ejemplo n.º 29
0
def get_session_token(video_id, cookiejar):
    ''' Get session token for a video. This is required in order to post/edit/delete comments. This will modify cookiejar with cookies from youtube required for commenting'''
    # youtube-dl uses disable_polymer=1 which uses a different request format which has an obfuscated javascript algorithm to generate a parameter called "bgr"
    # Tokens retrieved from disable_polymer pages only work with that format. Tokens retrieved on mobile only work using mobile requests
    # Additionally, tokens retrieved without sending the same cookie won't work. So this is necessary even if the bgr and stuff was reverse engineered.
    headers = {'User-Agent': util.mobile_user_agent}
    mobile_page = util.fetch_url(
        'https://m.youtube.com/watch?v=' + video_id,
        headers,
        report_text="Retrieved session token for comment",
        cookiejar_send=cookiejar,
        cookiejar_receive=cookiejar).decode()
    match = xsrf_token_regex.search(mobile_page)
    if match:
        return match.group(1).replace("%3D", "=")
    else:
        raise Exception("Couldn't find xsrf_token")
Ejemplo n.º 30
0
def proxy_site(env, start_response):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
        'Accept': '*/*',
    }
    url = "https://" + env['SERVER_NAME'] + env['PATH_INFO']
    if env['QUERY_STRING']:
        url += '?' + env['QUERY_STRING']

    content, response = util.fetch_url(url, headers, return_response=True)

    headers = response.getheaders()
    if isinstance(headers, urllib3._collections.HTTPHeaderDict):
        headers = headers.items()

    start_response('200 OK', headers)
    return content