Example #1
0
def get_user_page(url, query_string=''):
    path_components = url.rstrip('/').lstrip('/').split('/')
    username = path_components[0]
    try:
        page = path_components[1]
    except IndexError:
        page = 'videos'
    if page == 'videos':
        polymer_json = common.fetch_url('https://www.youtube.com/user/' + username + '/videos?pbj=1', headers_1)
        polymer_json = json.loads(polymer_json)
        return channel_videos_html(polymer_json)
    elif page == 'about':
        polymer_json = common.fetch_url('https://www.youtube.com/user/' + username + '/about?pbj=1', headers_1)
        polymer_json = json.loads(polymer_json)
        return channel_about_page(polymer_json)
    elif page == 'playlists':
        polymer_json = common.fetch_url('https://www.youtube.com/user/' + username + '/playlists?pbj=1&view=1', headers_1)
        polymer_json = json.loads(polymer_json)
        return channel_playlists_html(polymer_json)
    elif page == 'search':
        raise NotImplementedError()
        '''polymer_json = common.fetch_url('https://www.youtube.com/user' + username +  '/search?pbj=1&' + query_string, headers_1)
        polymer_json = json.loads(polymer_json)
        return channel_search_page('''
    else:
        raise ValueError('Unknown channel page: ' + page)
Example #2
0
def playlist_first_page(playlist_id):
    url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&ajax=1&disable_polymer=true'
    content = common.fetch_url(url, common.mobile_ua + headers_1)
    if content[0:4] == b")]}'":
        content = content[4:]
    content = json.loads(common.uppercase_escape(content.decode('utf-8')))
    return content
Example #3
0
def get_channel_page(url, query_string=''):
    path_components = url.rstrip('/').lstrip('/').split('/')
    channel_id = path_components[0]
    try:
        tab = path_components[1]
    except IndexError:
        tab = 'videos'
    
    parameters = urllib.parse.parse_qs(query_string)
    page_number = int(common.default_multi_get(parameters, 'page', 0, default='1'))
    sort = common.default_multi_get(parameters, 'sort', 0, default='3')
    view = common.default_multi_get(parameters, 'view', 0, default='1')
    query = common.default_multi_get(parameters, 'query', 0, default='')

    if tab == 'videos':
        tasks = (
            gevent.spawn(get_number_of_videos, channel_id ), 
            gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view)
        )
        gevent.joinall(tasks)
        number_of_videos, polymer_json = tasks[0].value, tasks[1].value

        return channel_videos_html(polymer_json, page_number, number_of_videos, query_string)
    elif tab == 'about':
        polymer_json = common.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', headers_1)
        polymer_json = json.loads(polymer_json)
        return channel_about_page(polymer_json)
    elif tab == 'playlists':
        polymer_json = common.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1', headers_1)
        '''with open('debug/channel_playlists_debug', 'wb') as f:
            f.write(polymer_json)'''
        polymer_json = json.loads(polymer_json)
        return channel_playlists_html(polymer_json)
    elif tab == 'search':
        tasks = (
            gevent.spawn(get_number_of_videos, channel_id ), 
            gevent.spawn(get_channel_search_json, channel_id, query, page_number)
        )
        gevent.joinall(tasks)
        number_of_videos, polymer_json = tasks[0].value, tasks[1].value

        return channel_search_page(polymer_json, query, page_number, number_of_videos, query_string)
    else:
        raise ValueError('Unknown channel tab: ' + tab)
Example #4
0
def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1):
    ctoken = channel_ctoken(channel_id, page, sort, tab, view).replace('=', '%3D')
    url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken

    print("Sending channel tab ajax request")
    content = common.fetch_url(url, headers_1)
    print("Finished recieving channel tab response")

    '''with open('debug/channel_debug', 'wb') as f:
        f.write(content)'''
    info = json.loads(content)
    return info
Example #5
0
def get_channel_search_json(channel_id, query, page):
    params = proto.string(2, 'search') + proto.string(15, str(page))
    params = proto.percent_b64encode(params)
    ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query)
    ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii')

    polymer_json = common.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, headers_1)
    '''with open('debug/channel_search_debug', 'wb') as f:
        f.write(polymer_json)'''
    polymer_json = json.loads(polymer_json)

    return polymer_json
Example #6
0
def get_search_json(query, page, autocorrect, sort):
    url = "https://www.youtube.com/results?search_query=" + urllib.parse.quote_plus(query)
    headers = {
        'Host': 'www.youtube.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
        'Accept': '*/*',
        'Accept-Language': 'en-US,en;q=0.5',
        'X-YouTube-Client-Name': '1',
        'X-YouTube-Client-Version': '2.20180418',
    }
    url += "&pbj=1&sp=" + page_number_to_sp_parameter(page, autocorrect, sort).replace("=", "%3D")
    content = common.fetch_url(url, headers=headers)
    info = json.loads(content)
    return info
Example #7
0
def get_number_of_videos(channel_id):
    # Uploads playlist
    playlist_id = 'UU' + channel_id[2:]
    url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&ajax=1&disable_polymer=true'
    print("Getting number of videos")
    response = common.fetch_url(url, common.mobile_ua + headers_1)
    '''with open('debug/playlist_debug_metadata', 'wb') as f:
        f.write(response)'''
    response = response.decode('utf-8')
    print("Got response for number of videos")
    match = re.search(r'"num_videos_text":\s*{(?:"item_type":\s*"formatted_string",)?\s*"runs":\s*\[{"text":\s*"([\d,]*) videos"', response)
    if match:
        return int(match.group(1).replace(',',''))
    else:
        return 0
Example #8
0
def get_videos_ajax(playlist_id, page):

    url = "https://m.youtube.com/playlist?action_continuation=1&ajax=1&ctoken=" + playlist_ctoken(playlist_id, (int(page)-1)*20)
    headers = {
        'User-Agent': '  Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
        'Accept': '*/*',
        'Accept-Language': 'en-US,en;q=0.5',
        'X-YouTube-Client-Name': '2',
        'X-YouTube-Client-Version': '1.20180508',
    }
    print("Sending playlist ajax request")
    content = common.fetch_url(url, headers)
    '''with open('debug/playlist_debug', 'wb') as f:
        f.write(content)'''
    content = content[4:]
    print("Finished recieving playlist response")

    info = json.loads(common.uppercase_escape(content.decode('utf-8')))
    return info
Example #9
0
def request_comments(ctoken, replies=False):
    if replies:  # let's make it use different urls for no reason despite all the data being encoded
        base_url = "https://m.youtube.com/watch_comment?action_get_comment_replies=1&ctoken="
    else:
        base_url = "https://m.youtube.com/watch_comment?action_get_comments=1&ctoken="
    url = base_url + ctoken.replace("=", "%3D") + "&pbj=1"
    print("Sending comments ajax request")
    for i in range(0, 8):  # don't retry more than 8 times
        content = fetch_url(url, headers=mobile_headers)
        if content[
                0:
                4] == b")]}'":  # random closing characters included at beginning of response for some reason
            content = content[4:]
        elif content[
                0:
                10] == b'\n<!DOCTYPE':  # occasionally returns html instead of json for no reason
            content = b''
            print("got <!DOCTYPE>, retrying")
            continue
        break
    '''with open('debug/comments_debug', 'wb') as f:
        f.write(content)'''
    return content
Example #10
0
def get_channel_id(username):
    # method that gives the smallest possible response at ~10 kb
    # needs to be as fast as possible
    url = 'https://m.youtube.com/user/' + username + '/about?ajax=1&disable_polymer=true'
    response = common.fetch_url(url, common.mobile_ua + headers_1).decode('utf-8')
    return re.search(r'"channel_id":\s*"([a-zA-Z0-9_-]*)"', response).group(1)
Example #11
0
def youtube(env, start_response):
    path, method, query_string = env['PATH_INFO'], env['REQUEST_METHOD'], env['QUERY_STRING']
    if method == "GET":
        if path in YOUTUBE_FILES:
            with open("youtube" + path, 'rb') as f:
                mime_type = mimetypes.guess_type(path)[0] or 'application/octet-stream'
                start_response('200 OK',  (('Content-type',mime_type),) )
                return f.read()

        elif path == "/comments":
            start_response('200 OK',  (('Content-type','text/html'),) )
            return comments.get_comments_page(query_string).encode()

        elif path == "/watch":
            start_response('200 OK',  (('Content-type','text/html'),) )
            return watch.get_watch_page(query_string).encode()
        
        elif path == "/search":
            start_response('200 OK',  (('Content-type','text/html'),) )
            return search.get_search_page(query_string).encode()
        
        elif path == "/playlist":
            start_response('200 OK',  (('Content-type','text/html'),) )
            return playlist.get_playlist_page(query_string).encode()
        
        elif path.startswith("/channel/"):
            start_response('200 OK',  (('Content-type','text/html'),) )
            return channel.get_channel_page(path[9:], query_string=query_string).encode()

        elif path.startswith("/user/"):
            start_response('200 OK',  (('Content-type','text/html'),) )
            return channel.get_user_page(path[6:], query_string=query_string).encode()

        elif path.startswith("/playlists"):
            start_response('200 OK',  (('Content-type','text/html'),) )
            return local_playlist.get_playlist_page(path[10:], query_string=query_string).encode()
        elif path.startswith("/api/"):
            start_response('200 OK',  () )
            result = common.fetch_url('https://www.youtube.com' + path + ('?' + query_string if query_string else ''))
            result = result.replace(b"align:start position:0%", b"")
            return result
        else:
            start_response('404 Not Found',  () )
            return b'404 Not Found'

    elif method == "POST":
        fields = urllib.parse.parse_qs(env['wsgi.input'].read().decode())
        if path == "/edit_playlist":
            if fields['action'][0] == 'add':
                local_playlist.add_to_playlist(fields['playlist_name'][0], fields['video_info_list'])
                start_response('204 No Content', ())
            else:
                start_response('400 Bad Request', ())
                return b'400 Bad Request'

        elif path.startswith("/playlists"):
            if fields['action'][0] == 'remove':
                playlist_name = path[11:]
                local_playlist.remove_from_playlist(playlist_name, fields['video_info_list'])
                start_response('200 OK', ())
                return local_playlist.get_playlist_page(playlist_name).encode() 
                start_response('200 OK',  (('Content-type','text/html'),) )
                return local_playlist.get_playlist_page(path[10:], query_string=query_string).encode()
            else:
                start_response('400 Bad Request', ())
                return b'400 Bad Request'
        else:
            start_response('404 Not Found', ())
            return b'404 Not Found' 

    else:
        start_response('501 Not Implemented', ())
        return b'501 Not Implemented'