def get_user_page(url, query_string=''): path_components = url.rstrip('/').lstrip('/').split('/') username = path_components[0] try: page = path_components[1] except IndexError: page = 'videos' if page == 'videos': polymer_json = common.fetch_url('https://www.youtube.com/user/' + username + '/videos?pbj=1', headers_1) polymer_json = json.loads(polymer_json) return channel_videos_html(polymer_json) elif page == 'about': polymer_json = common.fetch_url('https://www.youtube.com/user/' + username + '/about?pbj=1', headers_1) polymer_json = json.loads(polymer_json) return channel_about_page(polymer_json) elif page == 'playlists': polymer_json = common.fetch_url('https://www.youtube.com/user/' + username + '/playlists?pbj=1&view=1', headers_1) polymer_json = json.loads(polymer_json) return channel_playlists_html(polymer_json) elif page == 'search': raise NotImplementedError() '''polymer_json = common.fetch_url('https://www.youtube.com/user' + username + '/search?pbj=1&' + query_string, headers_1) polymer_json = json.loads(polymer_json) return channel_search_page(''' else: raise ValueError('Unknown channel page: ' + page)
def playlist_first_page(playlist_id): url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&ajax=1&disable_polymer=true' content = common.fetch_url(url, common.mobile_ua + headers_1) if content[0:4] == b")]}'": content = content[4:] content = json.loads(common.uppercase_escape(content.decode('utf-8'))) return content
def get_channel_page(url, query_string=''): path_components = url.rstrip('/').lstrip('/').split('/') channel_id = path_components[0] try: tab = path_components[1] except IndexError: tab = 'videos' parameters = urllib.parse.parse_qs(query_string) page_number = int(common.default_multi_get(parameters, 'page', 0, default='1')) sort = common.default_multi_get(parameters, 'sort', 0, default='3') view = common.default_multi_get(parameters, 'view', 0, default='1') query = common.default_multi_get(parameters, 'query', 0, default='') if tab == 'videos': tasks = ( gevent.spawn(get_number_of_videos, channel_id ), gevent.spawn(get_channel_tab, channel_id, page_number, sort, 'videos', view) ) gevent.joinall(tasks) number_of_videos, polymer_json = tasks[0].value, tasks[1].value return channel_videos_html(polymer_json, page_number, number_of_videos, query_string) elif tab == 'about': polymer_json = common.fetch_url('https://www.youtube.com/channel/' + channel_id + '/about?pbj=1', headers_1) polymer_json = json.loads(polymer_json) return channel_about_page(polymer_json) elif tab == 'playlists': polymer_json = common.fetch_url('https://www.youtube.com/channel/' + channel_id + '/playlists?pbj=1&view=1', headers_1) '''with open('debug/channel_playlists_debug', 'wb') as f: f.write(polymer_json)''' polymer_json = json.loads(polymer_json) return channel_playlists_html(polymer_json) elif tab == 'search': tasks = ( gevent.spawn(get_number_of_videos, channel_id ), gevent.spawn(get_channel_search_json, channel_id, query, page_number) ) gevent.joinall(tasks) number_of_videos, polymer_json = tasks[0].value, tasks[1].value return channel_search_page(polymer_json, query, page_number, number_of_videos, query_string) else: raise ValueError('Unknown channel tab: ' + tab)
def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1): ctoken = channel_ctoken(channel_id, page, sort, tab, view).replace('=', '%3D') url = "https://www.youtube.com/browse_ajax?ctoken=" + ctoken print("Sending channel tab ajax request") content = common.fetch_url(url, headers_1) print("Finished recieving channel tab response") '''with open('debug/channel_debug', 'wb') as f: f.write(content)''' info = json.loads(content) return info
def get_channel_search_json(channel_id, query, page): params = proto.string(2, 'search') + proto.string(15, str(page)) params = proto.percent_b64encode(params) ctoken = proto.string(2, channel_id) + proto.string(3, params) + proto.string(11, query) ctoken = base64.urlsafe_b64encode(proto.nested(80226972, ctoken)).decode('ascii') polymer_json = common.fetch_url("https://www.youtube.com/browse_ajax?ctoken=" + ctoken, headers_1) '''with open('debug/channel_search_debug', 'wb') as f: f.write(polymer_json)''' polymer_json = json.loads(polymer_json) return polymer_json
def get_search_json(query, page, autocorrect, sort): url = "https://www.youtube.com/results?search_query=" + urllib.parse.quote_plus(query) headers = { 'Host': 'www.youtube.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)', 'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'X-YouTube-Client-Name': '1', 'X-YouTube-Client-Version': '2.20180418', } url += "&pbj=1&sp=" + page_number_to_sp_parameter(page, autocorrect, sort).replace("=", "%3D") content = common.fetch_url(url, headers=headers) info = json.loads(content) return info
def get_number_of_videos(channel_id): # Uploads playlist playlist_id = 'UU' + channel_id[2:] url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&ajax=1&disable_polymer=true' print("Getting number of videos") response = common.fetch_url(url, common.mobile_ua + headers_1) '''with open('debug/playlist_debug_metadata', 'wb') as f: f.write(response)''' response = response.decode('utf-8') print("Got response for number of videos") match = re.search(r'"num_videos_text":\s*{(?:"item_type":\s*"formatted_string",)?\s*"runs":\s*\[{"text":\s*"([\d,]*) videos"', response) if match: return int(match.group(1).replace(',','')) else: return 0
def get_videos_ajax(playlist_id, page): url = "https://m.youtube.com/playlist?action_continuation=1&ajax=1&ctoken=" + playlist_ctoken(playlist_id, (int(page)-1)*20) headers = { 'User-Agent': ' Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1', 'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'X-YouTube-Client-Name': '2', 'X-YouTube-Client-Version': '1.20180508', } print("Sending playlist ajax request") content = common.fetch_url(url, headers) '''with open('debug/playlist_debug', 'wb') as f: f.write(content)''' content = content[4:] print("Finished recieving playlist response") info = json.loads(common.uppercase_escape(content.decode('utf-8'))) return info
def request_comments(ctoken, replies=False): if replies: # let's make it use different urls for no reason despite all the data being encoded base_url = "https://m.youtube.com/watch_comment?action_get_comment_replies=1&ctoken=" else: base_url = "https://m.youtube.com/watch_comment?action_get_comments=1&ctoken=" url = base_url + ctoken.replace("=", "%3D") + "&pbj=1" print("Sending comments ajax request") for i in range(0, 8): # don't retry more than 8 times content = fetch_url(url, headers=mobile_headers) if content[ 0: 4] == b")]}'": # random closing characters included at beginning of response for some reason content = content[4:] elif content[ 0: 10] == b'\n<!DOCTYPE': # occasionally returns html instead of json for no reason content = b'' print("got <!DOCTYPE>, retrying") continue break '''with open('debug/comments_debug', 'wb') as f: f.write(content)''' return content
def get_channel_id(username): # method that gives the smallest possible response at ~10 kb # needs to be as fast as possible url = 'https://m.youtube.com/user/' + username + '/about?ajax=1&disable_polymer=true' response = common.fetch_url(url, common.mobile_ua + headers_1).decode('utf-8') return re.search(r'"channel_id":\s*"([a-zA-Z0-9_-]*)"', response).group(1)
def youtube(env, start_response): path, method, query_string = env['PATH_INFO'], env['REQUEST_METHOD'], env['QUERY_STRING'] if method == "GET": if path in YOUTUBE_FILES: with open("youtube" + path, 'rb') as f: mime_type = mimetypes.guess_type(path)[0] or 'application/octet-stream' start_response('200 OK', (('Content-type',mime_type),) ) return f.read() elif path == "/comments": start_response('200 OK', (('Content-type','text/html'),) ) return comments.get_comments_page(query_string).encode() elif path == "/watch": start_response('200 OK', (('Content-type','text/html'),) ) return watch.get_watch_page(query_string).encode() elif path == "/search": start_response('200 OK', (('Content-type','text/html'),) ) return search.get_search_page(query_string).encode() elif path == "/playlist": start_response('200 OK', (('Content-type','text/html'),) ) return playlist.get_playlist_page(query_string).encode() elif path.startswith("/channel/"): start_response('200 OK', (('Content-type','text/html'),) ) return channel.get_channel_page(path[9:], query_string=query_string).encode() elif path.startswith("/user/"): start_response('200 OK', (('Content-type','text/html'),) ) return channel.get_user_page(path[6:], query_string=query_string).encode() elif path.startswith("/playlists"): start_response('200 OK', (('Content-type','text/html'),) ) return local_playlist.get_playlist_page(path[10:], query_string=query_string).encode() elif path.startswith("/api/"): start_response('200 OK', () ) result = common.fetch_url('https://www.youtube.com' + path + ('?' + query_string if query_string else '')) result = result.replace(b"align:start position:0%", b"") return result else: start_response('404 Not Found', () ) return b'404 Not Found' elif method == "POST": fields = urllib.parse.parse_qs(env['wsgi.input'].read().decode()) if path == "/edit_playlist": if fields['action'][0] == 'add': local_playlist.add_to_playlist(fields['playlist_name'][0], fields['video_info_list']) start_response('204 No Content', ()) else: start_response('400 Bad Request', ()) return b'400 Bad Request' elif path.startswith("/playlists"): if fields['action'][0] == 'remove': playlist_name = path[11:] local_playlist.remove_from_playlist(playlist_name, fields['video_info_list']) start_response('200 OK', ()) return local_playlist.get_playlist_page(playlist_name).encode() start_response('200 OK', (('Content-type','text/html'),) ) return local_playlist.get_playlist_page(path[10:], query_string=query_string).encode() else: start_response('400 Bad Request', ()) return b'400 Bad Request' else: start_response('404 Not Found', ()) return b'404 Not Found' else: start_response('501 Not Implemented', ()) return b'501 Not Implemented'