def resolve_playlist(url): resolve_pool = ThreadPool(processes=16) logger.info("Resolving YouTube-Playlist '%s'", url) playlist = [] page = 'first' while page: params = { 'playlistId': url, 'maxResults': 50, 'key': yt_key, 'part': 'contentDetails' } if page and page != "first": logger.debug("Get YouTube-Playlist '%s' page %s", url, page) params['pageToken'] = page result = session.get(yt_api_endpoint+'playlistItems', params=params) data = result.json() page = data.get('nextPageToken') for item in data["items"]: video_id = item['contentDetails']['videoId'] playlist.append(video_id) playlist = resolve_pool.map(resolve_url, playlist) resolve_pool.close() return [item for item in playlist if item]
def on_start(self): proxy = httpclient.format_proxy(self.config["proxy"]) youtube.Video.proxy = proxy headers = { "user-agent": httpclient.format_user_agent(self.user_agent), "Cookie": "PREF=hl=en;", "Accept-Language": "en;q=0.8", } if youtube.api_enabled is True: if youtube_api.youtube_api_key is None: logger.error("No YouTube API key provided, disabling API") youtube.api_enabled = False else: youtube.Entry.api = youtube_api.API(proxy, headers) if youtube.Entry.search(q="test") is None: logger.error( "Failed to verify YouTube API key, disabling API" ) youtube.api_enabled = False else: logger.info("YouTube API key verified") if youtube.api_enabled is False: # regex based api # logger.info("Using scrAPI") # youtube.Entry.api = youtube_scrapi.scrAPI(proxy, headers) # # beautiful soup 4 based api logger.info("using bs4API") youtube.Entry.api = youtube_bs4api.bs4API(proxy, headers)
def list_videos(cls, ids): regex = (r'<div id="watch7-content"(?:.|\n)*?' r'<meta itemprop="name" content="' r'(?P<title>.*?)(?:">)(?:.|\n)*?' r'<meta itemprop="duration" content="' r'(?P<duration>.*?)(?:">)(?:.|\n)*?' r'<link itemprop="url" href="http://www.youtube.com/' r'(?:user|channel)/(?P<channelTitle>.*?)(?:">)(?:.|\n)*?' r'</div>') items = [] for id in ids: query = {'v': id} logger.info('session.get triggered: list_videos') result = cls.session.get(scrAPI.endpoint + 'watch', params=query) for match in re.finditer(regex, result.text): item = { 'id': id, 'snippet': { 'title': match.group('title'), 'channelTitle': match.group('channelTitle'), }, 'contentDetails': { 'duration': match.group('duration'), } } items.append(item) return json.loads( json.dumps({'items': items}, sort_keys=False, indent=1))
def search(self, query=None, uris=None): if not query: return if 'uri' in query: search_query = ''.join(query['uri']) url = urlparse(search_query) if 'youtube.com' in url.netloc: req = parse_qs(url.query) if 'list' in req: return SearchResult( uri='youtube:search', tracks=resolve_playlist(req.get('list')[0]) ) else: logger.info("Resolving Youtube for track '%s'", search_query) return SearchResult( uri='youtube:search', tracks=[resolve_url(search_query)] ) else: search_query = ''.join(query.values()[0]) logger.info("Searching Youtube for query '%s'", search_query) return SearchResult( uri='youtube:search', tracks=search_youtube(search_query) )
def search_youtube(q): query = { 'part': 'id,snippet', 'maxResults': 15, 'type': 'video', 'q': q, 'key': yt_key } pl = requests.get(yt_api_endpoint+'search', params=query) playlist = [] resultlist = pl.json().get('items') for yt_id in resultlist: try: videoid=yt_id.get('id').get('videoId') videotitle=yt_id.get('snippet').get('title') videodesc=yt_id.get('snippet').get('description') thumb=yt_id.get('snippet').get('thumbnails').get('default').get('url') uri = 'youtube:video/%s.%s' % (safe_url(videotitle), videoid) #logger.info(uri) track = Track( name=videotitle, comment=videoid, length=0, album=Album( name='Youtube', images=[] ), uri=uri) playlist.append(track) except Exception as e: logger.info(e.message) return playlist
def search(cls, q): """ search for videos and playlists """ search_results = [] # assume 20 results per page pages = int( Video.search_results / 20) + (Video.search_results % 20 > 0) logger.info("session.get triggered: search") rs = [{ "search_query": q.replace(" ", "+"), "page": page + 1, "app": "desktop", "persist_app": 1, } for page in range(pages)] for result in [cls.run_search(r) for r in rs]: search_results.extend(result) return json.loads( json.dumps( { "items": [ x for _, x in zip(range(Video.search_results), search_results) ] }, sort_keys=False, indent=1, ))
def list_videos(cls, ids): regex = (r'<div id="watch7-content"(?:.|\n)*?' r'<meta itemprop="name" content="' r'(?P<title>.*?)(?:">)(?:.|\n)*?' r'<meta itemprop="duration" content="' r'(?P<duration>.*?)(?:">)(?:.|\n)*?' r'<link itemprop="url" href="http://www.youtube.com/' r'(?:user|channel)/(?P<channelTitle>.*?)(?:">)(?:.|\n)*?' r"</div>") items = [] for id in ids: query = {"v": id} logger.info("session.get triggered: list_videos") result = cls.session.get(cls.endpoint + "watch", params=query) for match in re.finditer(regex, result.text): item = { "id": id, "snippet": { "title": match.group("title"), "channelTitle": match.group("channelTitle"), }, "contentDetails": { "duration": match.group("duration") }, } items.append(item) return json.loads( json.dumps({"items": items}, sort_keys=False, indent=1))
def on_start(self): proxy = httpclient.format_proxy(self.config['proxy']) youtube.Video.proxy = proxy headers = { 'user-agent': httpclient.format_user_agent(self.user_agent), 'Cookie': 'PREF=hl=en;', 'Accept-Language': 'en;q=0.8' } if youtube.api_enabled is True: if youtube.API.youtube_api_key is None: logger.error('No YouTube API key provided, disabling API') youtube.api_enabled = False else: youtube.Entry.api = youtube.API(proxy, headers) if youtube.Entry.search(q='test') is None: logger.error( 'Failed to verify YouTube API key, disabling API') youtube.api_enabled = False else: logger.info('YouTube API key verified') if youtube.api_enabled is False: logger.info('Using scrAPI') youtube.Entry.api = youtube.scrAPI(proxy, headers)
def search(cls, q): search_results = [] # assume 20 results per page pages = int(Video.search_results / 20) + (Video.search_results % 20 > 0 ) # noqa: E501 logger.info("session.get triggered: search") rs = [{ "search_query": q.replace(" ", "+"), "page": page + 1 } for page in range(pages)] for result in [cls.run_search(r) for r in rs]: search_results.extend(result) return json.loads( json.dumps( { "items": [ x for _, x in zip(range(Video.search_results), search_results) ] }, # noqa: E501 sort_keys=False, indent=1, ))
def resolve_url(url, stream=False): try: video = pafy.new(url) if not stream: uri = 'youtube:video/%s.%s' % ( safe_url(video.title), video.videoid ) else: uri = video.getbestaudio() if not uri: # get video url uri = video.getbest() logger.debug('%s - %s %s %s' % ( video.title, uri.bitrate, uri.mediatype, uri.extension)) uri = uri.url if not uri: return except Exception as e: # Video is private or doesn't exist logger.info(e.message) return track = Track( name=video.title, comment=video.videoid, length=video.length * 1000, album=Album( name='YouTube', images=[video.bigthumb, video.bigthumbhd] ), uri=uri ) return track
def resolve_playlist(url): logger.info("Resolving Youtube-Playlist '%s'", url) playlist = [] page = 'first' while page: params = { 'playlistId': url, 'maxResults': 50, 'key': yt_key, 'part': 'contentDetails' } if page and page != "first": logger.debug("Get Youtube-Playlist '%s' page %s", url, page) params['pageToken'] = page result = requests.get(yt_api_endpoint + 'playlistItems', params=params) data = result.json() page = data.get('nextPageToken') for item in data["items"]: video_id = item['contentDetails']['videoId'] playlist.append(video_id) return [resolve_url(item) for item in playlist]
def search(self, query=None, uris=None, exact=False): # TODO Support exact search if not query: return if 'uri' in query: search_query = ''.join(query['uri']) url = urlparse(search_query) if 'youtube.com' in url.netloc: req = parse_qs(url.query) if 'list' in req: return SearchResult(uri='youtube:search', tracks=resolve_playlist( req.get('list')[0])) else: logger.info("Resolving YouTube for track '%s'", search_query) return SearchResult( uri='youtube:search', tracks=[t for t in [resolve_url(search_query)] if t]) else: search_query = ' '.join(query.values()[0]) logger.info("Searching YouTube for query '%s'", search_query) return SearchResult(uri='youtube:search', tracks=search_youtube(search_query))
def list_playlists(cls, ids): """ list playlists - EXPERIMENTAL, using exact search for ids """ items = [] rs = [ { "search_query": '"' + id + '"', "sp": "EgIQAw%3D%3D", "app": "desktop", "persist_app": 1, } for id in ids ] for result in [cls.run_search(r)[0] for r in rs]: logger.info("session.get triggered: list_playlists (experimental)") result.update({"id": result["id"]["playlistId"]}) items.extend([result]) return json.loads( json.dumps({"items": items}, sort_keys=False, indent=1) )
def resolve_url(url, stream=False): try: video = pafy.new(url) if not stream: uri = 'youtube:video/%s.%s' % (safe_url( video.title), video.videoid) else: uri = video.getbestaudio() if not uri: # get video url uri = video.getbest() logger.debug( '%s - %s %s %s' % (video.title, uri.bitrate, uri.mediatype, uri.extension)) uri = uri.url if not uri: return except Exception as e: # Video is private or doesn't exist logger.info(e.message) return track = Track(name=video.title, comment=video.videoid, length=video.length * 1000, album=Album(name='YouTube', images=[video.bigthumb, video.bigthumbhd]), uri=uri) return track
def resolve_playlist(url): resolve_pool = ThreadPool(processes=16) logger.info("Resolving YouTube-Playlist '%s'", url) playlist = [] page = 'first' while page: params = { 'playlistId': url, 'maxResults': 50, 'key': yt_key, 'part': 'contentDetails' } if page and page != "first": logger.debug("Get YouTube-Playlist '%s' page %s", url, page) params['pageToken'] = page result = session.get(yt_api_endpoint + 'playlistItems', params=params) data = result.json() page = data.get('nextPageToken') for item in data["items"]: video_id = item['contentDetails']['videoId'] playlist.append(video_id) playlist = resolve_pool.map(resolve_url, playlist) resolve_pool.close() return [item for item in playlist if item]
def list_playlistitems(cls, id, page, max_results): """ list playlist items """ query = {"list": id, "app": "desktop", "persist_app": 1} logger.info("session.get triggered: list_playlist_items") result = cls.session.get(cls.endpoint + "playlist", params=query) # TODO: implement pagination to get playlist entries > 100 regex = (r'<tr class\=\"pl-video.*\" data-title\=\"(?P<title>.+?)".*?' r"<a href\=\"\/watch\?v\=(?P<id>.{11})\&(?:.|\n)*?" r"(?P<thumbnail>https://i\.ytimg\.com\/vi\/.{11}/).*?\.jpg" r'(?:.|\n)*?<div class="pl-video-owner">(?:.|\n)*?' r"/(?:user|channel)/(?:.|\n)*? >(?P<channelTitle>.*?)</a>" r'(?:.|\n)*?<div class="timestamp">.*?">(?:(?:' r"(?P<durationHours>[0-9]+)\:)?" r"(?P<durationMinutes>[0-9]+)\:" r"(?P<durationSeconds>[0-9]{2}))" r"(?:.|\n)*?</div></td></tr>") items = [] for match in re.finditer(regex, result.text): duration = cls.format_duration(match) item = { "id": match.group("id"), "snippet": { "resourceId": { "videoId": match.group("id") }, "title": match.group("title"), "channelTitle": match.group("channelTitle"), "thumbnails": { "default": { "url": match.group("thumbnail") + "default.jpg", "width": 120, "height": 90, }, }, }, } if duration != "": item.update({"contentDetails": {"duration": "PT" + duration}}) items.append(item) del items[max_results:] result = json.loads( json.dumps( { "nextPageToken": None, "items": items }, # noqa: E501 sort_keys=False, indent=1, )) return result
def list_playlists(cls, ids): query = { "part": "id,snippet,contentDetails", "fields": "items(id,snippet(title,thumbnails,channelTitle)," + "contentDetails(itemCount))", "id": ",".join(ids), "key": youtube_api_key, } logger.info("session.get triggered: list_playlists") result = cls.session.get(API.endpoint + "playlists", params=query) return result.json()
def list_playlists(cls, ids): query = { 'part': 'id,snippet,contentDetails', 'fields': 'items(id,snippet(title,thumbnails,channelTitle),' + 'contentDetails(itemCount))', 'id': ','.join(ids), 'key': API.youtube_api_key } logger.info('session.get triggered: list_playlists') result = cls.session.get(API.endpoint + 'playlists', params=query) return result.json()
def list_playlistitems(cls, id, page, max_results): query = {'list': id} logger.info('session.get triggered: list_playlist_items') items = cls.run_list_playlistitems(query) return json.loads( json.dumps( { 'nextPageToken': None, 'items': items }, # [x for _, x in zip(range(Video.search_results), items)]}, # noqa: E501 sort_keys=False, indent=1))
def list_videos(cls, ids): query = { 'part': 'id,snippet,contentDetails', 'fields': 'items(id,snippet(title,channelTitle),' + 'contentDetails(duration))', 'id': ','.join(ids), 'key': API.youtube_api_key } logger.info('session.get triggered: list_videos') result = cls.session.get(API.endpoint + 'videos', params=query) # logger.info(result.json()) return result.json()
def translate_uri(self, uri): logger.info('youtube PlaybackProvider.translate_uri "%s"', uri) if "youtube:video/" not in uri: return None try: id = extract_id(uri) return youtube.Video.get(id).audio_url.get() except Exception as e: logger.error('translate_uri error "%s"', e) return None
def search(self, query=None, uris=None, exact=False): # TODO Support exact search logger.info('youtube LibraryProvider.search "%s"', query) # handle only searching (queries with 'any') not browsing! if not (query and "any" in query): return None search_query = " ".join(query["any"]) logger.info('Searching YouTube for query "%s"', search_query) try: entries = youtube.Entry.search(search_query) except Exception as e: logger.error('search error "%s"', e) return None # load playlist info (to get video_count) of all playlists together playlists = [entry for entry in entries if not entry.is_video] youtube.Playlist.load_info(playlists) tracks = [] for entry in entries: if entry.is_video: uri_base = "youtube:video" album = "YouTube Video" length = int(entry.length.get()) * 1000 else: uri_base = "youtube:playlist" album = "YouTube Playlist (%s videos)" % entry.video_count.get() length = 0 name = entry.title.get() tracks.append( Track( name=name.replace(";", ""), comment=entry.id, length=length, artists=[Artist(name=entry.channel.get())], album=Album(name=album), uri="%s/%s.%s" % (uri_base, safe_url(name), entry.id), ) ) # load video info and playlist videos in the background. they should be # ready by the time the user adds search results to the playing queue for pl in playlists: pl.videos # start loading return SearchResult(uri="youtube:search", tracks=tracks)
def list_playlistitems(cls, id, page, max_results): query = { 'part': 'id,snippet', 'fields': 'nextPageToken,' + 'items(snippet(title, resourceId(videoId)))', 'maxResults': max_results, 'playlistId': id, 'key': API.youtube_api_key, 'pageToken': page, } logger.info('session.get triggered: list_playlistitems') result = cls.session.get(API.endpoint + 'playlistItems', params=query) return result.json()
def list_playlistitems(cls, id, page, max_results): query = { "part": "id,snippet", "fields": "nextPageToken," + "items(snippet(title, resourceId(videoId), channelTitle))", "maxResults": max_results, "playlistId": id, "key": youtube_api_key, "pageToken": page, } logger.info("session.get triggered: list_playlistitems") result = cls.session.get(API.endpoint + "playlistItems", params=query) return result.json()
def search(cls, q): query = { "part": "id, snippet", "fields": "items(id, snippet(title, thumbnails(default), channelTitle))", # noqa: E501 "maxResults": Video.search_results, "type": "video,playlist", "q": q, "key": youtube_api_key, } logger.info("session.get triggered: search") result = cls.session.get(API.endpoint + "search", params=query) return result.json()
def search(cls, q): query = { 'part': 'id, snippet', 'fields': 'items(id, snippet(title, thumbnails(default), channelTitle))', # noqa: E501 'maxResults': Video.search_results, 'type': 'video,playlist', 'q': q, 'key': API.youtube_api_key } logger.info('session.get triggered: search') result = cls.session.get(API.endpoint + 'search', params=query) return result.json()
def list_playlistitems(cls, id, page, max_results): query = {"list": id} logger.info("session.get triggered: list_playlist_items") items = cls.run_list_playlistitems(query) result = json.loads( json.dumps( { "nextPageToken": None, "items": [x for _, x in zip(range(max_results), items)], }, # noqa: E501 sort_keys=False, indent=1, )) return result
def list_playlists(cls, ids): """ list playlists see https://developers.google.com/youtube/v3/docs/playlists/list """ query = { "part": "id,snippet,contentDetails", "fields": "items(id,snippet(title,thumbnails,channelTitle)," + "contentDetails(itemCount))", "id": ",".join(ids), "key": youtube_api_key, } logger.info("session.get triggered: list_playlists") result = cls.session.get(API.endpoint + "playlists", params=query) return result.json()
def search(cls, q): """ search for both videos and playlists using a single API call see https://developers.google.com/youtube/v3/docs/search """ query = { "part": "id, snippet", "fields": "items(id, snippet(title, thumbnails(default), channelTitle))", "maxResults": Video.search_results, "type": "video,playlist", "q": q, "key": youtube_api_key, } logger.info("session.get triggered: search") result = cls.session.get(API.endpoint + "search", params=query) return result.json()
def search_youtube(q): query = { 'part': 'id', 'maxResults': 15, 'type': 'video', 'q': q, 'key': yt_key } pl = requests.get(yt_api_endpoint + 'search', params=query) playlist = [] for yt_id in pl.json().get('items'): try: track = resolve_url(yt_id.get('id').get('videoId')) playlist.append(track) except Exception as e: logger.info(e.message) return playlist
def search_youtube(q): query = { 'part': 'id', 'maxResults': 15, 'type': 'video', 'q': q, 'key': yt_key } pl = requests.get(yt_api_endpoint+'search', params=query) playlist = [] for yt_id in pl.json().get('items'): try: track = resolve_url(yt_id.get('id').get('videoId')) playlist.append(track) except Exception as e: logger.info(e.message) return playlist
def list_playlists(cls, ids): """ list playlists """ regex = (r'<div id="pl-header"(?:.|\n)*?"' r"(?P<thumbnail>https://i\.ytimg\.com\/vi\/.{11}/).*?\.jpg" r'(?:(.|\n))*?(?:.|\n)*?class="pl-header-title"' r'(?:.|\n)*?\>\s*(?P<title>.*)(?:.|\n)*?<a href="/' r"(user|channel)/(?:.|\n)*? >" r"(?P<channelTitle>.*?)</a>(?:.|\n)*?" r"(?P<itemCount>\d*) videos</li>") items = [] for id in ids: query = { "list": id, "app": "desktop", "persist_app": 1, } logger.info("session.get triggered: list_playlists") result = cls.session.get(cls.endpoint + "playlist", params=query) for match in re.finditer(regex, result.text): item = { "id": id, "snippet": { "title": match.group("title"), "channelTitle": match.group("channelTitle"), "thumbnails": { "default": { "url": match.group("thumbnail") + "default.jpg", "width": 120, "height": 90, }, }, }, "contentDetails": { "itemCount": match.group("itemCount") }, } items.append(item) return json.loads( json.dumps({"items": items}, sort_keys=False, indent=1))
def resolve_playlist(url): logger.info("Resolving Youtube for playlist '%s'", url) query = { 'part': 'snippet', 'maxResults': 50, 'playlistId': url, 'fields': 'items/snippet/resourceId', 'key': yt_key } pl = requests.get(yt_api_endpoint+'playlistItem', params=query) playlist = [] for yt_id in pl.json().get('items'): try: yt_id = yt_id.get('snippet').get('resourceId').get('videoId') playlist.append(resolve_url(yt_id)) except Exception as e: logger.info(e.message) return playlist
def translate_uri(self, uri): """ Called when a track us ready to play, we need to return the actual url of the audio. uri must be of the form youtube:video/<title>.<id> (only videos can be played, playlists are expended into tracks by YouTubeLibraryProvider.lookup) """ logger.info('youtube PlaybackProvider.translate_uri "%s"', uri) if "youtube:video/" not in uri: return None try: id = extract_id(uri) return youtube.Video.get(id).audio_url.get() except Exception as e: logger.error('translate_uri error "%s"', e) return None
def list_playlists(cls, ids): regex = (r'<div id="pl-header"(?:.|\n)*?"' r'(?P<thumbnail>https://i\.ytimg\.com\/vi\/.{11}/).*?\.jpg' r'(?:(.|\n))*?(?:.|\n)*?class="pl-header-title"' r'(?:.|\n)*?\>\s*(?P<title>.*)(?:.|\n)*?<a href="/' r'(user|channel)/(?:.|\n)*? >' r'(?P<channelTitle>.*?)</a>(?:.|\n)*?' r'(?P<itemCount>\d*) videos</li>') items = [] for id in ids: query = { 'list': id, } logger.info('session.get triggered: list_playlists') result = cls.session.get(scrAPI.endpoint + 'playlist', params=query) for match in re.finditer(regex, result.text): item = { 'id': id, 'snippet': { 'title': match.group('title'), 'channelTitle': match.group('channelTitle'), 'thumbnails': { 'default': { 'url': match.group('thumbnail') + 'default.jpg', 'width': 120, 'height': 90, }, }, }, 'contentDetails': { 'itemCount': match.group('itemCount'), } } items.append(item) return json.loads( json.dumps({'items': items}, sort_keys=False, indent=1))