def _get_playlist_videos_html(playlist): result = [] url = "https://www.youtube.com/playlist?list=%s" % playlist try: response = requests.get(url) if response is not None and response.status_code == 200: formatter = BeautifulSoup(response.text, "html.parser") listing = formatter.find_all(attrs={"class": "pl-video"}) for item in listing: video = dict() try: data = item.find(attrs={ "class": "yt-thumb-clip" }).find("img") if data.get("data-thumb") is not None: video["thumb"] = data["data-thumb"] else: video["thumb"] = data["src"] data = item.find(attrs={ "class": "pl-video-title" }).find("a") video["id"] = _getvid(data["href"])[0] video["url"] = 'https://www.youtube.com%s' % data["href"] video["title"] = data.text.strip() result.append(video) except BaseException as ex: commons.warn("Error extracting video from playlist: %s" % str(ex)) except ConnectionError as err: commons.error("Error connecting to playlist video list: %s" % str(err)) return result
def _get_channels_html(sources): """ Get list of dictionary channels (user and standard) in order to display them and to list the content. Channel details are collected from HTML website content :param sources: list of dictionary channels that contain only basic details for identification :return: list of improved dictionary channels that contain details for displaying and querying """ result = [] if sources is not None and isinstance(sources, list): for item in sources: url = "https://www.youtube.com/%s/%s/videos" % (item['type'], item['id']) channel = dict() try: response = requests.get(url) if response is not None and response.status_code == 200: formatter = BeautifulSoup(response.text, "html.parser") data = formatter.find( attrs={ "class": "qualified-channel-title-text" }).find("a") channel['title'] = data.text channel['id'] = item['id'] if item['type'] == "user": data = formatter.find( attrs={ "class": "channel-header-subscription-button-container" }).find("button") channel['id'] = data["data-channel-external-id"] result.append(channel) except BaseException as ex: commons.warn("Error preparing channel dictionary: %s" % str(ex)) return result
def _get_search_html(query): result = [] try: query = str(query).strip().replace(' ', '+') response = requests.get( "https://www.youtube.com/results?search_query=%s" % query) if response is not None and response.status_code == 200: formatter = BeautifulSoup(response.text, "html.parser") listing = formatter.find_all(attrs={"class": "yt-lockup"}) for item in listing: video = dict() try: data = item.find(attrs={ "class": "video-thumb" }).find("img") if data.get("data-thumb") is not None: video["thumb"] = data["data-thumb"] else: video["thumb"] = data["src"] data = item.find(attrs={ "class": "yt-lockup-title" }).find("a") video["title"] = data.text.strip() video["url"] = 'https://www.youtube.com%s' % data["href"] if data["href"].startswith( "/watch") and data["href"].find("list=") < 0: tmpobj = _getvid(data["href"]) video["type"] = tmpobj[1] video["id"] = tmpobj[0] elif data["href"].startswith( "/user") or data["href"].startswith("/channel"): tmpobj = _getcid(data["href"]) video["type"] = tmpobj[1] video["id"] = tmpobj[0] elif data["href"].startswith( "/playlist") or data["href"].find("list=") >= 0: tmpobj = _getpid(data["href"]) video["type"] = tmpobj[1] video["id"] = tmpobj[0] data = item.find(attrs={"class": "video-time"}) if data: video["time"] = data.text if video.get( 'type') is not None and video['type'] == "user": data = formatter.find( attrs={"class": "yt-uix-subscription-button"}) video['id'] = data["data-channel-external-id"] video["type"] = "channel" result.append(video) except BaseException as ex: commons.warn( "Error extracting video from search list: %s" % str(ex)) except ConnectionError as err: commons.error("Error connecting to search video list: %s" % str(err)) return result
def _get_trending_html(locale='US'): """ Get the list of most popular (trending) videos using HTML website content :param context: modshell provider context :return: list of video dictionary objects providing many properties for identification and also for playing """ result = [] try: response = requests.get("https://www.youtube.com/feed/trending?gl=%s" % locale) if response is not None and response.status_code == 200: formatter = BeautifulSoup(response.text, "html.parser") listing = formatter.find_all( attrs={"class": "expanded-shelf-content-item"}) for item in listing: video = dict() try: data = item.find(attrs={ "class": ["yt-thumb", "video-thumb"] }).find(attrs={ "class": "yt-thumb-simple" }).find("img") if data.get("data-thumb") is not None: video["thumb"] = data["data-thumb"] else: video["thumb"] = data["src"] data = item.find(attrs={ "class": "yt-lockup-title" }).find("a") video["id"] = _getvid(data["href"])[0] video["url"] = 'https://www.youtube.com%s' % data["href"] video["title"] = data.text data = data.select("span.accessible-description") if len(data) != 0: video["time"] = data[0].text else: video["time"] = "" data = item.find(attrs={"class": "yt-lockup-meta-info"}) if data and len(data.contents) > 1: video["date"] = data.contents[0].string video["views"] = data.contents[1].string.split(" ")[0] result.append(video) except BaseException as ex: commons.warn( "Error extracting video from trending list: %s" % str(ex)) except ConnectionError as err: commons.error("Error connecting to trending video list: %s" % str(err)) return result
def _get_datalist_api(base): result = [] try: pageno = 0 token = "" while token is not None and pageno <= 20: if token is not None and token != "": url = "%s&pageToken=%s" % (base, token) else: url = base response = requests.get(url) if response is not None and response.status_code == 200: data = json.loads(response.text) if data.get('nextPageToken') is not None: token = data['nextPageToken'] pageno += 1 else: token = None if data.get('items') is not None: for item in data['items']: video = dict() try: if item['kind'] == "youtube#video": video["id"] = item['id'] video["type"] = "video" video[ "url"] = 'https://www.youtube.com/watch?v=%s' % video[ "id"] elif item['kind'] == "youtube#searchResult": if item['id']['kind'] == 'youtube#video': video["id"] = item['id']['videoId'] video["type"] = "video" video[ "url"] = 'https://www.youtube.com/watch?v=%s' % video[ "id"] elif item['id']['kind'] == 'youtube#channel': video["id"] = item['id']['channelId'] video["type"] = "channel" video[ "url"] = 'https://www.youtube.com/channel/%s' % video[ "id"] elif item['id']['kind'] == 'youtube#playlist': video["id"] = item['id']['playlistId'] video["type"] = "playlist" video[ "url"] = 'https://www.youtube.com/playlist?list=%s' % video[ "id"] elif item['kind'] == "youtube#channel": video["id"] = item['id'] video["type"] = "channel" video[ "url"] = 'https://www.youtube.com/channel/%s' % video[ "id"] snippet = item["snippet"] video["title"] = snippet['title'] if item['kind'] == "youtube#playlistItem" and snippet.get( 'resourceId') is not None: if snippet['resourceId'][ 'kind'] == 'youtube#video': video["id"] = snippet['resourceId'][ 'videoId'] video["type"] = "video" video[ "url"] = 'https://www.youtube.com/watch?v=%s' % video[ "id"] if snippet.get('thumbnails') is not None: thumbnails = snippet['thumbnails'] if thumbnails.get('standard') is not None: video["thumb"] = thumbnails['standard'][ 'url'] elif thumbnails.get('high') is not None: video["thumb"] = thumbnails['high']['url'] elif thumbnails.get('medium') is not None: video["thumb"] = thumbnails['medium'][ 'url'] elif thumbnails.get('default') is not None: video["thumb"] = thumbnails['default'][ 'url'] if snippet.get('publishedAt') is not None: video["date"] = snippet['publishedAt'] result.append(video) except BaseException as ex: commons.warn( "Error extracting video from API list: %s" % str(ex)) else: token = None except ConnectionError as err: commons.error("Error connecting to API video list: %s" % str(err)) return result
def warning(self, text): commons.warn(text)