def _webscrape_api_data(self, video_url): """Scrape api data from VRT NU html page""" from webscraper import get_video_attributes video_data = get_video_attributes(video_url) # Web scraping failed, log error if not video_data: log_error('Web scraping api data failed, empty video_data') return None # Store required html data attributes client = video_data.get('client') or self._CLIENT media_api_url = video_data.get('mediaapiurl') video_id = video_data.get('videoid') publication_id = video_data.get('publicationid', '') # Live stream or on demand if video_id is None: is_live_stream = True video_id = video_data.get('livestream') else: is_live_stream = False publication_id += quote('$') if client is None or media_api_url is None or (video_id is None and publication_id is None): log_error( 'Web scraping api data failed, required attributes missing') return None return ApiData(client, media_api_url, video_id, publication_id, is_live_stream)
def _get_api_data(self, video): """Create api data object from video dictionary""" video_url = video.get('video_url') video_id = video.get('video_id') publication_id = video.get('publication_id') # Prepare api_data for on demand streams by video_id and publication_id if video_id and publication_id: api_data = ApiData(self._CLIENT, self._VUALTO_API_URL, video_id, publication_id + quote('$'), False) # Prepare api_data for livestreams by video_id, e.g. vualto_strubru, vualto_mnm, ketnet_jr elif video_id and not video_url: api_data = ApiData(self._CLIENT, self._VUALTO_API_URL, video_id, '', True) # Webscrape api_data with video_id fallback elif video_url: api_data = self._webscrape_api_data(video_url) if video_id: api_data = ApiData(self._CLIENT, self._VUALTO_API_URL, video_id, '', True) return api_data
def _webscrape_api_data(self, video_url): ''' Scrape api data from VRT NU html page ''' from bs4 import BeautifulSoup, SoupStrainer log(2, 'URL get: {url}', url=unquote(video_url)) html_page = urlopen(video_url).read() strainer = SoupStrainer( ['section', 'div'], {'class': ['video-player', 'livestream__player']}) soup = BeautifulSoup(html_page, 'html.parser', parse_only=strainer) try: video_data = soup.find(lambda tag: tag.name == 'nui-media').attrs except Exception as exc: # pylint: disable=broad-except # Web scraping failed, log error log_error('Web scraping api data failed: {error}', error=exc) return None # Web scraping failed, log error if not video_data: log_error('Web scraping api data failed, empty video_data') return None # Store required html data attributes client = video_data.get('client') or self._CLIENT media_api_url = video_data.get('mediaapiurl') video_id = video_data.get('videoid') publication_id = video_data.get('publicationid', '') # Live stream or on demand if video_id is None: is_live_stream = True video_id = video_data.get('livestream') else: is_live_stream = False publication_id += quote('$') if client is None or media_api_url is None or (video_id is None and publication_id is None): log_error( 'Web scraping api data failed, required attributes missing') return None return ApiData(client, media_api_url, video_id, publication_id, is_live_stream)