コード例 #1
0
    def _webscrape_api_data(self, video_url):
        """Scrape api data from VRT NU html page"""
        from webscraper import get_video_attributes
        video_data = get_video_attributes(video_url)

        # Web scraping failed, log error
        if not video_data:
            log_error('Web scraping api data failed, empty video_data')
            return None

        # Store required html data attributes
        client = video_data.get('client') or self._CLIENT
        media_api_url = video_data.get('mediaapiurl')
        video_id = video_data.get('videoid')
        publication_id = video_data.get('publicationid', '')
        # Live stream or on demand
        if video_id is None:
            is_live_stream = True
            video_id = video_data.get('livestream')
        else:
            is_live_stream = False
            publication_id += quote('$')

        if client is None or media_api_url is None or (video_id is None and
                                                       publication_id is None):
            log_error(
                'Web scraping api data failed, required attributes missing')
            return None

        return ApiData(client, media_api_url, video_id, publication_id,
                       is_live_stream)
コード例 #2
0
 def _get_api_data(self, video):
     """Create api data object from video dictionary"""
     video_url = video.get('video_url')
     video_id = video.get('video_id')
     publication_id = video.get('publication_id')
     # Prepare api_data for on demand streams by video_id and publication_id
     if video_id and publication_id:
         api_data = ApiData(self._CLIENT, self._VUALTO_API_URL, video_id, publication_id + quote('$'), False)
     # Prepare api_data for livestreams by video_id, e.g. vualto_strubru, vualto_mnm, ketnet_jr
     elif video_id and not video_url:
         api_data = ApiData(self._CLIENT, self._VUALTO_API_URL, video_id, '', True)
     # Webscrape api_data with video_id fallback
     elif video_url:
         api_data = self._webscrape_api_data(video_url)
         if video_id:
             api_data = ApiData(self._CLIENT, self._VUALTO_API_URL, video_id, '', True)
     return api_data
コード例 #3
0
    def _webscrape_api_data(self, video_url):
        ''' Scrape api data from VRT NU html page '''
        from bs4 import BeautifulSoup, SoupStrainer
        log(2, 'URL get: {url}', url=unquote(video_url))
        html_page = urlopen(video_url).read()
        strainer = SoupStrainer(
            ['section', 'div'],
            {'class': ['video-player', 'livestream__player']})
        soup = BeautifulSoup(html_page, 'html.parser', parse_only=strainer)
        try:
            video_data = soup.find(lambda tag: tag.name == 'nui-media').attrs
        except Exception as exc:  # pylint: disable=broad-except
            # Web scraping failed, log error
            log_error('Web scraping api data failed: {error}', error=exc)
            return None

        # Web scraping failed, log error
        if not video_data:
            log_error('Web scraping api data failed, empty video_data')
            return None

        # Store required html data attributes
        client = video_data.get('client') or self._CLIENT
        media_api_url = video_data.get('mediaapiurl')
        video_id = video_data.get('videoid')
        publication_id = video_data.get('publicationid', '')
        # Live stream or on demand
        if video_id is None:
            is_live_stream = True
            video_id = video_data.get('livestream')
        else:
            is_live_stream = False
            publication_id += quote('$')

        if client is None or media_api_url is None or (video_id is None and
                                                       publication_id is None):
            log_error(
                'Web scraping api data failed, required attributes missing')
            return None

        return ApiData(client, media_api_url, video_id, publication_id,
                       is_live_stream)