def UpdateVideoItemLive(self, item): """Updates an existing MediaItem with more data. Arguments: item : MediaItem - the MediaItem that needs to be updated Returns: The original item with more data added to it's properties. Used to update none complete MediaItems (self.complete = False). This could include opening the item's URL to fetch more data and then process that data or retrieve it's real media-URL. The method should at least: * cache the thumbnail to disk (use self.noImage if no thumb is available). * set at least one MediaItemPart with a single MediaStream. * set self.complete = True. if the returned item does not have a MediaItemPart then the self.complete flag will automatically be set back to False. """ Logger.Debug('Starting UpdateVideoItem: %s', item.name) item.MediaItemParts = [] part = item.CreateNewEmptyMediaPart() referer = {"referer": self.baseUrlLive} streams = NpoStream.GetLiveStreamsFromNpo(item.url, Config.cacheDir, proxy=self.proxy, headers=referer) if streams: Logger.Debug("Found live stream urls from item url") for s, b in streams: item.complete = True part.AppendMediaStream(s, b) else: # we need to determine radio or live tv Logger.Debug("Fetching live stream data from item url") htmlData = UriHandler.Open(item.url, proxy=self.proxy) mp3Urls = Regexer.DoRegex("""data-streams='{"url":"([^"]+)","codec":"[^"]+"}'""", htmlData) if len(mp3Urls) > 0: Logger.Debug("Found MP3 URL") part.AppendMediaStream(mp3Urls[0], 192) else: jsonUrl = item.url if not item.url.startswith("http://e.omroep.nl/metadata/"): Logger.Debug("Finding the actual metadata url from %s", item.url) jsonUrls = Regexer.DoRegex('<div class="video-player-container"[^>]+data-prid="([^"]+)"', htmlData) jsonUrl = None for url in jsonUrls: jsonUrl = "http://e.omroep.nl/metadata/%s" % (url,) for s, b in NpoStream.GetLiveStreamsFromNpo(jsonUrl, Config.cacheDir, proxy=self.proxy, headers=referer): item.complete = True part.AppendMediaStream(s, b) item.complete = True # Logger.Trace(item) return item
def __init__(self, channelInfo): """Initialisation of the class. Arguments: channelInfo: ChannelInfo - The channel info object to base this channel on. All class variables should be instantiated here and this method should not be overridden by any derived classes. """ chn_class.Channel.__init__(self, channelInfo) # ============== Actual channel setup STARTS here and should be overwritten from derived classes =============== if self.channelCode == "ketnet": self.noImage = "ketnetimage.png" self.mainListUri = "https://www.ketnet.be/kijken" self.baseUrl = "https://www.ketnet.be" self.mediaUrlRegex = 'playerConfig\W*=\W*(\{[\w\W]{0,2000}?);.vamp' elif self.channelCode == "cobra": self.noImage = "cobraimage.png" self.mainListUri = "http://www.cobra.be/cm/cobra/cobra-mediaplayer" self.baseUrl = "http://www.cobra.be" self.swfUrl = "%s/html/flash/common/player.swf" % (self.baseUrl, ) episodeRegex = '<a[^>]+href="(?<url>/kijken[^"]+)"[^>]*>\W*<img[^>]+src="(?<thumburl>[^"]+)"[^>]+alt="(?<title>[^"]+)"' episodeRegex = Regexer.FromExpresso(episodeRegex) self._AddDataParser(self.mainListUri, matchType=ParserData.MatchExact, parser=episodeRegex, creator=self.CreateEpisodeItem) self._AddDataParser("*", preprocessor=self.SelectVideoSection) videoRegex = Regexer.FromExpresso( '<a title="(?<title>[^"]+)" href="(?<url>[^"]+)"[^>]*>' '\W+<img src="(?<thumburl>[^"]+)"[^<]+<span[^<]+[^<]+' '[^>]+></span>\W+(?<description>[^<]+)') self._AddDataParser("*", parser=videoRegex, creator=self.CreateVideoItem, updater=self.UpdateVideoItem) folderRegex = Regexer.FromExpresso( '<span class="more-of-program" rel="/(?<url>[^"]+)">') self._AddDataParser("*", parser=folderRegex, creator=self.CreateFolderItem) #=============================================================================================================== # non standard items #=============================================================================================================== # Test cases: # ====================================== Actual channel setup STOPS here ======================================= return
def AddSearchAndGenres(self, data): """Performs pre-process actions for data processing, in this case adding a search Arguments: data : string - the retrieve data that was loaded for the current item and URL. Returns: A tuple of the data and a list of MediaItems that were generated. Accepts an data from the ProcessFolderList method, BEFORE the items are processed. Allows setting of parameters (like title etc) for the channel. Inside this method the <data> could be changed and additional items can be created. The return values should always be instantiated in at least ("", []). """ Logger.Info("Performing Pre-Processing") items = [] if self.parentItem is not None and "genre" in self.parentItem.metaData: self.__genre = self.parentItem.metaData["genre"] Logger.Debug("Parsing a specific genre: %s", self.__genre) return data, items searchItem = mediaitem.MediaItem("\a.: Sök :.", "searchSite") searchItem.complete = True searchItem.thumb = self.noImage searchItem.dontGroup = True searchItem.fanart = self.fanart # searchItem.SetDate(2099, 1, 1, text="") # -> No items have dates, so adding this will force a date sort in Retrospect items.append(searchItem) genresItem = mediaitem.MediaItem("\a.: Genrer :.", "") genresItem.complete = True genresItem.thumb = self.noImage genresItem.dontGroup = True genresItem.fanart = self.fanart items.append(genresItem) # find the actual genres genreRegex = '<li[^>]+genre[^>]*><button[^>]+data-value="(?<genre>[^"]+)"[^>]*>(?<title>[^>]+)</button></li>' genreRegex = Regexer.FromExpresso(genreRegex) genres = Regexer.DoRegex(genreRegex, data) for genre in genres: if genre["genre"] == "all": continue genreItem = mediaitem.MediaItem(genre["title"], self.mainListUri) genreItem.complete = True genreItem.thumb = self.noImage genreItem.fanart = self.fanart genreItem.metaData = {"genre": genre["genre"]} genresItem.items.append(genreItem) Logger.Debug("Pre-Processing finished") return data, items
def UpdateVideoItem(self, item): """Updates an existing MediaItem with more data. Arguments: item : MediaItem - the MediaItem that needs to be updated Returns: The original item with more data added to it's properties. Used to update none complete MediaItems (self.complete = False). This could include opening the item's URL to fetch more data and then process that data or retrieve it's real media-URL. The method should at least: * cache the thumbnail to disk (use self.noImage if no thumb is available). * set at least one MediaItemPart with a single MediaStream. * set self.complete = True. if the returned item does not have a MediaItemPart then the self.complete flag will automatically be set back to False. """ Logger.Debug('Starting UpdateVideoItem for %s (%s)', item.name, self.channelName) data = UriHandler.Open(item.url, proxy=self.proxy) # get the playlist GUID playlistGuids = Regexer.DoRegex("<div[^>]+data-playlist-id='([^']+)'[^>]+></div>", data) if not playlistGuids: # let's try the alternative then (for the new channels) playlistGuids = Regexer.DoRegex('local_playlist[", -]+([a-f0-9]{20})"', data) playlistGuid = playlistGuids[0] # Logger.Trace(playlistGuid) # now we can get the playlist meta data # http://api.mtvnn.com/v2/mrss.xml?uri=mgid%3Asensei%3Avideo%3Amtvnn.com%3Alocal_playlist-39ce0652b0b3c09258d9-SE-uma_site--ad_site-nickelodeon.se-ad_site_referer-video/9764-barjakt&adSite=nickelodeon.se&umaSite={umaSite}&show_images=true&url=http%3A//www.nickelodeon.se/video/9764-barjakt # but this seems to work. # http://api.mtvnn.com/v2/mrss.xml?uri=mgid%3Asensei%3Avideo%3Amtvnn.com%3Alocal_playlist-39ce0652b0b3c09258d9 playListUrl = "http://api.mtvnn.com/v2/mrss.xml?uri=mgid%3Asensei%3Avideo%3Amtvnn.com%3Alocal_playlist-" + playlistGuid playListData = UriHandler.Open(playListUrl, proxy=self.proxy) # now get the real RTMP data rtmpMetaData = Regexer.DoRegex("<media:content [^>]+url='([^']+)'", playListData)[0] rtmpData = UriHandler.Open(rtmpMetaData, proxy=self.proxy) rtmpUrls = Regexer.DoRegex('<rendition[^>]+bitrate="(\d+)"[^>]*>\W+<src>([^<]+ondemand)/([^<]+)</src>', rtmpData) part = item.CreateNewEmptyMediaPart() for rtmpUrl in rtmpUrls: url = "%s/%s" % (rtmpUrl[1], rtmpUrl[2]) bitrate = rtmpUrl[0] # convertedUrl = url.replace("ondemand/","ondemand?slist=") convertedUrl = self.GetVerifiableVideoUrl(url) part.AppendMediaStream(convertedUrl, bitrate) item.complete = True Logger.Trace("Media url: %s", item) return item
def __init__(self, channel_info): """ Initialisation of the class. All class variables should be instantiated here and this method should not be overridden by any derived classes. :param ChannelInfo channel_info: The channel info object to base this channel on. """ chn_class.Channel.__init__(self, channel_info) # ============== Actual channel setup STARTS here and should be overwritten from derived classes =============== self.noImage = "eenimage.png" # setup the urls self.mainListUri = "https://www.een.be/programmas" self.baseUrl = "http://www.een.be" # setup the main parsing data self._add_data_parser(self.mainListUri, preprocessor=self.extract_json, json=True, parser=["data", ], creator=self.create_show_item) video_parser = r'<a class="card-teaser"[^>][^>]*href="(?<url>[^"]+)"[^>]*>\W+<div[^>]+' \ r'style="background-image: url\(\'(?<thumburl>[^\']+/(?<year>\d{4})/' \ r'(?<month>\d{2})/(?<day>\d{2})/[^\']+)\'[^>]*>\W+<div[^>]+_play[\w\W+]' \ r'{0,2000}?<div[^>]*>(?<_title>[^>]*)</div>\W*<h3[^>]*>(?<title>[^<]+)' \ r'</h3>\W+<div[^>]*>\W+(?:<span[^>]*>[^<]*</span>)?(?<description>[^<]+)' video_parser = Regexer.from_expresso(video_parser) self._add_data_parser("*", name="Links to teasers of videos (Card teaser)", parser=video_parser, creator=self.create_video_item, updater=self.update_video_item) video_parser = r'<a[^>]*class="[^"]+-teaser"[^>]*background-image: url\(\'(?<thumburl>' \ r'[^\']+/(?<year>\d{4})/(?<month>\d{2})/(?<day>\d{2})/[^\']+)\'[^>]*href="' \ r'(?<url>[^"]+)"[^>]*>\W+<div[^>]+_play[\w\W+]{0,2000}?<div[^>]*>' \ r'(?<_title>[^>]*)</div>\W*<h3[^>]*>(?<title>[^<]+)</h3>\W+<div[^>]*>\W+' \ r'(?:<span[^>]*>[^<]*</span>)?(?<description>[^<]+)' video_parser = Regexer.from_expresso(video_parser) self._add_data_parser("*", name="Links to teasers of videos (Image Teaser)", parser=video_parser, creator=self.create_video_item, updater=self.update_video_item) single_video_parser = r'>(?<title>[^<]+)</h1>[\w\W]{0,2000}?(?:<h2>?<description>[^<]+)?' \ r'[\w\W]{0,1000}?data-video="(?<url>[^"]+)"[\w\W]{0,500}data-analytics' \ r'=\'{"date":"(?<year>\d+)-(?<month>\d+)-(?<day>\d+)' single_video_parser = Regexer.from_expresso(single_video_parser) self._add_data_parser("*", name="Pages that contain only a single video", parser=single_video_parser, creator=self.create_video_item) #=============================================================================================================== # non standard items #=============================================================================================================== # Test cases: # ====================================== Actual channel setup STOPS here ======================================= return
def __init__(self, channelInfo): """Initialisation of the class. Arguments: channelInfo: ChannelInfo - The channel info object to base this channel on. All class variables should be instantiated here and this method should not be overridden by any derived classes. """ chn_class.Channel.__init__(self, channelInfo) # ============== Actual channel setup STARTS here and should be overwritten from derived classes =============== # setup the main parsing data if self.channelCode == 'nickelodeon': self.noImage = "nickelodeonimage.png" self.mainListUri = "http://www.nickelodeon.nl/shows" self.baseUrl = "http://www.nickelodeon.nl" elif self.channelCode == "nickno": self.noImage = "nickelodeonimage.png" self.mainListUri = "http://www.nickelodeon.no/program/" self.baseUrl = "http://www.nickelodeon.no" elif self.channelCode == "nickse": self.noImage = "nickelodeonimage.png" self.mainListUri = "http://www.nickelodeon.se/serier/" self.baseUrl = "http://www.nickelodeon.se" else: raise NotImplementedError("Unknown channel code") episodeItemRegex = """<a[^>]+href="(?<url>/[^"]+)"[^>]*>\W*<img[^>]+src='(?<thumburl>[^']+)'[^>]*>\W*<div class='info'>\W+<h2 class='title'>(?<title>[^<]+)</h2>\W+<p class='sub_title'>(?<description>[^<]+)</p>""" episodeItemRegex = Regexer.FromExpresso(episodeItemRegex) self._AddDataParser(self.mainListUri, matchType=ParserData.MatchExact, parser=episodeItemRegex, creator=self.CreateEpisodeItem) videoItemRegex = """<li[^>]+data-item-id='\d+'>\W+<a href='(?<url>[^']+)'>\W+<img[^>]+src="(?<thumburl>[^"]+)"[^>]*>\W+<p class='title'>(?<title>[^<]+)</p>\W+<p[^>]+class='subtitle'[^>]*>(?<subtitle>[^>]+)</p>""" videoItemRegex = Regexer.FromExpresso(videoItemRegex) self._AddDataParser("*", parser=videoItemRegex, creator=self.CreateVideoItem, updater=self.UpdateVideoItem) self.pageNavigationRegex = 'href="(/video[^?"]+\?page_\d*=)(\d+)"' self.pageNavigationRegexIndex = 1 self._AddDataParser("*", parser=self.pageNavigationRegex, creator=self.CreatePageItem) self.mediaUrlRegex = '<param name="src" value="([^"]+)" />' # used for the UpdateVideoItem self.swfUrl = "http://origin-player.mtvnn.com/g2/g2player_2.1.7.swf" #=============================================================================================================== # Test cases: # NO: Avator -> Other items # SE: Hotel 13 -> Other items # NL: Sam & Cat -> Other items # ====================================== Actual channel setup STOPS here ======================================= return
def add_search_and_genres(self, data): """ Performs pre-process actions for data processing and adds a search option and genres. Accepts an data from the process_folder_list method, BEFORE the items are processed. Allows setting of parameters (like title etc) for the channel. Inside this method the <data> could be changed and additional items can be created. The return values should always be instantiated in at least ("", []). :param str data: The retrieve data that was loaded for the current item and URL. :return: A tuple of the data and a list of MediaItems that were generated. :rtype: tuple[str|JsonHelper,list[MediaItem]] """ Logger.info("Performing Pre-Processing") items = [] if self.parentItem is not None and "genre" in self.parentItem.metaData: self.__genre = self.parentItem.metaData["genre"] Logger.debug("Parsing a specific genre: %s", self.__genre) return data, items search_item = MediaItem("\a.: Sök :.", "searchSite") search_item.complete = True search_item.thumb = self.noImage search_item.dontGroup = True search_item.fanart = self.fanart # search_item.set_date(2099, 1, 1, text="") # -> No items have dates, so adding this will force a date sort in Retrospect items.append(search_item) genres_item = MediaItem("\a.: Genrer :.", "") genres_item.complete = True genres_item.thumb = self.noImage genres_item.dontGroup = True genres_item.fanart = self.fanart items.append(genres_item) # find the actual genres genre_regex = '<li[^>]+genre[^>]*><button[^>]+data-value="(?<genre>[^"]+)"[^>]*>' \ '(?<title>[^>]+)</button></li>' genre_regex = Regexer.from_expresso(genre_regex) genres = Regexer.do_regex(genre_regex, data) for genre in genres: if genre["genre"] == "all": continue genre_item = MediaItem(genre["title"], self.mainListUri) genre_item.complete = True genre_item.thumb = self.noImage genre_item.fanart = self.fanart genre_item.metaData = {"genre": genre["genre"]} genres_item.items.append(genre_item) Logger.debug("Pre-Processing finished") return data, items
def __init__(self, channel_info): """ Initialisation of the class. All class variables should be instantiated here and this method should not be overridden by any derived classes. :param ChannelInfo channel_info: The channel info object to base this channel on. """ chn_class.Channel.__init__(self, channel_info) # ============== Actual channel setup STARTS here and should be overwritten from derived classes =============== self.noImage = "vtmimage.jpg" # setup the urls self.mainListUri = "http://nieuws.vtm.be/herbekijk" self.baseUrl = "http://nieuws.vtm.be" # setup the main parsing data self.episodeItemRegex = '<li><a[^>]+href="/([^"]+)" class="level-1[^>]+>([^<]+)</a>' self._add_data_parser(self.mainListUri, creator=self.create_episode_item, parser=self.episodeItemRegex) video_item_regex = r'<article[^<]+has-video"[^>]*>\W*<a href="(?<Url>[^<"]+)"[^>]*>\W+' \ r'<div[^<]+<img[^>]+src="(?<Thumb>[^"]+)"[^>]*>[\w\W]{0,500}?<h3[^>]*>' \ r'(?:\W+<span[^>]*>[^>]*>)?(?<Title>[^<]+)</h3>\W+<div[^<]+<time[^>]+' \ r'datetime="(?<DateTime>[^"]+)"[^<]+</time>\W*</div>\W*<p[^>]+>*' \ r'(?<Description>[^<]+)' video_item_regex = Regexer.from_expresso(video_item_regex) self._add_data_parser("*", creator=self.create_video_item, parser=video_item_regex, updater=self.update_video_item) stadion_regex = r'<article[^>]*>\W*<div class="image is-video">\W*<a href="(?<Url>[^"]+)' \ r'[^>]*>\W*<img[^>]+src="(?<Thumb>[^"]+)"[\w\W]{0,1000}?<h3 class=' \ r'"pagemanager-item-title">\W*<span>\W*<a[^>]*>(?<Title>[^<]+)[\w\W]' \ r'{0,1000}?<div class="teaser">\W*<a[^>]+>(?<Description>[^<]+)' stadion_regex = Regexer.from_expresso(stadion_regex) self._add_data_parser("http://nieuws.vtm.be/stadion", parser=stadion_regex, creator=self.create_video_item, updater=self.update_video_item) self.pageNavigationRegex = '' self.pageNavigationRegexIndex = 0 #=============================================================================================================== # non standard items #=============================================================================================================== # Test cases: # ====================================== Actual channel setup STOPS here ======================================= return
def __init__(self, channelInfo): """Initialisation of the class. Arguments: channelInfo: ChannelInfo - The channel info object to base this channel on. All class variables should be instantiated here and this method should not be overridden by any derived classes. """ chn_class.Channel.__init__(self, channelInfo) # ============== Actual channel setup STARTS here and should be overwritten from derived classes =============== self.noImage = "vtmimage.png" # setup the urls self.mainListUri = "http://nieuws.vtm.be/herbekijk" self.baseUrl = "http://nieuws.vtm.be" # setup the main parsing data self.episodeItemRegex = '<li><a[^>]+href="/([^"]+)" class="level-1[^>]+>([^<]+)</a>' self._AddDataParser(self.mainListUri, creator=self.CreateEpisodeItem, parser=self.episodeItemRegex) self.videoItemRegex = '<article[^<]+has-video"[^>]*>\W*<a href="(?<Url>[^<"]+)"[^>]*>\W+<div[^<]+<img[^>]+' \ 'src="(?<Thumb>[^"]+)"[^>]*>[\w\W]{0,500}?<h3[^>]*>(?:\W+<span[^>]*>[^>]*>)?' \ '(?<Title>[^<]+)</h3>\W+<div[^<]+<time[^>]+datetime="(?<DateTime>[^"]+)"[^<]+</time>\W*' \ '</div>\W*<p[^>]+>*(?<Description>[^<]+)' self.videoItemRegex = Regexer.FromExpresso(self.videoItemRegex) self._AddDataParser("*", creator=self.CreateVideoItem, parser=self.videoItemRegex, updater=self.UpdateVideoItem) stadionRegex = '<article[^>]*>\W*<div class="image is-video">\W*<a href="(?<Url>[^"]+)[^>]*>\W*<img[^>]+src="(?<Thumb>[^"]+)"[\w\W]{0,1000}?<h3 class="pagemanager-item-title">\W*<span>\W*<a[^>]*>(?<Title>[^<]+)[\w\W]{0,1000}?<div class="teaser">\W*<a[^>]+>(?<Description>[^<]+)' stadionRegex = Regexer.FromExpresso(stadionRegex) self._AddDataParser("http://nieuws.vtm.be/stadion", parser=stadionRegex, creator=self.CreateVideoItem, updater=self.UpdateVideoItem) self.mediaUrlRegex = '<source[^>]+src="([^"]+)"[^>]+type="video/mp4"[^>]*/>' self.pageNavigationRegex = '' self.pageNavigationRegexIndex = 0 #=============================================================================================================== # non standard items #=============================================================================================================== # Test cases: # ====================================== Actual channel setup STOPS here ======================================= return
def UpdateVideoItem(self, item): """ Accepts an item. It returns an updated item. Usually retrieves the MediaURL and the Thumb! It should return a completed item. """ Logger.Debug('Starting UpdateVideoItem for %s (%s)', item.name, self.channelName) # get additional info data = UriHandler.Open(item.url, proxy=self.proxy) guid = Regexer.DoRegex('<meta property="og:video" content="http://player.extreme.com/FCPlayer.swf\?id=([^&]+)&[^"]+" />', data) #<param name="flashvars" value="id=dj0xMDEzNzQyJmM9MTAwMDAwNA&tags=source%253Dfreecaster&autoplay=1" /> # http://freecaster.tv/player/smil/dj0xMDEzNzQyJmM9MTAwMDAwNA -> playlist with bitrate # http://freecaster.tv/player/smil/dj0xMDEzNzQyJmM9MTAwMDAwNA -> info (not needed, get description from main page. if len(guid) > 0: url = '%s/player/smil/%s' % (self.baseUrl, guid[0],) data = UriHandler.Open(url) smiller = Smil(data) baseUrl = smiller.GetBaseUrl() urls = smiller.GetVideosAndBitrates() part = item.CreateNewEmptyMediaPart() for url in urls: if "youtube" in url[0]: for s, b in YouTube.GetStreamsFromYouTube(url[0], self.proxy): item.complete = True part.AppendMediaStream(s, b) else: part.AppendMediaStream("%s%s" % (baseUrl, url[0]), bitrate=int(int(url[1]) / 1000)) item.complete = True Logger.Trace("UpdateVideoItem complete: %s", item) return item # Try the brightcove brightCoveRegex = '<object id="myExperience[\w\W]+?videoPlayer" value="(\d+)"[\w\W]{0,1000}?playerKey" value="([^"]+)' brightCoveData = Regexer.DoRegex(brightCoveRegex, data) Logger.Trace(brightCoveData) if len(brightCoveData) > 0: seed = "c5f9ae8729f7054d43187989ef3421531ee8678d" objectData = brightCoveData[0] # from proxyinfo import ProxyInfo playerKey = str(objectData[1]) videoId = int(objectData[0]) part = item.CreateNewEmptyMediaPart() # But we need the IOS streams! amfHelper = BrightCove(Logger.Instance(), playerKey, videoId, str(item.url), seed, proxy=self.proxy) for stream, bitrate in amfHelper.GetStreamInfo(renditions="IOSRenditions"): part.AppendMediaStream(stream, bitrate) # Logger.Error("Cannot find GUID in url: %s", item.url) return item
def __init__(self, channel_info): """ Initialisation of the class. All class variables should be instantiated here and this method should not be overridden by any derived classes. :param ChannelInfo channel_info: The channel info object to base this channel on. """ chn_class.Channel.__init__(self, channel_info) # ============== Actual channel setup STARTS here and should be overwritten from derived classes =============== self.noImage = "l1image.png" # setup the urls self.mainListUri = "https://l1.nl/gemist/" self.baseUrl = "https://l1.nl" # setup the main parsing data episode_regex = r'<li>\W*<a[^>]*href="(?<url>/[^"]+)"[^>]*>(?<title>[^<]+)</a>\W*</li>' episode_regex = Regexer.from_expresso(episode_regex) self._add_data_parser(self.mainListUri, preprocessor=self.pre_process_folder_list, parser=episode_regex, creator=self.create_episode_item) # live stuff self._add_data_parsers(["#livetv", "#liveradio"], updater=self.update_live_stream) video_regex = r'<a[^>]*class="mediaItem"[^>]*href="(?<url>[^"]+)"[^>]*title="(?<title>' \ r'[^"]+)"[^>]*>[\w\W]{0,500}?<img[^>]+src="/(?<thumburl>[^"]+)' video_regex = Regexer.from_expresso(video_regex) self._add_data_parser("*", parser=video_regex, creator=self.create_video_item, updater=self.update_video_item) page_regex = r'<a[^>]+href="https?://l1.nl/([^"]+?pagina=)(\d+)"' page_regex = Regexer.from_expresso(page_regex) self.pageNavigationRegexIndex = 1 self._add_data_parser("*", parser=page_regex, creator=self.create_page_item) #=============================================================================================================== # non standard items #=============================================================================================================== # Test cases: # ====================================== Actual channel setup STOPS here ======================================= return
def __init__(self, channelInfo): """Initialisation of the class. Arguments: channelInfo: ChannelInfo - The channel info object to base this channel on. All class variables should be instantiated here and this method should not be overridden by any derived classes. """ chn_class.Channel.__init__(self, channelInfo) # ============== Actual channel setup STARTS here and should be overwritten from derived classes =============== self.videoType = None self.noImage = "eredivisieimage.jpg" # setup the urls self.baseUrl = "http://www.foxsports.nl" self.mainListUri = "http://www.foxsports.nl/videos/" self.swfUrl = "http://static.eredivisielive.nl/static/swf/edPlayer-1.6.2.plus.swf" # setup the main parsing data # self.episodeItemRegex = '<option[^>]+value="([^"]+)"[^=>]+(?:data-season="([^"]+)")?[^=>]*>([^<]+)</option>' # self.videoItemJson = ("item",) self._AddDataParser( self.mainListUri, parser=Regexer.FromExpresso('<a [hd][^>]*ata-(?<Type>area|sport)="(?<Url>[^"]+)[^>]*>' '(?<Title>[^<]+)</a>'), creator=self.CreateFolderItem ) self._AddDataParser( self.mainListUri, parser=Regexer.FromExpresso('<a[^>]+href="/video/(?<Type>filter|meest_bekeken)/?' '(?<Url>[^"]*)">[^<]*</a>\W+<h1[^>]*>(?<Title>[^<;]+)' '(?:'s){0,1}</h1>'), creator=self.CreateFolderItem ) self._AddDataParser( "http://www.foxsports.nl/video/filter/fragments/", preprocessor=self.AddPages, parser=Regexer.FromExpresso('<img[^>]+src=\'(?<Thumb>[^\']+)\'[^>]*>\W+</picture>\W+' '<span class="[^"]+video[\w\W]{0,500}?<h1[^>]*>\W+<a href="' '(?<Url>[^"]+)"[^>]*>(?<Title>[^<]+)</a>\W+</h1>\W+<span' '[^>]*>(?<Date>[^>]+)</span>'), creator=self.CreateVideoItem ) self._AddDataParser("*", updater=self.UpdateVideoItem) # ====================================== Actual channel setup STOPS here ======================================= return
def UpdateJsonVideoItem(self, item): data = UriHandler.Open(item.url, proxy=self.proxy) json = JsonHelper(data) m3u8Url = json.GetValue("playlist") if m3u8Url != "https://embed.kijk.nl/api/playlist/.m3u8": part = item.CreateNewEmptyMediaPart() for s, b in M3u8.GetStreamsFromM3u8(m3u8Url, self.proxy, appendQueryString=True): if "_enc_" in s: Logger.Warning("Found encrypted stream. Skipping %s", s) continue item.complete = True # s = self.GetVerifiableVideoUrl(s) part.AppendMediaStream(s, b) return item Logger.Warning("No M3u8 data found. Falling back to BrightCove") videoId = json.GetValue("vpakey") # videoId = json.GetValue("videoId") -> Not all items have a videoId url = "https://embed.kijk.nl/video/%s?width=868&height=491" % ( videoId, ) referer = "https://embed.kijk.nl/video/%s" % (videoId, ) part = item.CreateNewEmptyMediaPart() # First try the new BrightCove JSON data = UriHandler.Open(url, proxy=self.proxy, referer=referer) brightCoveRegex = '<video[^>]+data-video-id="(?<videoId>[^"]+)[^>]+data-account="(?<videoAccount>[^"]+)' brightCoveData = Regexer.DoRegex(Regexer.FromExpresso(brightCoveRegex), data) if brightCoveData: Logger.Info("Found new BrightCove JSON data") brightCoveUrl = 'https://edge.api.brightcove.com/playback/v1/accounts/%(videoAccount)s/videos/%(videoId)s' % \ brightCoveData[0] headers = { "Accept": "application/json;pk=BCpkADawqM3ve1c3k3HcmzaxBvD8lXCl89K7XEHiKutxZArg2c5RhwJHJANOwPwS_4o7UsC4RhIzXG8Y69mrwKCPlRkIxNgPQVY9qG78SJ1TJop4JoDDcgdsNrg" } brightCoveData = UriHandler.Open(brightCoveUrl, proxy=self.proxy, additionalHeaders=headers) brightCoveJson = JsonHelper(brightCoveData) streams = filter(lambda d: d["container"] == "M2TS", brightCoveJson.GetValue("sources")) if streams: # noinspection PyTypeChecker streamUrl = streams[0]["src"] for s, b in M3u8.GetStreamsFromM3u8(streamUrl, self.proxy): item.complete = True part.AppendMediaStream(s, b) return item
def UpdateVideoItem(self, item): """Updates an existing MediaItem with more data. Arguments: item : MediaItem - the MediaItem that needs to be updated Returns: The original item with more data added to it's properties. Used to update none complete MediaItems (self.complete = False). This could include opening the item's URL to fetch more data and then process that data or retrieve it's real media-URL. The method should at least: * cache the thumbnail to disk (use self.noImage if no thumb is available). * set at least one MediaItemPart with a single MediaStream. * set self.complete = True. if the returned item does not have a MediaItemPart then the self.complete flag will automatically be set back to False. """ Logger.Debug('Starting UpdateVideoItem for %s (%s)', item.name, self.channelName) url = item.url data = UriHandler.Open(url) if "json" in self.mainListUri: metaData = data else: mgid = Regexer.DoRegex("mgid:[^ ]+playlist-[abcdef0-9]+", data)[0] mgidUrlEncoded = htmlentityhelper.HtmlEntityHelper.UrlEncode(mgid) metaData = UriHandler.Open( "http://api.mtvnn.com/v2/mrss.xml?uri=%s" % (mgidUrlEncoded, )) videoUrl = Regexer.DoRegex("<media:content[^>]+url='([^']+)'>", metaData)[0] Logger.Trace(videoUrl) videoData = UriHandler.Open(videoUrl) videoItems = Regexer.DoRegex( '<rendition[^>]+bitrate="(\d+)"[^>]*>\W+<src>([^<]+)<', videoData) item.MediaItemParts = [] part = item.CreateNewEmptyMediaPart() for videoItem in videoItems: mediaUrl = self.GetVerifiableVideoUrl(videoItem[1]) part.AppendMediaStream(mediaUrl, videoItem[0]) item.complete = True return item
def UpdateVideoItemLive(self, item): """Updates an existing MediaItem with more data. Arguments: item : MediaItem - the MediaItem that needs to be updated Returns: The original item with more data added to it's properties. Used to update none complete MediaItems (self.complete = False). This could include opening the item's URL to fetch more data and then process that data or retrieve it's real media-URL. The method should at least: * cache the thumbnail to disk (use self.noImage if no thumb is available). * set at least one MediaItemPart with a single MediaStream. * set self.complete = True. if the returned item does not have a MediaItemPart then the self.complete flag will automatically be set back to False. """ Logger.Debug('Starting UpdateVideoItem: %s', item.name) item.MediaItemParts = [] part = item.CreateNewEmptyMediaPart() # we need to determine radio or live tv Logger.Debug("Fetching live stream data from item url: %s", item.url) htmlData = UriHandler.Open(item.url, proxy=self.proxy) mp3Urls = Regexer.DoRegex("""data-streams='{"url":"([^"]+)","codec":"[^"]+"}'""", htmlData) if len(mp3Urls) > 0: Logger.Debug("Found MP3 URL") part.AppendMediaStream(mp3Urls[0], 192) else: Logger.Debug("Finding the actual metadata url from %s", item.url) # NPO3 normal stream had wrong subs if "npo-3" in item.url and False: # NPO3 has apparently switched the normal and hearing impaired streams? jsonUrls = Regexer.DoRegex('<div class="video-player-container"[^>]+data-alt-prid="([^"]+)"', htmlData) else: jsonUrls = Regexer.DoRegex('<npo-player media-id="([^"]+)"', htmlData) for episodeId in jsonUrls: return self.__UpdateVideoItem(item, episodeId) Logger.Warning("Cannot update live item: %s", item) return item item.complete = True return item
def __update_video_from_mpd(self, item, mpd_info, use_adaptive_with_encryption): """ Updates an existing MediaItem with more data based on an MPD stream. :param dict[str,str] mpd_info: Stream info retrieved from the stream json. :param bool use_adaptive_with_encryption: Do we use the Adaptive InputStream add-on? :param MediaItem item: The original MediaItem that needs updating. :return: The original item with more data added to it's properties. :rtype: MediaItem """ Logger.debug("Updating streams using BrightCove data.") part = item.create_new_empty_media_part() mpd_manifest_url = "https:{0}".format(mpd_info["mediaLocator"]) mpd_data = UriHandler.open(mpd_manifest_url, proxy=self.proxy) subtitles = Regexer.do_regex(r'<BaseURL>([^<]+\.vtt)</BaseURL>', mpd_data) if subtitles: Logger.debug("Found subtitle: %s", subtitles[0]) subtitle = SubtitleHelper.download_subtitle(subtitles[0], proxy=self.proxy, format="webvtt") part.Subtitle = subtitle if use_adaptive_with_encryption: # We can use the adaptive add-on with encryption Logger.info("Using MPD InputStreamAddon") license_url = Regexer.do_regex('licenseUrl="([^"]+)"', mpd_data)[0] token = "Bearer {0}".format(mpd_info["playToken"]) key_headers = {"Authorization": token} license_key = Mpd.get_license_key(license_url, key_headers=key_headers) stream = part.append_media_stream(mpd_manifest_url, 0) Mpd.set_input_stream_addon_input(stream, self.proxy, license_key=license_key) item.complete = True else: XbmcWrapper.show_dialog( LanguageHelper.get_localized_string(LanguageHelper.DrmTitle), LanguageHelper.get_localized_string( LanguageHelper.WidevineLeiaRequired)) return item
def get_nodes_content(self, node_tag, *args): """Retreives all nodes with nodeTag as name Arguments: nodeTag : string - Name of the node to retrieve args : dictionary - Dictionary holding the node's attributes. Should occur in order of appearance. Returns: A list of all the content of the found nodes. The args should be a dictionary: {"size": "380x285"}, {"ratio":"4:3"} will find a node with <nodename size="380x285" name="test" ratio="4:3"> """ regex = "<%s" % (node_tag,) for arg in args: regex += r'[^>]*%s\W*=\W*"%s"' % (list(arg.keys())[0], arg[list(arg.keys())[0]]) # just do one pass regex += r"[^>]*>([\w\W]+?)</%s>" % (node_tag,) Logger.trace("XmlRegex = %s", regex) results = Regexer.do_regex(regex, self.data) Logger.trace(results) return results
def extract_json(self, data): """ Performs pre-process actions for data processing. Accepts an data from the process_folder_list method, BEFORE the items are processed. Allows setting of parameters (like title etc) for the channel. Inside this method the <data> could be changed and additional items can be created. The return values should always be instantiated in at least ("", []). :param str data: The retrieve data that was loaded for the current item and URL. :return: A tuple of the data and a list of MediaItems that were generated. :rtype: tuple[str|JsonHelper,list[MediaItem]] """ Logger.info("Performing Pre-Processing") items = [] json_data = Regexer.do_regex('type="application/json">([^<]+)<', data) if not json_data: Logger.warning("No JSON data found.") return data, items json = JsonHelper(json_data[0]) result = [] for key, value in json.json.items(): result.append(value) value["title"] = key # set new json and return JsonHelper object json.json = result return json, items
def UpdateVideoItem(self, item): """ Accepts an item. It returns an updated item. Usually retrieves the MediaURL and the Thumb! It should return a completed item. """ Logger.Debug('Starting UpdateVideoItem for %s (%s)', item.name, self.channelName) if not item.url.endswith("m3u8"): data = UriHandler.Open(item.url, proxy=self.proxy) jsonData = Regexer.DoRegex(self.mediaUrlRegex, data) if not jsonData: Logger.Error("Cannot find JSON stream info.") return item json = JsonHelper(jsonData[0]) Logger.Trace(json.json) stream = json.GetValue("source", "hls") Logger.Debug("Found HLS: %s", stream) else: stream = item.url part = item.CreateNewEmptyMediaPart() for s, b in M3u8.GetStreamsFromM3u8(stream, self.proxy): item.complete = True # s = self.GetVerifiableVideoUrl(s) part.AppendMediaStream(s, b) # var playerConfig = {"id":"mediaplayer","width":"100%","height":"100%","autostart":"false","image":"http:\/\/www.ketnet.be\/sites\/default\/files\/thumb_5667ea22632bc.jpg","brand":"ketnet","source":{"hls":"http:\/\/vod.stream.vrt.be\/ketnet\/_definst_\/mp4:ketnet\/2015\/12\/Ben_ik_familie_van_R001_A0023_20151208_143112_864.mp4\/playlist.m3u8"},"analytics":{"type_stream":"vod","playlist":"Ben ik familie van?","program":"Ben ik familie van?","episode":"Ben ik familie van?: Warre - Aflevering 3","parts":"1","whatson":"270157835527"},"title":"Ben ik familie van?: Warre - Aflevering 3","description":"Ben ik familie van?: Warre - Aflevering 3"} return item
def get_movie_id(self, data): """ Performs pre-process actions for data processing. Accepts an data from the process_folder_list method, BEFORE the items are processed. Allows setting of parameters (like title etc) for the channel. Inside this method the <data> could be changed and additional items can be created. The return values should always be instantiated in at least ("", []). :param str data: The retrieve data that was loaded for the current item and URL. :return: A tuple of the data and a list of MediaItems that were generated. :rtype: tuple[str|JsonHelper,list[MediaItem]] """ Logger.info("Performing Pre-Processing") items = [] movie_id = Regexer.do_regex(r"movietrailers://movie/detail/(\d+)", data)[-1] Logger.debug("Found Movie ID: %s", movie_id) url = "%s/trailers/feeds/data/%s.json" % (self.baseUrl, movie_id) data = UriHandler.open(url, proxy=self.proxy) # set it for logging purposes self.parentItem.url = url Logger.debug("Pre-Processing finished") return data, items
def AddClips(self, data): Logger.Info("Adding Clips Pre-Processing") items = [] # if the main list was retrieve using json, are the current data is json, just determine # the clip URL clipUrl = None if data.lstrip().startswith("{"): if self.parentItem.url.endswith("type=program"): # http://playapi.mtgx.tv/v3/videos?format=6723&order=-airdate&type=program # http://playapi.mtgx.tv/v3/videos?format=6723&order=-updated&type=clip" % (dataId,) clipUrl = self.parentItem.url.replace("type=program", "type=clip") else: # now we determine the ID and load the json data dataId = Regexer.DoRegex('data-format-id="(\d+)"', data)[-1] Logger.Debug("Found FormatId = %s", dataId) programUrl = "http://playapi.mtgx.tv/v3/videos?format=%s&order=-airdate&type=program" % ( dataId, ) data = UriHandler.Open(programUrl, proxy=self.proxy) clipUrl = "http://playapi.mtgx.tv/v3/videos?format=%s&order=-updated&type=clip" % ( dataId, ) if clipUrl is not None: clipTitle = LanguageHelper.GetLocalizedString(LanguageHelper.Clips) clipItem = mediaitem.MediaItem("\a.: %s :." % (clipTitle, ), clipUrl) clipItem.thumb = self.noImage items.append(clipItem) Logger.Debug("Pre-Processing finished") return data, items
def UpdateVideoItem(self, item): """ Accepts an item. It returns an updated item. Usually retrieves the MediaURL and the Thumb! It should return a completed item. """ Logger.Debug('Starting UpdateVideoItem for %s (%s)', item.name, self.channelName) data = UriHandler.Open(item.url, proxy=self.proxy).decode('unicode_escape') streams = Regexer.DoRegex("file:\W+'([^']+)'", data) part = item.CreateNewEmptyMediaPart() for s in streams: if "anifest" in s: continue s = JsonHelper.ConvertSpecialChars(s) if s.startswith("rtmp"): s = self.GetVerifiableVideoUrl(s) part.AppendMediaStream(s, 1001) part.AppendMediaStream(s.replace("_medium.mp4", "_low.mp4"), 301) else: part.AppendMediaStream(s, 1002) part.AppendMediaStream(s.replace("_medium.mp4", "_low.mp4"), 302) item.complete = True return item
def UpdateVideoItem(self, item): """ Updates the item """ data = UriHandler.Open(item.url, proxy=self.proxy) baseEncode = Regexer.DoRegex(self.mediaUrlRegex, data)[-1] jsonData = EncodingHelper().DecodeBase64(baseEncode) json = JsonHelper(jsonData, logger=Logger.Instance()) Logger.Trace(json) # "flv": "http://media.dumpert.nl/flv/e2a926ff_10307954_804223649588516_151552487_n.mp4.flv", # "tablet": "http://media.dumpert.nl/tablet/e2a926ff_10307954_804223649588516_151552487_n.mp4.mp4", # "mobile": "http://media.dumpert.nl/mobile/e2a926ff_10307954_804223649588516_151552487_n.mp4.mp4", item.MediaItemParts = [] part = item.CreateNewEmptyMediaPart() streams = json.GetValue() for key in streams: if key == "flv": part.AppendMediaStream(streams[key], 1000) elif key == "tablet": part.AppendMediaStream(streams[key], 800) elif key == "mobile": part.AppendMediaStream(streams[key], 450) else: Logger.Debug("Key '%s' was not used", key) item.complete = True Logger.Trace("VideoItem updated: %s", item) return item
def add_live_channel_and_extract_data(self, data): """ Add the live channel and extract the correct data to process further. The return values should always be instantiated in at least ("", []). :param str data: The retrieve data that was loaded for the current item and URL. :return: A tuple of the data and a list of MediaItems that were generated. :rtype: tuple[str|JsonHelper,list[MediaItem]] """ Logger.info("Performing Pre-Processing") items = [] title = LanguageHelper.get_localized_string(LanguageHelper.LiveStreamTitleId) item = MediaItem("\a.: {} :.".format(title), self.liveUrl) item.type = "folder" items.append(item) if not data: return "[]", items json_data = Regexer.do_regex(r"setupBroadcastArchive\('Tv',\s*([^;]+)\);", data) if isinstance(json_data, (tuple, list)) and len(json_data) > 0: Logger.debug("Pre-Processing finished") return json_data[0], items Logger.info("Cannot extract JSON data from HTML.") return data, items
def update_video_item_json_player(self, item): """ Updates an existing MediaItem with more data. Used to update none complete MediaItems (self.complete = False). This could include opening the item's URL to fetch more data and then process that data or retrieve it's real media-URL. The method should at least: * cache the thumbnail to disk (use self.noImage if no thumb is available). * set at least one MediaItemPart with a single MediaStream. * set self.complete = True. if the returned item does not have a MediaItemPart then the self.complete flag will automatically be set back to False. :param MediaItem item: the original MediaItem that needs updating. :return: The original item with more data added to it's properties. :rtype: MediaItem """ data = UriHandler.open(item.url, proxy=self.proxy) streams = Regexer.do_regex(r'label:\s*"([^"]+)",\W*file:\s*"([^"]+)"', data) part = item.create_new_empty_media_part() bitrates = {"720p SD": 1200} for stream in streams: part.append_media_stream(stream[1], bitrates.get(stream[0], 0)) item.complete = True return item
def ExtractJsonData(self, data): """Performs pre-process actions for data processing Arguments: data : string - the retrieve data that was loaded for the current item and URL. Returns: A tuple of the data and a list of MediaItems that were generated. Accepts an data from the ProcessFolderList method, BEFORE the items are processed. Allows setting of parameters (like title etc) for the channel. Inside this method the <data> could be changed and additional items can be created. The return values should always be instantiated in at least ("", []). """ Logger.Info("Performing Pre-Processing") data = Regexer.DoRegex("<script>var programEpisodes = ({[^<]+})", data)[-1] items = [] Logger.Debug("Pre-Processing finished") return data, items
def __convert_ttml_to_srt(ttml): """Converts sami format into SRT format: Arguments: ttml : string - TTML (Timed Text Markup Language) subtitle format Returns: SRT formatted subtitle: Example: 1 00:00:20,000 --> 00:00:24,400 text """ pars_regex = r'<p[^>]+begin="([^"]+)\.(\d+)"[^>]+end="([^"]+)\.(\d+)"[^>]*>([\w\W]+?)</p>' subs = Regexer.do_regex(pars_regex, ttml) srt = "" i = 1 for sub in subs: try: start = "%s,%03d" % (sub[0], int(sub[1])) end = "%s,%03d" % (sub[2], int(sub[3])) text = sub[4].replace("<br />", "\n") text = HtmlEntityHelper.convert_html_entities(text) text = text.replace("\r\n", "") srt = "%s\n%s\n%s --> %s\n%s\n" % (srt, i, start, end, text.strip()) i += 1 except: Logger.error("Error parsing subtitle: %s", sub[1], exc_info=True) return srt
def UpdateVideoItem(self, item): """ Accepts an item. It returns an updated item. Usually retrieves the MediaURL and the Thumb! It should return a completed item. """ Logger.Debug('Starting UpdateVideoItem for %s (%s)', item.name, self.channelName) # now the mediaurl is derived. First we try WMV data = UriHandler.Open(item.url) urls = Regexer.DoRegex( '<a href="([^"]+.(?:wmv|mp4))">(High|Medium|Mid|Low|MP4)', data) mediaPart = mediaitem.MediaItemPart(item.name) for url in urls: if url[1].lower() == "high": bitrate = 2000 elif url[1].lower() == "medium" or url[1].lower() == "mid": bitrate = 1200 elif url[1].lower() == "low" or url[1].lower() == "mp4": bitrate = 200 else: bitrate = 0 mediaPart.AppendMediaStream( HtmlEntityHelper.ConvertHTMLEntities(url[0]), bitrate) item.MediaItemParts.append(mediaPart) #images = Regexer.DoRegex('<link type="image/jpeg" rel="videothumbnail" href="([^"]+)"/>', data) #for image in images: # thumbUrl = htmlentityhelper.HtmlEntityHelper.ConvertHTMLEntities(image) # break item.complete = True return item
def update_video_item(self, item): data = UriHandler.open(item.url, proxy=self.proxy, additional_headers=item.HttpHeaders) media_regex = 'data-media="([^"]+)"' media_info = Regexer.do_regex(media_regex, data)[0] media_info = HtmlEntityHelper.convert_html_entities(media_info) media_info = JsonHelper(media_info) Logger.trace(media_info) # sources part = item.create_new_empty_media_part() # high, web, mobile, url media_sources = media_info.json.get("sources", {}) for quality in media_sources: url = media_sources[quality] if quality == "high": bitrate = 2000 elif quality == "web": bitrate = 800 elif quality == "mobile": bitrate = 400 else: bitrate = 0 part.append_media_stream(url, bitrate) # geoLocRestriction item.isGeoLocked = not media_info.get_value( "geoLocRestriction", fallback="world") == "world" item.complete = True return item
def update_video_item(self, item): """ Accepts an item. It returns an updated item. Usually retrieves the MediaURL and the Thumb! It should return a completed item. """ Logger.debug('Starting update_video_item for %s (%s)', item.name, self.channelName) # rtmpt://vrt.flash.streampower.be/een//2011/07/1000_110723_getipt_neefs_wiels_Website_EEN.flv # http://www.een.be/sites/een.be/modules/custom/vrt_video/player/player_4.3.swf # now the mediaurl is derived. First we try WMV data = UriHandler.open(item.url, proxy=self.proxy) part = item.create_new_empty_media_part() if "mediazone.vrt.be" not in item.url: # Extract actual media data video_id = Regexer.do_regex('data-video=[\'"]([^"\']+)[\'"]', data)[0] url = "https://mediazone.vrt.be/api/v1/een/assets/%s" % (video_id, ) data = UriHandler.open(url, proxy=self.proxy) json = JsonHelper(data) urls = json.get_value("targetUrls") for url_info in urls: Logger.trace(url_info) if url_info["type"].lower() != "hls": continue hls_url = url_info["url"] for s, b in M3u8.get_streams_from_m3u8(hls_url, self.proxy): part.append_media_stream(s, b) item.complete = True return item