def __init__(self, channel_info): """ Initialisation of the class. All class variables should be instantiated here and this method should not be overridden by any derived classes. :param ChannelInfo channel_info: The channel info object to base this channel on. """ chn_class.Channel.__init__(self, channel_info) # ============== Actual channel setup STARTS here and should be overwritten from derived classes =============== if self.channelCode == "ketnet": self.noImage = "ketnetimage.jpg" self.mainListUri = "https://www.ketnet.be/kijken" self.baseUrl = "https://www.ketnet.be" self.mediaUrlRegex = r'playerConfig\W*=\W*(\{[\w\W]{0,2000}?);(?:.vamp|playerConfig)' elif self.channelCode == "cobra": self.noImage = "cobraimage.png" self.mainListUri = "http://www.cobra.be/cm/cobra/cobra-mediaplayer" self.baseUrl = "http://www.cobra.be" self.swfUrl = "%s/html/flash/common/player.swf" % (self.baseUrl, ) episode_regex = r'<a[^>]+href="(?<url>/kijken[^"]+)"[^>]*>\W*<img[^>]+src="' \ r'(?<thumburl>[^"]+)"[^>]+alt="(?<title>[^"]+)"' episode_regex = Regexer.from_expresso(episode_regex) self._add_data_parser(self.mainListUri, match_type=ParserData.MatchExact, parser=episode_regex, creator=self.create_episode_item) self._add_data_parser("*", preprocessor=self.select_video_section) video_regex = Regexer.from_expresso( r'<a title="(?<title>[^"]+)" href="(?<url>[^"]+)"[^>]*>' r'\W+<img src="(?<thumburl>[^"]+)"[^<]+<span[^<]+[^<]+' r'[^>]+></span>\W+(?<description>[^<]+)') self._add_data_parser("*", parser=video_regex, creator=self.create_video_item, updater=self.update_video_item) folder_regex = Regexer.from_expresso( r'<span class="more-of-program" rel="/(?<url>[^"]+)">') self._add_data_parser("*", parser=folder_regex, creator=self.create_folder_item) #=============================================================================================================== # non standard items #=============================================================================================================== # Test cases: # ====================================== Actual channel setup STOPS here ======================================= return
def __update_video_from_brightcove(self, item, data, use_adaptive_with_encryption): """ Updates an existing MediaItem with more data based on an MPD stream. :param str data: Stream info retrieved from BrightCove. :param bool use_adaptive_with_encryption: Do we use the Adaptive InputStream add-on? :param MediaItem item: The original MediaItem that needs updating. :return: The original item with more data added to it's properties. :rtype: MediaItem """ part = item.create_new_empty_media_part() # Then try the new BrightCove JSON bright_cove_regex = '<video[^>]+data-video-id="(?<videoId>[^"]+)[^>]+data-account="(?<videoAccount>[^"]+)' bright_cove_data = Regexer.do_regex( Regexer.from_expresso(bright_cove_regex), data) if not bright_cove_data: Logger.warning("Error updating using BrightCove data: %s", item) return item Logger.info("Found new BrightCove JSON data") bright_cove_url = 'https://edge.api.brightcove.com/playback/v1/accounts/' \ '%(videoAccount)s/videos/%(videoId)s' % bright_cove_data[0] headers = { "Accept": "application/json;pk=BCpkADawqM3ve1c3k3HcmzaxBvD8lXCl89K7XEHiKutxZArg2c5RhwJHJANOwPwS_4o7UsC4RhIzXG8Y69mrwKCPlRkIxNgPQVY9qG78SJ1TJop4JoDDcgdsNrg" } bright_cove_data = UriHandler.open(bright_cove_url, additional_headers=headers) bright_cove_json = JsonHelper(bright_cove_data) streams = [ d for d in bright_cove_json.get_value("sources") if d["container"] == "M2TS" ] # Old filter # streams = filter(lambda d: d["container"] == "M2TS", bright_cove_json.get_value("sources")) if not streams: Logger.warning("Error extracting streams from BrightCove data: %s", item) return item # noinspection PyTypeChecker stream_url = streams[0]["src"] # these streams work better with the the InputStreamAddon because it removes the # "range" http header if use_adaptive_with_encryption: Logger.info("Using InputStreamAddon for playback of HLS stream") strm = part.append_media_stream(stream_url, 0) M3u8.set_input_stream_addon_input(strm) item.complete = True return item for s, b in M3u8.get_streams_from_m3u8(stream_url): item.complete = True part.append_media_stream(s, b) return item
def add_search_and_genres(self, data): """ Performs pre-process actions for data processing and adds a search option and genres. Accepts an data from the process_folder_list method, BEFORE the items are processed. Allows setting of parameters (like title etc) for the channel. Inside this method the <data> could be changed and additional items can be created. The return values should always be instantiated in at least ("", []). :param str data: The retrieve data that was loaded for the current item and URL. :return: A tuple of the data and a list of MediaItems that were generated. :rtype: tuple[str|JsonHelper,list[MediaItem]] """ Logger.info("Performing Pre-Processing") items = [] if self.parentItem is not None and "genre" in self.parentItem.metaData: self.__genre = self.parentItem.metaData["genre"] Logger.debug("Parsing a specific genre: %s", self.__genre) return data, items search_item = MediaItem("\a.: Sök :.", "searchSite") search_item.complete = True search_item.thumb = self.noImage search_item.dontGroup = True search_item.fanart = self.fanart # search_item.set_date(2099, 1, 1, text="") # -> No items have dates, so adding this will force a date sort in Retrospect items.append(search_item) genres_item = MediaItem("\a.: Genrer :.", "") genres_item.complete = True genres_item.thumb = self.noImage genres_item.dontGroup = True genres_item.fanart = self.fanart items.append(genres_item) # find the actual genres genre_regex = '<li[^>]+genre[^>]*><button[^>]+data-value="(?<genre>[^"]+)"[^>]*>' \ '(?<title>[^>]+)</button></li>' genre_regex = Regexer.from_expresso(genre_regex) genres = Regexer.do_regex(genre_regex, data) for genre in genres: if genre["genre"] == "all": continue genre_item = MediaItem(genre["title"], self.mainListUri) genre_item.complete = True genre_item.thumb = self.noImage genre_item.fanart = self.fanart genre_item.metaData = {"genre": genre["genre"]} genres_item.items.append(genre_item) Logger.debug("Pre-Processing finished") return data, items
def __init__(self, channel_info): """ Initialisation of the class. All class variables should be instantiated here and this method should not be overridden by any derived classes. :param ChannelInfo channel_info: The channel info object to base this channel on. """ chn_class.Channel.__init__(self, channel_info) # ============== Actual channel setup STARTS here and should be overwritten from derived classes =============== self.noImage = "eenimage.png" # setup the urls self.mainListUri = "https://www.een.be/programmas" self.baseUrl = "http://www.een.be" # setup the main parsing data self._add_data_parser(self.mainListUri, preprocessor=self.extract_json, json=True, parser=["data", ], creator=self.create_show_item) video_parser = r'<a class="card-teaser"[^>][^>]*href="(?<url>[^"]+)"[^>]*>\W+<div[^>]+' \ r'style="background-image: url\(\'(?<thumburl>[^\']+/(?<year>\d{4})/' \ r'(?<month>\d{2})/(?<day>\d{2})/[^\']+)\'[^>]*>\W+<div[^>]+_play[\w\W+]' \ r'{0,2000}?<div[^>]*>(?<_title>[^>]*)</div>\W*<h3[^>]*>(?<title>[^<]+)' \ r'</h3>\W+<div[^>]*>\W+(?:<span[^>]*>[^<]*</span>)?(?<description>[^<]+)' video_parser = Regexer.from_expresso(video_parser) self._add_data_parser("*", name="Links to teasers of videos (Card teaser)", parser=video_parser, creator=self.create_video_item, updater=self.update_video_item) video_parser = r'<a[^>]*class="[^"]+-teaser"[^>]*background-image: url\(\'(?<thumburl>' \ r'[^\']+/(?<year>\d{4})/(?<month>\d{2})/(?<day>\d{2})/[^\']+)\'[^>]*href="' \ r'(?<url>[^"]+)"[^>]*>\W+<div[^>]+_play[\w\W+]{0,2000}?<div[^>]*>' \ r'(?<_title>[^>]*)</div>\W*<h3[^>]*>(?<title>[^<]+)</h3>\W+<div[^>]*>\W+' \ r'(?:<span[^>]*>[^<]*</span>)?(?<description>[^<]+)' video_parser = Regexer.from_expresso(video_parser) self._add_data_parser("*", name="Links to teasers of videos (Image Teaser)", parser=video_parser, creator=self.create_video_item, updater=self.update_video_item) single_video_parser = r'>(?<title>[^<]+)</h1>[\w\W]{0,2000}?(?:<h2>?<description>[^<]+)?' \ r'[\w\W]{0,1000}?data-video="(?<url>[^"]+)"[\w\W]{0,500}data-analytics' \ r'=\'{"date":"(?<year>\d+)-(?<month>\d+)-(?<day>\d+)' single_video_parser = Regexer.from_expresso(single_video_parser) self._add_data_parser("*", name="Pages that contain only a single video", parser=single_video_parser, creator=self.create_video_item) #=============================================================================================================== # non standard items #=============================================================================================================== # Test cases: # ====================================== Actual channel setup STOPS here ======================================= return
def __init__(self, channel_info): """ Initialisation of the class. All class variables should be instantiated here and this method should not be overridden by any derived classes. :param ChannelInfo channel_info: The channel info object to base this channel on. """ chn_class.Channel.__init__(self, channel_info) # ============== Actual channel setup STARTS here and should be overwritten from derived classes =============== self.noImage = "vtmimage.jpg" # setup the urls self.mainListUri = "http://nieuws.vtm.be/herbekijk" self.baseUrl = "http://nieuws.vtm.be" # setup the main parsing data self.episodeItemRegex = '<li><a[^>]+href="/([^"]+)" class="level-1[^>]+>([^<]+)</a>' self._add_data_parser(self.mainListUri, creator=self.create_episode_item, parser=self.episodeItemRegex) video_item_regex = r'<article[^<]+has-video"[^>]*>\W*<a href="(?<Url>[^<"]+)"[^>]*>\W+' \ r'<div[^<]+<img[^>]+src="(?<Thumb>[^"]+)"[^>]*>[\w\W]{0,500}?<h3[^>]*>' \ r'(?:\W+<span[^>]*>[^>]*>)?(?<Title>[^<]+)</h3>\W+<div[^<]+<time[^>]+' \ r'datetime="(?<DateTime>[^"]+)"[^<]+</time>\W*</div>\W*<p[^>]+>*' \ r'(?<Description>[^<]+)' video_item_regex = Regexer.from_expresso(video_item_regex) self._add_data_parser("*", creator=self.create_video_item, parser=video_item_regex, updater=self.update_video_item) stadion_regex = r'<article[^>]*>\W*<div class="image is-video">\W*<a href="(?<Url>[^"]+)' \ r'[^>]*>\W*<img[^>]+src="(?<Thumb>[^"]+)"[\w\W]{0,1000}?<h3 class=' \ r'"pagemanager-item-title">\W*<span>\W*<a[^>]*>(?<Title>[^<]+)[\w\W]' \ r'{0,1000}?<div class="teaser">\W*<a[^>]+>(?<Description>[^<]+)' stadion_regex = Regexer.from_expresso(stadion_regex) self._add_data_parser("http://nieuws.vtm.be/stadion", parser=stadion_regex, creator=self.create_video_item, updater=self.update_video_item) self.pageNavigationRegex = '' self.pageNavigationRegexIndex = 0 #=============================================================================================================== # non standard items #=============================================================================================================== # Test cases: # ====================================== Actual channel setup STOPS here ======================================= return
def update_video_item(self, item): """ Updates an existing MediaItem with more data. Used to update none complete MediaItems (self.complete = False). This could include opening the item's URL to fetch more data and then process that data or retrieve it's real media-URL. The method should at least: * cache the thumbnail to disk (use self.noImage if no thumb is available). * set at least one MediaItemPart with a single MediaStream. * set self.complete = True. if the returned item does not have a MediaItemPart then the self.complete flag will automatically be set back to False. :param MediaItem item: the original MediaItem that needs updating. :return: The original item with more data added to it's properties. :rtype: MediaItem """ Logger.debug('Starting update_video_item for %s (%s)', item.name, self.channelName) # 1 - get the overal config file guid_regex = 'http://[^:]+/mgid:[^"]+:([0-9a-f-]+)"' rtmp_regex = r'type="video/([^"]+)" bitrate="(\d+)">\W+<src>([^<]+)</src>' data = UriHandler.open(item.url, proxy=self.proxy) guids = Regexer.do_regex(guid_regex, data) item.MediaItemParts = [] for guid in guids: # get the info for this part Logger.debug("Processing part with GUID: %s", guid) # reset stuff part = None # http://www.southpark.nl/feeds/video-player/mediagen?uri=mgid%3Aarc%3Aepisode%3Acomedycentral.com%3Aeb2a53f7-e370-4049-a6a9-57c195367a92&suppressRegisterBeacon=true guid = HtmlEntityHelper.url_encode("mgid:arc:episode:comedycentral.com:%s" % (guid,)) info_url = "%s/feeds/video-player/mediagen?uri=%s&suppressRegisterBeacon=true" % (self.baseUrl, guid) # 2- Get the GUIDS for the different ACTS info_data = UriHandler.open(info_url, proxy=self.proxy) rtmp_streams = Regexer.do_regex(rtmp_regex, info_data) for rtmp_stream in rtmp_streams: # if this is the first stream for the part, create an new part if part is None: part = item.create_new_empty_media_part() part.append_media_stream(self.get_verifiable_video_url(rtmp_stream[2]), rtmp_stream[1]) item.complete = True Logger.trace("Media item updated: %s", item) return item
def __init__(self, channel_info): """ Initialisation of the class. All class variables should be instantiated here and this method should not be overridden by any derived classes. :param ChannelInfo channel_info: The channel info object to base this channel on. """ chn_class.Channel.__init__(self, channel_info) # ============== Actual channel setup STARTS here and should be overwritten from derived classes =============== self.noImage = "l1image.png" # setup the urls self.mainListUri = "https://l1.nl/gemist/" self.baseUrl = "https://l1.nl" # setup the main parsing data episode_regex = r'<li>\W*<a[^>]*href="(?<url>/[^"]+)"[^>]*>(?<title>[^<]+)</a>\W*</li>' episode_regex = Regexer.from_expresso(episode_regex) self._add_data_parser(self.mainListUri, preprocessor=self.pre_process_folder_list, parser=episode_regex, creator=self.create_episode_item) # live stuff self._add_data_parsers(["#livetv", "#liveradio"], updater=self.update_live_stream) video_regex = r'<a[^>]*class="mediaItem"[^>]*href="(?<url>[^"]+)"[^>]*title="(?<title>' \ r'[^"]+)"[^>]*>[\w\W]{0,500}?<img[^>]+src="/(?<thumburl>[^"]+)' video_regex = Regexer.from_expresso(video_regex) self._add_data_parser("*", parser=video_regex, creator=self.create_video_item, updater=self.update_video_item) page_regex = r'<a[^>]+href="https?://l1.nl/([^"]+?pagina=)(\d+)"' page_regex = Regexer.from_expresso(page_regex) self.pageNavigationRegexIndex = 1 self._add_data_parser("*", parser=page_regex, creator=self.create_page_item) #=============================================================================================================== # non standard items #=============================================================================================================== # Test cases: # ====================================== Actual channel setup STOPS here ======================================= return
def update_video_item(self, item): """Updates an existing MediaItem with more data. Arguments: item : MediaItem - the MediaItem that needs to be updated Returns: The original item with more data added to it's properties. Used to update none complete MediaItems (self.complete = False). This could include opening the item's URL to fetch more data and then process that data or retrieve it's real media-URL. The method should at least: * cache the thumbnail to disk (use self.noImage if no thumb is available). * set at least one MediaItemPart with a single MediaStream. * set self.complete = True. if the returned item does not have a MediaItemPart then the self.complete flag will automatically be set back to False. """ Logger.debug('Starting update_video_item for %s (%s)', item.name, self.channelName) data = UriHandler.open(item.url, proxy=self.proxy) # get the playlist GUID playlist_guids = Regexer.do_regex( "<div[^>]+data-playlist-id='([^']+)'[^>]+></div>", data) if not playlist_guids: # let's try the alternative then (for the new channels) playlist_guids = Regexer.do_regex( 'local_playlist[", -]+([a-f0-9]{20})"', data) playlist_guid = playlist_guids[0] play_list_url = "http://api.mtvnn.com/v2/nl/NL/local_playlists/{}.json?video_format=m3u8".format( playlist_guid) data = UriHandler.open(play_list_url, proxy=self.proxy) from resources.lib.helpers.jsonhelper import JsonHelper from resources.lib.streams.m3u8 import M3u8 json_data = JsonHelper(data) m3u8_url = json_data.get_value("local_playlist_videos", 0, "url") part = item.create_new_empty_media_part() item.complete = M3u8.update_part_with_m3u8_streams(part, m3u8_url, proxy=self.proxy, channel=self, encrypted=True) return item
def __init__(self, channel_info): """ Initialisation of the class. All class variables should be instantiated here and this method should not be overridden by any derived classes. :param ChannelInfo channel_info: The channel info object to base this channel on. """ chn_class.Channel.__init__(self, channel_info) # ============== Actual channel setup STARTS here and should be overwritten from derived classes =============== self.videoType = None self.noImage = "eredivisieimage.jpg" # setup the urls self.baseUrl = "https://www.foxsports.nl" self.mainListUri = "https://www.foxsports.nl/videos/" self.swfUrl = "https://static.eredivisielive.nl/static/swf/edPlayer-1.6.2.plus.swf" # setup the main parsing data # self.episodeItemRegex = '<option[^>]+value="([^"]+)"[^=>]+(?:data-season="([^"]+)")?[^=>]*>([^<]+)</option>' # self.videoItemJson = ("item",) self._add_data_parser( self.mainListUri, parser=Regexer.from_expresso( '<a [hd][^>]*ata-(?<Type>area|sport)="(?<Url>[^"]+)[^>]*>' '(?<Title>[^<]+)</a>'), creator=self.create_folder_item) self._add_data_parser( self.mainListUri, parser=Regexer.from_expresso( r'<a[^>]+href="/video/(?<Type>filter|meest_bekeken)/?' r'(?<Url>[^"]*)">[^<]*</a>\W+<h1[^>]*>(?<Title>[^<;]+)' r'(?:'s){0,1}</h1>'), creator=self.create_folder_item) self._add_data_parser( "https://www.foxsports.nl/video/filter/fragments/", preprocessor=self.add_pages, parser=Regexer.from_expresso( r'<img[^>]+src=\'(?<Thumb>[^\']+)\'[^>]*>\W+</picture>\W+' r'<span class="[^"]+play[\w\W]{0,500}?<h1[^>]*>\W+<a href="' r'(?<Url>[^"]+)"[^>]*>(?<Title>[^<]+)</a>\W+</h1>\W+<span' r'[^>]*>(?<Date>[^>]+)</span>'), creator=self.create_video_item) self._add_data_parser("*", updater=self.update_video_item) # ====================================== Actual channel setup STOPS here ======================================= return
def __update_video_from_mpd(self, item, mpd_info, use_adaptive_with_encryption): """ Updates an existing MediaItem with more data based on an MPD stream. :param dict[str,str] mpd_info: Stream info retrieved from the stream json. :param bool use_adaptive_with_encryption: Do we use the Adaptive InputStream add-on? :param MediaItem item: The original MediaItem that needs updating. :return: The original item with more data added to it's properties. :rtype: MediaItem """ Logger.debug("Updating streams using BrightCove data.") part = item.create_new_empty_media_part() mpd_manifest_url = "https:{0}".format(mpd_info["mediaLocator"]) mpd_data = UriHandler.open(mpd_manifest_url, proxy=self.proxy) subtitles = Regexer.do_regex(r'<BaseURL>([^<]+\.vtt)</BaseURL>', mpd_data) if subtitles: Logger.debug("Found subtitle: %s", subtitles[0]) subtitle = SubtitleHelper.download_subtitle(subtitles[0], proxy=self.proxy, format="webvtt") part.Subtitle = subtitle if use_adaptive_with_encryption: # We can use the adaptive add-on with encryption Logger.info("Using MPD InputStreamAddon") license_url = Regexer.do_regex('licenseUrl="([^"]+)"', mpd_data)[0] token = "Bearer {0}".format(mpd_info["playToken"]) key_headers = {"Authorization": token} license_key = Mpd.get_license_key(license_url, key_headers=key_headers) stream = part.append_media_stream(mpd_manifest_url, 0) Mpd.set_input_stream_addon_input(stream, self.proxy, license_key=license_key) item.complete = True else: XbmcWrapper.show_dialog( LanguageHelper.get_localized_string(LanguageHelper.DrmTitle), LanguageHelper.get_localized_string( LanguageHelper.WidevineLeiaRequired)) return item
def extract_json(self, data): """ Performs pre-process actions for data processing. Accepts an data from the process_folder_list method, BEFORE the items are processed. Allows setting of parameters (like title etc) for the channel. Inside this method the <data> could be changed and additional items can be created. The return values should always be instantiated in at least ("", []). :param str data: The retrieve data that was loaded for the current item and URL. :return: A tuple of the data and a list of MediaItems that were generated. :rtype: tuple[str|JsonHelper,list[MediaItem]] """ Logger.info("Performing Pre-Processing") items = [] json_data = Regexer.do_regex('type="application/json">([^<]+)<', data) if not json_data: Logger.warning("No JSON data found.") return data, items json = JsonHelper(json_data[0]) result = [] for key, value in json.json.items(): result.append(value) value["title"] = key # set new json and return JsonHelper object json.json = result return json, items
def __update_video(self, item, data): if not item.url.startswith("https://api.viervijfzes.be/content/"): regex = 'data-video-*id="([^"]+)' m3u8_url = Regexer.do_regex(regex, data)[-1] # we either have an URL now or an uuid else: m3u8_url = item.url.rsplit("/", 1)[-1] if ".m3u8" not in m3u8_url: Logger.info("Not a direct M3u8 file. Need to log in") url = "https://api.viervijfzes.be/content/%s" % (m3u8_url, ) # We need to log in if not self.loggedOn: self.log_on() # add authorization header authentication_header = { "authorization": self.__idToken, "content-type": "application/json" } data = UriHandler.open(url, additional_headers=authentication_header) json_data = JsonHelper(data) m3u8_url = json_data.get_value("video", "S") # Geo Locked? if "/geo/" in m3u8_url.lower(): # set it for the error statistics item.isGeoLocked = True part = item.create_new_empty_media_part() item.complete = M3u8.update_part_with_m3u8_streams( part, m3u8_url, channel=self, encrypted=False) return item
def update_video_html_item(self, item): """ Updates an existing MediaItem with more data. Used to update none complete MediaItems (self.complete = False). This could include opening the item's URL to fetch more data and then process that data or retrieve it's real media-URL. The method should at least: * cache the thumbnail to disk (use self.noImage if no thumb is available). * set at least one MediaItemPart with a single MediaStream. * set self.complete = True. if the returned item does not have a MediaItemPart then the self.complete flag will automatically be set back to False. :param MediaItem item: the original MediaItem that needs updating. :return: The original item with more data added to it's properties. :rtype: MediaItem """ data = UriHandler.open(item.url, proxy=self.proxy) video_id = Regexer.do_regex(r'data-video-id="([^"]+)"', data)[0] item.url = "https://api.svt.se/video/{}".format(video_id) return self.update_video_api_item(item)
def update_video_item(self, item): """ Updates an existing MediaItem with more data. Used to update none complete MediaItems (self.complete = False). This could include opening the item's URL to fetch more data and then process that data or retrieve it's real media-URL. The method should at least: * cache the thumbnail to disk (use self.noImage if no thumb is available). * set at least one MediaItemPart with a single MediaStream. * set self.complete = True. if the returned item does not have a MediaItemPart then the self.complete flag will automatically be set back to False. :param MediaItem item: the original MediaItem that needs updating. :return: The original item with more data added to it's properties. :rtype: MediaItem """ Logger.debug('Starting update_video_item for %s (%s)', item.name, self.channelName) # Get the MZID data = UriHandler.open(item.url, proxy=self.proxy, additional_headers=item.HttpHeaders) json_data = Regexer.do_regex(r'<script type="application/ld\+json">(.*?)</script>', data) json_info = JsonHelper(json_data[-1]) video_id = json_info.get_value("video", "@id") publication_id = json_info.get_value("publication", -1, "@id") mzid = "{}${}".format(publication_id, video_id) return self.update_video_for_mzid(item, mzid)
def get_movie_id(self, data): """ Performs pre-process actions for data processing. Accepts an data from the process_folder_list method, BEFORE the items are processed. Allows setting of parameters (like title etc) for the channel. Inside this method the <data> could be changed and additional items can be created. The return values should always be instantiated in at least ("", []). :param str data: The retrieve data that was loaded for the current item and URL. :return: A tuple of the data and a list of MediaItems that were generated. :rtype: tuple[str|JsonHelper,list[MediaItem]] """ Logger.info("Performing Pre-Processing") items = [] movie_id = Regexer.do_regex(r"movietrailers://movie/detail/(\d+)", data)[-1] Logger.debug("Found Movie ID: %s", movie_id) url = "%s/trailers/feeds/data/%s.json" % (self.baseUrl, movie_id) data = UriHandler.open(url) # set it for logging purposes self.parentItem.url = url Logger.debug("Pre-Processing finished") return data, items
def __convert_ttml_to_srt(ttml): """Converts sami format into SRT format: Arguments: ttml : string - TTML (Timed Text Markup Language) subtitle format Returns: SRT formatted subtitle: Example: 1 00:00:20,000 --> 00:00:24,400 text """ pars_regex = r'<p[^>]+begin="([^"]+)\.(\d+)"[^>]+end="([^"]+)\.(\d+)"[^>]*>([\w\W]+?)</p>' subs = Regexer.do_regex(pars_regex, ttml) srt = "" i = 1 for sub in subs: try: start = "%s,%03d" % (sub[0], int(sub[1])) end = "%s,%03d" % (sub[2], int(sub[3])) text = sub[4].replace("<br />", "\n") text = HtmlEntityHelper.convert_html_entities(text) text = text.replace("\r\n", "") srt = "%s\n%s\n%s --> %s\n%s\n" % (srt, i, start, end, text.strip()) i += 1 except: Logger.error("Error parsing subtitle: %s", sub[1], exc_info=True) return srt
def update_video_item(self, item): """ Accepts an item. It returns an updated item. Usually retrieves the MediaURL and the Thumb! It should return a completed item. """ Logger.debug('Starting update_video_item for %s (%s)', item.name, self.channelName) # rtmpt://vrt.flash.streampower.be/een//2011/07/1000_110723_getipt_neefs_wiels_Website_EEN.flv # http://www.een.be/sites/een.be/modules/custom/vrt_video/player/player_4.3.swf # now the mediaurl is derived. First we try WMV data = UriHandler.open(item.url) part = item.create_new_empty_media_part() if "mediazone.vrt.be" not in item.url: # Extract actual media data video_id = Regexer.do_regex('data-video=[\'"]([^"\']+)[\'"]', data)[0] url = "https://mediazone.vrt.be/api/v1/een/assets/%s" % (video_id, ) data = UriHandler.open(url) json = JsonHelper(data) urls = json.get_value("targetUrls") for url_info in urls: Logger.trace(url_info) if url_info["type"].lower() != "hls": continue hls_url = url_info["url"] for s, b in M3u8.get_streams_from_m3u8(hls_url): part.append_media_stream(s, b) item.complete = True return item
def update_video_item(self, item): """ Updates an existing MediaItem with more data. Used to update none complete MediaItems (self.complete = False). This could include opening the item's URL to fetch more data and then process that data or retrieve it's real media-URL. The method should at least: * cache the thumbnail to disk (use self.noImage if no thumb is available). * set at least one MediaItemPart with a single MediaStream. * set self.complete = True. if the returned item does not have a MediaItemPart then the self.complete flag will automatically be set back to False. :param MediaItem item: the original MediaItem that needs updating. :return: The original item with more data added to it's properties. :rtype: MediaItem """ Logger.debug('Starting update_video_item for %s (%s)', item.name, self.channelName) data = UriHandler.open(item.url, proxy=self.proxy) streams = Regexer.do_regex(self.mediaUrlRegex, data) item.MediaItemParts = [] part = item.create_new_empty_media_part() for stream in streams: Logger.trace(stream) part.append_media_stream(stream[0], stream[1]) item.complete = True return item
def create_folder_item(self, result_set): """ Creates a MediaItem of type 'folder' using the result_set from the regex. This method creates a new MediaItem from the Regular Expression or Json results <result_set>. The method should be implemented by derived classes and are specific to the channel. :param list[str]|dict[str,str] result_set: The result_set of the self.episodeItemRegex :return: A new MediaItem of type 'folder'. :rtype: MediaItem|None """ if len(result_set) > 3 and result_set[3] != "": Logger.debug("Sub category folder found.") url = parse.urljoin( self.baseUrl, HtmlEntityHelper.convert_html_entities(result_set[3])) name = "\a.: %s :." % (result_set[4], ) item = MediaItem(name, url) item.complete = True item.type = "folder" return item url = parse.urljoin( self.baseUrl, HtmlEntityHelper.convert_html_entities(result_set[0])) name = HtmlEntityHelper.convert_html_entities(result_set[1]) helper = HtmlHelper(result_set[2]) description = helper.get_tag_content("div", {'class': 'description'}) item = MediaItem(name, "%s/RSS" % (url, )) item.type = 'folder' item.description = description.strip() date = helper.get_tag_content("div", {'class': 'date'}) if date == "": date = helper.get_tag_content("span", {'class': 'lastPublishedDate'}) if not date == "": date_parts = Regexer.do_regex(r"(\w+) (\d+)[^<]+, (\d+)", date) if len(date_parts) > 0: date_parts = date_parts[0] month_part = date_parts[0].lower() day_part = date_parts[1] year_part = date_parts[2] try: month = DateHelper.get_month_from_name(month_part, "en") item.set_date(year_part, month, day_part) except: Logger.error("Error matching month: %s", month_part, exc_info=True) item.complete = True return item
def update_video_item(self, item): data = UriHandler.open(item.url, proxy=self.proxy, additional_headers=item.HttpHeaders) media_regex = 'data-media="([^"]+)"' media_info = Regexer.do_regex(media_regex, data)[0] media_info = HtmlEntityHelper.convert_html_entities(media_info) media_info = JsonHelper(media_info) Logger.trace(media_info) # sources part = item.create_new_empty_media_part() # high, web, mobile, url media_sources = media_info.json.get("sources", {}) for quality in media_sources: url = media_sources[quality] if quality == "high": bitrate = 2000 elif quality == "web": bitrate = 800 elif quality == "mobile": bitrate = 400 else: bitrate = 0 part.append_media_stream(url, bitrate) # geoLocRestriction item.isGeoLocked = not media_info.get_value( "geoLocRestriction", fallback="world") == "world" item.complete = True return item
def update_video_item_json_player(self, item): """ Updates an existing MediaItem with more data. Used to update none complete MediaItems (self.complete = False). This could include opening the item's URL to fetch more data and then process that data or retrieve it's real media-URL. The method should at least: * cache the thumbnail to disk (use self.noImage if no thumb is available). * set at least one MediaItemPart with a single MediaStream. * set self.complete = True. if the returned item does not have a MediaItemPart then the self.complete flag will automatically be set back to False. :param MediaItem item: the original MediaItem that needs updating. :return: The original item with more data added to it's properties. :rtype: MediaItem """ data = UriHandler.open(item.url, proxy=self.proxy) streams = Regexer.do_regex(r'label:\s*"([^"]+)",\W*file:\s*"([^"]+)"', data) part = item.create_new_empty_media_part() bitrates = {"720p SD": 1200} for stream in streams: part.append_media_stream(stream[1], bitrates.get(stream[0], 0)) item.complete = True return item
def extract_json_episodes(self, data): """ Performs pre-process actions for data processing. Accepts an data from the process_folder_list method, BEFORE the items are processed. Allows setting of parameters (like title etc) for the channel. Inside this method the <data> could be changed and additional items can be created. The return values should always be instantiated in at least ("", []). :param str data: The retrieve data that was loaded for the current item and URL. :return: A tuple of the data and a list of MediaItems that were generated. :rtype: tuple[str|JsonHelper,list[MediaItem]] """ Logger.info("Performing Pre-Processing") items = [] data = Regexer.do_regex( r'window.__DATA__ = ([\w\W]+?});\s*window.__PUSH_STATE__', data)[0] json_data = JsonHelper(data) main_container = [ m for m in json_data.get_value("children") if m["type"] == "MainContainer" ] line_list = [ item for item in main_container[0]["children"] if item["type"] == "LineList" ] line_list = line_list[0]["props"] json_data.json = line_list return json_data, items
def add_live_channel_and_extract_data(self, data): """ Add the live channel and extract the correct data to process further. The return values should always be instantiated in at least ("", []). :param str data: The retrieve data that was loaded for the current item and URL. :return: A tuple of the data and a list of MediaItems that were generated. :rtype: tuple[str|JsonHelper,list[MediaItem]] """ Logger.info("Performing Pre-Processing") items = [] title = LanguageHelper.get_localized_string( LanguageHelper.LiveStreamTitleId) item = MediaItem("\a.: {} :.".format(title), self.liveUrl) item.type = "folder" items.append(item) if not data: return "[]", items json_data = Regexer.do_regex( r"setupBroadcastArchive\('Tv',\s*([^;]+)\);", data) if isinstance(json_data, (tuple, list)) and len(json_data) > 0: Logger.debug("Pre-Processing finished") return json_data[0], items Logger.info("Cannot extract JSON data from HTML.") return data, items
def update_live_item(self, item): """ Updates an existing MediaItem with more data. Used to update none complete MediaItems (self.complete = False). This could include opening the item's URL to fetch more data and then process that data or retrieve it's real media-URL. The method should at least: * cache the thumbnail to disk (use self.noImage if no thumb is available). * set at least one MediaItemPart with a single MediaStream. * set self.complete = True. if the returned item does not have a MediaItemPart then the self.complete flag will automatically be set back to False. :param MediaItem item: the original MediaItem that needs updating. :return: The original item with more data added to it's properties. :rtype: MediaItem """ Logger.debug('Starting update_live_item for %s (%s)', item.name, self.channelName) data = UriHandler.open(item.url, proxy=self.proxy, additional_headers=self.httpHeaders) stream_root = Regexer.do_regex(r'<media href="([^"]+\.isml)', data)[0] Logger.debug("Found Live stream root: %s", stream_root) part = item.create_new_empty_media_part() for s, b in F4m.get_streams_from_f4m(item.url, self.proxy): item.complete = True s = s.replace(".f4m", ".m3u8") part.append_media_stream(s, b) return item
def get_nodes_content(self, node_tag, *args): """Retreives all nodes with nodeTag as name Arguments: nodeTag : string - Name of the node to retrieve args : dictionary - Dictionary holding the node's attributes. Should occur in order of appearance. Returns: A list of all the content of the found nodes. The args should be a dictionary: {"size": "380x285"}, {"ratio":"4:3"} will find a node with <nodename size="380x285" name="test" ratio="4:3"> """ regex = "<%s" % (node_tag, ) for arg in args: regex += r'[^>]*%s\W*=\W*"%s"' % (list(arg.keys())[0], arg[list( arg.keys())[0]]) # just do one pass regex += r"[^>]*>([\w\W]+?)</%s>" % (node_tag, ) Logger.trace("XmlRegex = %s", regex) results = Regexer.do_regex(regex, self.data) Logger.trace(results) return results
def get_subtitle(url, proxy=None, play_list_data=None, append_query_string=True, language=None): # NOSONAR """ Retrieves a subtitle url either from a M3u8 file via HTTP or alternatively from a M3u8 playlist string value (in case it was already retrieved). :param str url: The M3u8 url that contains the subtitle information. :param ProxyInfo proxy: An optional proxy to use. :param str play_list_data: The data (in case the URL was already retrieved). :param bool append_query_string: Should we re-append the query string? :param str language: The language to select (if multiple are present). :return: The subtitle url for the M3u8 file. :rtype: str """ data = play_list_data or UriHandler.open(url, proxy) regex = r'(#\w[^:]+)[^\n]+TYPE=SUBTITLES[^\n]*LANGUAGE="(\w+)"[^\n]*\W+URI="([^"]+.m3u8[^"\n\r]*)' sub = "" qs = None if append_query_string and "?" in url: base, qs = url.split("?", 1) Logger.info("Going to append QS: %s", qs) elif "?" in url: base, qs = url.split("?", 1) Logger.info("Ignoring QS: %s", qs) qs = None else: base = url needles = Regexer.do_regex(regex, data) url_index = 2 language_index = 1 base_url_logged = False base_url = base[:base.rindex("/")] for n in needles: if language is not None and n[language_index] != language: Logger.debug("Found incorrect language: %s", n[language_index]) continue if "://" not in n[url_index]: if not base_url_logged: Logger.debug("Using base_url %s for M3u8", base_url) base_url_logged = True sub = "%s/%s" % (base_url, n[url_index]) else: if not base_url_logged: Logger.debug("Full url found in M3u8") base_url_logged = True sub = n[url_index] if qs is not None and sub.endswith("?null="): sub = sub.replace("?null=", "?%s" % (qs, )) elif qs is not None and "?" in sub: sub = "%s&%s" % (sub, qs) elif qs is not None: sub = "%s?%s" % (sub, qs) return sub
def update_video_item(self, item): """ Updates an existing MediaItem with more data. Used to update none complete MediaItems (self.complete = False). This could include opening the item's URL to fetch more data and then process that data or retrieve it's real media-URL. The method should at least: * cache the thumbnail to disk (use self.noImage if no thumb is available). * set at least one MediaItemPart with a single MediaStream. * set self.complete = True. if the returned item does not have a MediaItemPart then the self.complete flag will automatically be set back to False. :param MediaItem item: the original MediaItem that needs updating. :return: The original item with more data added to it's properties. :rtype: MediaItem """ Logger.debug('Starting update_video_item for %s (%s)', item.name, self.channelName) if not item.url.endswith(".js"): data = UriHandler.open(item.url) data_id = Regexer.do_regex(r'data-id="(\d+)"[^>]+data-playout', data) if data_id is None: Logger.warning("Cannot find stream-id for L1 stream.") return item data_url = "https://limburg.bbvms.com/p/L1_video/c/{}.json".format(data_id[0]) else: data_url = item.url data = UriHandler.open(data_url) json = JsonHelper(data, logger=Logger.instance()) Logger.trace(json) base_url = json.get_value("publicationData", "defaultMediaAssetPath") streams = json.get_value("clipData", "assets") item.MediaItemParts = [] part = item.create_new_empty_media_part() for stream in streams: url = stream.get("src", None) if "://" not in url: url = "{}{}".format(base_url, url) bitrate = stream.get("bandwidth", None) if url: part.append_media_stream(url, bitrate) if not item.thumb and json.get_value("thumbnails"): url = json.get_value("thumbnails")[0].get("src", None) if url and "http:/" not in url: url = "%s%s" % (self.baseUrl, url) item.thumb = url item.complete = True return item
def update_video_item(self, item): """ Updates an existing MediaItem with more data. Used to update none complete MediaItems (self.complete = False). This could include opening the item's URL to fetch more data and then process that data or retrieve it's real media-URL. The method should at least: * cache the thumbnail to disk (use self.noImage if no thumb is available). * set at least one MediaItemPart with a single MediaStream. * set self.complete = True. if the returned item does not have a MediaItemPart then the self.complete flag will automatically be set back to False. :param MediaItem item: the original MediaItem that needs updating. :return: The original item with more data added to it's properties. :rtype: MediaItem """ Logger.debug('Starting update_video_item for %s (%s)', item.name, self.channelName) url = item.url data = UriHandler.open(url, proxy=self.proxy) renditions_url = Regexer.do_regex( r'<media:content[^>]+url=\W([^\'"]+)\W', data)[0] renditions_url = HtmlEntityHelper.strip_amp(renditions_url) rendition_data = UriHandler.open(renditions_url, proxy=self.proxy) video_items = Regexer.do_regex( r'<rendition[^>]+bitrate="(\d+)"[^>]*>\W+<src>([^<]+)<', rendition_data) item.MediaItemParts = [] part = item.create_new_empty_media_part() for video_item in video_items: media_url = self.get_verifiable_video_url(video_item[1].replace( "rtmpe", "rtmp")) part.append_media_stream(media_url, video_item[0]) item.complete = True return item
def update_video_item(self, item): """Updates an existing MediaItem with more data. Arguments: item : MediaItem - the MediaItem that needs to be updated Returns: The original item with more data added to it's properties. Used to update none complete MediaItems (self.complete = False). This could include opening the item's URL to fetch more data and then process that data or retrieve it's real media-URL. The method should at least: * cache the thumbnail to disk (use self.noImage if no thumb is available). * set at least one MediaItemPart with a single MediaStream. * set self.complete = True. if the returned item does not have a MediaItemPart then the self.complete flag will automatically be set back to False. """ Logger.debug('Starting update_video_item for %s (%s)', item.name, self.channelName) from resources.lib.streams.m3u8 import M3u8 data = UriHandler.open(item.url, proxy=self.proxy) video_id = Regexer.do_regex(r'{"video":{"config":{"uri":"([^"]+)', data)[0] url = "http://media.mtvnservices.com/pmt/e1/access/index.html?uri={}&configtype=edge".format( video_id) meta_data = UriHandler.open(url, proxy=self.proxy, referer=self.baseUrl) meta = JsonHelper(meta_data) stream_parts = meta.get_value("feed", "items") for stream_part in stream_parts: stream_url = stream_part["group"]["content"] stream_url = stream_url.replace("&device={device}", "") stream_url = "%s&format=json&acceptMethods=hls" % (stream_url, ) stream_data = UriHandler.open(stream_url, proxy=self.proxy) stream = JsonHelper(stream_data) # subUrls = stream.get_value("package", "video", "item", 0, "transcript", 0, "typographic") # NOSONAR part = item.create_new_empty_media_part() hls_streams = stream.get_value("package", "video", "item", 0, "rendition") for hls_stream in hls_streams: hls_url = hls_stream["src"] item.complete |= M3u8.update_part_with_m3u8_streams( part, hls_url, proxy=self.proxy) item.complete = True Logger.trace("Media url: %s", item) return item
def extract_json_video(self, data): """ Performs pre-process actions for data processing. Accepts an data from the process_folder_list method, BEFORE the items are processed. Allows setting of parameters (like title etc) for the channel. Inside this method the <data> could be changed and additional items can be created. The return values should always be instantiated in at least ("", []). :param str data: The retrieve data that was loaded for the current item and URL. :return: A tuple of the data and a list of MediaItems that were generated. :rtype: tuple[str|JsonHelper,list[MediaItem]] """ Logger.info("Performing Pre-Processing") items = [] data = Regexer.do_regex(r'window.__DATA__ = ([\w\W]+?});\s*window.__PUSH_STATE__', data)[0] json_data = JsonHelper(data) # Get the main content container main_container = [m for m in json_data.get_value("children") if m["type"] == "MainContainer"] # Extract seasons seasons = [] if not self.parentItem.metaData.get("is_season", False): seasons = [ lst["props"]["items"] for lst in main_container[0]["children"] if lst["type"] == "SeasonSelector" ] if seasons: seasons = [s for s in seasons[0] if s["url"]] # Inject them json_data.json["seasons"] = seasons # Find the actual line_lists = [lst for lst in main_container[0]["children"] if lst["type"] == "LineList"] for line_list in line_lists: if line_list.get("props", {}).get("type") == "video-guide": json_data.json = line_list["props"] # Get the actual full episode list all_episodes = json_data.get_value("filters", "items", 0, "url") url_all_episodes = "{}{}".format(self.baseUrl, all_episodes) data = UriHandler.open(url_all_episodes) json_data = JsonHelper(data) # And append seasons again if seasons: json_data.json["seasons"] = seasons return json_data, items Logger.warning("Cannot extract video items") return json_data, items