Python Regexer Examples, resources.lib.regexer.Regexer Python Examples

Example #1

0

Show file

    def __init__(self, channel_info):
        """ Initialisation of the class.

        All class variables should be instantiated here and this method should not
        be overridden by any derived classes.

        :param ChannelInfo channel_info: The channel info object to base this channel on.

        """

        chn_class.Channel.__init__(self, channel_info)

        # ============== Actual channel setup STARTS here and should be overwritten from derived classes ===============

        if self.channelCode == "ketnet":
            self.noImage = "ketnetimage.jpg"
            self.mainListUri = "https://www.ketnet.be/kijken"
            self.baseUrl = "https://www.ketnet.be"
            self.mediaUrlRegex = r'playerConfig\W*=\W*(\{[\w\W]{0,2000}?);(?:.vamp|playerConfig)'

        elif self.channelCode == "cobra":
            self.noImage = "cobraimage.png"
            self.mainListUri = "http://www.cobra.be/cm/cobra/cobra-mediaplayer"
            self.baseUrl = "http://www.cobra.be"

        self.swfUrl = "%s/html/flash/common/player.swf" % (self.baseUrl, )

        episode_regex = r'<a[^>]+href="(?<url>/kijken[^"]+)"[^>]*>\W*<img[^>]+src="' \
                        r'(?<thumburl>[^"]+)"[^>]+alt="(?<title>[^"]+)"'
        episode_regex = Regexer.from_expresso(episode_regex)
        self._add_data_parser(self.mainListUri,
                              match_type=ParserData.MatchExact,
                              parser=episode_regex,
                              creator=self.create_episode_item)

        self._add_data_parser("*", preprocessor=self.select_video_section)

        video_regex = Regexer.from_expresso(
            r'<a title="(?<title>[^"]+)" href="(?<url>[^"]+)"[^>]*>'
            r'\W+<img src="(?<thumburl>[^"]+)"[^<]+<span[^<]+[^<]+'
            r'[^>]+></span>\W+(?<description>[^<]+)')
        self._add_data_parser("*",
                              parser=video_regex,
                              creator=self.create_video_item,
                              updater=self.update_video_item)

        folder_regex = Regexer.from_expresso(
            r'<span class="more-of-program" rel="/(?<url>[^"]+)">')
        self._add_data_parser("*",
                              parser=folder_regex,
                              creator=self.create_folder_item)

        #===============================================================================================================
        # non standard items

        #===============================================================================================================
        # Test cases:

        # ====================================== Actual channel setup STOPS here =======================================
        return

Example #2

0

Show file

File: chn_kijknl.py Project: Bartolomeo1/plugin.video.retrospect

    def __update_video_from_brightcove(self, item, data,
                                       use_adaptive_with_encryption):
        """ Updates an existing MediaItem with more data based on an MPD stream.

        :param str data:                            Stream info retrieved from BrightCove.
        :param bool use_adaptive_with_encryption:   Do we use the Adaptive InputStream add-on?
        :param MediaItem item:                      The original MediaItem that needs updating.

        :return: The original item with more data added to it's properties.
        :rtype: MediaItem

        """

        part = item.create_new_empty_media_part()
        # Then try the new BrightCove JSON
        bright_cove_regex = '<video[^>]+data-video-id="(?<videoId>[^"]+)[^>]+data-account="(?<videoAccount>[^"]+)'
        bright_cove_data = Regexer.do_regex(
            Regexer.from_expresso(bright_cove_regex), data)
        if not bright_cove_data:
            Logger.warning("Error updating using BrightCove data: %s", item)
            return item

        Logger.info("Found new BrightCove JSON data")
        bright_cove_url = 'https://edge.api.brightcove.com/playback/v1/accounts/' \
                          '%(videoAccount)s/videos/%(videoId)s' % bright_cove_data[0]
        headers = {
            "Accept":
            "application/json;pk=BCpkADawqM3ve1c3k3HcmzaxBvD8lXCl89K7XEHiKutxZArg2c5RhwJHJANOwPwS_4o7UsC4RhIzXG8Y69mrwKCPlRkIxNgPQVY9qG78SJ1TJop4JoDDcgdsNrg"
        }

        bright_cove_data = UriHandler.open(bright_cove_url,
                                           additional_headers=headers)
        bright_cove_json = JsonHelper(bright_cove_data)
        streams = [
            d for d in bright_cove_json.get_value("sources")
            if d["container"] == "M2TS"
        ]
        # Old filter
        # streams = filter(lambda d: d["container"] == "M2TS", bright_cove_json.get_value("sources"))
        if not streams:
            Logger.warning("Error extracting streams from BrightCove data: %s",
                           item)
            return item

        # noinspection PyTypeChecker
        stream_url = streams[0]["src"]

        # these streams work better with the the InputStreamAddon because it removes the
        # "range" http header
        if use_adaptive_with_encryption:
            Logger.info("Using InputStreamAddon for playback of HLS stream")
            strm = part.append_media_stream(stream_url, 0)
            M3u8.set_input_stream_addon_input(strm)
            item.complete = True
            return item

        for s, b in M3u8.get_streams_from_m3u8(stream_url):
            item.complete = True
            part.append_media_stream(s, b)
        return item

Example #3

0

Show file

File: chn_oppetarkiv.py Project: Sopor/plugin.video.retrospect

    def add_search_and_genres(self, data):
        """ Performs pre-process actions for data processing and adds a search option and genres.

        Accepts an data from the process_folder_list method, BEFORE the items are
        processed. Allows setting of parameters (like title etc) for the channel.
        Inside this method the <data> could be changed and additional items can
        be created.

        The return values should always be instantiated in at least ("", []).

        :param str data: The retrieve data that was loaded for the current item and URL.

        :return: A tuple of the data and a list of MediaItems that were generated.
        :rtype: tuple[str|JsonHelper,list[MediaItem]]

        """

        Logger.info("Performing Pre-Processing")
        items = []

        if self.parentItem is not None and "genre" in self.parentItem.metaData:
            self.__genre = self.parentItem.metaData["genre"]
            Logger.debug("Parsing a specific genre: %s", self.__genre)
            return data, items

        search_item = MediaItem("\a.: S&ouml;k :.", "searchSite")
        search_item.complete = True
        search_item.thumb = self.noImage
        search_item.dontGroup = True
        search_item.fanart = self.fanart
        # search_item.set_date(2099, 1, 1, text="")
        # -> No items have dates, so adding this will force a date sort in Retrospect
        items.append(search_item)

        genres_item = MediaItem("\a.: Genrer :.", "")
        genres_item.complete = True
        genres_item.thumb = self.noImage
        genres_item.dontGroup = True
        genres_item.fanart = self.fanart
        items.append(genres_item)

        # find the actual genres
        genre_regex = '<li[^>]+genre[^>]*><button[^>]+data-value="(?<genre>[^"]+)"[^>]*>' \
                      '(?<title>[^>]+)</button></li>'
        genre_regex = Regexer.from_expresso(genre_regex)
        genres = Regexer.do_regex(genre_regex, data)
        for genre in genres:
            if genre["genre"] == "all":
                continue
            genre_item = MediaItem(genre["title"], self.mainListUri)
            genre_item.complete = True
            genre_item.thumb = self.noImage
            genre_item.fanart = self.fanart
            genre_item.metaData = {"genre": genre["genre"]}
            genres_item.items.append(genre_item)

        Logger.debug("Pre-Processing finished")
        return data, items

Example #4

0

Show file

File: chn_een.py Project: Bartolomeo1/plugin.video.retrospect

    def __init__(self, channel_info):
        """ Initialisation of the class.

        All class variables should be instantiated here and this method should not
        be overridden by any derived classes.

        :param ChannelInfo channel_info: The channel info object to base this channel on.

        """

        chn_class.Channel.__init__(self, channel_info)

        # ============== Actual channel setup STARTS here and should be overwritten from derived classes ===============
        self.noImage = "eenimage.png"

        # setup the urls
        self.mainListUri = "https://www.een.be/programmas"
        self.baseUrl = "http://www.een.be"

        # setup the main parsing data
        self._add_data_parser(self.mainListUri, preprocessor=self.extract_json, json=True,
                              parser=["data", ], creator=self.create_show_item)

        video_parser = r'<a class="card-teaser"[^>][^>]*href="(?<url>[^"]+)"[^>]*>\W+<div[^>]+' \
                       r'style="background-image: url\(\'(?<thumburl>[^\']+/(?<year>\d{4})/' \
                       r'(?<month>\d{2})/(?<day>\d{2})/[^\']+)\'[^>]*>\W+<div[^>]+_play[\w\W+]' \
                       r'{0,2000}?<div[^>]*>(?<_title>[^>]*)</div>\W*<h3[^>]*>(?<title>[^<]+)' \
                       r'</h3>\W+<div[^>]*>\W+(?:<span[^>]*>[^<]*</span>)?(?<description>[^<]+)'
        video_parser = Regexer.from_expresso(video_parser)
        self._add_data_parser("*", name="Links to teasers of videos (Card teaser)",
                              parser=video_parser, creator=self.create_video_item,
                              updater=self.update_video_item)

        video_parser = r'<a[^>]*class="[^"]+-teaser"[^>]*background-image: url\(\'(?<thumburl>' \
                       r'[^\']+/(?<year>\d{4})/(?<month>\d{2})/(?<day>\d{2})/[^\']+)\'[^>]*href="' \
                       r'(?<url>[^"]+)"[^>]*>\W+<div[^>]+_play[\w\W+]{0,2000}?<div[^>]*>' \
                       r'(?<_title>[^>]*)</div>\W*<h3[^>]*>(?<title>[^<]+)</h3>\W+<div[^>]*>\W+' \
                       r'(?:<span[^>]*>[^<]*</span>)?(?<description>[^<]+)'
        video_parser = Regexer.from_expresso(video_parser)
        self._add_data_parser("*", name="Links to teasers of videos (Image Teaser)",
                              parser=video_parser, creator=self.create_video_item,
                              updater=self.update_video_item)

        single_video_parser = r'>(?<title>[^<]+)</h1>[\w\W]{0,2000}?(?:<h2>?<description>[^<]+)?' \
                              r'[\w\W]{0,1000}?data-video="(?<url>[^"]+)"[\w\W]{0,500}data-analytics' \
                              r'=\'{&quot;date&quot;:&quot;(?<year>\d+)-(?<month>\d+)-(?<day>\d+)'
        single_video_parser = Regexer.from_expresso(single_video_parser)
        self._add_data_parser("*", name="Pages that contain only a single video",
                              parser=single_video_parser, creator=self.create_video_item)

        #===============================================================================================================
        # non standard items

        #===============================================================================================================
        # Test cases:

        # ====================================== Actual channel setup STOPS here =======================================
        return

Example #5

0

Show file

    def __init__(self, channel_info):
        """ Initialisation of the class.

        All class variables should be instantiated here and this method should not
        be overridden by any derived classes.

        :param ChannelInfo channel_info: The channel info object to base this channel on.

        """

        chn_class.Channel.__init__(self, channel_info)

        # ============== Actual channel setup STARTS here and should be overwritten from derived classes ===============
        self.noImage = "vtmimage.jpg"

        # setup the urls
        self.mainListUri = "http://nieuws.vtm.be/herbekijk"
        self.baseUrl = "http://nieuws.vtm.be"

        # setup the main parsing data
        self.episodeItemRegex = '<li><a[^>]+href="/([^"]+)" class="level-1[^>]+>([^<]+)</a>'
        self._add_data_parser(self.mainListUri,
                              creator=self.create_episode_item,
                              parser=self.episodeItemRegex)

        video_item_regex = r'<article[^<]+has-video"[^>]*>\W*<a href="(?<Url>[^<"]+)"[^>]*>\W+' \
                           r'<div[^<]+<img[^>]+src="(?<Thumb>[^"]+)"[^>]*>[\w\W]{0,500}?<h3[^>]*>' \
                           r'(?:\W+<span[^>]*>[^>]*>)?(?<Title>[^<]+)</h3>\W+<div[^<]+<time[^>]+' \
                           r'datetime="(?<DateTime>[^"]+)"[^<]+</time>\W*</div>\W*<p[^>]+>*' \
                           r'(?<Description>[^<]+)'
        video_item_regex = Regexer.from_expresso(video_item_regex)
        self._add_data_parser("*",
                              creator=self.create_video_item,
                              parser=video_item_regex,
                              updater=self.update_video_item)

        stadion_regex = r'<article[^>]*>\W*<div class="image is-video">\W*<a href="(?<Url>[^"]+)' \
                        r'[^>]*>\W*<img[^>]+src="(?<Thumb>[^"]+)"[\w\W]{0,1000}?<h3 class=' \
                        r'"pagemanager-item-title">\W*<span>\W*<a[^>]*>(?<Title>[^<]+)[\w\W]' \
                        r'{0,1000}?<div class="teaser">\W*<a[^>]+>(?<Description>[^<]+)'
        stadion_regex = Regexer.from_expresso(stadion_regex)
        self._add_data_parser("http://nieuws.vtm.be/stadion",
                              parser=stadion_regex,
                              creator=self.create_video_item,
                              updater=self.update_video_item)

        self.pageNavigationRegex = ''
        self.pageNavigationRegexIndex = 0

        #===============================================================================================================
        # non standard items

        #===============================================================================================================
        # Test cases:

        # ====================================== Actual channel setup STOPS here =======================================
        return

Example #6

0

Show file

    def update_video_item(self, item):
        """ Updates an existing MediaItem with more data.

        Used to update none complete MediaItems (self.complete = False). This
        could include opening the item's URL to fetch more data and then process that
        data or retrieve it's real media-URL.

        The method should at least:
        * cache the thumbnail to disk (use self.noImage if no thumb is available).
        * set at least one MediaItemPart with a single MediaStream.
        * set self.complete = True.

        if the returned item does not have a MediaItemPart then the self.complete flag
        will automatically be set back to False.

        :param MediaItem item: the original MediaItem that needs updating.

        :return: The original item with more data added to it's properties.
        :rtype: MediaItem

        """

        Logger.debug('Starting update_video_item for %s (%s)', item.name, self.channelName)

        # 1 - get the overal config file
        guid_regex = 'http://[^:]+/mgid:[^"]+:([0-9a-f-]+)"'
        rtmp_regex = r'type="video/([^"]+)" bitrate="(\d+)">\W+<src>([^<]+)</src>'

        data = UriHandler.open(item.url, proxy=self.proxy)
        guids = Regexer.do_regex(guid_regex, data)

        item.MediaItemParts = []
        for guid in guids:
            # get the info for this part
            Logger.debug("Processing part with GUID: %s", guid)

            # reset stuff
            part = None

            # http://www.southpark.nl/feeds/video-player/mediagen?uri=mgid%3Aarc%3Aepisode%3Acomedycentral.com%3Aeb2a53f7-e370-4049-a6a9-57c195367a92&suppressRegisterBeacon=true
            guid = HtmlEntityHelper.url_encode("mgid:arc:episode:comedycentral.com:%s" % (guid,))
            info_url = "%s/feeds/video-player/mediagen?uri=%s&suppressRegisterBeacon=true" % (self.baseUrl, guid)

            # 2- Get the GUIDS for the different ACTS
            info_data = UriHandler.open(info_url, proxy=self.proxy)
            rtmp_streams = Regexer.do_regex(rtmp_regex, info_data)

            for rtmp_stream in rtmp_streams:
                # if this is the first stream for the part, create an new part
                if part is None:
                    part = item.create_new_empty_media_part()

                part.append_media_stream(self.get_verifiable_video_url(rtmp_stream[2]), rtmp_stream[1])

        item.complete = True
        Logger.trace("Media item updated: %s", item)
        return item

Example #7

0

Show file

File: chn_l1.py Project: Sopor/plugin.video.retrospect

    def __init__(self, channel_info):
        """ Initialisation of the class.

        All class variables should be instantiated here and this method should not
        be overridden by any derived classes.

        :param ChannelInfo channel_info: The channel info object to base this channel on.

        """

        chn_class.Channel.__init__(self, channel_info)

        # ============== Actual channel setup STARTS here and should be overwritten from derived classes ===============
        self.noImage = "l1image.png"

        # setup the urls
        self.mainListUri = "https://l1.nl/gemist/"
        self.baseUrl = "https://l1.nl"

        # setup the main parsing data
        episode_regex = r'<li>\W*<a[^>]*href="(?<url>/[^"]+)"[^>]*>(?<title>[^<]+)</a>\W*</li>'
        episode_regex = Regexer.from_expresso(episode_regex)
        self._add_data_parser(self.mainListUri,
                              preprocessor=self.pre_process_folder_list,
                              parser=episode_regex,
                              creator=self.create_episode_item)

        # live stuff
        self._add_data_parsers(["#livetv", "#liveradio"],
                               updater=self.update_live_stream)

        video_regex = r'<a[^>]*class="mediaItem"[^>]*href="(?<url>[^"]+)"[^>]*title="(?<title>' \
                      r'[^"]+)"[^>]*>[\w\W]{0,500}?<img[^>]+src="/(?<thumburl>[^"]+)'
        video_regex = Regexer.from_expresso(video_regex)
        self._add_data_parser("*",
                              parser=video_regex,
                              creator=self.create_video_item,
                              updater=self.update_video_item)

        page_regex = r'<a[^>]+href="https?://l1.nl/([^"]+?pagina=)(\d+)"'
        page_regex = Regexer.from_expresso(page_regex)
        self.pageNavigationRegexIndex = 1
        self._add_data_parser("*",
                              parser=page_regex,
                              creator=self.create_page_item)

        #===============================================================================================================
        # non standard items

        #===============================================================================================================
        # Test cases:

        # ====================================== Actual channel setup STOPS here =======================================
        return

Example #8

0

Show file

    def update_video_item(self, item):
        """Updates an existing MediaItem with more data.

        Arguments:
        item : MediaItem - the MediaItem that needs to be updated

        Returns:
        The original item with more data added to it's properties.

        Used to update none complete MediaItems (self.complete = False). This
        could include opening the item's URL to fetch more data and then process that
        data or retrieve it's real media-URL.

        The method should at least:
        * cache the thumbnail to disk (use self.noImage if no thumb is available).
        * set at least one MediaItemPart with a single MediaStream.
        * set self.complete = True.

        if the returned item does not have a MediaItemPart then the self.complete flag
        will automatically be set back to False.

        """

        Logger.debug('Starting update_video_item for %s (%s)', item.name,
                     self.channelName)

        data = UriHandler.open(item.url, proxy=self.proxy)

        # get the playlist GUID
        playlist_guids = Regexer.do_regex(
            "<div[^>]+data-playlist-id='([^']+)'[^>]+></div>", data)
        if not playlist_guids:
            # let's try the alternative then (for the new channels)
            playlist_guids = Regexer.do_regex(
                'local_playlist[", -]+([a-f0-9]{20})"', data)
        playlist_guid = playlist_guids[0]
        play_list_url = "http://api.mtvnn.com/v2/nl/NL/local_playlists/{}.json?video_format=m3u8".format(
            playlist_guid)

        data = UriHandler.open(play_list_url, proxy=self.proxy)

        from resources.lib.helpers.jsonhelper import JsonHelper
        from resources.lib.streams.m3u8 import M3u8

        json_data = JsonHelper(data)
        m3u8_url = json_data.get_value("local_playlist_videos", 0, "url")
        part = item.create_new_empty_media_part()
        item.complete = M3u8.update_part_with_m3u8_streams(part,
                                                           m3u8_url,
                                                           proxy=self.proxy,
                                                           channel=self,
                                                           encrypted=True)
        return item

Example #9

0

Show file

File: chn_eredivisie.py Project: Sopor/plugin.video.retrospect

    def __init__(self, channel_info):
        """ Initialisation of the class.

        All class variables should be instantiated here and this method should not
        be overridden by any derived classes.

        :param ChannelInfo channel_info: The channel info object to base this channel on.

        """

        chn_class.Channel.__init__(self, channel_info)

        # ============== Actual channel setup STARTS here and should be overwritten from derived classes ===============
        self.videoType = None
        self.noImage = "eredivisieimage.jpg"

        # setup the urls
        self.baseUrl = "https://www.foxsports.nl"
        self.mainListUri = "https://www.foxsports.nl/videos/"
        self.swfUrl = "https://static.eredivisielive.nl/static/swf/edPlayer-1.6.2.plus.swf"

        # setup the main parsing data
        # self.episodeItemRegex = '<option[^>]+value="([^"]+)"[^=>]+(?:data-season="([^"]+)")?[^=>]*>([^<]+)</option>'
        # self.videoItemJson = ("item",)
        self._add_data_parser(
            self.mainListUri,
            parser=Regexer.from_expresso(
                '<a [hd][^>]*ata-(?<Type>area|sport)="(?<Url>[^"]+)[^>]*>'
                '(?<Title>[^<]+)</a>'),
            creator=self.create_folder_item)

        self._add_data_parser(
            self.mainListUri,
            parser=Regexer.from_expresso(
                r'<a[^>]+href="/video/(?<Type>filter|meest_bekeken)/?'
                r'(?<Url>[^"]*)">[^<]*</a>\W+<h1[^>]*>(?<Title>[^<;]+)'
                r'(?:&#39;s){0,1}</h1>'),
            creator=self.create_folder_item)

        self._add_data_parser(
            "https://www.foxsports.nl/video/filter/fragments/",
            preprocessor=self.add_pages,
            parser=Regexer.from_expresso(
                r'<img[^>]+src=\'(?<Thumb>[^\']+)\'[^>]*>\W+</picture>\W+'
                r'<span class="[^"]+play[\w\W]{0,500}?<h1[^>]*>\W+<a href="'
                r'(?<Url>[^"]+)"[^>]*>(?<Title>[^<]+)</a>\W+</h1>\W+<span'
                r'[^>]*>(?<Date>[^>]+)</span>'),
            creator=self.create_video_item)

        self._add_data_parser("*", updater=self.update_video_item)

        # ====================================== Actual channel setup STOPS here =======================================
        return

Example #10

0

Show file

File: chn_kijknl.py Project: weblate/plugin.video.retrospect

    def __update_video_from_mpd(self, item, mpd_info,
                                use_adaptive_with_encryption):
        """ Updates an existing MediaItem with more data based on an MPD stream.

        :param dict[str,str] mpd_info:              Stream info retrieved from the stream json.
        :param bool use_adaptive_with_encryption:   Do we use the Adaptive InputStream add-on?
        :param MediaItem item:                      The original MediaItem that needs updating.

        :return: The original item with more data added to it's properties.
        :rtype: MediaItem

        """

        Logger.debug("Updating streams using BrightCove data.")

        part = item.create_new_empty_media_part()
        mpd_manifest_url = "https:{0}".format(mpd_info["mediaLocator"])
        mpd_data = UriHandler.open(mpd_manifest_url, proxy=self.proxy)
        subtitles = Regexer.do_regex(r'<BaseURL>([^<]+\.vtt)</BaseURL>',
                                     mpd_data)

        if subtitles:
            Logger.debug("Found subtitle: %s", subtitles[0])
            subtitle = SubtitleHelper.download_subtitle(subtitles[0],
                                                        proxy=self.proxy,
                                                        format="webvtt")
            part.Subtitle = subtitle

        if use_adaptive_with_encryption:
            # We can use the adaptive add-on with encryption
            Logger.info("Using MPD InputStreamAddon")
            license_url = Regexer.do_regex('licenseUrl="([^"]+)"', mpd_data)[0]
            token = "Bearer {0}".format(mpd_info["playToken"])
            key_headers = {"Authorization": token}
            license_key = Mpd.get_license_key(license_url,
                                              key_headers=key_headers)

            stream = part.append_media_stream(mpd_manifest_url, 0)
            Mpd.set_input_stream_addon_input(stream,
                                             self.proxy,
                                             license_key=license_key)
            item.complete = True
        else:
            XbmcWrapper.show_dialog(
                LanguageHelper.get_localized_string(LanguageHelper.DrmTitle),
                LanguageHelper.get_localized_string(
                    LanguageHelper.WidevineLeiaRequired))

        return item

Example #11

0

Show file

File: chn_nickjr.py Project: Bartolomeo1/plugin.video.retrospect

    def extract_json(self, data):
        """ Performs pre-process actions for data processing.

        Accepts an data from the process_folder_list method, BEFORE the items are
        processed. Allows setting of parameters (like title etc) for the channel.
        Inside this method the <data> could be changed and additional items can
        be created.

        The return values should always be instantiated in at least ("", []).

        :param str data: The retrieve data that was loaded for the current item and URL.

        :return: A tuple of the data and a list of MediaItems that were generated.
        :rtype: tuple[str|JsonHelper,list[MediaItem]]

        """

        Logger.info("Performing Pre-Processing")
        items = []

        json_data = Regexer.do_regex('type="application/json">([^<]+)<', data)
        if not json_data:
            Logger.warning("No JSON data found.")
            return data, items

        json = JsonHelper(json_data[0])
        result = []
        for key, value in json.json.items():
            result.append(value)
            value["title"] = key

        # set new json and return JsonHelper object
        json.json = result
        return json, items

Example #12

0

Show file

    def __update_video(self, item, data):
        if not item.url.startswith("https://api.viervijfzes.be/content/"):
            regex = 'data-video-*id="([^"]+)'
            m3u8_url = Regexer.do_regex(regex, data)[-1]
            # we either have an URL now or an uuid
        else:
            m3u8_url = item.url.rsplit("/", 1)[-1]

        if ".m3u8" not in m3u8_url:
            Logger.info("Not a direct M3u8 file. Need to log in")
            url = "https://api.viervijfzes.be/content/%s" % (m3u8_url, )

            # We need to log in
            if not self.loggedOn:
                self.log_on()

            # add authorization header
            authentication_header = {
                "authorization": self.__idToken,
                "content-type": "application/json"
            }
            data = UriHandler.open(url, additional_headers=authentication_header)
            json_data = JsonHelper(data)
            m3u8_url = json_data.get_value("video", "S")

        # Geo Locked?
        if "/geo/" in m3u8_url.lower():
            # set it for the error statistics
            item.isGeoLocked = True

        part = item.create_new_empty_media_part()
        item.complete = M3u8.update_part_with_m3u8_streams(
            part, m3u8_url, channel=self, encrypted=False)

        return item

Example #13

0

Show file

File: chn_svt.py Project: meekm/plugin.video.retrospect

    def update_video_html_item(self, item):
        """ Updates an existing MediaItem with more data.

        Used to update none complete MediaItems (self.complete = False). This
        could include opening the item's URL to fetch more data and then process that
        data or retrieve it's real media-URL.

        The method should at least:
        * cache the thumbnail to disk (use self.noImage if no thumb is available).
        * set at least one MediaItemPart with a single MediaStream.
        * set self.complete = True.

        if the returned item does not have a MediaItemPart then the self.complete flag
        will automatically be set back to False.

        :param MediaItem item: the original MediaItem that needs updating.

        :return: The original item with more data added to it's properties.
        :rtype: MediaItem

        """

        data = UriHandler.open(item.url, proxy=self.proxy)
        video_id = Regexer.do_regex(r'data-video-id="([^"]+)"', data)[0]
        item.url = "https://api.svt.se/video/{}".format(video_id)
        return self.update_video_api_item(item)

Example #14

0

Show file

File: chn_vrtnu.py Project: weblate/plugin.video.retrospect

    def update_video_item(self, item):
        """ Updates an existing MediaItem with more data.

        Used to update none complete MediaItems (self.complete = False). This
        could include opening the item's URL to fetch more data and then process that
        data or retrieve it's real media-URL.

        The method should at least:
        * cache the thumbnail to disk (use self.noImage if no thumb is available).
        * set at least one MediaItemPart with a single MediaStream.
        * set self.complete = True.

        if the returned item does not have a MediaItemPart then the self.complete flag
        will automatically be set back to False.

        :param MediaItem item: the original MediaItem that needs updating.

        :return: The original item with more data added to it's properties.
        :rtype: MediaItem

        """

        Logger.debug('Starting update_video_item for %s (%s)', item.name, self.channelName)

        # Get the MZID
        data = UriHandler.open(item.url, proxy=self.proxy, additional_headers=item.HttpHeaders)
        json_data = Regexer.do_regex(r'<script type="application/ld\+json">(.*?)</script>', data)
        json_info = JsonHelper(json_data[-1])
        video_id = json_info.get_value("video", "@id")
        publication_id = json_info.get_value("publication", -1, "@id")

        mzid = "{}${}".format(publication_id, video_id)
        return self.update_video_for_mzid(item, mzid)

Example #15

0

Show file

File: chn_amt.py Project: Bartolomeo1/plugin.video.retrospect

    def get_movie_id(self, data):
        """ Performs pre-process actions for data processing.

        Accepts an data from the process_folder_list method, BEFORE the items are
        processed. Allows setting of parameters (like title etc) for the channel.
        Inside this method the <data> could be changed and additional items can
        be created.

        The return values should always be instantiated in at least ("", []).

        :param str data: The retrieve data that was loaded for the current item and URL.

        :return: A tuple of the data and a list of MediaItems that were generated.
        :rtype: tuple[str|JsonHelper,list[MediaItem]]

        """

        Logger.info("Performing Pre-Processing")
        items = []

        movie_id = Regexer.do_regex(r"movietrailers://movie/detail/(\d+)", data)[-1]
        Logger.debug("Found Movie ID: %s", movie_id)
        url = "%s/trailers/feeds/data/%s.json" % (self.baseUrl, movie_id)
        data = UriHandler.open(url)

        # set it for logging purposes
        self.parentItem.url = url

        Logger.debug("Pre-Processing finished")
        return data, items

Example #16

0

Show file

    def __convert_ttml_to_srt(ttml):
        """Converts sami format into SRT format:

        Arguments:
        ttml : string - TTML (Timed Text Markup Language) subtitle format

        Returns:
        SRT formatted subtitle:

        Example:
            1
            00:00:20,000 --> 00:00:24,400
            text

        """

        pars_regex = r'<p[^>]+begin="([^"]+)\.(\d+)"[^>]+end="([^"]+)\.(\d+)"[^>]*>([\w\W]+?)</p>'
        subs = Regexer.do_regex(pars_regex, ttml)

        srt = ""
        i = 1

        for sub in subs:
            try:
                start = "%s,%03d" % (sub[0], int(sub[1]))
                end = "%s,%03d" % (sub[2], int(sub[3]))
                text = sub[4].replace("<br />", "\n")
                text = HtmlEntityHelper.convert_html_entities(text)
                text = text.replace("\r\n", "")
                srt = "%s\n%s\n%s --> %s\n%s\n" % (srt, i, start, end, text.strip())
                i += 1
            except:
                Logger.error("Error parsing subtitle: %s", sub[1], exc_info=True)

        return srt

Example #17

0

Show file

File: chn_een.py Project: Bartolomeo1/plugin.video.retrospect

    def update_video_item(self, item):
        """
        Accepts an item. It returns an updated item. Usually retrieves the MediaURL
        and the Thumb! It should return a completed item.
        """
        Logger.debug('Starting update_video_item for %s (%s)', item.name, self.channelName)

        # rtmpt://vrt.flash.streampower.be/een//2011/07/1000_110723_getipt_neefs_wiels_Website_EEN.flv
        # http://www.een.be/sites/een.be/modules/custom/vrt_video/player/player_4.3.swf

        # now the mediaurl is derived. First we try WMV
        data = UriHandler.open(item.url)

        part = item.create_new_empty_media_part()
        if "mediazone.vrt.be" not in item.url:
            # Extract actual media data
            video_id = Regexer.do_regex('data-video=[\'"]([^"\']+)[\'"]', data)[0]
            url = "https://mediazone.vrt.be/api/v1/een/assets/%s" % (video_id, )
            data = UriHandler.open(url)

        json = JsonHelper(data)
        urls = json.get_value("targetUrls")

        for url_info in urls:
            Logger.trace(url_info)
            if url_info["type"].lower() != "hls":
                continue

            hls_url = url_info["url"]
            for s, b in M3u8.get_streams_from_m3u8(hls_url):
                part.append_media_stream(s, b)

        item.complete = True
        return item

Example #18

0

Show file

    def update_video_item(self, item):
        """ Updates an existing MediaItem with more data.

        Used to update none complete MediaItems (self.complete = False). This
        could include opening the item's URL to fetch more data and then process that
        data or retrieve it's real media-URL.

        The method should at least:
        * cache the thumbnail to disk (use self.noImage if no thumb is available).
        * set at least one MediaItemPart with a single MediaStream.
        * set self.complete = True.

        if the returned item does not have a MediaItemPart then the self.complete flag
        will automatically be set back to False.

        :param MediaItem item: the original MediaItem that needs updating.

        :return: The original item with more data added to it's properties.
        :rtype: MediaItem

        """

        Logger.debug('Starting update_video_item for %s (%s)', item.name, self.channelName)

        data = UriHandler.open(item.url, proxy=self.proxy)
        streams = Regexer.do_regex(self.mediaUrlRegex, data)

        item.MediaItemParts = []
        part = item.create_new_empty_media_part()
        for stream in streams:
            Logger.trace(stream)
            part.append_media_stream(stream[0], stream[1])

        item.complete = True
        return item

Example #19

0

Show file

File: chn_channel9.py Project: weblate/plugin.video.retrospect

    def create_folder_item(self, result_set):
        """ Creates a MediaItem of type 'folder' using the result_set from the regex.

        This method creates a new MediaItem from the Regular Expression or Json
        results <result_set>. The method should be implemented by derived classes
        and are specific to the channel.

        :param list[str]|dict[str,str] result_set: The result_set of the self.episodeItemRegex

        :return: A new MediaItem of type 'folder'.
        :rtype: MediaItem|None

        """

        if len(result_set) > 3 and result_set[3] != "":
            Logger.debug("Sub category folder found.")
            url = parse.urljoin(
                self.baseUrl,
                HtmlEntityHelper.convert_html_entities(result_set[3]))
            name = "\a.: %s :." % (result_set[4], )
            item = MediaItem(name, url)
            item.complete = True
            item.type = "folder"
            return item

        url = parse.urljoin(
            self.baseUrl,
            HtmlEntityHelper.convert_html_entities(result_set[0]))
        name = HtmlEntityHelper.convert_html_entities(result_set[1])

        helper = HtmlHelper(result_set[2])
        description = helper.get_tag_content("div", {'class': 'description'})

        item = MediaItem(name, "%s/RSS" % (url, ))
        item.type = 'folder'
        item.description = description.strip()

        date = helper.get_tag_content("div", {'class': 'date'})
        if date == "":
            date = helper.get_tag_content("span",
                                          {'class': 'lastPublishedDate'})

        if not date == "":
            date_parts = Regexer.do_regex(r"(\w+) (\d+)[^<]+, (\d+)", date)
            if len(date_parts) > 0:
                date_parts = date_parts[0]
                month_part = date_parts[0].lower()
                day_part = date_parts[1]
                year_part = date_parts[2]

                try:
                    month = DateHelper.get_month_from_name(month_part, "en")
                    item.set_date(year_part, month, day_part)
                except:
                    Logger.error("Error matching month: %s",
                                 month_part,
                                 exc_info=True)

        item.complete = True
        return item

Example #20

0

Show file

    def update_video_item(self, item):
        data = UriHandler.open(item.url,
                               proxy=self.proxy,
                               additional_headers=item.HttpHeaders)
        media_regex = 'data-media="([^"]+)"'
        media_info = Regexer.do_regex(media_regex, data)[0]
        media_info = HtmlEntityHelper.convert_html_entities(media_info)
        media_info = JsonHelper(media_info)
        Logger.trace(media_info)

        # sources
        part = item.create_new_empty_media_part()
        # high, web, mobile, url
        media_sources = media_info.json.get("sources", {})
        for quality in media_sources:
            url = media_sources[quality]
            if quality == "high":
                bitrate = 2000
            elif quality == "web":
                bitrate = 800
            elif quality == "mobile":
                bitrate = 400
            else:
                bitrate = 0
            part.append_media_stream(url, bitrate)

        # geoLocRestriction
        item.isGeoLocked = not media_info.get_value(
            "geoLocRestriction", fallback="world") == "world"
        item.complete = True
        return item

Example #21

0

Show file

    def update_video_item_json_player(self, item):
        """ Updates an existing MediaItem with more data.

        Used to update none complete MediaItems (self.complete = False). This
        could include opening the item's URL to fetch more data and then process that
        data or retrieve it's real media-URL.

        The method should at least:
        * cache the thumbnail to disk (use self.noImage if no thumb is available).
        * set at least one MediaItemPart with a single MediaStream.
        * set self.complete = True.

        if the returned item does not have a MediaItemPart then the self.complete flag
        will automatically be set back to False.

        :param MediaItem item: the original MediaItem that needs updating.

        :return: The original item with more data added to it's properties.
        :rtype: MediaItem

        """

        data = UriHandler.open(item.url, proxy=self.proxy)
        streams = Regexer.do_regex(r'label:\s*"([^"]+)",\W*file:\s*"([^"]+)"',
                                   data)

        part = item.create_new_empty_media_part()
        bitrates = {"720p SD": 1200}
        for stream in streams:
            part.append_media_stream(stream[1], bitrates.get(stream[0], 0))
            item.complete = True

        return item

Example #22

0

Show file

File: chn_nickelodeon.py Project: Snorchs/plugin.video.retrospect

    def extract_json_episodes(self, data):
        """ Performs pre-process actions for data processing.

        Accepts an data from the process_folder_list method, BEFORE the items are
        processed. Allows setting of parameters (like title etc) for the channel.
        Inside this method the <data> could be changed and additional items can
        be created.

        The return values should always be instantiated in at least ("", []).

        :param str data: The retrieve data that was loaded for the current item and URL.

        :return: A tuple of the data and a list of MediaItems that were generated.
        :rtype: tuple[str|JsonHelper,list[MediaItem]]

        """

        Logger.info("Performing Pre-Processing")
        items = []

        data = Regexer.do_regex(
            r'window.__DATA__ = ([\w\W]+?});\s*window.__PUSH_STATE__', data)[0]
        json_data = JsonHelper(data)
        main_container = [
            m for m in json_data.get_value("children")
            if m["type"] == "MainContainer"
        ]
        line_list = [
            item for item in main_container[0]["children"]
            if item["type"] == "LineList"
        ]
        line_list = line_list[0]["props"]
        json_data.json = line_list
        return json_data, items

Example #23

0

Show file

    def add_live_channel_and_extract_data(self, data):
        """ Add the live channel and extract the correct data to process further.

        The return values should always be instantiated in at least ("", []).

        :param str data: The retrieve data that was loaded for the current item and URL.

        :return: A tuple of the data and a list of MediaItems that were generated.
        :rtype: tuple[str|JsonHelper,list[MediaItem]]

        """
        Logger.info("Performing Pre-Processing")
        items = []

        title = LanguageHelper.get_localized_string(
            LanguageHelper.LiveStreamTitleId)
        item = MediaItem("\a.: {} :.".format(title), self.liveUrl)
        item.type = "folder"
        items.append(item)

        if not data:
            return "[]", items

        json_data = Regexer.do_regex(
            r"setupBroadcastArchive\('Tv',\s*([^;]+)\);", data)
        if isinstance(json_data, (tuple, list)) and len(json_data) > 0:
            Logger.debug("Pre-Processing finished")
            return json_data[0], items

        Logger.info("Cannot extract JSON data from HTML.")
        return data, items

Example #24

0

Show file

    def update_live_item(self, item):
        """ Updates an existing MediaItem with more data.

        Used to update none complete MediaItems (self.complete = False). This
        could include opening the item's URL to fetch more data and then process that
        data or retrieve it's real media-URL.

        The method should at least:
        * cache the thumbnail to disk (use self.noImage if no thumb is available).
        * set at least one MediaItemPart with a single MediaStream.
        * set self.complete = True.

        if the returned item does not have a MediaItemPart then the self.complete flag
        will automatically be set back to False.

        :param MediaItem item: the original MediaItem that needs updating.

        :return: The original item with more data added to it's properties.
        :rtype: MediaItem

        """

        Logger.debug('Starting update_live_item for %s (%s)', item.name, self.channelName)
        data = UriHandler.open(item.url, proxy=self.proxy, additional_headers=self.httpHeaders)
        stream_root = Regexer.do_regex(r'<media href="([^"]+\.isml)', data)[0]
        Logger.debug("Found Live stream root: %s", stream_root)

        part = item.create_new_empty_media_part()
        for s, b in F4m.get_streams_from_f4m(item.url, self.proxy):
            item.complete = True
            s = s.replace(".f4m", ".m3u8")
            part.append_media_stream(s, b)

        return item

Example #25

0

Show file

File: xmlhelper.py Project: orbit86/plugin.video.retrospect

    def get_nodes_content(self, node_tag, *args):
        """Retreives all nodes with nodeTag as name 
        
        Arguments:
        nodeTag : string     - Name of the node to retrieve 
        args    : dictionary - Dictionary holding the node's attributes. Should
                               occur in order of appearance.
        
        Returns:
        A list of all the content of the found nodes.
        
        The args should be a dictionary: {"size": "380x285"}, {"ratio":"4:3"} 
        will find a node with <nodename size="380x285" name="test" ratio="4:3">
        
        """

        regex = "<%s" % (node_tag, )

        for arg in args:
            regex += r'[^>]*%s\W*=\W*"%s"' % (list(arg.keys())[0], arg[list(
                arg.keys())[0]])
            # just do one pass

        regex += r"[^>]*>([\w\W]+?)</%s>" % (node_tag, )
        Logger.trace("XmlRegex = %s", regex)

        results = Regexer.do_regex(regex, self.data)
        Logger.trace(results)
        return results

Example #26

0

Show file

File: m3u8.py Project: orbit86/plugin.video.retrospect

    def get_subtitle(url, proxy=None, play_list_data=None, append_query_string=True, language=None):  # NOSONAR
        """ Retrieves a subtitle url either from a M3u8 file via HTTP or alternatively from a
        M3u8 playlist string value (in case it was already retrieved).

        :param str url:                     The M3u8 url that contains the subtitle information.
        :param ProxyInfo proxy:             An optional proxy to use.
        :param str play_list_data:          The data (in case the URL was already retrieved).
        :param bool append_query_string:    Should we re-append the query string?
        :param str language:                The language to select (if multiple are present).

        :return: The subtitle url for the M3u8 file.
        :rtype: str

        """

        data = play_list_data or UriHandler.open(url, proxy)
        regex = r'(#\w[^:]+)[^\n]+TYPE=SUBTITLES[^\n]*LANGUAGE="(\w+)"[^\n]*\W+URI="([^"]+.m3u8[^"\n\r]*)'
        sub = ""

        qs = None
        if append_query_string and "?" in url:
            base, qs = url.split("?", 1)
            Logger.info("Going to append QS: %s", qs)
        elif "?" in url:
            base, qs = url.split("?", 1)
            Logger.info("Ignoring QS: %s", qs)
            qs = None
        else:
            base = url

        needles = Regexer.do_regex(regex, data)
        url_index = 2
        language_index = 1
        base_url_logged = False
        base_url = base[:base.rindex("/")]
        for n in needles:
            if language is not None and n[language_index] != language:
                Logger.debug("Found incorrect language: %s", n[language_index])
                continue

            if "://" not in n[url_index]:
                if not base_url_logged:
                    Logger.debug("Using base_url %s for M3u8", base_url)
                    base_url_logged = True
                sub = "%s/%s" % (base_url, n[url_index])
            else:
                if not base_url_logged:
                    Logger.debug("Full url found in M3u8")
                    base_url_logged = True
                sub = n[url_index]

            if qs is not None and sub.endswith("?null="):
                sub = sub.replace("?null=", "?%s" % (qs, ))
            elif qs is not None and "?" in sub:
                sub = "%s&%s" % (sub, qs)
            elif qs is not None:
                sub = "%s?%s" % (sub, qs)

        return sub

Example #27

0

Show file

File: chn_l1.py Project: Bartolomeo1/plugin.video.retrospect

    def update_video_item(self, item):
        """ Updates an existing MediaItem with more data.

        Used to update none complete MediaItems (self.complete = False). This
        could include opening the item's URL to fetch more data and then process that
        data or retrieve it's real media-URL.

        The method should at least:
        * cache the thumbnail to disk (use self.noImage if no thumb is available).
        * set at least one MediaItemPart with a single MediaStream.
        * set self.complete = True.

        if the returned item does not have a MediaItemPart then the self.complete flag
        will automatically be set back to False.

        :param MediaItem item: the original MediaItem that needs updating.

        :return: The original item with more data added to it's properties.
        :rtype: MediaItem

        """

        Logger.debug('Starting update_video_item for %s (%s)', item.name, self.channelName)

        if not item.url.endswith(".js"):
            data = UriHandler.open(item.url)
            data_id = Regexer.do_regex(r'data-id="(\d+)"[^>]+data-playout', data)
            if data_id is None:
                Logger.warning("Cannot find stream-id for L1 stream.")
                return item

            data_url = "https://limburg.bbvms.com/p/L1_video/c/{}.json".format(data_id[0])
        else:
            data_url = item.url

        data = UriHandler.open(data_url)
        json = JsonHelper(data, logger=Logger.instance())
        Logger.trace(json)

        base_url = json.get_value("publicationData", "defaultMediaAssetPath")
        streams = json.get_value("clipData", "assets")
        item.MediaItemParts = []
        part = item.create_new_empty_media_part()
        for stream in streams:
            url = stream.get("src", None)
            if "://" not in url:
                url = "{}{}".format(base_url, url)
            bitrate = stream.get("bandwidth", None)
            if url:
                part.append_media_stream(url, bitrate)

        if not item.thumb and json.get_value("thumbnails"):
            url = json.get_value("thumbnails")[0].get("src", None)
            if url and "http:/" not in url:
                url = "%s%s" % (self.baseUrl, url)
            item.thumb = url
        item.complete = True
        return item

Example #28

0

Show file

    def update_video_item(self, item):
        """ Updates an existing MediaItem with more data.

        Used to update none complete MediaItems (self.complete = False). This
        could include opening the item's URL to fetch more data and then process that
        data or retrieve it's real media-URL.

        The method should at least:
        * cache the thumbnail to disk (use self.noImage if no thumb is available).
        * set at least one MediaItemPart with a single MediaStream.
        * set self.complete = True.

        if the returned item does not have a MediaItemPart then the self.complete flag
        will automatically be set back to False.

        :param MediaItem item: the original MediaItem that needs updating.

        :return: The original item with more data added to it's properties.
        :rtype: MediaItem

        """

        Logger.debug('Starting update_video_item for %s (%s)', item.name,
                     self.channelName)

        url = item.url
        data = UriHandler.open(url, proxy=self.proxy)

        renditions_url = Regexer.do_regex(
            r'<media:content[^>]+url=\W([^\'"]+)\W', data)[0]
        renditions_url = HtmlEntityHelper.strip_amp(renditions_url)
        rendition_data = UriHandler.open(renditions_url, proxy=self.proxy)
        video_items = Regexer.do_regex(
            r'<rendition[^>]+bitrate="(\d+)"[^>]*>\W+<src>([^<]+)<',
            rendition_data)

        item.MediaItemParts = []
        part = item.create_new_empty_media_part()
        for video_item in video_items:
            media_url = self.get_verifiable_video_url(video_item[1].replace(
                "rtmpe", "rtmp"))
            part.append_media_stream(media_url, video_item[0])

        item.complete = True
        return item

Example #29

0

Show file

File: chn_nickelodeon.py Project: Snorchs/plugin.video.retrospect

    def update_video_item(self, item):
        """Updates an existing MediaItem with more data.

        Arguments:
        item : MediaItem - the MediaItem that needs to be updated

        Returns:
        The original item with more data added to it's properties.

        Used to update none complete MediaItems (self.complete = False). This
        could include opening the item's URL to fetch more data and then process that
        data or retrieve it's real media-URL.

        The method should at least:
        * cache the thumbnail to disk (use self.noImage if no thumb is available).
        * set at least one MediaItemPart with a single MediaStream.
        * set self.complete = True.

        if the returned item does not have a MediaItemPart then the self.complete flag
        will automatically be set back to False.

        """

        Logger.debug('Starting update_video_item for %s (%s)', item.name,
                     self.channelName)
        from resources.lib.streams.m3u8 import M3u8

        data = UriHandler.open(item.url, proxy=self.proxy)
        video_id = Regexer.do_regex(r'{"video":{"config":{"uri":"([^"]+)',
                                    data)[0]
        url = "http://media.mtvnservices.com/pmt/e1/access/index.html?uri={}&configtype=edge".format(
            video_id)
        meta_data = UriHandler.open(url,
                                    proxy=self.proxy,
                                    referer=self.baseUrl)
        meta = JsonHelper(meta_data)
        stream_parts = meta.get_value("feed", "items")
        for stream_part in stream_parts:
            stream_url = stream_part["group"]["content"]
            stream_url = stream_url.replace("&device={device}", "")
            stream_url = "%s&format=json&acceptMethods=hls" % (stream_url, )
            stream_data = UriHandler.open(stream_url, proxy=self.proxy)
            stream = JsonHelper(stream_data)

            # subUrls = stream.get_value("package", "video", "item", 0, "transcript", 0, "typographic")  # NOSONAR
            part = item.create_new_empty_media_part()

            hls_streams = stream.get_value("package", "video", "item", 0,
                                           "rendition")
            for hls_stream in hls_streams:
                hls_url = hls_stream["src"]
                item.complete |= M3u8.update_part_with_m3u8_streams(
                    part, hls_url, proxy=self.proxy)

        item.complete = True
        Logger.trace("Media url: %s", item)
        return item

Example #30

0

Show file

File: chn_nickelodeon.py Project: weblate/plugin.video.retrospect

    def extract_json_video(self, data):
        """ Performs pre-process actions for data processing.

        Accepts an data from the process_folder_list method, BEFORE the items are
        processed. Allows setting of parameters (like title etc) for the channel.
        Inside this method the <data> could be changed and additional items can
        be created.

        The return values should always be instantiated in at least ("", []).

        :param str data: The retrieve data that was loaded for the current item and URL.

        :return: A tuple of the data and a list of MediaItems that were generated.
        :rtype: tuple[str|JsonHelper,list[MediaItem]]

        """

        Logger.info("Performing Pre-Processing")
        items = []

        data = Regexer.do_regex(r'window.__DATA__ = ([\w\W]+?});\s*window.__PUSH_STATE__', data)[0]
        json_data = JsonHelper(data)
        # Get the main content container
        main_container = [m for m in json_data.get_value("children") if m["type"] == "MainContainer"]

        # Extract seasons
        seasons = []
        if not self.parentItem.metaData.get("is_season", False):
            seasons = [
                lst["props"]["items"]
                for lst in main_container[0]["children"]
                if lst["type"] == "SeasonSelector"
            ]
            if seasons:
                seasons = [s for s in seasons[0] if s["url"]]
                # Inject them
                json_data.json["seasons"] = seasons

        # Find the actual
        line_lists = [lst for lst in main_container[0]["children"] if lst["type"] == "LineList"]
        for line_list in line_lists:
            if line_list.get("props", {}).get("type") == "video-guide":
                json_data.json = line_list["props"]

                # Get the actual full episode list
                all_episodes = json_data.get_value("filters", "items", 0, "url")
                url_all_episodes = "{}{}".format(self.baseUrl, all_episodes)
                data = UriHandler.open(url_all_episodes)
                json_data = JsonHelper(data)

                # And append seasons again
                if seasons:
                    json_data.json["seasons"] = seasons
                return json_data, items

        Logger.warning("Cannot extract video items")
        return json_data, items