Beispiel #1
0
def PlayStream(name, url, iconimage, description, subtitles_url):
    html = OpenURL(url)
    check_geo = re.search('<H1>Access Denied</H1>', html)
    if check_geo or not html:
        # print "Geoblock detected, raising error message"
        dialog = xbmcgui.Dialog()
        dialog.ok(translation(30400), translation(30401))
        raise
    liz = xbmcgui.ListItem(name,
                           iconImage='DefaultVideo.png',
                           thumbnailImage=iconimage)
    liz.setInfo(type='Video', infoLabels={'Title': name})
    liz.setProperty("IsPlayable", "true")
    liz.setPath(url)
    if subtitles_url and ADDON.getSetting('subtitles') == 'true':
        subtitles_file = download_subtitles(subtitles_url)
    xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, liz)
    if subtitles_url and ADDON.getSetting('subtitles') == 'true':
        # Successfully started playing something?
        while True:
            if xbmc.Player().isPlaying():
                break
            else:
                xbmc.sleep(500)
        xbmc.Player().setSubtitles(subtitles_file)
Beispiel #2
0
def ListAtoZ():
    """List programmes based on alphabetical order.

    Only creates the corresponding directories for each character.
    """
    characters = [('A', 'a'), ('B', 'b'), ('C', 'c'), ('D', 'd'), ('E', 'e'),
                  ('F', 'f'), ('G', 'g'), ('H', 'h'), ('I', 'i'), ('J', 'j'),
                  ('K', 'k'), ('L', 'l'), ('M', 'm'), ('N', 'n'), ('O', 'o'),
                  ('P', 'p'), ('Q', 'q'), ('R', 'r'), ('S', 's'), ('T', 't'),
                  ('U', 'u'), ('V', 'v'), ('W', 'w'), ('X', 'x'), ('Y', 'y'),
                  ('Z', 'z'), ('0-9', '0-9')]

    if int(ADDON.getSetting('scrape_atoz')) == 1:
        pDialog = xbmcgui.DialogProgressBG()
        pDialog.create(translation(30319))
        page = 1
        total_pages = len(characters)
        for name, url in characters:
            GetAtoZPage(url)
            percent = int(100 * page / total_pages)
            pDialog.update(percent, translation(30319), name)
            page += 1
        pDialog.close()
    else:
        for name, url in characters:
            AddMenuEntry(name, url, 124, '', '', '')
def ListAtoZ():
    """List programmes based on alphabetical order.

    Only creates the corresponding directories for each character.
    """
    characters = [
        ('A', 'a'), ('B', 'b'), ('C', 'c'), ('D', 'd'), ('E', 'e'), ('F', 'f'),
        ('G', 'g'), ('H', 'h'), ('I', 'i'), ('J', 'j'), ('K', 'k'), ('L', 'l'),
        ('M', 'm'), ('N', 'n'), ('O', 'o'), ('P', 'p'), ('Q', 'q'), ('R', 'r'),
        ('S', 's'), ('T', 't'), ('U', 'u'), ('V', 'v'), ('W', 'w'), ('X', 'x'),
        ('Y', 'y'), ('Z', 'z'), ('0-9', '0-9')]

    if int(ADDON.getSetting('scrape_atoz')) == 1:
        pDialog = xbmcgui.DialogProgressBG()
        pDialog.create(translation(30319))
        page = 1
        total_pages = len(characters)
        for name, url in characters:
            GetAtoZPage(url)
            percent = int(100*page/total_pages)
            pDialog.update(percent,translation(30319),name)
            page += 1
        pDialog.close()
    else:
        for name, url in characters:
            AddMenuEntry(name, url, 124, '', '', '')
def PlayStream(name, url, iconimage, description, subtitles_url):
    if iconimage == '':
        iconimage = 'DefaultVideo.png'
    html = OpenURL(url)
    check_geo = re.search(
        '<H1>Access Denied</H1>', html)
    if check_geo or not html:
        # print "Geoblock detected, raising error message"
        dialog = xbmcgui.Dialog()
        dialog.ok(translation(30400), translation(30401))
        raise
    liz = xbmcgui.ListItem(name, iconImage='DefaultVideo.png', thumbnailImage=iconimage)
    liz.setInfo(type='Video', infoLabels={'Title': name})
    liz.setProperty("IsPlayable", "true")
    liz.setPath(url)
    if subtitles_url and ADDON.getSetting('subtitles') == 'true':
        subtitles_file = download_subtitles(subtitles_url)
    xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, liz)
    if subtitles_url and ADDON.getSetting('subtitles') == 'true':
        # Successfully started playing something?
        while True:
            if xbmc.Player().isPlaying():
                break
            else:
                xbmc.sleep(500)
        xbmc.Player().setSubtitles(subtitles_file)
def RedButtonDialog():
    if ADDON.getSetting('redbutton_warning') == 'true':
        dialog = xbmcgui.Dialog()
        ret = dialog.yesno(translation(30405), translation(30406), '',
                           translation(30407), translation(30409), translation(30408))
        if ret:
            ListRedButton()
    else:
        ListRedButton()
Beispiel #6
0
def ParseMediaselector(stream_id):
    streams = []
    subtitles = []
    # print "Parsing streams for PID: %s"%stream_id
    # Open the page with the actual strem information and display the various available streams.
    NEW_URL = "https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/pc/vpid/%s/format/json/jsfunc/JS_callbacks0" % stream_id
    html = OpenURL(NEW_URL)
    match = re.search(r'JS_callbacks0.*?\((.*?)\);', html, re.DOTALL)
    if match:
        json_data = json.loads(match.group(1))
        if json_data:
            # print(json.dumps(json_data, sort_keys=True, indent=2))
            if 'media' in json_data:
                for media in json_data['media']:
                    if 'kind' in media:
                        if media['kind'] == 'captions':
                            if 'connection' in media:
                                for connection in media['connection']:
                                    href = ''
                                    protocol = ''
                                    supplier = ''
                                    if 'href' in connection:
                                        href = connection['href']
                                    if 'protocol' in connection:
                                        protocol = connection['protocol']
                                    if 'supplier' in connection:
                                        supplier = connection['supplier']
                                    subtitles.append(
                                        (href, protocol, supplier))
                        elif media['kind'].startswith('video'):
                            if 'connection' in media:
                                for connection in media['connection']:
                                    href = ''
                                    protocol = ''
                                    supplier = ''
                                    transfer_format = ''
                                    if 'href' in connection:
                                        href = connection['href']
                                    if 'protocol' in connection:
                                        protocol = connection['protocol']
                                    if 'supplier' in connection:
                                        supplier = connection['supplier']
                                    if 'transferFormat' in connection:
                                        transfer_format = connection[
                                            'transferFormat']
                                    streams.append((href, protocol, supplier,
                                                    transfer_format))
            elif 'result' in json_data:
                if json_data['result'] == 'geolocation':
                    # print "Geoblock detected, raising error message"
                    dialog = xbmcgui.Dialog()
                    dialog.ok(translation(30400), translation(30401))
                    raise
    # print "Found streams:"
    # print streams
    return streams, subtitles
Beispiel #7
0
def RedButtonDialog():
    if ADDON.getSetting('redbutton_warning') == 'true':
        dialog = xbmcgui.Dialog()
        ret = dialog.yesno(translation(30405), translation(30406), '',
                           translation(30407), translation(30409),
                           translation(30408))
        if ret:
            ListRedButton()
    else:
        ListRedButton()
def PlayStream(name, url, iconimage, description, subtitles_url):
    html = OpenURL(url)

    check_geo = re.search("<H1>Access Denied</H1>", html)
    if check_geo or not html:
        # print "Geoblock detected, raising error message"
        dialog = xbmcgui.Dialog()
        dialog.ok(translation(30400), translation(30401))
        raise
    liz = xbmcgui.ListItem(name, iconImage="DefaultVideo.png", thumbnailImage=iconimage)
    liz.setInfo(type="Audio", infoLabels={"Title": name})
    liz.setProperty("IsPlayable", "true")
    liz.setPath(url)
    xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, liz)
Beispiel #9
0
def PlayStream(name, url, iconimage, description, subtitles_url):
    html = OpenURL(url)

    check_geo = re.search(
        '<H1>Access Denied</H1>', html)
    if check_geo or not html:
        # print "Geoblock detected, raising error message"
        dialog = xbmcgui.Dialog()
        dialog.ok(translation(30400), translation(30401))
        raise
    liz = xbmcgui.ListItem(name, iconImage='DefaultVideo.png', thumbnailImage=iconimage)
    liz.setInfo(type='Audio', infoLabels={'Title': name})
    liz.setProperty("IsPlayable", "true")
    liz.setPath(url)
    xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, liz)
def AddAvailableStreamItem(name, url, iconimage, description):
    """Play a streamm based on settings for preferred catchup source and bitrate."""
    stream_ids = ScrapeAvailableStreams(url)
    if len(stream_ids) < 1:
        # TODO check CBeeBies for special cases
        xbmcgui.Dialog().ok(translation(30403), translation(30404))
        return
    streams_all = ParseStreams(stream_ids)
    streams = streams_all[0]
    source = int(ADDON.getSetting("radio_source"))
    if source > 0:
        # Case 1: Selected source
        match = [x for x in streams if (x[0] == source)]
        if len(match) == 0:
            # Fallback: Use any source and any bitrate
            match = streams
        match.sort(key=lambda x: x[1], reverse=True)
    else:
        # Case 3: Any source
        # Play highest available bitrate
        match = streams
        match.sort(key=lambda x: x[1], reverse=True)
    PlayStream(name, match[0][2], iconimage, description, "")
Beispiel #11
0
def AddAvailableStreamItem(name, url, iconimage, description):
    """Play a streamm based on settings for preferred catchup source and bitrate."""
    stream_ids = ScrapeAvailableStreams(url)
    if len(stream_ids) < 1:
        #TODO check CBeeBies for special cases
        xbmcgui.Dialog().ok(translation(30403), translation(30404))
        return
    streams_all = ParseStreams(stream_ids)
    streams = streams_all[0]
    source = int(ADDON.getSetting('radio_source'))
    if source > 0:
        # Case 1: Selected source
        match = [x for x in streams if (x[0] == source)]
        if len(match) == 0:
            # Fallback: Use any source and any bitrate
            match = streams
        match.sort(key=lambda x: x[1], reverse=True)
    else:
        # Case 3: Any source
        # Play highest available bitrate
        match = streams
        match.sort(key=lambda x: x[1], reverse=True)
    PlayStream(name, match[0][2], iconimage, description, '')
def ListFavourites(logged_in):

    if(CheckLogin(logged_in) == False):
        CreateBaseDirectory('video')
        return

    """Scrapes all episodes of the favourites page."""
    identity_cookie = None
    cookie_jar = None
    cookie_jar = GetCookieJar()
    for cookie in cookie_jar:
        if (cookie.name == 'IDENTITY'):
            identity_cookie = cookie.value
            break
    url = "https://ibl.api.bbci.co.uk/ibl/v1/user/added?identity_cookie=%s" % identity_cookie
    html = OpenURL(url)
    json_data = json.loads(html)
    favourites_list = json_data.get('added').get('elements')
    for favourite in favourites_list:
        programme = favourite.get('programme')
        id = programme.get('id')
        url = "http://www.bbc.co.uk/iplayer/brand/%s" % (id)
        title = programme.get('title')
        initial_child = programme.get('initial_children')[0]
        subtitle = initial_child.get('subtitle')
        episode_title = title
        if subtitle:
            episode_title = title + ' - ' + subtitle
        image=initial_child.get('images')
        image_url=ParseImageUrl(image.get('standard'))
        synopses = initial_child.get('synopses')
        plot = synopses.get('small')
        try:
            aired = FirstShownToAired(initial_child.get('release_date'))
        except:
            aired = ''
        CheckAutoplay(episode_title, url, image_url, plot, aired)
        more = programme.get('count')
        if more:
            episodes_url = "http://www.bbc.co.uk/iplayer/episodes/" + id
            AddMenuEntry('[B]%s[/B] - %s %s' % (title, more, translation(30313)),
                         episodes_url, 128, image_url, '', '')

    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE)
    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_DATE)
Beispiel #13
0
def ListFavourites(logged_in):

    if (CheckLogin(logged_in) == False):
        CreateBaseDirectory('video')
        return
    """Scrapes all episodes of the favourites page."""
    html = OpenURL(
        'http://www.bbc.co.uk/iplayer/usercomponents/favourites/programmes.json'
    )
    json_data = json.loads(html)
    # favourites = json_data.get('favourites')
    programmes = json_data.get('programmes')
    for programme in programmes:
        id = programme.get('id')
        url = "http://www.bbc.co.uk/iplayer/brand/%s" % (id)
        title = programme.get('title')
        initial_child = programme.get('initial_children')[0]
        subtitle = initial_child.get('subtitle')
        episode_title = title
        if subtitle:
            episode_title = title + ' - ' + subtitle
        image = initial_child.get('images')
        image_url = ParseImageUrl(image.get('standard'))
        synopses = initial_child.get('synopses')
        plot = synopses.get('small')
        try:
            aired = FirstShownToAired(initial_child.get('release_date'))
        except:
            aired = ''
        CheckAutoplay(episode_title, url, image_url, plot, aired)
        more = programme.get('count')
        if more:
            episodes_url = "http://www.bbc.co.uk/iplayer/episodes/" + id
            AddMenuEntry(
                '[B]%s[/B] - %s %s' % (title, more, translation(30313)),
                episodes_url, 128, image_url, '', '')

    xbmcplugin.addSortMethod(int(sys.argv[1]),
                             xbmcplugin.SORT_METHOD_VIDEO_TITLE)
    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_DATE)
def ListFavourites(logged_in):

    if(CheckLogin(logged_in) == False):
        CreateBaseDirectory('video')
        return

    """Scrapes all episodes of the favourites page."""
    html = OpenURL('http://www.bbc.co.uk/iplayer/usercomponents/favourites/programmes.json')
    json_data = json.loads(html)
    # favourites = json_data.get('favourites')
    programmes = json_data.get('programmes')
    for programme in programmes:
        id = programme.get('id')
        url = "http://www.bbc.co.uk/iplayer/brand/%s" % (id)
        title = programme.get('title')
        initial_child = programme.get('initial_children')[0]
        subtitle = initial_child.get('subtitle')
        episode_title = title
        if subtitle:
            episode_title = title + ' - ' + subtitle
        image=initial_child.get('images')
        image_url=ParseImageUrl(image.get('standard'))
        synopses = initial_child.get('synopses')
        plot = synopses.get('small')
        try:
            aired = FirstShownToAired(initial_child.get('release_date'))
        except:
            aired = ''
        CheckAutoplay(episode_title, url, image_url, plot, aired)
        more = programme.get('count')
        if more:
            episodes_url = "http://www.bbc.co.uk/iplayer/episodes/" + id
            AddMenuEntry('[B]%s[/B] - %s %s' % (title, more, translation(30313)),
                         episodes_url, 128, image_url, '', '')

    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE)
    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_DATE)
def RedButtonDialog():
    dialog = xbmcgui.Dialog()
    ret = dialog.yesno(translation(30405), translation(30406), '',
                       translation(30407), translation(30409), translation(30408))
    if ret:
        ListRedButton()
def ScrapeEpisodes(page_url):
    """Creates a list of programmes on one standard HTML page.

    ScrapeEpisodes contains a number of special treatments, which are only needed for
    specific pages, e.g. Search, but allows to use a single function for all kinds
    of pages.
    """

    pDialog = xbmcgui.DialogProgressBG()
    pDialog.create(translation(30319))

    html = OpenURL(page_url)

    total_pages = 1
    current_page = 1
    page_range = range(1)
    paginate = re.search(r'<div class="paginate.*?</div>', html, re.DOTALL)
    next_page = 1
    if paginate:
        if int(ADDON.getSetting('paginate_episodes')) == 0:
            current_page_match = re.search(r'page=(\d*)', page_url)
            if current_page_match:
                current_page = int(current_page_match.group(1))
            page_range = range(current_page, current_page+1)
            next_page_match = re.search(r'<span class="next txt">.+?href="(.*?page=)(.*?)"',
                                        paginate.group(0),
                                        re.DOTALL)
            if next_page_match:
                page_base_url = next_page_match.group(1)
                next_page = int(next_page_match.group(2))
            else:
                next_page = current_page
            page_range = range(current_page, current_page+1)
        else:
            pages = re.findall(r'<li class="page.*?</li>',paginate.group(0),re.DOTALL)
            if pages:
                last = pages[-1]
                last_page = re.search(r'<a href="(.*?page=)(.*?)"',last)
                page_base_url = last_page.group(1)
                total_pages = int(last_page.group(2))
            page_range = range(1, total_pages+1)

    for page in page_range:

        if page > current_page:
            page_url = 'http://www.bbc.co.uk' + page_base_url + str(page)
            html = OpenURL(page_url)

        # NOTE remove inner li to match outer li

        # <li data-version-type="hd">
        html = re.compile(r'<li data-version-type.*?</li>',
                          flags=(re.DOTALL | re.MULTILINE)).sub('', html)

        # <li class="list-item programme"  data-ip-id="p026f2t4">
        list_items = re.findall(r'<li class="list-item.*?</li>', html, flags=(re.DOTALL | re.MULTILINE))

        list_item_num = 1

        for li in list_items:

            # <li class="list-item unavailable"  data-ip-id="b06sq9xj">
            unavailable_match = re.search(
                '<li class="list-item.*?unavailable.*?"',
                li, flags=(re.DOTALL | re.MULTILINE))
            if unavailable_match:
                continue

            # <li class="list-item search-group"  data-ip-id="b06rdtx0">
            search_group = False
            search_group_match = re.search(
                '<li class="list-item.*?search-group.*?"',
                li, flags=(re.DOTALL | re.MULTILINE))
            if search_group_match:
                search_group = True

            main_url = None
            # <a href="/iplayer/episode/p026gmw9/world-of-difference-the-models"
            # title="World of Difference, The Models" class="list-item-link stat"
            url_match = re.search(
                r'<a.*?href="(.*?)".*?list-item-link.*?>',
                li, flags=(re.DOTALL | re.MULTILINE))
            if url_match:
                url = url_match.group(1)
                if url:
                    main_url = 'http://www.bbc.co.uk' + url

            name = ''
            title = ''
            #<div class="title top-title">World of Difference</div>
            title_match = re.search(
                r'<div class="title top-title">\s*(.*?)\s*</div>',
                li, flags=(re.DOTALL | re.MULTILINE))
            if title_match:
                title = title_match.group(1)
                name = title

            subtitle = None
            #<div class="subtitle">The Models</div>
            subtitle_match = re.search(
                r'<div class="subtitle">\s*(.*?)\s*</div>',
                li, flags=(re.DOTALL | re.MULTILINE))
            if subtitle_match:
                subtitle = subtitle_match.group(1)
                if subtitle:
                    name = name + " - " + subtitle

            icon = ''
            type = None
            # <div class="r-image"  data-ip-type="episode"
            # data-ip-src="http://ichef.bbci.co.uk/images/ic/336x189/p026vl1q.jpg">
            # <div class="r-image"  data-ip-type="group"
            # data-ip-src="http://ichef.bbci.co.uk/images/ic/336x189/p037ty9z.jpg">
            image_match = re.search(
                r'<div class="r-image".+?data-ip-type="(.*?)".+?data-ip-src="http://ichef.bbci.co.uk/images/ic/336x189/(.*?)\.jpg"',
                li, flags=(re.DOTALL | re.MULTILINE))
            if image_match:
                type = image_match.group(1)
                image = image_match.group(2)
                if image:
                    icon = "http://ichef.bbci.co.uk/images/ic/832x468/" + image + ".jpg"

            synopsis = ''
            # <p class="synopsis">What was it like to be a top fashion model 30 years ago? (1978)</p>
            synopsis_match = re.search(
                r'<p class="synopsis">\s*(.*?)\s*</p>',
                li, flags=(re.DOTALL | re.MULTILINE))
            if synopsis_match:
                synopsis = synopsis_match.group(1)

            aired = ''
            # <span class="release">\nFirst shown: 8 Jun 1967\n</span>
            release_match = re.search(
                r'<span class="release">.*?First shown:\s*(.*?)\n.*?</span>',
                li, flags=(re.DOTALL | re.MULTILINE))
            if release_match:
                release = release_match.group(1)
                if release:
                    aired = FirstShownToAired(release)

            episodes = None
            # <a class="view-more-container avail stat" href="/iplayer/episodes/p00db1jf" data-progress-state="">
            # <a class="view-more-container sibling stat"
            #  href="/iplayer/search?q=doctor&amp;search_group_id=urn:bbc:programmes:b06qbs4n">
            episodes_match = re.search(
                r'<a class="view-more-container.+?stat".+?href="(.*?)"',
                li, flags=(re.DOTALL | re.MULTILINE))
            if episodes_match:
                episodes = episodes_match.group(1)

            more = None
            # <em class="view-more-heading">27</em>
            more_match = re.search(
                r'<em class="view-more-heading">(.*?)</em>',
                li, flags=(re.DOTALL | re.MULTILINE))
            if more_match:
                more = more_match.group(1)

            if episodes:
                episodes_url = 'http://www.bbc.co.uk' + episodes
                if search_group:
                    AddMenuEntry('[B]%s[/B] - %s' % (title, translation(30318)),
                                 episodes_url, 128, icon, '', '')
                else:
                    AddMenuEntry('[B]%s[/B] - %s %s' % (title, more, translation(30313)),
                                 episodes_url, 128, icon, '', '')
            elif more:
                AddMenuEntry('[B]%s[/B] - %s %s' % (title, more, translation(30313)),
                             main_url, 128, icon, '', '')

            if type != "group":
                CheckAutoplay(name , main_url, icon, synopsis, aired)

            percent = int(100*(page+list_item_num/len(list_items))/total_pages)
            pDialog.update(percent,translation(30319),name)

            list_item_num += 1

        percent = int(100*page/total_pages)
        pDialog.update(percent,translation(30319))

    if int(ADDON.getSetting('paginate_episodes')) == 0:
        if current_page < next_page:
            page_url = 'http://www.bbc.co.uk' + page_base_url + str(next_page)
            AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320), page_url, 128, '', '', '')

    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE)
    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_DATE)
    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED)

    pDialog.close()
def ParseStreams(stream_id):
    retlist = []
    # print "Parsing streams for PID: %s"%stream_id[0]
    # Open the page with the actual strem information and display the various available streams.
    NEW_URL = "http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s" % stream_id[0]
    html = OpenURL(NEW_URL)
    # Parse the different streams and add them as new directory entries.
    match = re.compile(
        'connection authExpires=".+?href="(.+?)".+?supplier="mf_(.+?)".+?transferFormat="(.+?)"'
        ).findall(html)
    for m3u8_url, supplier, transfer_format in match:
        tmp_sup = 0
        tmp_br = 0
        if transfer_format == 'hls':
            if supplier == 'akamai_uk_hls':
                tmp_sup = 1
            elif supplier == 'limelight_uk_hls':
                tmp_sup = 2
            m3u8_breakdown = re.compile('(.+?)iptv.+?m3u8(.+?)$').findall(m3u8_url)
            #print m3u8_breakdown
            # print m3u8_url
            m3u8_html = OpenURL(m3u8_url)
            m3u8_match = re.compile('BANDWIDTH=(.+?),.+?RESOLUTION=(.+?)\n(.+?)\n').findall(m3u8_html)
            for bandwidth, resolution, stream in m3u8_match:
                # print bandwidth
                # print resolution
                #print stream
                url = "%s%s%s" % (m3u8_breakdown[0][0], stream, m3u8_breakdown[0][1])
                #print url
                if int(bandwidth) == 1012300:
                    tmp_br = 2
                elif int(bandwidth) == 1799880:
                    tmp_br = 4
                elif int(bandwidth) == 3116400:
                    tmp_br = 6
                elif int(bandwidth) == 5509880:
                    tmp_br = 7
                retlist.append((tmp_sup, tmp_br, url, resolution))
    # It may be useful to parse these additional streams as a default as they offer additional bandwidths.
    match = re.compile(
        'kind="video".+?connection href="(.+?)".+?supplier="(.+?)".+?transferFormat="(.+?)"'
        ).findall(html)
    # print match
    unique = []
    [unique.append(item) for item in match if item not in unique]
    # print unique
    for m3u8_url, supplier, transfer_format in unique:
        tmp_sup = 0
        tmp_br = 0
        if transfer_format == 'hls':
            if supplier == 'akamai_hls_open':
                tmp_sup = 1
            elif supplier == 'limelight_hls_open':
                tmp_sup = 2
            m3u8_breakdown = re.compile('.+?master.m3u8(.+?)$').findall(m3u8_url)
        # print m3u8_url
        # print m3u8_breakdown
        m3u8_html = OpenURL(m3u8_url)
        # print m3u8_html
        m3u8_match = re.compile('BANDWIDTH=(.+?),RESOLUTION=(.+?),.+?\n(.+?)\n').findall(m3u8_html)
        # print m3u8_match
        for bandwidth, resolution, stream in m3u8_match:
            # print bandwidth
            # print resolution
            # print stream
            url = "%s%s" % (stream, m3u8_breakdown[0][0])
            # This is not entirely correct, displayed bandwidth may be higher or lower than actual bandwidth.
            if int(bandwidth) <= 801000:
                tmp_br = 1
            elif int(bandwidth) <= 1510000:
                tmp_br = 3
            elif int(bandwidth) <= 2410000:
                tmp_br = 5
            retlist.append((tmp_sup, tmp_br, url, resolution))
    match = re.compile('service="captions".+?connection href="(.+?)"').findall(html)
    # print "Subtitle URL: %s"%match
    # print retlist
    if not match:
        # print "No streams found"
        check_geo = re.search(
            '<error id="geolocation"/>', html)
        if check_geo:
            # print "Geoblock detected, raising error message"
            dialog = xbmcgui.Dialog()
            dialog.ok(translation(30400), translation(30401))
            raise
    return retlist, match
Beispiel #18
0
def GetCategoryPage(page_url, just_episodes=False):

    pDialog = xbmcgui.DialogProgressBG()
    pDialog.create(translation(30319))

    html = OpenURL(page_url)

    total_pages = 1
    current_page = 1
    page_range = list(range(1))
    paginate = re.search(r'pgn__list', html)
    next_page = 1
    if paginate:
        if int(ADDON.getSetting('radio_paginate_episodes')) == 0:
            current_page_match = re.search(r'page=(\d*)', page_url)
            if current_page_match:
                current_page = int(current_page_match.group(1))
                main_base_url = re.search(r'(.+?)\?.+?', page_url).group(1)
            else:
                main_base_url = page_url
            page_range = list(range(current_page, current_page + 1))
            next_page_match = re.search(
                r'pgn__page--next.*?href="(.*?page=)(.*?)"', html)
            if next_page_match:
                page_base_url = main_base_url + next_page_match.group(1)
                next_page = int(next_page_match.group(2))
            else:
                next_page = current_page
            page_range = list(range(current_page, current_page + 1))
        else:
            pages = re.findall(r'<li class="pgn__page.*?</li>',
                               html,
                               flags=(re.DOTALL | re.MULTILINE))
            if pages:
                last = pages[-2]
                last_page = re.search(r'href=".*?page=(.*?)"', last)
                page_base_url = page_url + '?page='
                total_pages = int(last_page.group(1))
            page_range = list(range(1, total_pages + 1))

    for page in page_range:

        if page > current_page:
            page_url = page_base_url + str(page)
            html = OpenURL(page_url)

        list_item_num = 1

        programmes = html.split('<div class="programme-item')
        for programme in programmes:

            series_id = ''
            series_id_match = re.search(
                r'<a class="category-episodes" href="/programmes/(.+?)/episodes"',
                programme)
            if series_id_match:
                series_id = series_id_match.group(1)

            programme_id = ''
            programme_id_match = re.search(r'href="/programmes/(.+?)"',
                                           programme)
            if programme_id_match:
                programme_id = programme_id_match.group(1)

            name = ''
            name_match = re.search(
                r'<span class="programme-item-title.+?>(.+?)</span>',
                programme)
            if name_match:
                name = name_match.group(1)

            subtitle = ''
            subtitle_match = re.search(
                r'<p class="programme-item-subtitle.+?>(.+?)</p>', programme)
            if subtitle_match:
                subtitle = subtitle_match.group(1)

            image = ''
            image_match = re.search(r'class="media__image" src="(.+?)"',
                                    programme)
            if image_match:
                image = 'http://' + image_match.group(1)

            synopsis = ''
            synopsis_match = re.search(
                r'<p class="programme-item-synopsis.+?>(.+?)</p>', programme)
            if synopsis_match:
                synopsis = synopsis_match.group(1)

            station = ''
            station_match = re.search(
                r'class="programme-item-network.+?>\s*(.+?)\s*</a>', programme)
            if station_match:
                station = station_match.group(1).strip()

            series_title = "[B]%s - %s[/B]" % (station, name)
            title = "[B]%s[/B] - %s %s" % (station, name, subtitle)

            if series_id:
                AddMenuEntry(series_title, series_id, 131, image, synopsis, '')
            elif programme_id:  #TODO maybe they are not always mutually exclusive

                url = "http://www.bbc.co.uk/radio/play/%s" % programme_id
                CheckAutoplay(title, url, image, ' ', '')

            percent = int(100 * (page + list_item_num / len(programmes)) /
                          total_pages)
            pDialog.update(percent, translation(30319), name)

            list_item_num += 1

        percent = int(100 * page / total_pages)
        pDialog.update(percent, translation(30319))

    if int(ADDON.getSetting('radio_paginate_episodes')) == 0:
        if current_page < next_page:
            page_url = page_base_url + str(next_page)
            AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320),
                         page_url, 137, '', '', '')

    #BUG: this should sort by original order but it doesn't (see http://trac.kodi.tv/ticket/10252)
    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED)
    xbmcplugin.addSortMethod(int(sys.argv[1]),
                             xbmcplugin.SORT_METHOD_VIDEO_TITLE)

    pDialog.close()
Beispiel #19
0
def ParseJSON(programme_data, current_url):
    """Parses the JSON data containing programme information of a page. Contains a lot of fallbacks
    """

    added_playables = []
    added_directories = []

    if programme_data:
        name = ''
        if 'header' in programme_data:
            if 'title' in programme_data['header']:
                name = programme_data['header']['title']
            url_split = current_url.replace('&', '?').split('?')
            is_paginated = False
            """ Avoid duplicate entries by checking if we are on page >1
            """
            for part in url_split:
                if part.startswith('page'):
                    is_paginated = True
            if not is_paginated:
                if 'availableSlices' in programme_data['header']:
                    current_series = programme_data['header']['currentSliceId']
                    slices = programme_data['header']['availableSlices']
                    if slices is not None:
                        for series in slices:
                            if series['id'] == current_series:
                                continue
                            base_url = url_split[0]
                            series_url = base_url + '?seriesId=' + series['id']
                            AddMenuEntry(
                                '[B]%s: %s[/B]' % (name, series['title']),
                                series_url, 128, '', '', '')

        programmes = None
        if 'currentLetter' in programme_data:
            # This must be an A-Z page.
            current_letter = programme_data['currentLetter']
            programmes = programme_data['programmes'][current_letter][
                'entities']
        elif 'entities' in programme_data:
            # This must be a category or most popular.
            programmes = programme_data['entities']
        elif 'items' in programme_data:
            # This must be Added or Watching.
            programmes = programme_data['items']

        if programmes:
            for item in programmes:
                meta = None
                if 'props' in item:
                    meta = item.get('meta')
                    item = item.get('props')
                ParseSingleJSON(meta, item, name, added_playables,
                                added_directories)

        # The next section is for global and channel highlights. They are a bit tricky.
        groups = None
        highlights = None
        bundles = None
        if 'groups' in programme_data:
            groups = programme_data.get('groups')
            for entity in groups:
                for item in entity['entities']:
                    item = item.get("props")
                    if not item:
                        continue
                    ParseSingleJSON(None, item, None, added_playables,
                                    added_directories)

                title = ''
                id = ''
                title = entity.get('title')
                id = entity.get('id')
                if (title and id):
                    episodes_url = 'https://www.bbc.co.uk/iplayer/group/%s' % id
                    if not episodes_url in added_directories:
                        AddMenuEntry(
                            '[B]%s: %s[/B]' % (translation(30314), title),
                            episodes_url, 128, '', '', '')

        if 'highlights' in programme_data:
            highlights = programme_data.get('highlights')
            entity = highlights.get("items")
            if entity:
                for item in entity:
                    item = item.get("props")
                    if not item:
                        continue
                    ParseSingleJSON(None, item, None, added_playables,
                                    added_directories)

        if 'bundles' in programme_data:
            bundles = programme_data.get('bundles')
            for bundle in bundles:
                entity = ''
                entity = bundle.get('entities')
                if entity:
                    for item in entity:
                        ParseSingleJSON(None, item, None, added_playables,
                                        added_directories)
                journey = ''
                journey = bundle.get('journey')
                if journey:
                    id = ''
                    id = journey.get('id')
                    type = ''
                    type = journey.get('type')
                    title = ''
                    title = bundle.get('title').get('default')
                    if title:
                        if (id and (type == 'group')):
                            if (id == 'popular'):
                                AddMenuEntry(
                                    '[B]%s: %s[/B]' %
                                    (translation(30314), title), 'url', 105,
                                    '', '', '')
                            else:
                                episodes_url = 'https://www.bbc.co.uk/iplayer/group/%s' % id
                                if not episodes_url in added_directories:
                                    AddMenuEntry(
                                        '[B]%s: %s[/B]' %
                                        (translation(30314), title),
                                        episodes_url, 128, '', '', '')
                        if (id and (type == 'category')):
                            AddMenuEntry(
                                '[B]%s: %s[/B]' % (translation(30314), title),
                                id, 126, '', '', '')

    xbmcplugin.addSortMethod(int(sys.argv[1]),
                             xbmcplugin.SORT_METHOD_VIDEO_TITLE)
    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_DATE)
    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED)
def ParseStreams(stream_id):
    retlist = []
    # print "Parsing streams for PID: %s"%stream_id
    # Open the page with the actual strem information and display the various available streams.
    NEW_URL = "https://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s" % stream_id
    html = OpenURL(NEW_URL)
    # Parse the different streams and add them as new directory entries.
    match = re.compile(
        'connection authExpires=".+?href="(.+?)".+?supplier="mf_(.+?)".+?transferFormat="(.+?)"'
    ).findall(html)
    source = int(ADDON.getSetting('catchup_source'))
    for m3u8_url, supplier, transfer_format in match:
        tmp_sup = 0
        tmp_br = 0
        if transfer_format == 'hls':
            if supplier.startswith('akamai') and source in [0, 1]:
                tmp_sup = 1
            elif supplier.startswith('limelight') and source in [0, 2]:
                tmp_sup = 2
            elif supplier.startswith('bidi') and source in [0, 3]:
                tmp_sup = 3
            else:
                continue
            m3u8_breakdown = re.compile('(.+?)iptv.+?m3u8(.+?)$').findall(
                m3u8_url)
            m3u8_html = OpenURL(m3u8_url)
            m3u8_match = re.compile(
                'BANDWIDTH=(.+?),.+?RESOLUTION=(.+?)(?:,.+?\n|\n)(.+?)\n'
            ).findall(m3u8_html)
            for bandwidth, resolution, stream in m3u8_match:
                url = "%s%s%s" % (m3u8_breakdown[0][0], stream,
                                  m3u8_breakdown[0][1])
                if 1000000 <= int(bandwidth) <= 1100000:
                    tmp_br = 2
                elif 1790000 <= int(bandwidth) <= 1800000:
                    tmp_br = 4
                elif 3100000 <= int(bandwidth) <= 3120000:
                    tmp_br = 6
                elif int(bandwidth) >= 5500000:
                    tmp_br = 7
                retlist.append((tmp_sup, tmp_br, url, resolution))
    # It may be useful to parse these additional streams as a default as they offer additional bandwidths.
    match = re.compile(
        'kind="video".+?connection href="(.+?)".+?supplier="(.+?)".+?transferFormat="(.+?)"'
    ).findall(html)
    unique = []
    [unique.append(item) for item in match if item not in unique]
    for m3u8_url, supplier, transfer_format in unique:
        tmp_sup = 0
        tmp_br = 0
        if transfer_format == 'hls':
            if supplier.startswith('akamai_hls_open') and source in [0, 1]:
                tmp_sup = 1
            elif supplier.startswith('limelight_hls_open') and source in [
                    0, 2
            ]:
                tmp_sup = 2
            else:
                continue
            m3u8_breakdown = re.compile('.+?master.m3u8(.+?)$').findall(
                m3u8_url)
        m3u8_html = OpenURL(m3u8_url)
        m3u8_match = re.compile(
            'BANDWIDTH=(.+?),RESOLUTION=(.+?),.+?\n(.+?)\n').findall(m3u8_html)
        for bandwidth, resolution, stream in m3u8_match:
            url = "%s%s" % (stream, m3u8_breakdown[0][0])
            # This is not entirely correct, displayed bandwidth may be higher or lower than actual bandwidth.
            if int(bandwidth) <= 801000:
                tmp_br = 1
            elif int(bandwidth) <= 1510000:
                tmp_br = 3
            elif int(bandwidth) <= 2410000:
                tmp_br = 5
            retlist.append((tmp_sup, tmp_br, url, resolution))
    # Some events have special live streams which show up as normal programmes.
    # They need to be parsed separately.
    match = re.compile(
        'connection.+?href="(.+?)".+?supplier="(.+?)".+?transferFormat="(.+?)"'
    ).findall(html)
    unique = []
    [unique.append(item) for item in match if item not in unique]
    for m3u8_url, supplier, transfer_format in unique:
        tmp_sup = 0
        tmp_br = 0
        if transfer_format == 'hls':
            if supplier == 'akamai_hls_live':
                tmp_sup = 1
            elif supplier == 'll_hls_live':
                tmp_sup = 2
            else:
                # This is not a live stream, skip code to avoid unnecessary loading of playlists.
                continue
            html = OpenURL(m3u8_url)
            match = re.compile(
                '#EXT-X-STREAM-INF:PROGRAM-ID=(.+?),BANDWIDTH=(.+?),CODECS="(.*?)",RESOLUTION=(.+?)\s*(.+?.m3u8)'
            ).findall(html)
            for stream_id, bandwidth, codecs, resolution, url in match:
                # Note: This is not entirely correct as these bandwidths relate to live programmes,
                # not catchup.
                if int(bandwidth) <= 1000000:
                    tmp_br = 1
                elif int(bandwidth) <= 1100000:
                    tmp_br = 2
                elif 1700000 <= int(bandwidth) <= 1900000:
                    tmp_br = 4
                elif 3100000 <= int(bandwidth) <= 3120000:
                    tmp_br = 6
                elif int(bandwidth) >= 5500000:
                    tmp_br = 7
                retlist.append((tmp_sup, tmp_br, url, resolution))
    match = re.compile('service="captions".+?connection href="(.+?)"').findall(
        html)
    # print "Subtitle URL: %s"%match
    # print retlist
    if not match:
        # print "No streams found"
        check_geo = re.search('<error id="geolocation"/>', html)
        if check_geo:
            # print "Geoblock detected, raising error message"
            dialog = xbmcgui.Dialog()
            dialog.ok(translation(30400), translation(30401))
            raise
    return retlist, match
Beispiel #21
0
def ScrapeEpisodes(page_url):
    """Creates a list of programmes on one standard HTML page.

    ScrapeEpisodes contains a number of special treatments, which are only needed for
    specific pages, e.g. Search, but allows to use a single function for all kinds
    of pages.
    """

    pDialog = xbmcgui.DialogProgressBG()
    pDialog.create(translation(30319))

    html = OpenURL(page_url)

    total_pages = 1
    current_page = 1
    page_range = list(range(1))
    paginate = re.search(r'<ol class="paginat.*?</ol>', html, re.DOTALL)
    if not paginate:
        paginate = re.search(r'<div class="paginate.*?</div>', html, re.DOTALL)
    next_page = 1
    if paginate:
        if int(ADDON.getSetting('paginate_episodes')) == 0:
            current_page_match = re.search(r'page=(\d*)', page_url)
            if current_page_match:
                current_page = int(current_page_match.group(1))
            pages = re.findall(r'<li class="pag.*?</li>', paginate.group(0),
                               re.DOTALL)
            if pages:
                last = pages[-1]
                last_page = re.search(r'page=(\d*)', last)
                if last_page:
                    total_pages = int(last_page.group(1))
                else:
                    total_pages = current_page
            if current_page < total_pages:
                split_page_url = page_url.replace('&', '?').split('?')
                page_base_url = split_page_url[0]
                for part in split_page_url[1:len(split_page_url)]:
                    if not part.startswith('page'):
                        page_base_url = page_base_url + '?' + part
                if '?' in page_base_url:
                    page_base_url = page_base_url.replace(
                        'https://www.bbc.co.uk', '') + '&page='
                else:
                    page_base_url = page_base_url.replace(
                        'https://www.bbc.co.uk', '') + '?page='
                next_page = current_page + 1
            else:
                next_page = current_page
            page_range = list(range(current_page, current_page + 1))
        else:
            pages = re.findall(r'<li class="pag.*?</li>', paginate.group(0),
                               re.DOTALL)
            if pages:
                last = pages[-1]
                last_page = re.search(r'page=(\d*)', last)
                split_page_url = page_url.replace('&', '?').split('?')
                page_base_url = split_page_url[0]
                for part in split_page_url[1:len(split_page_url)]:
                    if not part.startswith('page'):
                        page_base_url = page_base_url + '?' + part
                if '?' in page_base_url:
                    page_base_url = page_base_url.replace(
                        'https://www.bbc.co.uk', '') + '&page='
                else:
                    page_base_url = page_base_url.replace(
                        'https://www.bbc.co.uk', '') + '?page='
                total_pages = int(last_page.group(1))
            page_range = list(range(1, total_pages + 1))

    for page in page_range:

        if page > current_page:
            page_url = 'https://www.bbc.co.uk' + page_base_url + str(page)
            html = OpenURL(page_url)

        json_data = ScrapeJSON(html)
        if json_data:
            ParseJSON(json_data, page_url)

        percent = int(100 * page / total_pages)
        pDialog.update(percent, translation(30319))

    if int(ADDON.getSetting('paginate_episodes')) == 0:
        if current_page < next_page:
            page_url = 'https://www.bbc.co.uk' + page_base_url + str(next_page)
            AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320),
                         page_url, 128, '', '', '')

    pDialog.close()
Beispiel #22
0
def ScrapeAtoZEpisodes(page_url):
    """Creates a list of programmes on one standard HTML page.

    ScrapeEpisodes contains a number of special treatments, which are only needed for
    specific pages, e.g. Search, but allows to use a single function for all kinds
    of pages.
    """

    pDialog = xbmcgui.DialogProgressBG()
    pDialog.create(translation(30319))

    html = OpenURL(page_url)

    total_pages = 1
    current_page = 1
    page_range = list(range(1))

    json_data = ScrapeJSON(html)
    if json_data:

        last_page = 1
        current_page = 1
        if 'pagination' in json_data:
            page_base_url_match = re.search(r'(.+?)page=', page_url)
            if page_base_url_match:
                page_base_url = page_base_url_match.group(0)
            else:
                page_base_url = page_url + "?page="
            current_page = json_data['pagination'].get('currentPage')
            last_page = json_data['pagination'].get('totalPages')
            if int(ADDON.getSetting('paginate_episodes')) == 0:
                current_page_match = re.search(r'page=(\d*)', page_url)
                if current_page_match:
                    current_page = int(current_page_match.group(1))
                page_base_url_match = re.search(r'(.+?)page=', page_url)
                if page_base_url_match:
                    page_base_url = page_base_url_match.group(0)
                else:
                    page_base_url = page_url + "?page="
                if current_page < last_page:
                    next_page = curent_page + 1
                else:
                    next_page = current_page
                page_range = list(range(current_page, current_page + 1))
            else:
                page_range = list(range(1, last_page + 1))

        for page in page_range:

            if page > current_page:
                page_url = page_base_url + str(page)
                html = OpenURL(page_url)

            json_data = ScrapeJSON(html)
            if json_data:
                ParseJSON(json_data, page_url)

            percent = int(100 * page / last_page)
            pDialog.update(percent, translation(30319))

    if int(ADDON.getSetting('paginate_episodes')) == 0:
        if current_page < next_page:
            page_url = page_base_url + str(next_page)
            AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320),
                         page_url, 134, '', '', '')

    pDialog.close()
Beispiel #23
0
def ScrapeEpisodes(page_url):
    """Creates a list of programmes on one standard HTML page.

    ScrapeEpisodes contains a number of special treatments, which are only needed for
    specific pages, e.g. Search, but allows to use a single function for all kinds
    of pages.
    """

    pDialog = xbmcgui.DialogProgressBG()
    pDialog.create(translation(30319))

    html = OpenURL(page_url)

    total_pages = 1
    current_page = 1
    page_range = range(1)
    paginate = re.search(r'<div class="paginate.*?</div>', html, re.DOTALL)
    next_page = 1
    if paginate:
        if int(ADDON.getSetting('paginate_episodes')) == 0:
            current_page_match = re.search(r'page=(\d*)', page_url)
            if current_page_match:
                current_page = int(current_page_match.group(1))
            page_range = range(current_page, current_page + 1)
            next_page_match = re.search(
                r'<span class="next txt">.+?href="(.*?page=)(.*?)"',
                paginate.group(0), re.DOTALL)
            if next_page_match:
                page_base_url = next_page_match.group(1)
                next_page = int(next_page_match.group(2))
            else:
                next_page = current_page
            page_range = range(current_page, current_page + 1)
        else:
            pages = re.findall(r'<li class="page.*?</li>', paginate.group(0),
                               re.DOTALL)
            if pages:
                last = pages[-1]
                last_page = re.search(r'<a href="(.*?page=)(.*?)"', last)
                page_base_url = last_page.group(1)
                total_pages = int(last_page.group(2))
            page_range = range(1, total_pages + 1)

    for page in page_range:

        if page > current_page:
            page_url = 'http://www.bbc.co.uk' + page_base_url + str(page)
            html = OpenURL(page_url)

        # NOTE remove inner li to match outer li

        # <li data-version-type="hd">
        html = re.compile(r'<li data-version-type.*?</li>',
                          flags=(re.DOTALL | re.MULTILINE)).sub('', html)

        # <li class="list-item programme"  data-ip-id="p026f2t4">
        list_items = re.findall(r'<li class="list-item.*?</li>',
                                html,
                                flags=(re.DOTALL | re.MULTILINE))

        list_item_num = 1

        for li in list_items:

            # <li class="list-item unavailable"  data-ip-id="b06sq9xj">
            unavailable_match = re.search(
                '<li class="list-item.*?unavailable.*?"',
                li,
                flags=(re.DOTALL | re.MULTILINE))
            if unavailable_match:
                continue

            # <li class="list-item search-group"  data-ip-id="b06rdtx0">
            search_group = False
            search_group_match = re.search(
                '<li class="list-item.*?search-group.*?"',
                li,
                flags=(re.DOTALL | re.MULTILINE))
            if search_group_match:
                search_group = True

            main_url = None
            # <a href="/iplayer/episode/p026gmw9/world-of-difference-the-models"
            # title="World of Difference, The Models" class="list-item-link stat"
            url_match = re.search(r'<a.*?href="(.*?)".*?list-item-link.*?>',
                                  li,
                                  flags=(re.DOTALL | re.MULTILINE))
            if url_match:
                url = url_match.group(1)
                if url:
                    main_url = 'http://www.bbc.co.uk' + url

            name = ''
            title = ''
            #<div class="title top-title">World of Difference</div>
            title_match = re.search(
                r'<div class="title top-title">\s*(.*?)\s*</div>',
                li,
                flags=(re.DOTALL | re.MULTILINE))
            if title_match:
                title = title_match.group(1)
                name = title

            subtitle = None
            #<div class="subtitle">The Models</div>
            subtitle_match = re.search(
                r'<div class="subtitle">\s*(.*?)\s*</div>',
                li,
                flags=(re.DOTALL | re.MULTILINE))
            if subtitle_match:
                subtitle = subtitle_match.group(1)
                if subtitle:
                    name = name + " - " + subtitle

            icon = ''
            type = None
            # <div class="r-image"  data-ip-type="episode"
            # data-ip-src="http://ichef.bbci.co.uk/images/ic/336x189/p026vl1q.jpg">
            # <div class="r-image"  data-ip-type="group"
            # data-ip-src="http://ichef.bbci.co.uk/images/ic/336x189/p037ty9z.jpg">
            image_match = re.search(
                r'<div class="r-image".+?data-ip-type="(.*?)".+?data-ip-src="http://ichef.bbci.co.uk/images/ic/336x189/(.*?)\.jpg"',
                li,
                flags=(re.DOTALL | re.MULTILINE))
            if image_match:
                type = image_match.group(1)
                image = image_match.group(2)
                if image:
                    icon = "http://ichef.bbci.co.uk/images/ic/832x468/" + image + ".jpg"

            synopsis = ''
            # <p class="synopsis">What was it like to be a top fashion model 30 years ago? (1978)</p>
            synopsis_match = re.search(r'<p class="synopsis">\s*(.*?)\s*</p>',
                                       li,
                                       flags=(re.DOTALL | re.MULTILINE))
            if synopsis_match:
                synopsis = synopsis_match.group(1)

            aired = ''
            # <span class="release">\nFirst shown: 8 Jun 1967\n</span>
            release_match = re.search(
                r'<span class="release">.*?First shown:\s*(.*?)\n.*?</span>',
                li,
                flags=(re.DOTALL | re.MULTILINE))
            if release_match:
                release = release_match.group(1)
                if release:
                    aired = FirstShownToAired(release)

            episodes = None
            # <a class="view-more-container avail stat" href="/iplayer/episodes/p00db1jf" data-progress-state="">
            # <a class="view-more-container sibling stat"
            #  href="/iplayer/search?q=doctor&amp;search_group_id=urn:bbc:programmes:b06qbs4n">
            episodes_match = re.search(
                r'<a class="view-more-container.+?stat".+?href="(.*?)"',
                li,
                flags=(re.DOTALL | re.MULTILINE))
            if episodes_match:
                episodes = episodes_match.group(1)

            more = None
            # <em class="view-more-heading">27</em>
            more_match = re.search(r'<em class="view-more-heading">(.*?)</em>',
                                   li,
                                   flags=(re.DOTALL | re.MULTILINE))
            if more_match:
                more = more_match.group(1)

            if episodes:
                episodes_url = 'http://www.bbc.co.uk' + episodes
                if search_group:
                    AddMenuEntry(
                        '[B]%s[/B] - %s' % (title, translation(30318)),
                        episodes_url, 128, icon, '', '')
                else:
                    AddMenuEntry(
                        '[B]%s[/B] - %s %s' %
                        (title, more, translation(30313)), episodes_url, 128,
                        icon, '', '')
            elif more:
                AddMenuEntry(
                    '[B]%s[/B] - %s %s' % (title, more, translation(30313)),
                    main_url, 128, icon, '', '')

            if type != "group":
                CheckAutoplay(name, main_url, icon, synopsis, aired)

            percent = int(100 * (page + list_item_num / len(list_items)) /
                          total_pages)
            pDialog.update(percent, translation(30319), name)

            list_item_num += 1

        percent = int(100 * page / total_pages)
        pDialog.update(percent, translation(30319))

    if int(ADDON.getSetting('paginate_episodes')) == 0:
        if current_page < next_page:
            page_url = 'http://www.bbc.co.uk' + page_base_url + str(next_page)
            AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320),
                         page_url, 128, '', '', '')

    xbmcplugin.addSortMethod(int(sys.argv[1]),
                             xbmcplugin.SORT_METHOD_VIDEO_TITLE)
    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_DATE)
    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED)

    pDialog.close()
Beispiel #24
0
def ParseStreams(stream_id):
    retlist = []
    # print "Parsing streams for PID: %s"%stream_id[0]
    # Open the page with the actual strem information and display the various available streams.
    NEW_URL = "http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s" % stream_id[
        0]
    html = OpenURL(NEW_URL)
    # Parse the different streams and add them as new directory entries.
    match = re.compile(
        'connection authExpires=".+?href="(.+?)".+?supplier="mf_(.+?)".+?transferFormat="(.+?)"'
    ).findall(html)
    for m3u8_url, supplier, transfer_format in match:
        tmp_sup = 0
        tmp_br = 0
        if transfer_format == 'hls':
            if supplier == 'akamai_uk_hls':
                tmp_sup = 1
            elif supplier == 'limelight_uk_hls':
                tmp_sup = 2
            m3u8_breakdown = re.compile('(.+?)iptv.+?m3u8(.+?)$').findall(
                m3u8_url)
            #print m3u8_breakdown
            # print m3u8_url
            m3u8_html = OpenURL(m3u8_url)
            m3u8_match = re.compile(
                'BANDWIDTH=(.+?),.+?RESOLUTION=(.+?)\n(.+?)\n').findall(
                    m3u8_html)
            for bandwidth, resolution, stream in m3u8_match:
                # print bandwidth
                # print resolution
                #print stream
                url = "%s%s%s" % (m3u8_breakdown[0][0], stream,
                                  m3u8_breakdown[0][1])
                #print url
                if int(bandwidth) == 1012300:
                    tmp_br = 2
                elif int(bandwidth) == 1799880:
                    tmp_br = 4
                elif int(bandwidth) == 3116400:
                    tmp_br = 6
                elif int(bandwidth) == 5509880:
                    tmp_br = 7
                retlist.append((tmp_sup, tmp_br, url, resolution))
    # It may be useful to parse these additional streams as a default as they offer additional bandwidths.
    match = re.compile(
        'kind="video".+?connection href="(.+?)".+?supplier="(.+?)".+?transferFormat="(.+?)"'
    ).findall(html)
    # print match
    unique = []
    [unique.append(item) for item in match if item not in unique]
    # print unique
    for m3u8_url, supplier, transfer_format in unique:
        tmp_sup = 0
        tmp_br = 0
        if transfer_format == 'hls':
            if supplier == 'akamai_hls_open':
                tmp_sup = 1
            elif supplier == 'limelight_hls_open':
                tmp_sup = 2
            m3u8_breakdown = re.compile('.+?master.m3u8(.+?)$').findall(
                m3u8_url)
        # print m3u8_url
        # print m3u8_breakdown
        m3u8_html = OpenURL(m3u8_url)
        # print m3u8_html
        m3u8_match = re.compile(
            'BANDWIDTH=(.+?),RESOLUTION=(.+?),.+?\n(.+?)\n').findall(m3u8_html)
        # print m3u8_match
        for bandwidth, resolution, stream in m3u8_match:
            # print bandwidth
            # print resolution
            # print stream
            url = "%s%s" % (stream, m3u8_breakdown[0][0])
            # This is not entirely correct, displayed bandwidth may be higher or lower than actual bandwidth.
            if int(bandwidth) <= 801000:
                tmp_br = 1
            elif int(bandwidth) <= 1510000:
                tmp_br = 3
            elif int(bandwidth) <= 2410000:
                tmp_br = 5
            retlist.append((tmp_sup, tmp_br, url, resolution))
    match = re.compile('service="captions".+?connection href="(.+?)"').findall(
        html)
    # print "Subtitle URL: %s"%match
    # print retlist
    if not match:
        # print "No streams found"
        check_geo = re.search('<error id="geolocation"/>', html)
        if check_geo:
            # print "Geoblock detected, raising error message"
            dialog = xbmcgui.Dialog()
            dialog.ok(translation(30400), translation(30401))
            raise
    return retlist, match
def ListHighlights(highlights_url):
    """Creates a list of the programmes in the highlights section.
    """

    html = OpenURL('http://www.bbc.co.uk/%s' % highlights_url)

    inner_anchors = re.findall(r'<a.*?(?!<a).*?</a>',html,flags=(re.DOTALL | re.MULTILINE))

    # First find all groups as we need to store some properties of groups for later reuse.
    group_properties = []

    # NOTE find episode count first
    episode_count = dict()
    groups = [a for a in inner_anchors if re.match(
        r'<a[^<]*?class="grouped-items__cta.*?data-object-type="group-list-link".*?',
        a, flags=(re.DOTALL | re.MULTILINE))]
    for group in groups:

        href = ''
        href_match = re.match(
            r'<a[^<]*?href="(.*?)"',
            group, flags=(re.DOTALL | re.MULTILINE))
        if href_match:
            href = href_match.group(1)

        count_match = re.search(
            r'>View all ([0-9]*).*?</a>',
            group, flags=(re.DOTALL | re.MULTILINE))
        if count_match:
            count = count_match.group(1)
            episode_count[href] = count

    groups = [a for a in inner_anchors if re.match(
        r'<a[^<]*?class="grouped-items__title.*?data-object-type="group-list-link".*?',
        a, flags=(re.DOTALL | re.MULTILINE))]
    for group in groups:

        href = ''
        href_match = re.match(
            r'<a[^<]*?href="(.*?)"',
            group, flags=(re.DOTALL | re.MULTILINE))
        if href_match:
            href = href_match.group(1)

        name = ''
        name_match = re.search(
            r'<strong>(.*?)</strong>',
            group, flags=(re.DOTALL | re.MULTILINE))
        if name_match:
            name = name_match.group(1)

        count = ''
        if href in episode_count:
            count = episode_count[href]

        url = 'http://www.bbc.co.uk' + href

        # Unfortunately, the group type is not inside the links, so we need to search the whole HTML.
        group_type = ''
        group_type_match = re.search(
            r'data-group-name="'+name+'".+?data-group-type="(.+?)"',
            html, flags=(re.DOTALL | re.MULTILINE))
        if group_type_match:
            group_type = group_type_match.group(1)

        position = ''
        position_match = re.search(
            r'data-object-position="(.+?)-ALL"',
            group, flags=(re.DOTALL | re.MULTILINE))
        if position_match:
            group_properties.append(
                             [position_match.group(1),
                             name, group_type])

        AddMenuEntry('[B]%s: %s[/B] - %s %s' % (translation(30314), name, count, translation(30315)),
                     url, 128, '', '', '')

    # Some programmes show up twice in HTML, once inside the groups, once outside.
    # We need to parse both to avoid duplicates and to make sure we get all of them.
    episodelist = []

    # <a\n    href="/iplayer/episode/b06tr74y/eastenders-24122015"\n    class="grouped-items__list-link
    listeds = [a for a in inner_anchors if re.search(
        r'class="grouped-items__list-link',
        a, flags=(re.DOTALL | re.MULTILINE))]

    for listed in listeds:

        episode_id = ''
        # <a\n    href="/iplayer/episode/b06tr74y/eastenders-24122015"
        id_match = re.match(
            r'<a.*?href="/iplayer/episode/(.*?)/',
            listed, flags=(re.DOTALL | re.MULTILINE))
        if id_match:
            episode_id = id_match.group(1)

        name = ''
        # <p class="grouped-items__title grouped-items__title--item typo typo--skylark">
        # <strong>EastEnders</strong></p>
        title_match = re.search(
            r'<.*?class="grouped-items__title.*?<strong>(.*?)</strong>',
            listed, flags=(re.DOTALL | re.MULTILINE))
        if title_match:
            name = title_match.group(1)
            name = re.compile(r'<.*?>', flags=(re.DOTALL | re.MULTILINE)).sub('', name)

        # <p class="grouped-items__subtitle typo typo--canary">24/12/2015</p>
        subtitle_match = re.search(
            r'<.*?class="grouped-items__subtitle.*?>(.*?)<',
            listed, flags=(re.DOTALL | re.MULTILINE))
        if subtitle_match:
            name = name + ' - ' + subtitle_match.group(1)

        # Assign correct group based on the position of the episode
        position = ''
        position_match = re.search(
            r'data-object-position="(.+?)"',
            listed, flags=(re.DOTALL | re.MULTILINE))
        if position_match:
            for n,i in enumerate(group_properties):
                if re.match(i[0], position_match.group(1), flags=(re.DOTALL | re.MULTILINE)):
                    position = i[1]
                    # For series-catchup groups, we need to modify the title.
                    if i[2] == 'series-catchup':
                        name = i[1]+': '+name

        episodelist.append(
                    [episode_id,
                    name,
                    "%s %s" % (translation(30316), position),
                    'DefaultVideo.png',
                    '']
                    )

    # < a\nhref="/iplayer/episode/p036gq3z/bbc-music-introducing-from-buddhist-monk-to-rock-star"\n
    # class="single-item stat"
    singles = [a for a in inner_anchors if re.search(
        r'class="single-item',
        a, flags=(re.DOTALL | re.MULTILINE))]

    for single in singles:

        object_type = ''
        # data-object-type="episode-backfill"
        data_object_type = re.search(
            r'data-object-type="(.*?)"',
            single, flags=(re.DOTALL | re.MULTILINE))
        if data_object_type:
            object_type = data_object_type.group(1)
            if object_type == "episode-backfill":
                if (highlights_url not in ['tv/bbcnews', 'tv/bbcparliament', 'tv/s4c']):
                    continue

        episode_id = ''
        url = ''
        # <a\nhref="/iplayer/episode/p036gq3z/bbc-music-introducing-from-buddhist-monk-to-rock-star"
        if object_type == "editorial-promo":
            id_match = re.match(
                r'<a.*?href="(.*?)"',
                single, flags=(re.DOTALL | re.MULTILINE))
        else:
            id_match = re.match(
                r'<a.*?href="/iplayer/episode/(.*?)/',
                single, flags=(re.DOTALL | re.MULTILINE))
        if id_match:
            episode_id = id_match.group(1)
            url = 'http://www.bbc.co.uk/iplayer/episode/' + episode_id

        name = ''
        # <h3 class="single-item__title typo typo--skylark"><strong>BBC Music Introducing</strong></h3>
        title_match = re.search(
            r'<.*?class="single-item__title.*?<strong>(.*?)</strong>',
            single, flags=(re.DOTALL | re.MULTILINE))
        if title_match:
            name = title_match.group(1)
            name = re.compile(r'<.*?>', flags=(re.DOTALL | re.MULTILINE)).sub('', name)

        # <p class="single-item__subtitle typo typo--canary">From Buddhist Monk to Rock Star</p>
        subtitle_match = re.search(
            r'<.*?class="single-item__subtitle.*?>(.*?)<',
            single, flags=(re.DOTALL | re.MULTILINE))
        if subtitle_match:
            name = name + ' - ' + subtitle_match.group(1)

        icon = ''
        # <div class="r-image"  data-ip-type="episode"
        # data-ip-src="http://ichef.bbci.co.uk/images/ic/406x228/p036gtc5.jpg">
        image_match = re.search(
            r'<.*?class="r-image.*?data-ip-src="(.*?)"',
            single, flags=(re.DOTALL | re.MULTILINE))
        if image_match:
            icon = image_match.group(1)

        desc = ''
        # <p class="single-item__overlay__desc">
        # The remarkable rise of Ngawang Lodup - from BBC Introducing to performing at the O2 Arena</p>
        desc_match = re.search(
            r'<.*?class="single-item__overlay__desc.*?>(.*?)<',
            single, flags=(re.DOTALL | re.MULTILINE))
        if desc_match:
            desc = desc_match.group(1)

        aired = ''
        # <p class="single-item__overlay__subtitle">First shown: 4 Nov 2015</p>
        release_match = re.search(
            r'<.*?class="single-item__overlay__subtitle">First shown: (.*?)<',
            single, flags=(re.DOTALL | re.MULTILINE))
        if release_match:
            release = release_match.group(1)
            if release:
                aired = FirstShownToAired(release)

        add_entry = True
        for n,i in enumerate(episodelist):
            if i[0]==episode_id:
                episodelist[n][2]=desc
                episodelist[n][3]=icon
                episodelist[n][4]=aired
                add_entry = False
        if add_entry:
            if object_type == "editorial-promo":
                if episode_id:
                    AddMenuEntry('[B]%s[/B]' % (name), episode_id, 128, icon, '', '')
            else:
                if url:
                    CheckAutoplay(name, url, icon, desc, aired)

    # Finally add all programmes which have been identified as part of a group before.
    for episode in episodelist:
        episode_url = "http://www.bbc.co.uk/iplayer/episode/%s" % episode[0]
        if ((ADDON.getSetting('suppress_incomplete') == 'false') or (not episode[4] == '')):
            if episode[0]:
                CheckAutoplay(episode[1], episode_url, episode[3], episode[2], episode[4])

    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE)
    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_DATE)
    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED)
Beispiel #26
0
def GetPage(page_url, just_episodes=False):
    """   Generic Radio page scraper.   """

    pDialog = xbmcgui.DialogProgressBG()
    pDialog.create(translation(30319))

    html = OpenURL(page_url)

    total_pages = 1
    current_page = 1
    page_range = list(range(1))
    paginate = re.search(r'<ol.+?class="pagination.*?</ol>', html)
    next_page = 1
    if paginate:
        if int(ADDON.getSetting('radio_paginate_episodes')) == 0:
            current_page_match = re.search(r'page=(\d*)', page_url)
            if current_page_match:
                current_page = int(current_page_match.group(1))
            page_range = list(range(current_page, current_page + 1))
            next_page_match = re.search(
                r'<li class="pagination__next"><a href="(.*?page=)(.*?)">',
                paginate.group(0))
            if next_page_match:
                page_base_url = next_page_match.group(1)
                next_page = int(next_page_match.group(2))
            else:
                next_page = current_page
            page_range = list(range(current_page, current_page + 1))
        else:
            pages = re.findall(r'<li.+?class="pagination__page.*?</li>',
                               paginate.group(0))
            if pages:
                last = pages[-1]
                last_page = re.search(r'<a.+?href="(.*?=)(.*?)"', last)
                page_base_url = last_page.group(1)
                total_pages = int(last_page.group(2))
            page_range = list(range(1, total_pages + 1))

    for page in page_range:

        if page > current_page:
            page_url = 'http://www.bbc.co.uk' + page_base_url + str(page)
            html = OpenURL(page_url)

        masthead_title = ''
        masthead_title_match = re.search(
            r'<div.+?id="programmes-main-content".*?<span property="name">(.+?)</span>',
            html)
        if masthead_title_match:
            masthead_title = masthead_title_match.group(1)
        else:
            alternative_masthead_title_match = re.search(
                r'<div class="br-masthead__title">.*?<a href="[^"]+">([^<]+?)</a>',
                html, re.M | re.S)
            if alternative_masthead_title_match:
                masthead_title = alternative_masthead_title_match.group(1)

        list_item_num = 1
        data = ''
        data_match = re.findall(
            r'<script type="application\/ld\+json">(.*?)<\/script>', html,
            re.S)
        if data_match:
            json_data = json.loads(data_match[0])

            for episode in json_data['episode']:
                programme_id = ''
                programme_id = episode['identifier']

                name = ''
                name = episode['name']
                title = "[B]%s[/B] - %s" % (masthead_title, name)

                imafe = ''
                image = episode['image']

                synopsis = ''
                synopsis = episode['description']

                url = "http://www.bbc.co.uk/radio/play/%s" % programme_id
                CheckAutoplay(title, url, image, synopsis, '')

                percent = int(
                    100 * (page + list_item_num / len(json_data['episode'])) /
                    total_pages)
                pDialog.update(percent, translation(30319), name)

                list_item_num += 1

        percent = int(100 * page / total_pages)
        pDialog.update(percent, translation(30319))

    if int(ADDON.getSetting('radio_paginate_episodes')) == 0:
        if current_page < next_page:
            page_url = 'http://www.bbc.co.uk' + page_base_url + str(next_page)
            AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320),
                         page_url, 136, '', '', '')

    #BUG: this should sort by original order but it doesn't (see http://trac.kodi.tv/ticket/10252)
    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED)
    xbmcplugin.addSortMethod(int(sys.argv[1]),
                             xbmcplugin.SORT_METHOD_VIDEO_TITLE)

    pDialog.close()
Beispiel #27
0
def GetAtoZPage(page_url, just_episodes=False):
    """   Generic Radio page scraper.   """

    pDialog = xbmcgui.DialogProgressBG()
    pDialog.create(translation(30319))

    html = OpenURL(page_url)

    total_pages = 1
    current_page = 1
    page_range = list(range(1))
    paginate = re.search(r'<ol.+?class="pagination.*?</ol>', html)
    next_page = 1
    if paginate:
        if int(ADDON.getSetting('radio_paginate_episodes')) == 0:
            current_page_match = re.search(r'page=(\d*)', page_url)
            if current_page_match:
                current_page = int(current_page_match.group(1))
            page_range = list(range(current_page, current_page + 1))
            next_page_match = re.search(
                r'<li class="pagination__next"><a href="(.*?page=)(.*?)">',
                paginate.group(0))
            if next_page_match:
                page_base_url = next_page_match.group(1)
                next_page = int(next_page_match.group(2))
            else:
                next_page = current_page
            page_range = list(range(current_page, current_page + 1))
        else:
            pages = re.findall(r'<li.+?class="pagination__page.*?</li>',
                               paginate.group(0))
            if pages:
                last = pages[-1]
                last_page = re.search(r'<a.+?href="(.*?=)(.*?)"', last)
                page_base_url = last_page.group(1)
                total_pages = int(last_page.group(2))
            page_range = list(range(1, total_pages + 1))

    for page in page_range:

        if page > current_page:
            page_url = 'http://www.bbc.co.uk' + page_base_url + str(page)
            html = OpenURL(page_url)

        masthead_title = ''
        masthead_title_match = re.search(
            r'<div.+?id="programmes-main-content".*?<span property="name">(.+?)</span>',
            html)
        if masthead_title_match:
            masthead_title = masthead_title_match.group(1)
        else:
            alternative_masthead_title_match = re.search(
                r'<div class="br-masthead__title">.*?<a href="[^"]+">([^<]+?)</a>',
                html, re.M | re.S)
            if alternative_masthead_title_match:
                masthead_title = alternative_masthead_title_match.group(1)

        list_item_num = 1

        programmes = html.split('<li class="grid one-whole">')
        for programme in programmes:

            if not re.search(r'programme--radio', programme):
                continue

            series_id = ''
            series_id_match = re.search(
                r'data-lazylink-inc="/programmes/(.+?)/episodes/player.inc"',
                programme)
            if series_id_match:
                series_id = series_id_match.group(1)

            programme_id = ''
            programme_id_match = re.search(r'data-pid="(.+?)"', programme)
            if programme_id_match:
                programme_id = programme_id_match.group(1)

            name = ''
            name_match = re.search(r'<span property="name">(.+?)</span>',
                                   programme)
            if name_match:
                name = name_match.group(1)
            else:
                alternative_name_match = re.search(
                    r'<meta property="name" content="([^"]+?)"', programme)
                if alternative_name_match:
                    name = alternative_name_match.group(1)

            image = ''
            image_match = re.search(
                r'<meta property="image" content="(.+?)" />', programme)
            if image_match:
                image = image_match.group(1)

            synopsis = ''
            synopsis_match = re.search(
                r'<span property="description">(.+?)<\/span>', programme)
            if synopsis_match:
                synopsis = synopsis_match.group(1)

            station = ''
            station_match = re.search(
                r'<p class="programme__service.+?<strong>(.+?)<\/strong>.*?<\/p>',
                programme)
            if station_match:
                station = station_match.group(1).strip()

            series_title = "[B]%s - %s[/B]" % (station, name)
            if just_episodes:
                title = "[B]%s[/B] - %s" % (masthead_title, name)
            else:
                title = "[B]%s[/B] - %s" % (station, name)

            if series_id:
                AddMenuEntry(series_title, series_id, 131, image, synopsis, '')
            elif programme_id:  #TODO maybe they are not always mutually exclusive
                url = "http://www.bbc.co.uk/radio/play/%s" % programme_id
                CheckAutoplay(title, url, image, ' ', '')

            percent = int(100 * (page + list_item_num / len(programmes)) /
                          total_pages)
            pDialog.update(percent, translation(30319), name)

            list_item_num += 1

        percent = int(100 * page / total_pages)
        pDialog.update(percent, translation(30319))

    if int(ADDON.getSetting('radio_paginate_episodes')) == 0:
        if current_page < next_page:
            page_url = 'http://www.bbc.co.uk' + page_base_url + str(next_page)
            AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320),
                         page_url, 138, '', '', '')

    #BUG: this should sort by original order but it doesn't (see http://trac.kodi.tv/ticket/10252)
    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED)
    xbmcplugin.addSortMethod(int(sys.argv[1]),
                             xbmcplugin.SORT_METHOD_VIDEO_TITLE)

    pDialog.close()
Beispiel #28
0
def ListHighlights(highlights_url):
    """Creates a list of the programmes in the highlights section.
    """

    html = OpenURL('http://www.bbc.co.uk/%s' % highlights_url)

    inner_anchors = re.findall(r'<a.*?(?!<a).*?</a>',
                               html,
                               flags=(re.DOTALL | re.MULTILINE))

    # First find all groups as we need to store some properties of groups for later reuse.
    group_properties = []

    # NOTE find episode count first
    episode_count = dict()
    groups = [
        a for a in inner_anchors if re.match(
            r'<a[^<]*?class="grouped-items__cta.*?data-object-type="group-list-link".*?',
            a,
            flags=(re.DOTALL | re.MULTILINE))
    ]
    for group in groups:

        href = ''
        href_match = re.match(r'<a[^<]*?href="(.*?)"',
                              group,
                              flags=(re.DOTALL | re.MULTILINE))
        if href_match:
            href = href_match.group(1)

        count_match = re.search(r'>View all ([0-9]*).*?</a>',
                                group,
                                flags=(re.DOTALL | re.MULTILINE))
        if count_match:
            count = count_match.group(1)
            episode_count[href] = count

    groups = [
        a for a in inner_anchors if re.match(
            r'<a[^<]*?class="grouped-items__title.*?data-object-type="group-list-link".*?',
            a,
            flags=(re.DOTALL | re.MULTILINE))
    ]
    for group in groups:

        href = ''
        href_match = re.match(r'<a[^<]*?href="(.*?)"',
                              group,
                              flags=(re.DOTALL | re.MULTILINE))
        if href_match:
            href = href_match.group(1)

        name = ''
        name_match = re.search(r'<strong>(.*?)</strong>',
                               group,
                               flags=(re.DOTALL | re.MULTILINE))
        if name_match:
            name = name_match.group(1)

        count = ''
        if href in episode_count:
            count = episode_count[href]

        url = 'http://www.bbc.co.uk' + href

        # Unfortunately, the group type is not inside the links, so we need to search the whole HTML.
        group_type = ''
        group_type_match = re.search(r'data-group-name="' + name +
                                     '".+?data-group-type="(.+?)"',
                                     html,
                                     flags=(re.DOTALL | re.MULTILINE))
        if group_type_match:
            group_type = group_type_match.group(1)

        position = ''
        position_match = re.search(r'data-object-position="(.+?)-ALL"',
                                   group,
                                   flags=(re.DOTALL | re.MULTILINE))
        if position_match:
            group_properties.append(
                [position_match.group(1), name, group_type])

        AddMenuEntry(
            '[B]%s: %s[/B] - %s %s' %
            (translation(30314), name, count, translation(30315)), url, 128,
            '', '', '')

    # Some programmes show up twice in HTML, once inside the groups, once outside.
    # We need to parse both to avoid duplicates and to make sure we get all of them.
    episodelist = []

    # <a\n    href="/iplayer/episode/b06tr74y/eastenders-24122015"\n    class="grouped-items__list-link
    listeds = [
        a for a in inner_anchors
        if re.search(r'class="grouped-items__list-link',
                     a,
                     flags=(re.DOTALL | re.MULTILINE))
    ]

    for listed in listeds:

        episode_id = ''
        # <a\n    href="/iplayer/episode/b06tr74y/eastenders-24122015"
        id_match = re.match(r'<a.*?href="/iplayer/episode/(.*?)/',
                            listed,
                            flags=(re.DOTALL | re.MULTILINE))
        if id_match:
            episode_id = id_match.group(1)

        name = ''
        # <p class="grouped-items__title grouped-items__title--item typo typo--skylark">
        # <strong>EastEnders</strong></p>
        title_match = re.search(
            r'<.*?class="grouped-items__title.*?<strong>(.*?)</strong>',
            listed,
            flags=(re.DOTALL | re.MULTILINE))
        if title_match:
            name = title_match.group(1)
            name = re.compile(r'<.*?>',
                              flags=(re.DOTALL | re.MULTILINE)).sub('', name)

        # <p class="grouped-items__subtitle typo typo--canary">24/12/2015</p>
        subtitle_match = re.search(
            r'<.*?class="grouped-items__subtitle.*?>(.*?)<',
            listed,
            flags=(re.DOTALL | re.MULTILINE))
        if subtitle_match:
            name = name + ' - ' + subtitle_match.group(1)

        # Assign correct group based on the position of the episode
        position = ''
        position_match = re.search(r'data-object-position="(.+?)"',
                                   listed,
                                   flags=(re.DOTALL | re.MULTILINE))
        if position_match:
            for n, i in enumerate(group_properties):
                if re.match(i[0],
                            position_match.group(1),
                            flags=(re.DOTALL | re.MULTILINE)):
                    position = i[1]
                    # For series-catchup groups, we need to modify the title.
                    if i[2] == 'series-catchup':
                        name = i[1] + ': ' + name

        episodelist.append([
            episode_id, name,
            "%s %s" % (translation(30316), position), 'DefaultVideo.png', ''
        ])

    # < a\nhref="/iplayer/episode/p036gq3z/bbc-music-introducing-from-buddhist-monk-to-rock-star"\n
    # class="single-item stat"
    singles = [
        a for a in inner_anchors if re.search(
            r'class="single-item', a, flags=(re.DOTALL | re.MULTILINE))
    ]

    for single in singles:

        object_type = ''
        # data-object-type="episode-backfill"
        data_object_type = re.search(r'data-object-type="(.*?)"',
                                     single,
                                     flags=(re.DOTALL | re.MULTILINE))
        if data_object_type:
            object_type = data_object_type.group(1)
            if object_type == "episode-backfill":
                if (highlights_url
                        not in ['tv/bbcnews', 'tv/bbcparliament', 'tv/s4c']):
                    continue

        episode_id = ''
        url = ''
        # <a\nhref="/iplayer/episode/p036gq3z/bbc-music-introducing-from-buddhist-monk-to-rock-star"
        if object_type == "editorial-promo":
            id_match = re.match(r'<a.*?href="(.*?)"',
                                single,
                                flags=(re.DOTALL | re.MULTILINE))
        else:
            id_match = re.match(r'<a.*?href="/iplayer/episode/(.*?)/',
                                single,
                                flags=(re.DOTALL | re.MULTILINE))
        if id_match:
            episode_id = id_match.group(1)
            url = 'http://www.bbc.co.uk/iplayer/episode/' + episode_id

        name = ''
        # <h3 class="single-item__title typo typo--skylark"><strong>BBC Music Introducing</strong></h3>
        title_match = re.search(
            r'<.*?class="single-item__title.*?<strong>(.*?)</strong>',
            single,
            flags=(re.DOTALL | re.MULTILINE))
        if title_match:
            name = title_match.group(1)
            name = re.compile(r'<.*?>',
                              flags=(re.DOTALL | re.MULTILINE)).sub('', name)

        # <p class="single-item__subtitle typo typo--canary">From Buddhist Monk to Rock Star</p>
        subtitle_match = re.search(
            r'<.*?class="single-item__subtitle.*?>(.*?)<',
            single,
            flags=(re.DOTALL | re.MULTILINE))
        if subtitle_match:
            name = name + ' - ' + subtitle_match.group(1)

        icon = ''
        # <div class="r-image"  data-ip-type="episode"
        # data-ip-src="http://ichef.bbci.co.uk/images/ic/406x228/p036gtc5.jpg">
        image_match = re.search(r'<.*?class="r-image.*?data-ip-src="(.*?)"',
                                single,
                                flags=(re.DOTALL | re.MULTILINE))
        if image_match:
            icon = image_match.group(1)

        desc = ''
        # <p class="single-item__overlay__desc">
        # The remarkable rise of Ngawang Lodup - from BBC Introducing to performing at the O2 Arena</p>
        desc_match = re.search(
            r'<.*?class="single-item__overlay__desc.*?>(.*?)<',
            single,
            flags=(re.DOTALL | re.MULTILINE))
        if desc_match:
            desc = desc_match.group(1)

        aired = ''
        # <p class="single-item__overlay__subtitle">First shown: 4 Nov 2015</p>
        release_match = re.search(
            r'<.*?class="single-item__overlay__subtitle">First shown: (.*?)<',
            single,
            flags=(re.DOTALL | re.MULTILINE))
        if release_match:
            release = release_match.group(1)
            if release:
                aired = FirstShownToAired(release)

        add_entry = True
        for n, i in enumerate(episodelist):
            if i[0] == episode_id:
                episodelist[n][2] = desc
                episodelist[n][3] = icon
                episodelist[n][4] = aired
                add_entry = False
        if add_entry:
            if object_type == "editorial-promo":
                AddMenuEntry('[B]%s[/B]' % (name), episode_id, 128, icon, '',
                             '')
            else:
                CheckAutoplay(name, url, icon, desc, aired)

    # Finally add all programmes which have been identified as part of a group before.
    for episode in episodelist:
        episode_url = "http://www.bbc.co.uk/iplayer/episode/%s" % episode[0]
        if ((ADDON.getSetting('suppress_incomplete') == 'false')
                or (not episode[4] == '')):
            CheckAutoplay(episode[1], episode_url, episode[3], episode[2],
                          episode[4])

    xbmcplugin.addSortMethod(int(sys.argv[1]),
                             xbmcplugin.SORT_METHOD_VIDEO_TITLE)
    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_DATE)
    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED)
def GetPage(page_url, just_episodes=False):
    """   Generic Radio page scraper.   """

    pDialog = xbmcgui.DialogProgressBG()
    pDialog.create(translation(30319))

    html = OpenURL(page_url)

    total_pages = 1
    current_page = 1
    page_range = range(1)
    paginate = re.search(r'<ol.+?class="pagination.*?</ol>', html)
    next_page = 1
    if paginate:
        if int(ADDON.getSetting("radio_paginate_episodes")) == 0:
            current_page_match = re.search(r"page=(\d*)", page_url)
            if current_page_match:
                current_page = int(current_page_match.group(1))
            page_range = range(current_page, current_page + 1)
            next_page_match = re.search(r'<li class="pagination__next"><a href="(.*?page=)(.*?)">', paginate.group(0))
            if next_page_match:
                page_base_url = next_page_match.group(1)
                next_page = int(next_page_match.group(2))
            else:
                next_page = current_page
            page_range = range(current_page, current_page + 1)
        else:
            pages = re.findall(r'<li.+?class="pagination__page.*?</li>', paginate.group(0))
            if pages:
                last = pages[-1]
                last_page = re.search(r'<a.+?href="(.*?=)(.*?)"', last)
                page_base_url = last_page.group(1)
                total_pages = int(last_page.group(2))
            page_range = range(1, total_pages + 1)

    for page in page_range:

        if page > current_page:
            page_url = "http://www.bbc.co.uk" + page_base_url + str(page)
            html = OpenURL(page_url)

        masthead_title = ""
        masthead_title_match = re.search(
            r'<div.+?id="programmes-main-content".*?<span property="name">(.+?)</span>', html
        )
        if masthead_title_match:
            masthead_title = masthead_title_match.group(1)

        list_item_num = 1

        programmes = html.split('<div class="programme ')
        for programme in programmes:

            if not programme.startswith("programme--radio"):
                continue

            if "available" not in programme:  # TODO find a more robust test
                continue

            series_id = ""
            series_id_match = re.search(
                r'<a class="iplayer-text js-lazylink__link" href="/programmes/(.+?)/episodes/player"', programme
            )
            if series_id_match:
                series_id = series_id_match.group(1)

            programme_id = ""
            programme_id_match = re.search(r'data-pid="(.+?)"', programme)
            if programme_id_match:
                programme_id = programme_id_match.group(1)

            name = ""
            name_match = re.search(r'<span property="name">(.+?)</span>', programme)
            if name_match:
                name = name_match.group(1)

            subtitle = ""
            subtitle_match = re.search(
                r'<span class="programme__subtitle.+?property="name">(.*?)</span>(.*?property="name">(.*?)</span>)?',
                programme,
            )
            if subtitle_match:
                series = subtitle_match.group(1)
                episode = subtitle_match.group(3)
                if episode:
                    subtitle = "(%s, %s)" % (series, episode)
                else:
                    if series.strip():
                        subtitle = "(%s)" % series

            image = ""
            image_match = re.search(r'<meta property="image" content="(.+?)" />', programme)
            if image_match:
                image = image_match.group(1)

            synopsis = ""
            synopsis_match = re.search(r'<span property="description">(.+?)</span>', programme)
            if synopsis_match:
                synopsis = synopsis_match.group(1)

            station = ""
            station_match = re.search(r'<p class="programme__service.+?<strong>(.+?)</strong>.*?</p>', programme)
            if station_match:
                station = station_match.group(1).strip()

            series_title = "[B]%s - %s[/B]" % (station, name)
            if just_episodes:
                title = "[B]%s[/B] - %s" % (masthead_title, name)
            else:
                title = "[B]%s[/B] - %s %s" % (station, name, subtitle)

            if series_id:
                AddMenuEntry(series_title, series_id, 131, image, synopsis, "")
            elif programme_id:  # TODO maybe they are not always mutually exclusive

                url = "http://www.bbc.co.uk/programmes/%s" % programme_id
                CheckAutoplay(title, url, image, " ", "")

            percent = int(100 * (page + list_item_num / len(programmes)) / total_pages)
            pDialog.update(percent, translation(30319), name)

            list_item_num += 1

        percent = int(100 * page / total_pages)
        pDialog.update(percent, translation(30319))

    if int(ADDON.getSetting("radio_paginate_episodes")) == 0:
        if current_page < next_page:
            page_url = "http://www.bbc.co.uk" + page_base_url + str(next_page)
            AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320), page_url, 136, "", "", "")

    # BUG: this should sort by original order but it doesn't (see http://trac.kodi.tv/ticket/10252)
    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED)
    xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE)

    pDialog.close()