def PlayStream(name, url, iconimage, description, subtitles_url): html = OpenURL(url) check_geo = re.search('<H1>Access Denied</H1>', html) if check_geo or not html: # print "Geoblock detected, raising error message" dialog = xbmcgui.Dialog() dialog.ok(translation(30400), translation(30401)) raise liz = xbmcgui.ListItem(name, iconImage='DefaultVideo.png', thumbnailImage=iconimage) liz.setInfo(type='Video', infoLabels={'Title': name}) liz.setProperty("IsPlayable", "true") liz.setPath(url) if subtitles_url and ADDON.getSetting('subtitles') == 'true': subtitles_file = download_subtitles(subtitles_url) xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, liz) if subtitles_url and ADDON.getSetting('subtitles') == 'true': # Successfully started playing something? while True: if xbmc.Player().isPlaying(): break else: xbmc.sleep(500) xbmc.Player().setSubtitles(subtitles_file)
def ListAtoZ(): """List programmes based on alphabetical order. Only creates the corresponding directories for each character. """ characters = [('A', 'a'), ('B', 'b'), ('C', 'c'), ('D', 'd'), ('E', 'e'), ('F', 'f'), ('G', 'g'), ('H', 'h'), ('I', 'i'), ('J', 'j'), ('K', 'k'), ('L', 'l'), ('M', 'm'), ('N', 'n'), ('O', 'o'), ('P', 'p'), ('Q', 'q'), ('R', 'r'), ('S', 's'), ('T', 't'), ('U', 'u'), ('V', 'v'), ('W', 'w'), ('X', 'x'), ('Y', 'y'), ('Z', 'z'), ('0-9', '0-9')] if int(ADDON.getSetting('scrape_atoz')) == 1: pDialog = xbmcgui.DialogProgressBG() pDialog.create(translation(30319)) page = 1 total_pages = len(characters) for name, url in characters: GetAtoZPage(url) percent = int(100 * page / total_pages) pDialog.update(percent, translation(30319), name) page += 1 pDialog.close() else: for name, url in characters: AddMenuEntry(name, url, 124, '', '', '')
def ListAtoZ(): """List programmes based on alphabetical order. Only creates the corresponding directories for each character. """ characters = [ ('A', 'a'), ('B', 'b'), ('C', 'c'), ('D', 'd'), ('E', 'e'), ('F', 'f'), ('G', 'g'), ('H', 'h'), ('I', 'i'), ('J', 'j'), ('K', 'k'), ('L', 'l'), ('M', 'm'), ('N', 'n'), ('O', 'o'), ('P', 'p'), ('Q', 'q'), ('R', 'r'), ('S', 's'), ('T', 't'), ('U', 'u'), ('V', 'v'), ('W', 'w'), ('X', 'x'), ('Y', 'y'), ('Z', 'z'), ('0-9', '0-9')] if int(ADDON.getSetting('scrape_atoz')) == 1: pDialog = xbmcgui.DialogProgressBG() pDialog.create(translation(30319)) page = 1 total_pages = len(characters) for name, url in characters: GetAtoZPage(url) percent = int(100*page/total_pages) pDialog.update(percent,translation(30319),name) page += 1 pDialog.close() else: for name, url in characters: AddMenuEntry(name, url, 124, '', '', '')
def PlayStream(name, url, iconimage, description, subtitles_url): if iconimage == '': iconimage = 'DefaultVideo.png' html = OpenURL(url) check_geo = re.search( '<H1>Access Denied</H1>', html) if check_geo or not html: # print "Geoblock detected, raising error message" dialog = xbmcgui.Dialog() dialog.ok(translation(30400), translation(30401)) raise liz = xbmcgui.ListItem(name, iconImage='DefaultVideo.png', thumbnailImage=iconimage) liz.setInfo(type='Video', infoLabels={'Title': name}) liz.setProperty("IsPlayable", "true") liz.setPath(url) if subtitles_url and ADDON.getSetting('subtitles') == 'true': subtitles_file = download_subtitles(subtitles_url) xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, liz) if subtitles_url and ADDON.getSetting('subtitles') == 'true': # Successfully started playing something? while True: if xbmc.Player().isPlaying(): break else: xbmc.sleep(500) xbmc.Player().setSubtitles(subtitles_file)
def RedButtonDialog(): if ADDON.getSetting('redbutton_warning') == 'true': dialog = xbmcgui.Dialog() ret = dialog.yesno(translation(30405), translation(30406), '', translation(30407), translation(30409), translation(30408)) if ret: ListRedButton() else: ListRedButton()
def ParseMediaselector(stream_id): streams = [] subtitles = [] # print "Parsing streams for PID: %s"%stream_id # Open the page with the actual strem information and display the various available streams. NEW_URL = "https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/pc/vpid/%s/format/json/jsfunc/JS_callbacks0" % stream_id html = OpenURL(NEW_URL) match = re.search(r'JS_callbacks0.*?\((.*?)\);', html, re.DOTALL) if match: json_data = json.loads(match.group(1)) if json_data: # print(json.dumps(json_data, sort_keys=True, indent=2)) if 'media' in json_data: for media in json_data['media']: if 'kind' in media: if media['kind'] == 'captions': if 'connection' in media: for connection in media['connection']: href = '' protocol = '' supplier = '' if 'href' in connection: href = connection['href'] if 'protocol' in connection: protocol = connection['protocol'] if 'supplier' in connection: supplier = connection['supplier'] subtitles.append( (href, protocol, supplier)) elif media['kind'].startswith('video'): if 'connection' in media: for connection in media['connection']: href = '' protocol = '' supplier = '' transfer_format = '' if 'href' in connection: href = connection['href'] if 'protocol' in connection: protocol = connection['protocol'] if 'supplier' in connection: supplier = connection['supplier'] if 'transferFormat' in connection: transfer_format = connection[ 'transferFormat'] streams.append((href, protocol, supplier, transfer_format)) elif 'result' in json_data: if json_data['result'] == 'geolocation': # print "Geoblock detected, raising error message" dialog = xbmcgui.Dialog() dialog.ok(translation(30400), translation(30401)) raise # print "Found streams:" # print streams return streams, subtitles
def PlayStream(name, url, iconimage, description, subtitles_url): html = OpenURL(url) check_geo = re.search("<H1>Access Denied</H1>", html) if check_geo or not html: # print "Geoblock detected, raising error message" dialog = xbmcgui.Dialog() dialog.ok(translation(30400), translation(30401)) raise liz = xbmcgui.ListItem(name, iconImage="DefaultVideo.png", thumbnailImage=iconimage) liz.setInfo(type="Audio", infoLabels={"Title": name}) liz.setProperty("IsPlayable", "true") liz.setPath(url) xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, liz)
def PlayStream(name, url, iconimage, description, subtitles_url): html = OpenURL(url) check_geo = re.search( '<H1>Access Denied</H1>', html) if check_geo or not html: # print "Geoblock detected, raising error message" dialog = xbmcgui.Dialog() dialog.ok(translation(30400), translation(30401)) raise liz = xbmcgui.ListItem(name, iconImage='DefaultVideo.png', thumbnailImage=iconimage) liz.setInfo(type='Audio', infoLabels={'Title': name}) liz.setProperty("IsPlayable", "true") liz.setPath(url) xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, liz)
def AddAvailableStreamItem(name, url, iconimage, description): """Play a streamm based on settings for preferred catchup source and bitrate.""" stream_ids = ScrapeAvailableStreams(url) if len(stream_ids) < 1: # TODO check CBeeBies for special cases xbmcgui.Dialog().ok(translation(30403), translation(30404)) return streams_all = ParseStreams(stream_ids) streams = streams_all[0] source = int(ADDON.getSetting("radio_source")) if source > 0: # Case 1: Selected source match = [x for x in streams if (x[0] == source)] if len(match) == 0: # Fallback: Use any source and any bitrate match = streams match.sort(key=lambda x: x[1], reverse=True) else: # Case 3: Any source # Play highest available bitrate match = streams match.sort(key=lambda x: x[1], reverse=True) PlayStream(name, match[0][2], iconimage, description, "")
def AddAvailableStreamItem(name, url, iconimage, description): """Play a streamm based on settings for preferred catchup source and bitrate.""" stream_ids = ScrapeAvailableStreams(url) if len(stream_ids) < 1: #TODO check CBeeBies for special cases xbmcgui.Dialog().ok(translation(30403), translation(30404)) return streams_all = ParseStreams(stream_ids) streams = streams_all[0] source = int(ADDON.getSetting('radio_source')) if source > 0: # Case 1: Selected source match = [x for x in streams if (x[0] == source)] if len(match) == 0: # Fallback: Use any source and any bitrate match = streams match.sort(key=lambda x: x[1], reverse=True) else: # Case 3: Any source # Play highest available bitrate match = streams match.sort(key=lambda x: x[1], reverse=True) PlayStream(name, match[0][2], iconimage, description, '')
def ListFavourites(logged_in): if(CheckLogin(logged_in) == False): CreateBaseDirectory('video') return """Scrapes all episodes of the favourites page.""" identity_cookie = None cookie_jar = None cookie_jar = GetCookieJar() for cookie in cookie_jar: if (cookie.name == 'IDENTITY'): identity_cookie = cookie.value break url = "https://ibl.api.bbci.co.uk/ibl/v1/user/added?identity_cookie=%s" % identity_cookie html = OpenURL(url) json_data = json.loads(html) favourites_list = json_data.get('added').get('elements') for favourite in favourites_list: programme = favourite.get('programme') id = programme.get('id') url = "http://www.bbc.co.uk/iplayer/brand/%s" % (id) title = programme.get('title') initial_child = programme.get('initial_children')[0] subtitle = initial_child.get('subtitle') episode_title = title if subtitle: episode_title = title + ' - ' + subtitle image=initial_child.get('images') image_url=ParseImageUrl(image.get('standard')) synopses = initial_child.get('synopses') plot = synopses.get('small') try: aired = FirstShownToAired(initial_child.get('release_date')) except: aired = '' CheckAutoplay(episode_title, url, image_url, plot, aired) more = programme.get('count') if more: episodes_url = "http://www.bbc.co.uk/iplayer/episodes/" + id AddMenuEntry('[B]%s[/B] - %s %s' % (title, more, translation(30313)), episodes_url, 128, image_url, '', '') xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_DATE)
def ListFavourites(logged_in): if (CheckLogin(logged_in) == False): CreateBaseDirectory('video') return """Scrapes all episodes of the favourites page.""" html = OpenURL( 'http://www.bbc.co.uk/iplayer/usercomponents/favourites/programmes.json' ) json_data = json.loads(html) # favourites = json_data.get('favourites') programmes = json_data.get('programmes') for programme in programmes: id = programme.get('id') url = "http://www.bbc.co.uk/iplayer/brand/%s" % (id) title = programme.get('title') initial_child = programme.get('initial_children')[0] subtitle = initial_child.get('subtitle') episode_title = title if subtitle: episode_title = title + ' - ' + subtitle image = initial_child.get('images') image_url = ParseImageUrl(image.get('standard')) synopses = initial_child.get('synopses') plot = synopses.get('small') try: aired = FirstShownToAired(initial_child.get('release_date')) except: aired = '' CheckAutoplay(episode_title, url, image_url, plot, aired) more = programme.get('count') if more: episodes_url = "http://www.bbc.co.uk/iplayer/episodes/" + id AddMenuEntry( '[B]%s[/B] - %s %s' % (title, more, translation(30313)), episodes_url, 128, image_url, '', '') xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_DATE)
def ListFavourites(logged_in): if(CheckLogin(logged_in) == False): CreateBaseDirectory('video') return """Scrapes all episodes of the favourites page.""" html = OpenURL('http://www.bbc.co.uk/iplayer/usercomponents/favourites/programmes.json') json_data = json.loads(html) # favourites = json_data.get('favourites') programmes = json_data.get('programmes') for programme in programmes: id = programme.get('id') url = "http://www.bbc.co.uk/iplayer/brand/%s" % (id) title = programme.get('title') initial_child = programme.get('initial_children')[0] subtitle = initial_child.get('subtitle') episode_title = title if subtitle: episode_title = title + ' - ' + subtitle image=initial_child.get('images') image_url=ParseImageUrl(image.get('standard')) synopses = initial_child.get('synopses') plot = synopses.get('small') try: aired = FirstShownToAired(initial_child.get('release_date')) except: aired = '' CheckAutoplay(episode_title, url, image_url, plot, aired) more = programme.get('count') if more: episodes_url = "http://www.bbc.co.uk/iplayer/episodes/" + id AddMenuEntry('[B]%s[/B] - %s %s' % (title, more, translation(30313)), episodes_url, 128, image_url, '', '') xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_DATE)
def RedButtonDialog(): dialog = xbmcgui.Dialog() ret = dialog.yesno(translation(30405), translation(30406), '', translation(30407), translation(30409), translation(30408)) if ret: ListRedButton()
def ScrapeEpisodes(page_url): """Creates a list of programmes on one standard HTML page. ScrapeEpisodes contains a number of special treatments, which are only needed for specific pages, e.g. Search, but allows to use a single function for all kinds of pages. """ pDialog = xbmcgui.DialogProgressBG() pDialog.create(translation(30319)) html = OpenURL(page_url) total_pages = 1 current_page = 1 page_range = range(1) paginate = re.search(r'<div class="paginate.*?</div>', html, re.DOTALL) next_page = 1 if paginate: if int(ADDON.getSetting('paginate_episodes')) == 0: current_page_match = re.search(r'page=(\d*)', page_url) if current_page_match: current_page = int(current_page_match.group(1)) page_range = range(current_page, current_page+1) next_page_match = re.search(r'<span class="next txt">.+?href="(.*?page=)(.*?)"', paginate.group(0), re.DOTALL) if next_page_match: page_base_url = next_page_match.group(1) next_page = int(next_page_match.group(2)) else: next_page = current_page page_range = range(current_page, current_page+1) else: pages = re.findall(r'<li class="page.*?</li>',paginate.group(0),re.DOTALL) if pages: last = pages[-1] last_page = re.search(r'<a href="(.*?page=)(.*?)"',last) page_base_url = last_page.group(1) total_pages = int(last_page.group(2)) page_range = range(1, total_pages+1) for page in page_range: if page > current_page: page_url = 'http://www.bbc.co.uk' + page_base_url + str(page) html = OpenURL(page_url) # NOTE remove inner li to match outer li # <li data-version-type="hd"> html = re.compile(r'<li data-version-type.*?</li>', flags=(re.DOTALL | re.MULTILINE)).sub('', html) # <li class="list-item programme" data-ip-id="p026f2t4"> list_items = re.findall(r'<li class="list-item.*?</li>', html, flags=(re.DOTALL | re.MULTILINE)) list_item_num = 1 for li in list_items: # <li class="list-item unavailable" data-ip-id="b06sq9xj"> unavailable_match = re.search( '<li class="list-item.*?unavailable.*?"', li, flags=(re.DOTALL | re.MULTILINE)) if unavailable_match: continue # <li class="list-item search-group" data-ip-id="b06rdtx0"> search_group = False search_group_match = re.search( '<li class="list-item.*?search-group.*?"', li, flags=(re.DOTALL | re.MULTILINE)) if search_group_match: search_group = True main_url = None # <a href="/iplayer/episode/p026gmw9/world-of-difference-the-models" # title="World of Difference, The Models" class="list-item-link stat" url_match = re.search( r'<a.*?href="(.*?)".*?list-item-link.*?>', li, flags=(re.DOTALL | re.MULTILINE)) if url_match: url = url_match.group(1) if url: main_url = 'http://www.bbc.co.uk' + url name = '' title = '' #<div class="title top-title">World of Difference</div> title_match = re.search( r'<div class="title top-title">\s*(.*?)\s*</div>', li, flags=(re.DOTALL | re.MULTILINE)) if title_match: title = title_match.group(1) name = title subtitle = None #<div class="subtitle">The Models</div> subtitle_match = re.search( r'<div class="subtitle">\s*(.*?)\s*</div>', li, flags=(re.DOTALL | re.MULTILINE)) if subtitle_match: subtitle = subtitle_match.group(1) if subtitle: name = name + " - " + subtitle icon = '' type = None # <div class="r-image" data-ip-type="episode" # data-ip-src="http://ichef.bbci.co.uk/images/ic/336x189/p026vl1q.jpg"> # <div class="r-image" data-ip-type="group" # data-ip-src="http://ichef.bbci.co.uk/images/ic/336x189/p037ty9z.jpg"> image_match = re.search( r'<div class="r-image".+?data-ip-type="(.*?)".+?data-ip-src="http://ichef.bbci.co.uk/images/ic/336x189/(.*?)\.jpg"', li, flags=(re.DOTALL | re.MULTILINE)) if image_match: type = image_match.group(1) image = image_match.group(2) if image: icon = "http://ichef.bbci.co.uk/images/ic/832x468/" + image + ".jpg" synopsis = '' # <p class="synopsis">What was it like to be a top fashion model 30 years ago? (1978)</p> synopsis_match = re.search( r'<p class="synopsis">\s*(.*?)\s*</p>', li, flags=(re.DOTALL | re.MULTILINE)) if synopsis_match: synopsis = synopsis_match.group(1) aired = '' # <span class="release">\nFirst shown: 8 Jun 1967\n</span> release_match = re.search( r'<span class="release">.*?First shown:\s*(.*?)\n.*?</span>', li, flags=(re.DOTALL | re.MULTILINE)) if release_match: release = release_match.group(1) if release: aired = FirstShownToAired(release) episodes = None # <a class="view-more-container avail stat" href="/iplayer/episodes/p00db1jf" data-progress-state=""> # <a class="view-more-container sibling stat" # href="/iplayer/search?q=doctor&search_group_id=urn:bbc:programmes:b06qbs4n"> episodes_match = re.search( r'<a class="view-more-container.+?stat".+?href="(.*?)"', li, flags=(re.DOTALL | re.MULTILINE)) if episodes_match: episodes = episodes_match.group(1) more = None # <em class="view-more-heading">27</em> more_match = re.search( r'<em class="view-more-heading">(.*?)</em>', li, flags=(re.DOTALL | re.MULTILINE)) if more_match: more = more_match.group(1) if episodes: episodes_url = 'http://www.bbc.co.uk' + episodes if search_group: AddMenuEntry('[B]%s[/B] - %s' % (title, translation(30318)), episodes_url, 128, icon, '', '') else: AddMenuEntry('[B]%s[/B] - %s %s' % (title, more, translation(30313)), episodes_url, 128, icon, '', '') elif more: AddMenuEntry('[B]%s[/B] - %s %s' % (title, more, translation(30313)), main_url, 128, icon, '', '') if type != "group": CheckAutoplay(name , main_url, icon, synopsis, aired) percent = int(100*(page+list_item_num/len(list_items))/total_pages) pDialog.update(percent,translation(30319),name) list_item_num += 1 percent = int(100*page/total_pages) pDialog.update(percent,translation(30319)) if int(ADDON.getSetting('paginate_episodes')) == 0: if current_page < next_page: page_url = 'http://www.bbc.co.uk' + page_base_url + str(next_page) AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320), page_url, 128, '', '', '') xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_DATE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED) pDialog.close()
def ParseStreams(stream_id): retlist = [] # print "Parsing streams for PID: %s"%stream_id[0] # Open the page with the actual strem information and display the various available streams. NEW_URL = "http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s" % stream_id[0] html = OpenURL(NEW_URL) # Parse the different streams and add them as new directory entries. match = re.compile( 'connection authExpires=".+?href="(.+?)".+?supplier="mf_(.+?)".+?transferFormat="(.+?)"' ).findall(html) for m3u8_url, supplier, transfer_format in match: tmp_sup = 0 tmp_br = 0 if transfer_format == 'hls': if supplier == 'akamai_uk_hls': tmp_sup = 1 elif supplier == 'limelight_uk_hls': tmp_sup = 2 m3u8_breakdown = re.compile('(.+?)iptv.+?m3u8(.+?)$').findall(m3u8_url) #print m3u8_breakdown # print m3u8_url m3u8_html = OpenURL(m3u8_url) m3u8_match = re.compile('BANDWIDTH=(.+?),.+?RESOLUTION=(.+?)\n(.+?)\n').findall(m3u8_html) for bandwidth, resolution, stream in m3u8_match: # print bandwidth # print resolution #print stream url = "%s%s%s" % (m3u8_breakdown[0][0], stream, m3u8_breakdown[0][1]) #print url if int(bandwidth) == 1012300: tmp_br = 2 elif int(bandwidth) == 1799880: tmp_br = 4 elif int(bandwidth) == 3116400: tmp_br = 6 elif int(bandwidth) == 5509880: tmp_br = 7 retlist.append((tmp_sup, tmp_br, url, resolution)) # It may be useful to parse these additional streams as a default as they offer additional bandwidths. match = re.compile( 'kind="video".+?connection href="(.+?)".+?supplier="(.+?)".+?transferFormat="(.+?)"' ).findall(html) # print match unique = [] [unique.append(item) for item in match if item not in unique] # print unique for m3u8_url, supplier, transfer_format in unique: tmp_sup = 0 tmp_br = 0 if transfer_format == 'hls': if supplier == 'akamai_hls_open': tmp_sup = 1 elif supplier == 'limelight_hls_open': tmp_sup = 2 m3u8_breakdown = re.compile('.+?master.m3u8(.+?)$').findall(m3u8_url) # print m3u8_url # print m3u8_breakdown m3u8_html = OpenURL(m3u8_url) # print m3u8_html m3u8_match = re.compile('BANDWIDTH=(.+?),RESOLUTION=(.+?),.+?\n(.+?)\n').findall(m3u8_html) # print m3u8_match for bandwidth, resolution, stream in m3u8_match: # print bandwidth # print resolution # print stream url = "%s%s" % (stream, m3u8_breakdown[0][0]) # This is not entirely correct, displayed bandwidth may be higher or lower than actual bandwidth. if int(bandwidth) <= 801000: tmp_br = 1 elif int(bandwidth) <= 1510000: tmp_br = 3 elif int(bandwidth) <= 2410000: tmp_br = 5 retlist.append((tmp_sup, tmp_br, url, resolution)) match = re.compile('service="captions".+?connection href="(.+?)"').findall(html) # print "Subtitle URL: %s"%match # print retlist if not match: # print "No streams found" check_geo = re.search( '<error id="geolocation"/>', html) if check_geo: # print "Geoblock detected, raising error message" dialog = xbmcgui.Dialog() dialog.ok(translation(30400), translation(30401)) raise return retlist, match
def GetCategoryPage(page_url, just_episodes=False): pDialog = xbmcgui.DialogProgressBG() pDialog.create(translation(30319)) html = OpenURL(page_url) total_pages = 1 current_page = 1 page_range = list(range(1)) paginate = re.search(r'pgn__list', html) next_page = 1 if paginate: if int(ADDON.getSetting('radio_paginate_episodes')) == 0: current_page_match = re.search(r'page=(\d*)', page_url) if current_page_match: current_page = int(current_page_match.group(1)) main_base_url = re.search(r'(.+?)\?.+?', page_url).group(1) else: main_base_url = page_url page_range = list(range(current_page, current_page + 1)) next_page_match = re.search( r'pgn__page--next.*?href="(.*?page=)(.*?)"', html) if next_page_match: page_base_url = main_base_url + next_page_match.group(1) next_page = int(next_page_match.group(2)) else: next_page = current_page page_range = list(range(current_page, current_page + 1)) else: pages = re.findall(r'<li class="pgn__page.*?</li>', html, flags=(re.DOTALL | re.MULTILINE)) if pages: last = pages[-2] last_page = re.search(r'href=".*?page=(.*?)"', last) page_base_url = page_url + '?page=' total_pages = int(last_page.group(1)) page_range = list(range(1, total_pages + 1)) for page in page_range: if page > current_page: page_url = page_base_url + str(page) html = OpenURL(page_url) list_item_num = 1 programmes = html.split('<div class="programme-item') for programme in programmes: series_id = '' series_id_match = re.search( r'<a class="category-episodes" href="/programmes/(.+?)/episodes"', programme) if series_id_match: series_id = series_id_match.group(1) programme_id = '' programme_id_match = re.search(r'href="/programmes/(.+?)"', programme) if programme_id_match: programme_id = programme_id_match.group(1) name = '' name_match = re.search( r'<span class="programme-item-title.+?>(.+?)</span>', programme) if name_match: name = name_match.group(1) subtitle = '' subtitle_match = re.search( r'<p class="programme-item-subtitle.+?>(.+?)</p>', programme) if subtitle_match: subtitle = subtitle_match.group(1) image = '' image_match = re.search(r'class="media__image" src="(.+?)"', programme) if image_match: image = 'http://' + image_match.group(1) synopsis = '' synopsis_match = re.search( r'<p class="programme-item-synopsis.+?>(.+?)</p>', programme) if synopsis_match: synopsis = synopsis_match.group(1) station = '' station_match = re.search( r'class="programme-item-network.+?>\s*(.+?)\s*</a>', programme) if station_match: station = station_match.group(1).strip() series_title = "[B]%s - %s[/B]" % (station, name) title = "[B]%s[/B] - %s %s" % (station, name, subtitle) if series_id: AddMenuEntry(series_title, series_id, 131, image, synopsis, '') elif programme_id: #TODO maybe they are not always mutually exclusive url = "http://www.bbc.co.uk/radio/play/%s" % programme_id CheckAutoplay(title, url, image, ' ', '') percent = int(100 * (page + list_item_num / len(programmes)) / total_pages) pDialog.update(percent, translation(30319), name) list_item_num += 1 percent = int(100 * page / total_pages) pDialog.update(percent, translation(30319)) if int(ADDON.getSetting('radio_paginate_episodes')) == 0: if current_page < next_page: page_url = page_base_url + str(next_page) AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320), page_url, 137, '', '', '') #BUG: this should sort by original order but it doesn't (see http://trac.kodi.tv/ticket/10252) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) pDialog.close()
def ParseJSON(programme_data, current_url): """Parses the JSON data containing programme information of a page. Contains a lot of fallbacks """ added_playables = [] added_directories = [] if programme_data: name = '' if 'header' in programme_data: if 'title' in programme_data['header']: name = programme_data['header']['title'] url_split = current_url.replace('&', '?').split('?') is_paginated = False """ Avoid duplicate entries by checking if we are on page >1 """ for part in url_split: if part.startswith('page'): is_paginated = True if not is_paginated: if 'availableSlices' in programme_data['header']: current_series = programme_data['header']['currentSliceId'] slices = programme_data['header']['availableSlices'] if slices is not None: for series in slices: if series['id'] == current_series: continue base_url = url_split[0] series_url = base_url + '?seriesId=' + series['id'] AddMenuEntry( '[B]%s: %s[/B]' % (name, series['title']), series_url, 128, '', '', '') programmes = None if 'currentLetter' in programme_data: # This must be an A-Z page. current_letter = programme_data['currentLetter'] programmes = programme_data['programmes'][current_letter][ 'entities'] elif 'entities' in programme_data: # This must be a category or most popular. programmes = programme_data['entities'] elif 'items' in programme_data: # This must be Added or Watching. programmes = programme_data['items'] if programmes: for item in programmes: meta = None if 'props' in item: meta = item.get('meta') item = item.get('props') ParseSingleJSON(meta, item, name, added_playables, added_directories) # The next section is for global and channel highlights. They are a bit tricky. groups = None highlights = None bundles = None if 'groups' in programme_data: groups = programme_data.get('groups') for entity in groups: for item in entity['entities']: item = item.get("props") if not item: continue ParseSingleJSON(None, item, None, added_playables, added_directories) title = '' id = '' title = entity.get('title') id = entity.get('id') if (title and id): episodes_url = 'https://www.bbc.co.uk/iplayer/group/%s' % id if not episodes_url in added_directories: AddMenuEntry( '[B]%s: %s[/B]' % (translation(30314), title), episodes_url, 128, '', '', '') if 'highlights' in programme_data: highlights = programme_data.get('highlights') entity = highlights.get("items") if entity: for item in entity: item = item.get("props") if not item: continue ParseSingleJSON(None, item, None, added_playables, added_directories) if 'bundles' in programme_data: bundles = programme_data.get('bundles') for bundle in bundles: entity = '' entity = bundle.get('entities') if entity: for item in entity: ParseSingleJSON(None, item, None, added_playables, added_directories) journey = '' journey = bundle.get('journey') if journey: id = '' id = journey.get('id') type = '' type = journey.get('type') title = '' title = bundle.get('title').get('default') if title: if (id and (type == 'group')): if (id == 'popular'): AddMenuEntry( '[B]%s: %s[/B]' % (translation(30314), title), 'url', 105, '', '', '') else: episodes_url = 'https://www.bbc.co.uk/iplayer/group/%s' % id if not episodes_url in added_directories: AddMenuEntry( '[B]%s: %s[/B]' % (translation(30314), title), episodes_url, 128, '', '', '') if (id and (type == 'category')): AddMenuEntry( '[B]%s: %s[/B]' % (translation(30314), title), id, 126, '', '', '') xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_DATE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED)
def ParseStreams(stream_id): retlist = [] # print "Parsing streams for PID: %s"%stream_id # Open the page with the actual strem information and display the various available streams. NEW_URL = "https://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s" % stream_id html = OpenURL(NEW_URL) # Parse the different streams and add them as new directory entries. match = re.compile( 'connection authExpires=".+?href="(.+?)".+?supplier="mf_(.+?)".+?transferFormat="(.+?)"' ).findall(html) source = int(ADDON.getSetting('catchup_source')) for m3u8_url, supplier, transfer_format in match: tmp_sup = 0 tmp_br = 0 if transfer_format == 'hls': if supplier.startswith('akamai') and source in [0, 1]: tmp_sup = 1 elif supplier.startswith('limelight') and source in [0, 2]: tmp_sup = 2 elif supplier.startswith('bidi') and source in [0, 3]: tmp_sup = 3 else: continue m3u8_breakdown = re.compile('(.+?)iptv.+?m3u8(.+?)$').findall( m3u8_url) m3u8_html = OpenURL(m3u8_url) m3u8_match = re.compile( 'BANDWIDTH=(.+?),.+?RESOLUTION=(.+?)(?:,.+?\n|\n)(.+?)\n' ).findall(m3u8_html) for bandwidth, resolution, stream in m3u8_match: url = "%s%s%s" % (m3u8_breakdown[0][0], stream, m3u8_breakdown[0][1]) if 1000000 <= int(bandwidth) <= 1100000: tmp_br = 2 elif 1790000 <= int(bandwidth) <= 1800000: tmp_br = 4 elif 3100000 <= int(bandwidth) <= 3120000: tmp_br = 6 elif int(bandwidth) >= 5500000: tmp_br = 7 retlist.append((tmp_sup, tmp_br, url, resolution)) # It may be useful to parse these additional streams as a default as they offer additional bandwidths. match = re.compile( 'kind="video".+?connection href="(.+?)".+?supplier="(.+?)".+?transferFormat="(.+?)"' ).findall(html) unique = [] [unique.append(item) for item in match if item not in unique] for m3u8_url, supplier, transfer_format in unique: tmp_sup = 0 tmp_br = 0 if transfer_format == 'hls': if supplier.startswith('akamai_hls_open') and source in [0, 1]: tmp_sup = 1 elif supplier.startswith('limelight_hls_open') and source in [ 0, 2 ]: tmp_sup = 2 else: continue m3u8_breakdown = re.compile('.+?master.m3u8(.+?)$').findall( m3u8_url) m3u8_html = OpenURL(m3u8_url) m3u8_match = re.compile( 'BANDWIDTH=(.+?),RESOLUTION=(.+?),.+?\n(.+?)\n').findall(m3u8_html) for bandwidth, resolution, stream in m3u8_match: url = "%s%s" % (stream, m3u8_breakdown[0][0]) # This is not entirely correct, displayed bandwidth may be higher or lower than actual bandwidth. if int(bandwidth) <= 801000: tmp_br = 1 elif int(bandwidth) <= 1510000: tmp_br = 3 elif int(bandwidth) <= 2410000: tmp_br = 5 retlist.append((tmp_sup, tmp_br, url, resolution)) # Some events have special live streams which show up as normal programmes. # They need to be parsed separately. match = re.compile( 'connection.+?href="(.+?)".+?supplier="(.+?)".+?transferFormat="(.+?)"' ).findall(html) unique = [] [unique.append(item) for item in match if item not in unique] for m3u8_url, supplier, transfer_format in unique: tmp_sup = 0 tmp_br = 0 if transfer_format == 'hls': if supplier == 'akamai_hls_live': tmp_sup = 1 elif supplier == 'll_hls_live': tmp_sup = 2 else: # This is not a live stream, skip code to avoid unnecessary loading of playlists. continue html = OpenURL(m3u8_url) match = re.compile( '#EXT-X-STREAM-INF:PROGRAM-ID=(.+?),BANDWIDTH=(.+?),CODECS="(.*?)",RESOLUTION=(.+?)\s*(.+?.m3u8)' ).findall(html) for stream_id, bandwidth, codecs, resolution, url in match: # Note: This is not entirely correct as these bandwidths relate to live programmes, # not catchup. if int(bandwidth) <= 1000000: tmp_br = 1 elif int(bandwidth) <= 1100000: tmp_br = 2 elif 1700000 <= int(bandwidth) <= 1900000: tmp_br = 4 elif 3100000 <= int(bandwidth) <= 3120000: tmp_br = 6 elif int(bandwidth) >= 5500000: tmp_br = 7 retlist.append((tmp_sup, tmp_br, url, resolution)) match = re.compile('service="captions".+?connection href="(.+?)"').findall( html) # print "Subtitle URL: %s"%match # print retlist if not match: # print "No streams found" check_geo = re.search('<error id="geolocation"/>', html) if check_geo: # print "Geoblock detected, raising error message" dialog = xbmcgui.Dialog() dialog.ok(translation(30400), translation(30401)) raise return retlist, match
def ScrapeEpisodes(page_url): """Creates a list of programmes on one standard HTML page. ScrapeEpisodes contains a number of special treatments, which are only needed for specific pages, e.g. Search, but allows to use a single function for all kinds of pages. """ pDialog = xbmcgui.DialogProgressBG() pDialog.create(translation(30319)) html = OpenURL(page_url) total_pages = 1 current_page = 1 page_range = list(range(1)) paginate = re.search(r'<ol class="paginat.*?</ol>', html, re.DOTALL) if not paginate: paginate = re.search(r'<div class="paginate.*?</div>', html, re.DOTALL) next_page = 1 if paginate: if int(ADDON.getSetting('paginate_episodes')) == 0: current_page_match = re.search(r'page=(\d*)', page_url) if current_page_match: current_page = int(current_page_match.group(1)) pages = re.findall(r'<li class="pag.*?</li>', paginate.group(0), re.DOTALL) if pages: last = pages[-1] last_page = re.search(r'page=(\d*)', last) if last_page: total_pages = int(last_page.group(1)) else: total_pages = current_page if current_page < total_pages: split_page_url = page_url.replace('&', '?').split('?') page_base_url = split_page_url[0] for part in split_page_url[1:len(split_page_url)]: if not part.startswith('page'): page_base_url = page_base_url + '?' + part if '?' in page_base_url: page_base_url = page_base_url.replace( 'https://www.bbc.co.uk', '') + '&page=' else: page_base_url = page_base_url.replace( 'https://www.bbc.co.uk', '') + '?page=' next_page = current_page + 1 else: next_page = current_page page_range = list(range(current_page, current_page + 1)) else: pages = re.findall(r'<li class="pag.*?</li>', paginate.group(0), re.DOTALL) if pages: last = pages[-1] last_page = re.search(r'page=(\d*)', last) split_page_url = page_url.replace('&', '?').split('?') page_base_url = split_page_url[0] for part in split_page_url[1:len(split_page_url)]: if not part.startswith('page'): page_base_url = page_base_url + '?' + part if '?' in page_base_url: page_base_url = page_base_url.replace( 'https://www.bbc.co.uk', '') + '&page=' else: page_base_url = page_base_url.replace( 'https://www.bbc.co.uk', '') + '?page=' total_pages = int(last_page.group(1)) page_range = list(range(1, total_pages + 1)) for page in page_range: if page > current_page: page_url = 'https://www.bbc.co.uk' + page_base_url + str(page) html = OpenURL(page_url) json_data = ScrapeJSON(html) if json_data: ParseJSON(json_data, page_url) percent = int(100 * page / total_pages) pDialog.update(percent, translation(30319)) if int(ADDON.getSetting('paginate_episodes')) == 0: if current_page < next_page: page_url = 'https://www.bbc.co.uk' + page_base_url + str(next_page) AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320), page_url, 128, '', '', '') pDialog.close()
def ScrapeAtoZEpisodes(page_url): """Creates a list of programmes on one standard HTML page. ScrapeEpisodes contains a number of special treatments, which are only needed for specific pages, e.g. Search, but allows to use a single function for all kinds of pages. """ pDialog = xbmcgui.DialogProgressBG() pDialog.create(translation(30319)) html = OpenURL(page_url) total_pages = 1 current_page = 1 page_range = list(range(1)) json_data = ScrapeJSON(html) if json_data: last_page = 1 current_page = 1 if 'pagination' in json_data: page_base_url_match = re.search(r'(.+?)page=', page_url) if page_base_url_match: page_base_url = page_base_url_match.group(0) else: page_base_url = page_url + "?page=" current_page = json_data['pagination'].get('currentPage') last_page = json_data['pagination'].get('totalPages') if int(ADDON.getSetting('paginate_episodes')) == 0: current_page_match = re.search(r'page=(\d*)', page_url) if current_page_match: current_page = int(current_page_match.group(1)) page_base_url_match = re.search(r'(.+?)page=', page_url) if page_base_url_match: page_base_url = page_base_url_match.group(0) else: page_base_url = page_url + "?page=" if current_page < last_page: next_page = curent_page + 1 else: next_page = current_page page_range = list(range(current_page, current_page + 1)) else: page_range = list(range(1, last_page + 1)) for page in page_range: if page > current_page: page_url = page_base_url + str(page) html = OpenURL(page_url) json_data = ScrapeJSON(html) if json_data: ParseJSON(json_data, page_url) percent = int(100 * page / last_page) pDialog.update(percent, translation(30319)) if int(ADDON.getSetting('paginate_episodes')) == 0: if current_page < next_page: page_url = page_base_url + str(next_page) AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320), page_url, 134, '', '', '') pDialog.close()
def ScrapeEpisodes(page_url): """Creates a list of programmes on one standard HTML page. ScrapeEpisodes contains a number of special treatments, which are only needed for specific pages, e.g. Search, but allows to use a single function for all kinds of pages. """ pDialog = xbmcgui.DialogProgressBG() pDialog.create(translation(30319)) html = OpenURL(page_url) total_pages = 1 current_page = 1 page_range = range(1) paginate = re.search(r'<div class="paginate.*?</div>', html, re.DOTALL) next_page = 1 if paginate: if int(ADDON.getSetting('paginate_episodes')) == 0: current_page_match = re.search(r'page=(\d*)', page_url) if current_page_match: current_page = int(current_page_match.group(1)) page_range = range(current_page, current_page + 1) next_page_match = re.search( r'<span class="next txt">.+?href="(.*?page=)(.*?)"', paginate.group(0), re.DOTALL) if next_page_match: page_base_url = next_page_match.group(1) next_page = int(next_page_match.group(2)) else: next_page = current_page page_range = range(current_page, current_page + 1) else: pages = re.findall(r'<li class="page.*?</li>', paginate.group(0), re.DOTALL) if pages: last = pages[-1] last_page = re.search(r'<a href="(.*?page=)(.*?)"', last) page_base_url = last_page.group(1) total_pages = int(last_page.group(2)) page_range = range(1, total_pages + 1) for page in page_range: if page > current_page: page_url = 'http://www.bbc.co.uk' + page_base_url + str(page) html = OpenURL(page_url) # NOTE remove inner li to match outer li # <li data-version-type="hd"> html = re.compile(r'<li data-version-type.*?</li>', flags=(re.DOTALL | re.MULTILINE)).sub('', html) # <li class="list-item programme" data-ip-id="p026f2t4"> list_items = re.findall(r'<li class="list-item.*?</li>', html, flags=(re.DOTALL | re.MULTILINE)) list_item_num = 1 for li in list_items: # <li class="list-item unavailable" data-ip-id="b06sq9xj"> unavailable_match = re.search( '<li class="list-item.*?unavailable.*?"', li, flags=(re.DOTALL | re.MULTILINE)) if unavailable_match: continue # <li class="list-item search-group" data-ip-id="b06rdtx0"> search_group = False search_group_match = re.search( '<li class="list-item.*?search-group.*?"', li, flags=(re.DOTALL | re.MULTILINE)) if search_group_match: search_group = True main_url = None # <a href="/iplayer/episode/p026gmw9/world-of-difference-the-models" # title="World of Difference, The Models" class="list-item-link stat" url_match = re.search(r'<a.*?href="(.*?)".*?list-item-link.*?>', li, flags=(re.DOTALL | re.MULTILINE)) if url_match: url = url_match.group(1) if url: main_url = 'http://www.bbc.co.uk' + url name = '' title = '' #<div class="title top-title">World of Difference</div> title_match = re.search( r'<div class="title top-title">\s*(.*?)\s*</div>', li, flags=(re.DOTALL | re.MULTILINE)) if title_match: title = title_match.group(1) name = title subtitle = None #<div class="subtitle">The Models</div> subtitle_match = re.search( r'<div class="subtitle">\s*(.*?)\s*</div>', li, flags=(re.DOTALL | re.MULTILINE)) if subtitle_match: subtitle = subtitle_match.group(1) if subtitle: name = name + " - " + subtitle icon = '' type = None # <div class="r-image" data-ip-type="episode" # data-ip-src="http://ichef.bbci.co.uk/images/ic/336x189/p026vl1q.jpg"> # <div class="r-image" data-ip-type="group" # data-ip-src="http://ichef.bbci.co.uk/images/ic/336x189/p037ty9z.jpg"> image_match = re.search( r'<div class="r-image".+?data-ip-type="(.*?)".+?data-ip-src="http://ichef.bbci.co.uk/images/ic/336x189/(.*?)\.jpg"', li, flags=(re.DOTALL | re.MULTILINE)) if image_match: type = image_match.group(1) image = image_match.group(2) if image: icon = "http://ichef.bbci.co.uk/images/ic/832x468/" + image + ".jpg" synopsis = '' # <p class="synopsis">What was it like to be a top fashion model 30 years ago? (1978)</p> synopsis_match = re.search(r'<p class="synopsis">\s*(.*?)\s*</p>', li, flags=(re.DOTALL | re.MULTILINE)) if synopsis_match: synopsis = synopsis_match.group(1) aired = '' # <span class="release">\nFirst shown: 8 Jun 1967\n</span> release_match = re.search( r'<span class="release">.*?First shown:\s*(.*?)\n.*?</span>', li, flags=(re.DOTALL | re.MULTILINE)) if release_match: release = release_match.group(1) if release: aired = FirstShownToAired(release) episodes = None # <a class="view-more-container avail stat" href="/iplayer/episodes/p00db1jf" data-progress-state=""> # <a class="view-more-container sibling stat" # href="/iplayer/search?q=doctor&search_group_id=urn:bbc:programmes:b06qbs4n"> episodes_match = re.search( r'<a class="view-more-container.+?stat".+?href="(.*?)"', li, flags=(re.DOTALL | re.MULTILINE)) if episodes_match: episodes = episodes_match.group(1) more = None # <em class="view-more-heading">27</em> more_match = re.search(r'<em class="view-more-heading">(.*?)</em>', li, flags=(re.DOTALL | re.MULTILINE)) if more_match: more = more_match.group(1) if episodes: episodes_url = 'http://www.bbc.co.uk' + episodes if search_group: AddMenuEntry( '[B]%s[/B] - %s' % (title, translation(30318)), episodes_url, 128, icon, '', '') else: AddMenuEntry( '[B]%s[/B] - %s %s' % (title, more, translation(30313)), episodes_url, 128, icon, '', '') elif more: AddMenuEntry( '[B]%s[/B] - %s %s' % (title, more, translation(30313)), main_url, 128, icon, '', '') if type != "group": CheckAutoplay(name, main_url, icon, synopsis, aired) percent = int(100 * (page + list_item_num / len(list_items)) / total_pages) pDialog.update(percent, translation(30319), name) list_item_num += 1 percent = int(100 * page / total_pages) pDialog.update(percent, translation(30319)) if int(ADDON.getSetting('paginate_episodes')) == 0: if current_page < next_page: page_url = 'http://www.bbc.co.uk' + page_base_url + str(next_page) AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320), page_url, 128, '', '', '') xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_DATE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED) pDialog.close()
def ParseStreams(stream_id): retlist = [] # print "Parsing streams for PID: %s"%stream_id[0] # Open the page with the actual strem information and display the various available streams. NEW_URL = "http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s" % stream_id[ 0] html = OpenURL(NEW_URL) # Parse the different streams and add them as new directory entries. match = re.compile( 'connection authExpires=".+?href="(.+?)".+?supplier="mf_(.+?)".+?transferFormat="(.+?)"' ).findall(html) for m3u8_url, supplier, transfer_format in match: tmp_sup = 0 tmp_br = 0 if transfer_format == 'hls': if supplier == 'akamai_uk_hls': tmp_sup = 1 elif supplier == 'limelight_uk_hls': tmp_sup = 2 m3u8_breakdown = re.compile('(.+?)iptv.+?m3u8(.+?)$').findall( m3u8_url) #print m3u8_breakdown # print m3u8_url m3u8_html = OpenURL(m3u8_url) m3u8_match = re.compile( 'BANDWIDTH=(.+?),.+?RESOLUTION=(.+?)\n(.+?)\n').findall( m3u8_html) for bandwidth, resolution, stream in m3u8_match: # print bandwidth # print resolution #print stream url = "%s%s%s" % (m3u8_breakdown[0][0], stream, m3u8_breakdown[0][1]) #print url if int(bandwidth) == 1012300: tmp_br = 2 elif int(bandwidth) == 1799880: tmp_br = 4 elif int(bandwidth) == 3116400: tmp_br = 6 elif int(bandwidth) == 5509880: tmp_br = 7 retlist.append((tmp_sup, tmp_br, url, resolution)) # It may be useful to parse these additional streams as a default as they offer additional bandwidths. match = re.compile( 'kind="video".+?connection href="(.+?)".+?supplier="(.+?)".+?transferFormat="(.+?)"' ).findall(html) # print match unique = [] [unique.append(item) for item in match if item not in unique] # print unique for m3u8_url, supplier, transfer_format in unique: tmp_sup = 0 tmp_br = 0 if transfer_format == 'hls': if supplier == 'akamai_hls_open': tmp_sup = 1 elif supplier == 'limelight_hls_open': tmp_sup = 2 m3u8_breakdown = re.compile('.+?master.m3u8(.+?)$').findall( m3u8_url) # print m3u8_url # print m3u8_breakdown m3u8_html = OpenURL(m3u8_url) # print m3u8_html m3u8_match = re.compile( 'BANDWIDTH=(.+?),RESOLUTION=(.+?),.+?\n(.+?)\n').findall(m3u8_html) # print m3u8_match for bandwidth, resolution, stream in m3u8_match: # print bandwidth # print resolution # print stream url = "%s%s" % (stream, m3u8_breakdown[0][0]) # This is not entirely correct, displayed bandwidth may be higher or lower than actual bandwidth. if int(bandwidth) <= 801000: tmp_br = 1 elif int(bandwidth) <= 1510000: tmp_br = 3 elif int(bandwidth) <= 2410000: tmp_br = 5 retlist.append((tmp_sup, tmp_br, url, resolution)) match = re.compile('service="captions".+?connection href="(.+?)"').findall( html) # print "Subtitle URL: %s"%match # print retlist if not match: # print "No streams found" check_geo = re.search('<error id="geolocation"/>', html) if check_geo: # print "Geoblock detected, raising error message" dialog = xbmcgui.Dialog() dialog.ok(translation(30400), translation(30401)) raise return retlist, match
def ListHighlights(highlights_url): """Creates a list of the programmes in the highlights section. """ html = OpenURL('http://www.bbc.co.uk/%s' % highlights_url) inner_anchors = re.findall(r'<a.*?(?!<a).*?</a>',html,flags=(re.DOTALL | re.MULTILINE)) # First find all groups as we need to store some properties of groups for later reuse. group_properties = [] # NOTE find episode count first episode_count = dict() groups = [a for a in inner_anchors if re.match( r'<a[^<]*?class="grouped-items__cta.*?data-object-type="group-list-link".*?', a, flags=(re.DOTALL | re.MULTILINE))] for group in groups: href = '' href_match = re.match( r'<a[^<]*?href="(.*?)"', group, flags=(re.DOTALL | re.MULTILINE)) if href_match: href = href_match.group(1) count_match = re.search( r'>View all ([0-9]*).*?</a>', group, flags=(re.DOTALL | re.MULTILINE)) if count_match: count = count_match.group(1) episode_count[href] = count groups = [a for a in inner_anchors if re.match( r'<a[^<]*?class="grouped-items__title.*?data-object-type="group-list-link".*?', a, flags=(re.DOTALL | re.MULTILINE))] for group in groups: href = '' href_match = re.match( r'<a[^<]*?href="(.*?)"', group, flags=(re.DOTALL | re.MULTILINE)) if href_match: href = href_match.group(1) name = '' name_match = re.search( r'<strong>(.*?)</strong>', group, flags=(re.DOTALL | re.MULTILINE)) if name_match: name = name_match.group(1) count = '' if href in episode_count: count = episode_count[href] url = 'http://www.bbc.co.uk' + href # Unfortunately, the group type is not inside the links, so we need to search the whole HTML. group_type = '' group_type_match = re.search( r'data-group-name="'+name+'".+?data-group-type="(.+?)"', html, flags=(re.DOTALL | re.MULTILINE)) if group_type_match: group_type = group_type_match.group(1) position = '' position_match = re.search( r'data-object-position="(.+?)-ALL"', group, flags=(re.DOTALL | re.MULTILINE)) if position_match: group_properties.append( [position_match.group(1), name, group_type]) AddMenuEntry('[B]%s: %s[/B] - %s %s' % (translation(30314), name, count, translation(30315)), url, 128, '', '', '') # Some programmes show up twice in HTML, once inside the groups, once outside. # We need to parse both to avoid duplicates and to make sure we get all of them. episodelist = [] # <a\n href="/iplayer/episode/b06tr74y/eastenders-24122015"\n class="grouped-items__list-link listeds = [a for a in inner_anchors if re.search( r'class="grouped-items__list-link', a, flags=(re.DOTALL | re.MULTILINE))] for listed in listeds: episode_id = '' # <a\n href="/iplayer/episode/b06tr74y/eastenders-24122015" id_match = re.match( r'<a.*?href="/iplayer/episode/(.*?)/', listed, flags=(re.DOTALL | re.MULTILINE)) if id_match: episode_id = id_match.group(1) name = '' # <p class="grouped-items__title grouped-items__title--item typo typo--skylark"> # <strong>EastEnders</strong></p> title_match = re.search( r'<.*?class="grouped-items__title.*?<strong>(.*?)</strong>', listed, flags=(re.DOTALL | re.MULTILINE)) if title_match: name = title_match.group(1) name = re.compile(r'<.*?>', flags=(re.DOTALL | re.MULTILINE)).sub('', name) # <p class="grouped-items__subtitle typo typo--canary">24/12/2015</p> subtitle_match = re.search( r'<.*?class="grouped-items__subtitle.*?>(.*?)<', listed, flags=(re.DOTALL | re.MULTILINE)) if subtitle_match: name = name + ' - ' + subtitle_match.group(1) # Assign correct group based on the position of the episode position = '' position_match = re.search( r'data-object-position="(.+?)"', listed, flags=(re.DOTALL | re.MULTILINE)) if position_match: for n,i in enumerate(group_properties): if re.match(i[0], position_match.group(1), flags=(re.DOTALL | re.MULTILINE)): position = i[1] # For series-catchup groups, we need to modify the title. if i[2] == 'series-catchup': name = i[1]+': '+name episodelist.append( [episode_id, name, "%s %s" % (translation(30316), position), 'DefaultVideo.png', ''] ) # < a\nhref="/iplayer/episode/p036gq3z/bbc-music-introducing-from-buddhist-monk-to-rock-star"\n # class="single-item stat" singles = [a for a in inner_anchors if re.search( r'class="single-item', a, flags=(re.DOTALL | re.MULTILINE))] for single in singles: object_type = '' # data-object-type="episode-backfill" data_object_type = re.search( r'data-object-type="(.*?)"', single, flags=(re.DOTALL | re.MULTILINE)) if data_object_type: object_type = data_object_type.group(1) if object_type == "episode-backfill": if (highlights_url not in ['tv/bbcnews', 'tv/bbcparliament', 'tv/s4c']): continue episode_id = '' url = '' # <a\nhref="/iplayer/episode/p036gq3z/bbc-music-introducing-from-buddhist-monk-to-rock-star" if object_type == "editorial-promo": id_match = re.match( r'<a.*?href="(.*?)"', single, flags=(re.DOTALL | re.MULTILINE)) else: id_match = re.match( r'<a.*?href="/iplayer/episode/(.*?)/', single, flags=(re.DOTALL | re.MULTILINE)) if id_match: episode_id = id_match.group(1) url = 'http://www.bbc.co.uk/iplayer/episode/' + episode_id name = '' # <h3 class="single-item__title typo typo--skylark"><strong>BBC Music Introducing</strong></h3> title_match = re.search( r'<.*?class="single-item__title.*?<strong>(.*?)</strong>', single, flags=(re.DOTALL | re.MULTILINE)) if title_match: name = title_match.group(1) name = re.compile(r'<.*?>', flags=(re.DOTALL | re.MULTILINE)).sub('', name) # <p class="single-item__subtitle typo typo--canary">From Buddhist Monk to Rock Star</p> subtitle_match = re.search( r'<.*?class="single-item__subtitle.*?>(.*?)<', single, flags=(re.DOTALL | re.MULTILINE)) if subtitle_match: name = name + ' - ' + subtitle_match.group(1) icon = '' # <div class="r-image" data-ip-type="episode" # data-ip-src="http://ichef.bbci.co.uk/images/ic/406x228/p036gtc5.jpg"> image_match = re.search( r'<.*?class="r-image.*?data-ip-src="(.*?)"', single, flags=(re.DOTALL | re.MULTILINE)) if image_match: icon = image_match.group(1) desc = '' # <p class="single-item__overlay__desc"> # The remarkable rise of Ngawang Lodup - from BBC Introducing to performing at the O2 Arena</p> desc_match = re.search( r'<.*?class="single-item__overlay__desc.*?>(.*?)<', single, flags=(re.DOTALL | re.MULTILINE)) if desc_match: desc = desc_match.group(1) aired = '' # <p class="single-item__overlay__subtitle">First shown: 4 Nov 2015</p> release_match = re.search( r'<.*?class="single-item__overlay__subtitle">First shown: (.*?)<', single, flags=(re.DOTALL | re.MULTILINE)) if release_match: release = release_match.group(1) if release: aired = FirstShownToAired(release) add_entry = True for n,i in enumerate(episodelist): if i[0]==episode_id: episodelist[n][2]=desc episodelist[n][3]=icon episodelist[n][4]=aired add_entry = False if add_entry: if object_type == "editorial-promo": if episode_id: AddMenuEntry('[B]%s[/B]' % (name), episode_id, 128, icon, '', '') else: if url: CheckAutoplay(name, url, icon, desc, aired) # Finally add all programmes which have been identified as part of a group before. for episode in episodelist: episode_url = "http://www.bbc.co.uk/iplayer/episode/%s" % episode[0] if ((ADDON.getSetting('suppress_incomplete') == 'false') or (not episode[4] == '')): if episode[0]: CheckAutoplay(episode[1], episode_url, episode[3], episode[2], episode[4]) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_DATE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED)
def GetPage(page_url, just_episodes=False): """ Generic Radio page scraper. """ pDialog = xbmcgui.DialogProgressBG() pDialog.create(translation(30319)) html = OpenURL(page_url) total_pages = 1 current_page = 1 page_range = list(range(1)) paginate = re.search(r'<ol.+?class="pagination.*?</ol>', html) next_page = 1 if paginate: if int(ADDON.getSetting('radio_paginate_episodes')) == 0: current_page_match = re.search(r'page=(\d*)', page_url) if current_page_match: current_page = int(current_page_match.group(1)) page_range = list(range(current_page, current_page + 1)) next_page_match = re.search( r'<li class="pagination__next"><a href="(.*?page=)(.*?)">', paginate.group(0)) if next_page_match: page_base_url = next_page_match.group(1) next_page = int(next_page_match.group(2)) else: next_page = current_page page_range = list(range(current_page, current_page + 1)) else: pages = re.findall(r'<li.+?class="pagination__page.*?</li>', paginate.group(0)) if pages: last = pages[-1] last_page = re.search(r'<a.+?href="(.*?=)(.*?)"', last) page_base_url = last_page.group(1) total_pages = int(last_page.group(2)) page_range = list(range(1, total_pages + 1)) for page in page_range: if page > current_page: page_url = 'http://www.bbc.co.uk' + page_base_url + str(page) html = OpenURL(page_url) masthead_title = '' masthead_title_match = re.search( r'<div.+?id="programmes-main-content".*?<span property="name">(.+?)</span>', html) if masthead_title_match: masthead_title = masthead_title_match.group(1) else: alternative_masthead_title_match = re.search( r'<div class="br-masthead__title">.*?<a href="[^"]+">([^<]+?)</a>', html, re.M | re.S) if alternative_masthead_title_match: masthead_title = alternative_masthead_title_match.group(1) list_item_num = 1 data = '' data_match = re.findall( r'<script type="application\/ld\+json">(.*?)<\/script>', html, re.S) if data_match: json_data = json.loads(data_match[0]) for episode in json_data['episode']: programme_id = '' programme_id = episode['identifier'] name = '' name = episode['name'] title = "[B]%s[/B] - %s" % (masthead_title, name) imafe = '' image = episode['image'] synopsis = '' synopsis = episode['description'] url = "http://www.bbc.co.uk/radio/play/%s" % programme_id CheckAutoplay(title, url, image, synopsis, '') percent = int( 100 * (page + list_item_num / len(json_data['episode'])) / total_pages) pDialog.update(percent, translation(30319), name) list_item_num += 1 percent = int(100 * page / total_pages) pDialog.update(percent, translation(30319)) if int(ADDON.getSetting('radio_paginate_episodes')) == 0: if current_page < next_page: page_url = 'http://www.bbc.co.uk' + page_base_url + str(next_page) AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320), page_url, 136, '', '', '') #BUG: this should sort by original order but it doesn't (see http://trac.kodi.tv/ticket/10252) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) pDialog.close()
def GetAtoZPage(page_url, just_episodes=False): """ Generic Radio page scraper. """ pDialog = xbmcgui.DialogProgressBG() pDialog.create(translation(30319)) html = OpenURL(page_url) total_pages = 1 current_page = 1 page_range = list(range(1)) paginate = re.search(r'<ol.+?class="pagination.*?</ol>', html) next_page = 1 if paginate: if int(ADDON.getSetting('radio_paginate_episodes')) == 0: current_page_match = re.search(r'page=(\d*)', page_url) if current_page_match: current_page = int(current_page_match.group(1)) page_range = list(range(current_page, current_page + 1)) next_page_match = re.search( r'<li class="pagination__next"><a href="(.*?page=)(.*?)">', paginate.group(0)) if next_page_match: page_base_url = next_page_match.group(1) next_page = int(next_page_match.group(2)) else: next_page = current_page page_range = list(range(current_page, current_page + 1)) else: pages = re.findall(r'<li.+?class="pagination__page.*?</li>', paginate.group(0)) if pages: last = pages[-1] last_page = re.search(r'<a.+?href="(.*?=)(.*?)"', last) page_base_url = last_page.group(1) total_pages = int(last_page.group(2)) page_range = list(range(1, total_pages + 1)) for page in page_range: if page > current_page: page_url = 'http://www.bbc.co.uk' + page_base_url + str(page) html = OpenURL(page_url) masthead_title = '' masthead_title_match = re.search( r'<div.+?id="programmes-main-content".*?<span property="name">(.+?)</span>', html) if masthead_title_match: masthead_title = masthead_title_match.group(1) else: alternative_masthead_title_match = re.search( r'<div class="br-masthead__title">.*?<a href="[^"]+">([^<]+?)</a>', html, re.M | re.S) if alternative_masthead_title_match: masthead_title = alternative_masthead_title_match.group(1) list_item_num = 1 programmes = html.split('<li class="grid one-whole">') for programme in programmes: if not re.search(r'programme--radio', programme): continue series_id = '' series_id_match = re.search( r'data-lazylink-inc="/programmes/(.+?)/episodes/player.inc"', programme) if series_id_match: series_id = series_id_match.group(1) programme_id = '' programme_id_match = re.search(r'data-pid="(.+?)"', programme) if programme_id_match: programme_id = programme_id_match.group(1) name = '' name_match = re.search(r'<span property="name">(.+?)</span>', programme) if name_match: name = name_match.group(1) else: alternative_name_match = re.search( r'<meta property="name" content="([^"]+?)"', programme) if alternative_name_match: name = alternative_name_match.group(1) image = '' image_match = re.search( r'<meta property="image" content="(.+?)" />', programme) if image_match: image = image_match.group(1) synopsis = '' synopsis_match = re.search( r'<span property="description">(.+?)<\/span>', programme) if synopsis_match: synopsis = synopsis_match.group(1) station = '' station_match = re.search( r'<p class="programme__service.+?<strong>(.+?)<\/strong>.*?<\/p>', programme) if station_match: station = station_match.group(1).strip() series_title = "[B]%s - %s[/B]" % (station, name) if just_episodes: title = "[B]%s[/B] - %s" % (masthead_title, name) else: title = "[B]%s[/B] - %s" % (station, name) if series_id: AddMenuEntry(series_title, series_id, 131, image, synopsis, '') elif programme_id: #TODO maybe they are not always mutually exclusive url = "http://www.bbc.co.uk/radio/play/%s" % programme_id CheckAutoplay(title, url, image, ' ', '') percent = int(100 * (page + list_item_num / len(programmes)) / total_pages) pDialog.update(percent, translation(30319), name) list_item_num += 1 percent = int(100 * page / total_pages) pDialog.update(percent, translation(30319)) if int(ADDON.getSetting('radio_paginate_episodes')) == 0: if current_page < next_page: page_url = 'http://www.bbc.co.uk' + page_base_url + str(next_page) AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320), page_url, 138, '', '', '') #BUG: this should sort by original order but it doesn't (see http://trac.kodi.tv/ticket/10252) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) pDialog.close()
def ListHighlights(highlights_url): """Creates a list of the programmes in the highlights section. """ html = OpenURL('http://www.bbc.co.uk/%s' % highlights_url) inner_anchors = re.findall(r'<a.*?(?!<a).*?</a>', html, flags=(re.DOTALL | re.MULTILINE)) # First find all groups as we need to store some properties of groups for later reuse. group_properties = [] # NOTE find episode count first episode_count = dict() groups = [ a for a in inner_anchors if re.match( r'<a[^<]*?class="grouped-items__cta.*?data-object-type="group-list-link".*?', a, flags=(re.DOTALL | re.MULTILINE)) ] for group in groups: href = '' href_match = re.match(r'<a[^<]*?href="(.*?)"', group, flags=(re.DOTALL | re.MULTILINE)) if href_match: href = href_match.group(1) count_match = re.search(r'>View all ([0-9]*).*?</a>', group, flags=(re.DOTALL | re.MULTILINE)) if count_match: count = count_match.group(1) episode_count[href] = count groups = [ a for a in inner_anchors if re.match( r'<a[^<]*?class="grouped-items__title.*?data-object-type="group-list-link".*?', a, flags=(re.DOTALL | re.MULTILINE)) ] for group in groups: href = '' href_match = re.match(r'<a[^<]*?href="(.*?)"', group, flags=(re.DOTALL | re.MULTILINE)) if href_match: href = href_match.group(1) name = '' name_match = re.search(r'<strong>(.*?)</strong>', group, flags=(re.DOTALL | re.MULTILINE)) if name_match: name = name_match.group(1) count = '' if href in episode_count: count = episode_count[href] url = 'http://www.bbc.co.uk' + href # Unfortunately, the group type is not inside the links, so we need to search the whole HTML. group_type = '' group_type_match = re.search(r'data-group-name="' + name + '".+?data-group-type="(.+?)"', html, flags=(re.DOTALL | re.MULTILINE)) if group_type_match: group_type = group_type_match.group(1) position = '' position_match = re.search(r'data-object-position="(.+?)-ALL"', group, flags=(re.DOTALL | re.MULTILINE)) if position_match: group_properties.append( [position_match.group(1), name, group_type]) AddMenuEntry( '[B]%s: %s[/B] - %s %s' % (translation(30314), name, count, translation(30315)), url, 128, '', '', '') # Some programmes show up twice in HTML, once inside the groups, once outside. # We need to parse both to avoid duplicates and to make sure we get all of them. episodelist = [] # <a\n href="/iplayer/episode/b06tr74y/eastenders-24122015"\n class="grouped-items__list-link listeds = [ a for a in inner_anchors if re.search(r'class="grouped-items__list-link', a, flags=(re.DOTALL | re.MULTILINE)) ] for listed in listeds: episode_id = '' # <a\n href="/iplayer/episode/b06tr74y/eastenders-24122015" id_match = re.match(r'<a.*?href="/iplayer/episode/(.*?)/', listed, flags=(re.DOTALL | re.MULTILINE)) if id_match: episode_id = id_match.group(1) name = '' # <p class="grouped-items__title grouped-items__title--item typo typo--skylark"> # <strong>EastEnders</strong></p> title_match = re.search( r'<.*?class="grouped-items__title.*?<strong>(.*?)</strong>', listed, flags=(re.DOTALL | re.MULTILINE)) if title_match: name = title_match.group(1) name = re.compile(r'<.*?>', flags=(re.DOTALL | re.MULTILINE)).sub('', name) # <p class="grouped-items__subtitle typo typo--canary">24/12/2015</p> subtitle_match = re.search( r'<.*?class="grouped-items__subtitle.*?>(.*?)<', listed, flags=(re.DOTALL | re.MULTILINE)) if subtitle_match: name = name + ' - ' + subtitle_match.group(1) # Assign correct group based on the position of the episode position = '' position_match = re.search(r'data-object-position="(.+?)"', listed, flags=(re.DOTALL | re.MULTILINE)) if position_match: for n, i in enumerate(group_properties): if re.match(i[0], position_match.group(1), flags=(re.DOTALL | re.MULTILINE)): position = i[1] # For series-catchup groups, we need to modify the title. if i[2] == 'series-catchup': name = i[1] + ': ' + name episodelist.append([ episode_id, name, "%s %s" % (translation(30316), position), 'DefaultVideo.png', '' ]) # < a\nhref="/iplayer/episode/p036gq3z/bbc-music-introducing-from-buddhist-monk-to-rock-star"\n # class="single-item stat" singles = [ a for a in inner_anchors if re.search( r'class="single-item', a, flags=(re.DOTALL | re.MULTILINE)) ] for single in singles: object_type = '' # data-object-type="episode-backfill" data_object_type = re.search(r'data-object-type="(.*?)"', single, flags=(re.DOTALL | re.MULTILINE)) if data_object_type: object_type = data_object_type.group(1) if object_type == "episode-backfill": if (highlights_url not in ['tv/bbcnews', 'tv/bbcparliament', 'tv/s4c']): continue episode_id = '' url = '' # <a\nhref="/iplayer/episode/p036gq3z/bbc-music-introducing-from-buddhist-monk-to-rock-star" if object_type == "editorial-promo": id_match = re.match(r'<a.*?href="(.*?)"', single, flags=(re.DOTALL | re.MULTILINE)) else: id_match = re.match(r'<a.*?href="/iplayer/episode/(.*?)/', single, flags=(re.DOTALL | re.MULTILINE)) if id_match: episode_id = id_match.group(1) url = 'http://www.bbc.co.uk/iplayer/episode/' + episode_id name = '' # <h3 class="single-item__title typo typo--skylark"><strong>BBC Music Introducing</strong></h3> title_match = re.search( r'<.*?class="single-item__title.*?<strong>(.*?)</strong>', single, flags=(re.DOTALL | re.MULTILINE)) if title_match: name = title_match.group(1) name = re.compile(r'<.*?>', flags=(re.DOTALL | re.MULTILINE)).sub('', name) # <p class="single-item__subtitle typo typo--canary">From Buddhist Monk to Rock Star</p> subtitle_match = re.search( r'<.*?class="single-item__subtitle.*?>(.*?)<', single, flags=(re.DOTALL | re.MULTILINE)) if subtitle_match: name = name + ' - ' + subtitle_match.group(1) icon = '' # <div class="r-image" data-ip-type="episode" # data-ip-src="http://ichef.bbci.co.uk/images/ic/406x228/p036gtc5.jpg"> image_match = re.search(r'<.*?class="r-image.*?data-ip-src="(.*?)"', single, flags=(re.DOTALL | re.MULTILINE)) if image_match: icon = image_match.group(1) desc = '' # <p class="single-item__overlay__desc"> # The remarkable rise of Ngawang Lodup - from BBC Introducing to performing at the O2 Arena</p> desc_match = re.search( r'<.*?class="single-item__overlay__desc.*?>(.*?)<', single, flags=(re.DOTALL | re.MULTILINE)) if desc_match: desc = desc_match.group(1) aired = '' # <p class="single-item__overlay__subtitle">First shown: 4 Nov 2015</p> release_match = re.search( r'<.*?class="single-item__overlay__subtitle">First shown: (.*?)<', single, flags=(re.DOTALL | re.MULTILINE)) if release_match: release = release_match.group(1) if release: aired = FirstShownToAired(release) add_entry = True for n, i in enumerate(episodelist): if i[0] == episode_id: episodelist[n][2] = desc episodelist[n][3] = icon episodelist[n][4] = aired add_entry = False if add_entry: if object_type == "editorial-promo": AddMenuEntry('[B]%s[/B]' % (name), episode_id, 128, icon, '', '') else: CheckAutoplay(name, url, icon, desc, aired) # Finally add all programmes which have been identified as part of a group before. for episode in episodelist: episode_url = "http://www.bbc.co.uk/iplayer/episode/%s" % episode[0] if ((ADDON.getSetting('suppress_incomplete') == 'false') or (not episode[4] == '')): CheckAutoplay(episode[1], episode_url, episode[3], episode[2], episode[4]) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_DATE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED)
def GetPage(page_url, just_episodes=False): """ Generic Radio page scraper. """ pDialog = xbmcgui.DialogProgressBG() pDialog.create(translation(30319)) html = OpenURL(page_url) total_pages = 1 current_page = 1 page_range = range(1) paginate = re.search(r'<ol.+?class="pagination.*?</ol>', html) next_page = 1 if paginate: if int(ADDON.getSetting("radio_paginate_episodes")) == 0: current_page_match = re.search(r"page=(\d*)", page_url) if current_page_match: current_page = int(current_page_match.group(1)) page_range = range(current_page, current_page + 1) next_page_match = re.search(r'<li class="pagination__next"><a href="(.*?page=)(.*?)">', paginate.group(0)) if next_page_match: page_base_url = next_page_match.group(1) next_page = int(next_page_match.group(2)) else: next_page = current_page page_range = range(current_page, current_page + 1) else: pages = re.findall(r'<li.+?class="pagination__page.*?</li>', paginate.group(0)) if pages: last = pages[-1] last_page = re.search(r'<a.+?href="(.*?=)(.*?)"', last) page_base_url = last_page.group(1) total_pages = int(last_page.group(2)) page_range = range(1, total_pages + 1) for page in page_range: if page > current_page: page_url = "http://www.bbc.co.uk" + page_base_url + str(page) html = OpenURL(page_url) masthead_title = "" masthead_title_match = re.search( r'<div.+?id="programmes-main-content".*?<span property="name">(.+?)</span>', html ) if masthead_title_match: masthead_title = masthead_title_match.group(1) list_item_num = 1 programmes = html.split('<div class="programme ') for programme in programmes: if not programme.startswith("programme--radio"): continue if "available" not in programme: # TODO find a more robust test continue series_id = "" series_id_match = re.search( r'<a class="iplayer-text js-lazylink__link" href="/programmes/(.+?)/episodes/player"', programme ) if series_id_match: series_id = series_id_match.group(1) programme_id = "" programme_id_match = re.search(r'data-pid="(.+?)"', programme) if programme_id_match: programme_id = programme_id_match.group(1) name = "" name_match = re.search(r'<span property="name">(.+?)</span>', programme) if name_match: name = name_match.group(1) subtitle = "" subtitle_match = re.search( r'<span class="programme__subtitle.+?property="name">(.*?)</span>(.*?property="name">(.*?)</span>)?', programme, ) if subtitle_match: series = subtitle_match.group(1) episode = subtitle_match.group(3) if episode: subtitle = "(%s, %s)" % (series, episode) else: if series.strip(): subtitle = "(%s)" % series image = "" image_match = re.search(r'<meta property="image" content="(.+?)" />', programme) if image_match: image = image_match.group(1) synopsis = "" synopsis_match = re.search(r'<span property="description">(.+?)</span>', programme) if synopsis_match: synopsis = synopsis_match.group(1) station = "" station_match = re.search(r'<p class="programme__service.+?<strong>(.+?)</strong>.*?</p>', programme) if station_match: station = station_match.group(1).strip() series_title = "[B]%s - %s[/B]" % (station, name) if just_episodes: title = "[B]%s[/B] - %s" % (masthead_title, name) else: title = "[B]%s[/B] - %s %s" % (station, name, subtitle) if series_id: AddMenuEntry(series_title, series_id, 131, image, synopsis, "") elif programme_id: # TODO maybe they are not always mutually exclusive url = "http://www.bbc.co.uk/programmes/%s" % programme_id CheckAutoplay(title, url, image, " ", "") percent = int(100 * (page + list_item_num / len(programmes)) / total_pages) pDialog.update(percent, translation(30319), name) list_item_num += 1 percent = int(100 * page / total_pages) pDialog.update(percent, translation(30319)) if int(ADDON.getSetting("radio_paginate_episodes")) == 0: if current_page < next_page: page_url = "http://www.bbc.co.uk" + page_base_url + str(next_page) AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320), page_url, 136, "", "", "") # BUG: this should sort by original order but it doesn't (see http://trac.kodi.tv/ticket/10252) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) pDialog.close()