def ListGenres(): """List programmes based on alphabetical order. Only creates the corresponding directories for each character. """ genres = [] html = OpenURL("http://www.bbc.co.uk/radio/programmes/genres") mains = html.split('<li class="category br-keyline highlight-box--list">') for main in mains: current_main_match = re.search(r'<a.+?class="beta box-link".+?href="(.+?)">(.+?)</a>', main) if current_main_match: genres.append((current_main_match.group(1), current_main_match.group(2), True)) current_sub_match = re.findall(r'<a.+?class="box-link".+?href="(.+?)">(.+?)</a>', main) for sub_match_url, sub_match_name in current_sub_match: genres.append((sub_match_url, current_main_match.group(2) + " - " + sub_match_name, False)) for url, name, group in genres: new_url = "http://www.bbc.co.uk%s/player/episodes" % url if group: AddMenuEntry("[B]%s[/B]" % name, new_url, 136, "", "", "") else: AddMenuEntry("%s" % name, new_url, 136, "", "", "") # BUG: this should sort by original order but it doesn't (see http://trac.kodi.tv/ticket/10252) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE)
def ListGenres(): """List programmes based on alphabetical order. Only creates the corresponding directories for each character. """ genres = [] html = OpenURL('http://www.bbc.co.uk/radio/programmes/genres') mains = html.split('<div class="category__box island--vertical">') for main in mains: current_main_match = re.search( r'<a.+?class="gel-double-pica-bold".+?href="(.+?)">(.+?)</a>', main) if current_main_match: genres.append((current_main_match.group(1), current_main_match.group(2), True)) current_sub_match = re.findall( r'<a.+?class="gel-long-primer-bold".+?href="(.+?)">(.+?)</a>', main) for sub_match_url, sub_match_name in current_sub_match: genres.append((sub_match_url, current_main_match.group(2) + ' - ' + sub_match_name, False)) for url, name, group in genres: new_url = 'http://www.bbc.co.uk%s' % url if group: AddMenuEntry("[B]%s[/B]" % name, new_url, 137, '', '', '') else: AddMenuEntry("%s" % name, new_url, 137, '', '', '') #BUG: this should sort by original order but it doesn't (see http://trac.kodi.tv/ticket/10252) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE)
def ScrapeAvailableStreams(url): # Open page and retrieve the stream ID html = OpenURL(url) # Search for standard programmes. stream_id_st = re.compile('"vpid":"(.+?)"').findall(html) # Optionally, Signed programmes can be searched for. These have a different ID. if ADDON.getSetting('search_signed') == 'true': stream_id_sl = re.compile( 'data-download-sl="bbc-ipd:download/.+?/(.+?)/sd/').findall(html) else: stream_id_sl = [] # Optionally, Audio Described programmes can be searched for. These have a different ID. if ADDON.getSetting('search_ad') == 'true': url_ad = re.compile( '<a href="(.+?)" class="version link watch-ad-on"').findall(html) url_tmp = "http://www.bbc.co.uk%s" % url_ad[0] html = OpenURL(url_tmp) stream_id_ad = re.compile('"vpid":"(.+?)"').findall(html) # print stream_id_ad else: stream_id_ad = [] return { 'stream_id_st': stream_id_st, 'stream_id_sl': stream_id_sl, 'stream_id_ad': stream_id_ad }
def ListFollowing(logged_in): if(CheckLogin(logged_in) == False): CreateBaseDirectory('audio') return """Scrapes all episodes of the favourites page.""" html = OpenURL('https://www.bbc.co.uk/radio/favourites/programmes') programmes = html.split('<div class="favourites follow ') for programme in programmes: if not programme.startswith('media'): continue series_id = '' series_name = '' series_id_match = re.search(r'<a aria-label="(.*?)" class="follows__image-link" href="http://www.bbc.co.uk/programmes/(.*?)">',programme) if series_id_match: series_name = series_id_match.group(1) series_id = series_id_match.group(2) episode_name = '' episode_id = '' episode_id_match = re.search(r'<a aria-label="(.*?)" class="size-e clr-white" href="http://www.bbc.co.uk/programmes/(.*?)#play"',programme) if episode_id_match: episode_name = episode_id_match.group(1) episode_id = episode_id_match.group(2) episode_image = '' series_image = '' series_image_match = re.search(r'<img class="media__image" src="(.*?)"',programme) if series_image_match: series_image = "https:%s" % series_image_match.group(1) episode_image = series_image station = '' station_match = re.search(r'<a href="(.*?)" class="clr-light-grey">\s*(.*?)\s*</a>',programme, flags=(re.DOTALL | re.MULTILINE)) if station_match: station = station_match.group(2).strip() description = '' if series_id: series_title = "%s - %s" % (station, series_name) AddMenuEntry(series_title, series_id, 131, series_image, description, '') if episode_id: if series_name: episode_title = "%s - %s - %s" % (station, series_name, episode_name) else: episode_title = "%s - %s" % (station, episode_name) episode_url = "http://www.bbc.co.uk/programmes/%s" % episode_id # xbmc.log(episode_url) CheckAutoplay(episode_title, episode_url, episode_image, ' ', '') xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED)
def ListFollowing(logged_in): if(CheckLogin(logged_in) == False): CreateBaseDirectory('audio') return """Scrapes all episodes of the favourites page.""" html = OpenURL('https://www.bbc.co.uk/radio/favourites/programmes') programmes = html.split('<div class="favourites follow ') for programme in programmes: if not programme.startswith('media'): continue series_id = '' series_name = '' series_id_match = re.search(r'<a aria-label="(.*?)" class="follows__image-link" href="http://www.bbc.co.uk/programmes/(.*?)">',programme) if series_id_match: series_name = series_id_match.group(1) series_id = series_id_match.group(2) episode_name = '' episode_id = '' episode_id_match = re.search(r'<a aria-label="(.*?)" class="size-e clr-white" href="http://www.bbc.co.uk/programmes/(.*?)#play"',programme) if episode_id_match: episode_name = episode_id_match.group(1) episode_id = episode_id_match.group(2) episode_image = '' series_image = '' series_image_match = re.search(r'<img class="media__image" src="(.*?)"',programme) if series_image_match: series_image = "https:%s" % series_image_match.group(1) episode_image = series_image station = '' station_match = re.search(r'<a href="(.*?)" class="clr-light-grey">\s*(.*?)\s*</a>',programme, flags=(re.DOTALL | re.MULTILINE)) if station_match: station = station_match.group(2).strip() description = '' if series_id: series_title = "[B]%s - %s[/B]" % (station, series_name) AddMenuEntry(series_title, series_id, 131, series_image, description, '') if episode_id: if series_name: episode_title = "[B]%s[/B] - %s - %s" % (station, series_name, episode_name) else: episode_title = "[B]%s[/B] - %s" % (station, episode_name) episode_url = "http://www.bbc.co.uk/programmes/%s" % episode_id # xbmc.log(episode_url) CheckAutoplay(episode_title, episode_url, episode_image, ' ', '') xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED)
def ListWatching(logged_in): if (CheckLogin(logged_in) == False): CreateBaseDirectory('video') return identity_cookie = None cookie_jar = None cookie_jar = GetCookieJar() for cookie in cookie_jar: if (cookie.name == 'IDENTITY'): identity_cookie = cookie.value break url = "https://ibl.api.bbci.co.uk/ibl/v1/user/watching?identity_cookie=%s" % identity_cookie html = OpenURL(url) json_data = json.loads(html) watching_list = json_data.get('watching').get('elements') for watching in watching_list: programme = watching.get('programme') episode = watching.get('episode') title = episode.get('title') subtitle = episode.get('subtitle') if (subtitle): title += ", " + subtitle episode_id = episode.get('id') plot = episode.get('synopses').get('large') or " " aired = episode.get('release_date') image_url = ParseImageUrl(episode.get('images').get('standard')) aired = ParseAired(aired) url = "http://www.bbc.co.uk/iplayer/episode/%s" % (episode_id) CheckAutoplay(title, url, image_url, plot, aired)
def GetMultipleEpisodes(url): html = OpenURL(url) # There is a new layout for episodes, scrape it from the JSON received as part of the page json_data = ScrapeJSON(html) if json_data['episode']['tleoId']: GetEpisodes(json_data['episode']['tleoId'])
def PlayStream(name, url, iconimage, description, subtitles_url): html = OpenURL(url) check_geo = re.search('<H1>Access Denied</H1>', html) if check_geo or not html: # print "Geoblock detected, raising error message" dialog = xbmcgui.Dialog() dialog.ok(translation(30400), translation(30401)) raise liz = xbmcgui.ListItem(name, iconImage='DefaultVideo.png', thumbnailImage=iconimage) liz.setInfo(type='Video', infoLabels={'Title': name}) liz.setProperty("IsPlayable", "true") liz.setPath(url) if subtitles_url and ADDON.getSetting('subtitles') == 'true': subtitles_file = download_subtitles(subtitles_url) xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, liz) if subtitles_url and ADDON.getSetting('subtitles') == 'true': # Successfully started playing something? while True: if xbmc.Player().isPlaying(): break else: xbmc.sleep(500) xbmc.Player().setSubtitles(subtitles_file)
def ParseLiveStreams(channelname, providers): streams = [] subtitles = [] # print "Parsing streams for PID: %s"%channelname mediaselector = ParseMediaselector(channelname) # print mediaselector for provider_url, protocol, provider_name, transfer_format in mediaselector[ 0]: if transfer_format == 'hls': html = OpenURL(provider_url) match = re.compile( '#EXT-X-STREAM-INF:PROGRAM-ID=(.+?),BANDWIDTH=(.+?),CODECS="(.*?)",RESOLUTION=(.+?)\s*(.+?.m3u8)' ).findall(html) # print match tmp_sup = '' if 'akamai' in provider_name: tmp_sup = 'Akamai' elif 'll' in provider_name or 'limelight' in provider_name: tmp_sup = 'Limelight' elif 'bidi' in provider_name: tmp_sup = 'Bidi' elif 'cloudfront' in provider_name: tmp_sup = 'Cloudfront' else: continue # Add provider name to the stream list. streams.extend([list(stream) + [tmp_sup] for stream in match]) # print streams # Convert bitrate to Mbps for further processing for i in list(range(len(streams))): streams[i][1] = round(int(streams[i][1]) / 1000000.0, 1) # print streams # Return list sorted by bitrate return sorted(streams, key=lambda x: (x[1]), reverse=True)
def ParseStreams(stream_id): retlist = [] # print "Parsing streams for PID: %s"%stream_id[0] # Open the page with the actual strem information and display the various available streams. NEW_URL = "http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/apple-ipad-hls/vpid/%s/proto/http?cb=%d" % ( stream_id[0], random.randrange(10000, 99999)) #NOTE magic from get_iplayer html = OpenURL(NEW_URL) # Parse the different streams and add them as new directory entries. match = re.compile( 'media.+?bitrate="(.+?)".+?encoding="(.+?)"(.+?)<\/media>').findall( html) for bitrate, encoding, connections in match: stream = re.compile('<connection.+?href="(.+?)".+?supplier="(.+?)"' ).findall(connections) for url, supplier in stream: if ('akamai' in supplier): supplier = 1 elif ('limelight' in supplier): supplier = 2 retlist.append((supplier, bitrate, url, encoding)) return retlist, match
def ParseLiveStreams(channelname, providers): if providers == '': providers = [('ak', 'Akamai'), ('llnw', 'Limelight')] streams = [] for provider_url, provider_name in providers: # First we query the available streams from this website if channelname in [ 'bbc_parliament', 'bbc_alba', 's4cpbs', 'bbc_one_london', 'bbc_two_wales_digital', 'bbc_two_northern_ireland_digital', 'bbc_two_scotland' ]: device = 'hls_tablet' else: device = 'abr_hdtv' url = 'http://a.files.bbci.co.uk/media/live/manifesto/audio_video/simulcast/hls/uk/%s/%s/%s.m3u8' \ % (device, provider_url, channelname) html = OpenURL(url) match = re.compile( '#EXT-X-STREAM-INF:PROGRAM-ID=(.+?),BANDWIDTH=(.+?),CODECS="(.*?)",RESOLUTION=(.+?)\s*(.+?.m3u8)' ).findall(html) # Add provider name to the stream list. streams.extend([list(stream) + [provider_name] for stream in match]) # Convert bitrate to Mbps for further processing for i in range(len(streams)): streams[i][1] = round(int(streams[i][1]) / 1000000.0, 1) # Return list sorted by bitrate return sorted(streams, key=lambda x: (x[1]), reverse=True)
def ListMostPopular(): """Scrapes all episodes of the most popular page.""" current_url = 'https://www.bbc.co.uk/iplayer/group/most-popular' html = OpenURL(current_url) json_data = ScrapeJSON(html) if json_data: ParseJSON(json_data, current_url)
def ParseMediaselector(stream_id): streams = [] subtitles = [] # print "Parsing streams for PID: %s"%stream_id # Open the page with the actual strem information and display the various available streams. NEW_URL = "https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/pc/vpid/%s/format/json/jsfunc/JS_callbacks0" % stream_id html = OpenURL(NEW_URL) match = re.search(r'JS_callbacks0.*?\((.*?)\);', html, re.DOTALL) if match: json_data = json.loads(match.group(1)) if json_data: # print(json.dumps(json_data, sort_keys=True, indent=2)) if 'media' in json_data: for media in json_data['media']: if 'kind' in media: if media['kind'] == 'captions': if 'connection' in media: for connection in media['connection']: href = '' protocol = '' supplier = '' if 'href' in connection: href = connection['href'] if 'protocol' in connection: protocol = connection['protocol'] if 'supplier' in connection: supplier = connection['supplier'] subtitles.append( (href, protocol, supplier)) elif media['kind'].startswith('video'): if 'connection' in media: for connection in media['connection']: href = '' protocol = '' supplier = '' transfer_format = '' if 'href' in connection: href = connection['href'] if 'protocol' in connection: protocol = connection['protocol'] if 'supplier' in connection: supplier = connection['supplier'] if 'transferFormat' in connection: transfer_format = connection[ 'transferFormat'] streams.append((href, protocol, supplier, transfer_format)) elif 'result' in json_data: if json_data['result'] == 'geolocation': # print "Geoblock detected, raising error message" dialog = xbmcgui.Dialog() dialog.ok(translation(30400), translation(30401)) raise # print "Found streams:" # print streams return streams, subtitles
def ListCategories(): """Parses the available categories and creates directories for selecting one of them. The category names are scraped from the website. """ html = OpenURL('http://www.bbc.co.uk/iplayer') match = re.compile( '<a href="/iplayer/categories/(.+?)" class="stat">(.+?)</a>').findall( html) for url, name in match: AddMenuEntry(name, url, 125, '', '', '')
def ListHighlights(highlights_url): """Creates a list of the programmes in the highlights section. """ current_url = 'https://www.bbc.co.uk/%s' % highlights_url html = OpenURL(current_url) json_data = ScrapeJSON(html) if json_data: ParseJSON(json_data, current_url)
def GetAtoZPage(url): """Allows to list programmes based on alphabetical order. Creates the list of programmes for one character. """ current_url = 'https://www.bbc.co.uk/iplayer/a-z/%s' % url html = OpenURL(current_url) json_data = ScrapeJSON(html) if json_data: ParseJSON(json_data, current_url)
def GetAtoZPage(url): """Allows to list programmes based on alphabetical order. Creates the list of programmes for one character. """ link = OpenURL('http://www.bbc.co.uk/iplayer/a-z/%s' % url) match = re.compile( '<a href="/iplayer/brand/(.+?)".+?<span class="title">(.+?)</span>', re.DOTALL).findall(link) for programme_id, name in match: AddMenuEntry(name, programme_id, 121, '', '', '')
def ListCategories(): """Parses the available categories and creates directories for selecting one of them. The category names are scraped from the website. """ html = OpenURL('https://www.bbc.co.uk/iplayer') match = re.compile( '<a href="/iplayer/categories/(.+?)/featured".*?><span class="lnk__label">(.+?)</span>' ).findall(html) for url, name in match: if ((name == "View all") or (name == "A-Z")): continue AddMenuEntry(name, url, 126, '', '', '')
def ScrapeAvailableStreams(url): # Open page and retrieve the stream ID html = OpenURL(url) name = None image = None description = None stream_id_st = [] stream_id_sl = [] stream_id_ad = [] json_data = ScrapeJSON(html) if json_data: if 'title' in json_data['episode']: name = json_data['episode']['title'] if 'synopses' in json_data['episode']: synopses = json_data['episode']['synopses'] if 'large' in synopses: description = synopses['large'] elif 'medium' in synopses: description = synopses['medium'] elif 'small' in synopses: description = synopses['small'] elif 'editorial' in synopses: description = synopses['editorial'] if 'standard' in json_data['episode']['images']: image = json_data['episode']['images']['standard'].replace( '{recipe}', '832x468') for stream in json_data['versions']: if ((stream['kind'] == 'original') or (stream['kind'] == 'iplayer-version') or (stream['kind'] == 'technical-replacement') or (stream['kind'] == 'editorial') or (stream['kind'] == 'shortened') or (stream['kind'] == 'webcast')): stream_id_st = stream['id'] elif (stream['kind'] == 'signed'): stream_id_sl = stream['id'] elif (stream['kind'] == 'audio-described'): stream_id_ad = stream['id'] else: xbmc.log("iPlayer WWW warning: New stream kind: %s" % stream['kind']) stream_id_st = stream['id'] return { 'stream_id_st': stream_id_st, 'stream_id_sl': stream_id_sl, 'stream_id_ad': stream_id_ad, 'name': name, 'image': image, 'description': description }
def ListWatching(logged_in): if (CheckLogin(logged_in) == False): CreateBaseDirectory('video') return cookie_jar = None cookie_jar = GetCookieJar() url = "https://www.bbc.co.uk/iplayer/watching" html = OpenURL(url) json_data = ScrapeJSON(html) if json_data: ParseJSON(json_data, url)
def PlayStream(name, url, iconimage, description, subtitles_url): html = OpenURL(url) check_geo = re.search( '<H1>Access Denied</H1>', html) if check_geo or not html: # print "Geoblock detected, raising error message" dialog = xbmcgui.Dialog() dialog.ok(translation(30400), translation(30401)) raise liz = xbmcgui.ListItem(name, iconImage='DefaultVideo.png', thumbnailImage=iconimage) liz.setInfo(type='Audio', infoLabels={'Title': name}) liz.setProperty("IsPlayable", "true") liz.setPath(url) xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, liz)
def ListMostPopular(): html = OpenURL('http://www.bbc.co.uk/radio/popular') programmes = re.split(r'<li class="(episode|clip) typical-list-item', html) for programme in programmes: if not programme.startswith(" item-idx-"): continue programme_id = '' programme_id_match = re.search(r'<a href="/programmes/(.*?)"', programme) if programme_id_match: programme_id = programme_id_match.group(1) name = '' name_match = re.search(r'<img src=".*?" alt="(.*?)"', programme) if name_match: name = name_match.group(1) subtitle = '' subtitle_match = re.search( r'<span class="subtitle">\s*(.+?)\s*</span>', programme) if subtitle_match: if subtitle_match.group(1).strip(): subtitle = "(%s)" % subtitle_match.group(1) image = '' image_match = re.search(r'<img src="(.*?)"', programme) if image_match: image = image_match.group(1) station = '' station_match = re.search( r'<span class="service_title">\s*(.+?)\s*</span>', programme) if station_match: station = station_match.group(1) title = "[B]%s[/B] - %s %s" % (station, name, subtitle) if programme_id and title and image: url = "http://www.bbc.co.uk/radio/play/%s" % programme_id CheckAutoplay(title, url, image, ' ', '') #BUG: this should sort by original order but it doesn't (see http://trac.kodi.tv/ticket/10252) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED)
def ScrapeAvailableStreams(url): # Open page and retrieve the stream ID html = OpenURL(url) stream_id_st = None # Search for standard programmes. stream_id_st = re.compile('"vpid":"(.+?)"').findall(html) if not stream_id_st: match = re.search(r'window.__PRELOADED_STATE__ = (.*?);\s*</script>', html, re.DOTALL) if match: data = match.group(1) json_data = json.loads(data) # Note: Need to create list for backwards compatibility stream_id_st = [json_data['programmes']['current']['id']] # print json.dumps(json_data, indent=2, sort_keys=True) return stream_id_st
def ListCategoryFilters(url): """Parses the available category filters (if available) and creates directories for selcting them. If there are no filters available, all programmes will be listed using GetFilteredCategory. """ NEW_URL = 'http://www.bbc.co.uk/iplayer/categories/%s/all?sort=atoz' % url # Read selected category's page. html = OpenURL(NEW_URL) # Some categories offer filters, we want to provide these filters as options. match1 = re.findall( '<li class="filter"> <a class="name" href="/iplayer/categories/(.+?)"> (.+?)</a>', html, re.DOTALL) if match1: AddMenuEntry('All', url, 126, '', '', '') for url, name in match1: AddMenuEntry(name, url, 126, '', '', '') else: GetFilteredCategory(url)
def ListFavourites(logged_in): if (CheckLogin(logged_in) == False): CreateBaseDirectory('video') return """Scrapes all episodes of the favourites page.""" html = OpenURL( 'http://www.bbc.co.uk/iplayer/usercomponents/favourites/programmes.json' ) json_data = json.loads(html) # favourites = json_data.get('favourites') programmes = json_data.get('programmes') for programme in programmes: id = programme.get('id') url = "http://www.bbc.co.uk/iplayer/brand/%s" % (id) title = programme.get('title') initial_child = programme.get('initial_children')[0] subtitle = initial_child.get('subtitle') episode_title = title if subtitle: episode_title = title + ' - ' + subtitle image = initial_child.get('images') image_url = ParseImageUrl(image.get('standard')) synopses = initial_child.get('synopses') plot = synopses.get('small') try: aired = FirstShownToAired(initial_child.get('release_date')) except: aired = '' CheckAutoplay(episode_title, url, image_url, plot, aired) more = programme.get('count') if more: episodes_url = "http://www.bbc.co.uk/iplayer/episodes/" + id AddMenuEntry( '[B]%s[/B] - %s %s' % (title, more, translation(30313)), episodes_url, 128, image_url, '', '') xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_DATE)
def ListFavourites(logged_in): if(CheckLogin(logged_in) == False): CreateBaseDirectory('audio') return """Scrapes all episodes of the favourites page.""" html = OpenURL('http://www.bbc.co.uk/radio/favourites') programmes = html.split('<li class="my-item" data-appid="radio" ') for programme in programmes: if not programme.startswith('data-type="tlec"'): continue series_id = '' series_id_match = re.search(r'data-id="(.*?)"', programme) if series_id_match: series = series_id_match.group(1) programme_id = '' programme_id_match = re.search(r'<a href="http://www.bbc.co.uk/programmes/(.*?)"', programme) if programme_id_match: programme_id = programme_id_match.group(1) name = '' name_match = re.search(r'<span class="my-episode-brand" itemprop="name">(.*?)</span>', programme) if name_match: name = name_match.group(1) episode = '' episode_match = re.search(r'<span class="my-episode" itemprop="name">(.*?)</span>', programme) if episode_match: episode = "(%s)" % episode_match.group(1) image = '' image_match = re.search(r'itemprop="image" src="(.*?)"', programme) if image_match: image = image_match.group(1) synopsis = '' synopsis_match = re.search(r'<span class="my-item-info">(.*?)</span>', programme) if synopsis_match: synopsis = synopsis_match.group(1) station = '' station_match = re.search(r'<span class="my-episode-broadcaster" itemprop="name">(.*?)\.</span>', programme) if station_match: station = station_match.group(1).strip() title = "[B]%s - %s[/B]" % (station, name) episode_title = "[B]%s[/B] - %s %s" % (station, name, episode) if series: AddMenuEntry(title, series, 131, image, synopsis, '') if programme_id: url = "http://www.bbc.co.uk/programmes/%s" % programme_id CheckAutoplay(episode_title, url, image, ' ', '') xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED)
def ParseStreams(stream_id): retlist = [] # print "Parsing streams for PID: %s"%stream_id[0] # Open the page with the actual strem information and display the various available streams. NEW_URL = "http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s" % stream_id[ 0] html = OpenURL(NEW_URL) # Parse the different streams and add them as new directory entries. match = re.compile( 'connection authExpires=".+?href="(.+?)".+?supplier="mf_(.+?)".+?transferFormat="(.+?)"' ).findall(html) for m3u8_url, supplier, transfer_format in match: tmp_sup = 0 tmp_br = 0 if transfer_format == 'hls': if supplier == 'akamai_uk_hls': tmp_sup = 1 elif supplier == 'limelight_uk_hls': tmp_sup = 2 m3u8_breakdown = re.compile('(.+?)iptv.+?m3u8(.+?)$').findall( m3u8_url) #print m3u8_breakdown # print m3u8_url m3u8_html = OpenURL(m3u8_url) m3u8_match = re.compile( 'BANDWIDTH=(.+?),.+?RESOLUTION=(.+?)\n(.+?)\n').findall( m3u8_html) for bandwidth, resolution, stream in m3u8_match: # print bandwidth # print resolution #print stream url = "%s%s%s" % (m3u8_breakdown[0][0], stream, m3u8_breakdown[0][1]) #print url if int(bandwidth) == 1012300: tmp_br = 2 elif int(bandwidth) == 1799880: tmp_br = 4 elif int(bandwidth) == 3116400: tmp_br = 6 elif int(bandwidth) == 5509880: tmp_br = 7 retlist.append((tmp_sup, tmp_br, url, resolution)) # It may be useful to parse these additional streams as a default as they offer additional bandwidths. match = re.compile( 'kind="video".+?connection href="(.+?)".+?supplier="(.+?)".+?transferFormat="(.+?)"' ).findall(html) # print match unique = [] [unique.append(item) for item in match if item not in unique] # print unique for m3u8_url, supplier, transfer_format in unique: tmp_sup = 0 tmp_br = 0 if transfer_format == 'hls': if supplier == 'akamai_hls_open': tmp_sup = 1 elif supplier == 'limelight_hls_open': tmp_sup = 2 m3u8_breakdown = re.compile('.+?master.m3u8(.+?)$').findall( m3u8_url) # print m3u8_url # print m3u8_breakdown m3u8_html = OpenURL(m3u8_url) # print m3u8_html m3u8_match = re.compile( 'BANDWIDTH=(.+?),RESOLUTION=(.+?),.+?\n(.+?)\n').findall(m3u8_html) # print m3u8_match for bandwidth, resolution, stream in m3u8_match: # print bandwidth # print resolution # print stream url = "%s%s" % (stream, m3u8_breakdown[0][0]) # This is not entirely correct, displayed bandwidth may be higher or lower than actual bandwidth. if int(bandwidth) <= 801000: tmp_br = 1 elif int(bandwidth) <= 1510000: tmp_br = 3 elif int(bandwidth) <= 2410000: tmp_br = 5 retlist.append((tmp_sup, tmp_br, url, resolution)) match = re.compile('service="captions".+?connection href="(.+?)"').findall( html) # print "Subtitle URL: %s"%match # print retlist if not match: # print "No streams found" check_geo = re.search('<error id="geolocation"/>', html) if check_geo: # print "Geoblock detected, raising error message" dialog = xbmcgui.Dialog() dialog.ok(translation(30400), translation(30401)) raise return retlist, match
def ListListenList(logged_in): if (CheckLogin(logged_in) == False): CreateBaseDirectory('audio') return """Scrapes all episodes of the favourites page.""" html = OpenURL('http://www.bbc.co.uk/radio/favourites') programmes = html.split('<div class="favourites box-link favourite ') for programme in programmes: if not programme.startswith('media'): continue data_available_match = re.search(r'data-is-available="(.*?)"', programme) if ((not data_available_match) or (data_available_match.group(1) == '')): continue series_id = '' series_name = '' series_id_match = re.search( r'<a href="/programmes/(.*?)" class="media__meta-row size-f clr-light-grey text--single-line">\s*(.*?)\s*</a>', programme) if series_id_match: series_name = series_id_match.group(2) series_id = series_id_match.group(1) episode_name = '' episode_id = '' episode_id_match = re.search( r'<a aria-label="(.*?) Duration: (.*?)" class="favourites__brand-link(.*?)" href="/programmes/(.*?)#play">', programme) if episode_id_match: episode_name = episode_id_match.group(1) episode_id = episode_id_match.group(4) episode_image = '' episode_image_match = re.search( r'<img alt="" class="favourites__brand-image media__image " src="(.*?)"', programme) if episode_image_match: episode_image = "http:%s" % episode_image_match.group(1) series_image = '' series_image_match = re.search( r'<img class="media__image avatar-image--small" src="(.*?)">', programme) if series_image_match: series_image = "http:%s" % series_image_match.group(1) series_image = re.sub(r'96x96', '640x360', series_image) station = '' station_match = re.search( r'<span class="favourites__network-name.*?<a href="(.*?)" class="clr-light-grey">\s+?(.*?)\s+?<', programme, flags=(re.DOTALL | re.MULTILINE)) if station_match: station = station_match.group(2).strip() description = '' description_match = re.search( r'<p class="favourites__description media__meta-row size-f clr-white.*?">\s+?(.*?)\s+?</p>', programme, flags=(re.DOTALL | re.MULTILINE)) if description_match: description = description_match.group(1).strip() if series_id: series_title = "[B]%s - %s[/B]" % (station, series_name) AddMenuEntry(series_title, series_id, 131, series_image, description, '') if episode_id: if series_name: episode_title = "[B]%s[/B] - %s - %s" % (station, series_name, episode_name) episode_url = "http://www.bbc.co.uk/programmes/%s" % episode_id else: episode_title = "[B]%s[/B] - %s" % (station, episode_name) episode_url = "http://www.bbc.co.uk/radio/play/%s" % episode_id # xbmc.log(episode_url) CheckAutoplay(episode_title, episode_url, episode_image, ' ', '') xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED)
def ListListenList(logged_in): if CheckLogin(logged_in) == False: CreateBaseDirectory("audio") return """Scrapes all episodes of the favourites page.""" html = OpenURL("http://www.bbc.co.uk/radio/favourites/episodesandclips") programmes = html.split('<div class="favourites box-link favourite ') for programme in programmes: if not programme.startswith("media"): continue data_available_match = re.search(r'data-is-available="(.*?)"', programme) if (not data_available_match) or (data_available_match.group(1) == ""): continue series_id = "" series_name = "" series_id_match = re.search( r'<a href="http://www.bbc.co.uk/programmes/(.*?)" class="media__meta-row size-f clr-light-grey text--single-line">\s*(.*?)\s*</a>', programme, ) if series_id_match: series_name = series_id_match.group(2) series_id = series_id_match.group(1) episode_name = "" episode_id = "" episode_id_match = re.search( r'<a aria-label="(.*?) Duration: (.*?)" class="favourites__brand-link(.*?)" href="http://www.bbc.co.uk/programmes/(.*?)#play">', programme, ) if episode_id_match: episode_name = episode_id_match.group(1) episode_id = episode_id_match.group(4) episode_image = "" episode_image_match = re.search( r'<img alt="" class="favourites__brand-image media__image " src="(.*?)"', programme ) if episode_image_match: episode_image = "http:%s" % episode_image_match.group(1) series_image = "" series_image_match = re.search(r'<img class="media__image avatar-image--small" src="(.*?)">', programme) if series_image_match: series_image = "http:%s" % series_image_match.group(1) series_image = re.sub(r"96x96", "640x360", series_image) station = "" station_match = re.search( r'<span class="favourites__network-name.*?<a href="(.*?)" class="clr-light-grey">\s+?(.*?)\s+?<', programme, flags=(re.DOTALL | re.MULTILINE), ) if station_match: station = station_match.group(2).strip() description = "" description_match = re.search( r'<p class="favourites__description media__meta-row size-f clr-white.*?">\s+?(.*?)\s+?</p>', programme, flags=(re.DOTALL | re.MULTILINE), ) if description_match: description = description_match.group(1).strip() if series_id: series_title = "[B]%s - %s[/B]" % (station, series_name) AddMenuEntry(series_title, series_id, 131, series_image, description, "") if episode_id: if series_name: episode_title = "[B]%s[/B] - %s - %s" % (station, series_name, episode_name) else: episode_title = "[B]%s[/B] - %s" % (station, episode_name) episode_url = "http://www.bbc.co.uk/programmes/%s" % episode_id # xbmc.log(episode_url) CheckAutoplay(episode_title, episode_url, episode_image, " ", "") xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED)
def GetCategoryPage(page_url, just_episodes=False): pDialog = xbmcgui.DialogProgressBG() pDialog.create(translation(30319)) html = OpenURL(page_url) total_pages = 1 current_page = 1 page_range = list(range(1)) paginate = re.search(r'pgn__list', html) next_page = 1 if paginate: if int(ADDON.getSetting('radio_paginate_episodes')) == 0: current_page_match = re.search(r'page=(\d*)', page_url) if current_page_match: current_page = int(current_page_match.group(1)) main_base_url = re.search(r'(.+?)\?.+?', page_url).group(1) else: main_base_url = page_url page_range = list(range(current_page, current_page + 1)) next_page_match = re.search( r'pgn__page--next.*?href="(.*?page=)(.*?)"', html) if next_page_match: page_base_url = main_base_url + next_page_match.group(1) next_page = int(next_page_match.group(2)) else: next_page = current_page page_range = list(range(current_page, current_page + 1)) else: pages = re.findall(r'<li class="pgn__page.*?</li>', html, flags=(re.DOTALL | re.MULTILINE)) if pages: last = pages[-2] last_page = re.search(r'href=".*?page=(.*?)"', last) page_base_url = page_url + '?page=' total_pages = int(last_page.group(1)) page_range = list(range(1, total_pages + 1)) for page in page_range: if page > current_page: page_url = page_base_url + str(page) html = OpenURL(page_url) list_item_num = 1 programmes = html.split('<div class="programme-item') for programme in programmes: series_id = '' series_id_match = re.search( r'<a class="category-episodes" href="/programmes/(.+?)/episodes"', programme) if series_id_match: series_id = series_id_match.group(1) programme_id = '' programme_id_match = re.search(r'href="/programmes/(.+?)"', programme) if programme_id_match: programme_id = programme_id_match.group(1) name = '' name_match = re.search( r'<span class="programme-item-title.+?>(.+?)</span>', programme) if name_match: name = name_match.group(1) subtitle = '' subtitle_match = re.search( r'<p class="programme-item-subtitle.+?>(.+?)</p>', programme) if subtitle_match: subtitle = subtitle_match.group(1) image = '' image_match = re.search(r'class="media__image" src="(.+?)"', programme) if image_match: image = 'http://' + image_match.group(1) synopsis = '' synopsis_match = re.search( r'<p class="programme-item-synopsis.+?>(.+?)</p>', programme) if synopsis_match: synopsis = synopsis_match.group(1) station = '' station_match = re.search( r'class="programme-item-network.+?>\s*(.+?)\s*</a>', programme) if station_match: station = station_match.group(1).strip() series_title = "[B]%s - %s[/B]" % (station, name) title = "[B]%s[/B] - %s %s" % (station, name, subtitle) if series_id: AddMenuEntry(series_title, series_id, 131, image, synopsis, '') elif programme_id: #TODO maybe they are not always mutually exclusive url = "http://www.bbc.co.uk/radio/play/%s" % programme_id CheckAutoplay(title, url, image, ' ', '') percent = int(100 * (page + list_item_num / len(programmes)) / total_pages) pDialog.update(percent, translation(30319), name) list_item_num += 1 percent = int(100 * page / total_pages) pDialog.update(percent, translation(30319)) if int(ADDON.getSetting('radio_paginate_episodes')) == 0: if current_page < next_page: page_url = page_base_url + str(next_page) AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320), page_url, 137, '', '', '') #BUG: this should sort by original order but it doesn't (see http://trac.kodi.tv/ticket/10252) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) pDialog.close()
def ListFavourites(logged_in): if(CheckLogin(logged_in) == False): CreateBaseDirectory('audio') return """Scrapes all episodes of the favourites page.""" html = OpenURL('http://www.bbc.co.uk/radio/favourites') programmes = html.split('<div class="favourites favourite ') for programme in programmes: if not programme.startswith('media'): continue series_name = '' series_id = '' series_id_match = re.search(r'<a aria-label="(.*?)" class="favourites__brand-image-link" href="/programmes/(.*?)">',programme) if series_id_match: series_name = series_id_match.group(1) series_id = series_id_match.group(2) episode_name = '' episode_id = '' episode_id_match = re.search(r'<a aria-label="(.*?) Duration: (.*?)" class="favourites__brand-link(.*?)" href="/programmes/(.*?)#play">',programme) if episode_id_match: episode_name = episode_id_match.group(1) episode_id = episode_id_match.group(4) episode_image = '' episode_image_match = re.search(r'<img class="favourites__brand-image media__image " src="(.*?)"',programme) if episode_image_match: episode_image = "http:%s" % episode_image_match.group(1) series_image = '' series_image_match = re.search(r'<img class="media__image avatar-image--small" src="(.*?)">',programme) if series_image_match: series_image = "http:%s" % series_image_match.group(1) series_image = re.sub(r'96x96','640x360',series_image) station = '' station_match = re.search(r'<span class="favourites__network-name.*?<a href="(.*?)" class="clr-light-grey">\s+?(.*?)\s+?<',programme, flags=(re.DOTALL | re.MULTILINE)) if station_match: station = station_match.group(2).strip() description = '' description_match = re.search(r'<p class="favourites__description media__meta-row size-f clr-white">\s+?(.*?)\s+?</p>',programme, flags=(re.DOTALL | re.MULTILINE)) if description_match: description = description_match.group(1).strip() if series_id: series_title = "[B]%s - %s[/B]" % (station, series_name) AddMenuEntry(series_title, series_id, 131, series_image, description, '') if episode_id: if series_name: episode_title = "[B]%s[/B] - %s - %s" % (station, series_name, episode_name) else: episode_title = "[B]%s[/B] - %s" % (station, episode_name) episode_url = "http://www.bbc.co.uk/programmes/%s" % episode_id xbmc.log(episode_url) CheckAutoplay(episode_title, episode_url, episode_image, ' ', '') xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED)
def GetAtoZPage(page_url, just_episodes=False): """ Generic Radio page scraper. """ pDialog = xbmcgui.DialogProgressBG() pDialog.create(translation(30319)) html = OpenURL(page_url) total_pages = 1 current_page = 1 page_range = list(range(1)) paginate = re.search(r'<ol.+?class="pagination.*?</ol>', html) next_page = 1 if paginate: if int(ADDON.getSetting('radio_paginate_episodes')) == 0: current_page_match = re.search(r'page=(\d*)', page_url) if current_page_match: current_page = int(current_page_match.group(1)) page_range = list(range(current_page, current_page + 1)) next_page_match = re.search( r'<li class="pagination__next"><a href="(.*?page=)(.*?)">', paginate.group(0)) if next_page_match: page_base_url = next_page_match.group(1) next_page = int(next_page_match.group(2)) else: next_page = current_page page_range = list(range(current_page, current_page + 1)) else: pages = re.findall(r'<li.+?class="pagination__page.*?</li>', paginate.group(0)) if pages: last = pages[-1] last_page = re.search(r'<a.+?href="(.*?=)(.*?)"', last) page_base_url = last_page.group(1) total_pages = int(last_page.group(2)) page_range = list(range(1, total_pages + 1)) for page in page_range: if page > current_page: page_url = 'http://www.bbc.co.uk' + page_base_url + str(page) html = OpenURL(page_url) masthead_title = '' masthead_title_match = re.search( r'<div.+?id="programmes-main-content".*?<span property="name">(.+?)</span>', html) if masthead_title_match: masthead_title = masthead_title_match.group(1) else: alternative_masthead_title_match = re.search( r'<div class="br-masthead__title">.*?<a href="[^"]+">([^<]+?)</a>', html, re.M | re.S) if alternative_masthead_title_match: masthead_title = alternative_masthead_title_match.group(1) list_item_num = 1 programmes = html.split('<li class="grid one-whole">') for programme in programmes: if not re.search(r'programme--radio', programme): continue series_id = '' series_id_match = re.search( r'data-lazylink-inc="/programmes/(.+?)/episodes/player.inc"', programme) if series_id_match: series_id = series_id_match.group(1) programme_id = '' programme_id_match = re.search(r'data-pid="(.+?)"', programme) if programme_id_match: programme_id = programme_id_match.group(1) name = '' name_match = re.search(r'<span property="name">(.+?)</span>', programme) if name_match: name = name_match.group(1) else: alternative_name_match = re.search( r'<meta property="name" content="([^"]+?)"', programme) if alternative_name_match: name = alternative_name_match.group(1) image = '' image_match = re.search( r'<meta property="image" content="(.+?)" />', programme) if image_match: image = image_match.group(1) synopsis = '' synopsis_match = re.search( r'<span property="description">(.+?)<\/span>', programme) if synopsis_match: synopsis = synopsis_match.group(1) station = '' station_match = re.search( r'<p class="programme__service.+?<strong>(.+?)<\/strong>.*?<\/p>', programme) if station_match: station = station_match.group(1).strip() series_title = "[B]%s - %s[/B]" % (station, name) if just_episodes: title = "[B]%s[/B] - %s" % (masthead_title, name) else: title = "[B]%s[/B] - %s" % (station, name) if series_id: AddMenuEntry(series_title, series_id, 131, image, synopsis, '') elif programme_id: #TODO maybe they are not always mutually exclusive url = "http://www.bbc.co.uk/radio/play/%s" % programme_id CheckAutoplay(title, url, image, ' ', '') percent = int(100 * (page + list_item_num / len(programmes)) / total_pages) pDialog.update(percent, translation(30319), name) list_item_num += 1 percent = int(100 * page / total_pages) pDialog.update(percent, translation(30319)) if int(ADDON.getSetting('radio_paginate_episodes')) == 0: if current_page < next_page: page_url = 'http://www.bbc.co.uk' + page_base_url + str(next_page) AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320), page_url, 138, '', '', '') #BUG: this should sort by original order but it doesn't (see http://trac.kodi.tv/ticket/10252) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) pDialog.close()
def GetPage(page_url, just_episodes=False): """ Generic Radio page scraper. """ pDialog = xbmcgui.DialogProgressBG() pDialog.create(translation(30319)) html = OpenURL(page_url) total_pages = 1 current_page = 1 page_range = list(range(1)) paginate = re.search(r'<ol.+?class="pagination.*?</ol>', html) next_page = 1 if paginate: if int(ADDON.getSetting('radio_paginate_episodes')) == 0: current_page_match = re.search(r'page=(\d*)', page_url) if current_page_match: current_page = int(current_page_match.group(1)) page_range = list(range(current_page, current_page + 1)) next_page_match = re.search( r'<li class="pagination__next"><a href="(.*?page=)(.*?)">', paginate.group(0)) if next_page_match: page_base_url = next_page_match.group(1) next_page = int(next_page_match.group(2)) else: next_page = current_page page_range = list(range(current_page, current_page + 1)) else: pages = re.findall(r'<li.+?class="pagination__page.*?</li>', paginate.group(0)) if pages: last = pages[-1] last_page = re.search(r'<a.+?href="(.*?=)(.*?)"', last) page_base_url = last_page.group(1) total_pages = int(last_page.group(2)) page_range = list(range(1, total_pages + 1)) for page in page_range: if page > current_page: page_url = 'http://www.bbc.co.uk' + page_base_url + str(page) html = OpenURL(page_url) masthead_title = '' masthead_title_match = re.search( r'<div.+?id="programmes-main-content".*?<span property="name">(.+?)</span>', html) if masthead_title_match: masthead_title = masthead_title_match.group(1) else: alternative_masthead_title_match = re.search( r'<div class="br-masthead__title">.*?<a href="[^"]+">([^<]+?)</a>', html, re.M | re.S) if alternative_masthead_title_match: masthead_title = alternative_masthead_title_match.group(1) list_item_num = 1 data = '' data_match = re.findall( r'<script type="application\/ld\+json">(.*?)<\/script>', html, re.S) if data_match: json_data = json.loads(data_match[0]) for episode in json_data['episode']: programme_id = '' programme_id = episode['identifier'] name = '' name = episode['name'] title = "[B]%s[/B] - %s" % (masthead_title, name) imafe = '' image = episode['image'] synopsis = '' synopsis = episode['description'] url = "http://www.bbc.co.uk/radio/play/%s" % programme_id CheckAutoplay(title, url, image, synopsis, '') percent = int( 100 * (page + list_item_num / len(json_data['episode'])) / total_pages) pDialog.update(percent, translation(30319), name) list_item_num += 1 percent = int(100 * page / total_pages) pDialog.update(percent, translation(30319)) if int(ADDON.getSetting('radio_paginate_episodes')) == 0: if current_page < next_page: page_url = 'http://www.bbc.co.uk' + page_base_url + str(next_page) AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320), page_url, 136, '', '', '') #BUG: this should sort by original order but it doesn't (see http://trac.kodi.tv/ticket/10252) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) pDialog.close()
def GetPage(page_url, just_episodes=False): """ Generic Radio page scraper. """ pDialog = xbmcgui.DialogProgressBG() pDialog.create(translation(30319)) html = OpenURL(page_url) total_pages = 1 current_page = 1 page_range = range(1) paginate = re.search(r'<ol.+?class="pagination.*?</ol>', html) next_page = 1 if paginate: if int(ADDON.getSetting("radio_paginate_episodes")) == 0: current_page_match = re.search(r"page=(\d*)", page_url) if current_page_match: current_page = int(current_page_match.group(1)) page_range = range(current_page, current_page + 1) next_page_match = re.search(r'<li class="pagination__next"><a href="(.*?page=)(.*?)">', paginate.group(0)) if next_page_match: page_base_url = next_page_match.group(1) next_page = int(next_page_match.group(2)) else: next_page = current_page page_range = range(current_page, current_page + 1) else: pages = re.findall(r'<li.+?class="pagination__page.*?</li>', paginate.group(0)) if pages: last = pages[-1] last_page = re.search(r'<a.+?href="(.*?=)(.*?)"', last) page_base_url = last_page.group(1) total_pages = int(last_page.group(2)) page_range = range(1, total_pages + 1) for page in page_range: if page > current_page: page_url = "http://www.bbc.co.uk" + page_base_url + str(page) html = OpenURL(page_url) masthead_title = "" masthead_title_match = re.search( r'<div.+?id="programmes-main-content".*?<span property="name">(.+?)</span>', html ) if masthead_title_match: masthead_title = masthead_title_match.group(1) list_item_num = 1 programmes = html.split('<div class="programme ') for programme in programmes: if not programme.startswith("programme--radio"): continue if "available" not in programme: # TODO find a more robust test continue series_id = "" series_id_match = re.search( r'<a class="iplayer-text js-lazylink__link" href="/programmes/(.+?)/episodes/player"', programme ) if series_id_match: series_id = series_id_match.group(1) programme_id = "" programme_id_match = re.search(r'data-pid="(.+?)"', programme) if programme_id_match: programme_id = programme_id_match.group(1) name = "" name_match = re.search(r'<span property="name">(.+?)</span>', programme) if name_match: name = name_match.group(1) subtitle = "" subtitle_match = re.search( r'<span class="programme__subtitle.+?property="name">(.*?)</span>(.*?property="name">(.*?)</span>)?', programme, ) if subtitle_match: series = subtitle_match.group(1) episode = subtitle_match.group(3) if episode: subtitle = "(%s, %s)" % (series, episode) else: if series.strip(): subtitle = "(%s)" % series image = "" image_match = re.search(r'<meta property="image" content="(.+?)" />', programme) if image_match: image = image_match.group(1) synopsis = "" synopsis_match = re.search(r'<span property="description">(.+?)</span>', programme) if synopsis_match: synopsis = synopsis_match.group(1) station = "" station_match = re.search(r'<p class="programme__service.+?<strong>(.+?)</strong>.*?</p>', programme) if station_match: station = station_match.group(1).strip() series_title = "[B]%s - %s[/B]" % (station, name) if just_episodes: title = "[B]%s[/B] - %s" % (masthead_title, name) else: title = "[B]%s[/B] - %s %s" % (station, name, subtitle) if series_id: AddMenuEntry(series_title, series_id, 131, image, synopsis, "") elif programme_id: # TODO maybe they are not always mutually exclusive url = "http://www.bbc.co.uk/programmes/%s" % programme_id CheckAutoplay(title, url, image, " ", "") percent = int(100 * (page + list_item_num / len(programmes)) / total_pages) pDialog.update(percent, translation(30319), name) list_item_num += 1 percent = int(100 * page / total_pages) pDialog.update(percent, translation(30319)) if int(ADDON.getSetting("radio_paginate_episodes")) == 0: if current_page < next_page: page_url = "http://www.bbc.co.uk" + page_base_url + str(next_page) AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320), page_url, 136, "", "", "") # BUG: this should sort by original order but it doesn't (see http://trac.kodi.tv/ticket/10252) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) pDialog.close()
def ScrapeEpisodes(page_url): """Creates a list of programmes on one standard HTML page. ScrapeEpisodes contains a number of special treatments, which are only needed for specific pages, e.g. Search, but allows to use a single function for all kinds of pages. """ pDialog = xbmcgui.DialogProgressBG() pDialog.create(translation(30319)) html = OpenURL(page_url) total_pages = 1 current_page = 1 page_range = range(1) paginate = re.search(r'<div class="paginate.*?</div>', html, re.DOTALL) next_page = 1 if paginate: if int(ADDON.getSetting('paginate_episodes')) == 0: current_page_match = re.search(r'page=(\d*)', page_url) if current_page_match: current_page = int(current_page_match.group(1)) page_range = range(current_page, current_page + 1) next_page_match = re.search( r'<span class="next txt">.+?href="(.*?page=)(.*?)"', paginate.group(0), re.DOTALL) if next_page_match: page_base_url = next_page_match.group(1) next_page = int(next_page_match.group(2)) else: next_page = current_page page_range = range(current_page, current_page + 1) else: pages = re.findall(r'<li class="page.*?</li>', paginate.group(0), re.DOTALL) if pages: last = pages[-1] last_page = re.search(r'<a href="(.*?page=)(.*?)"', last) page_base_url = last_page.group(1) total_pages = int(last_page.group(2)) page_range = range(1, total_pages + 1) for page in page_range: if page > current_page: page_url = 'http://www.bbc.co.uk' + page_base_url + str(page) html = OpenURL(page_url) # NOTE remove inner li to match outer li # <li data-version-type="hd"> html = re.compile(r'<li data-version-type.*?</li>', flags=(re.DOTALL | re.MULTILINE)).sub('', html) # <li class="list-item programme" data-ip-id="p026f2t4"> list_items = re.findall(r'<li class="list-item.*?</li>', html, flags=(re.DOTALL | re.MULTILINE)) list_item_num = 1 for li in list_items: # <li class="list-item unavailable" data-ip-id="b06sq9xj"> unavailable_match = re.search( '<li class="list-item.*?unavailable.*?"', li, flags=(re.DOTALL | re.MULTILINE)) if unavailable_match: continue # <li class="list-item search-group" data-ip-id="b06rdtx0"> search_group = False search_group_match = re.search( '<li class="list-item.*?search-group.*?"', li, flags=(re.DOTALL | re.MULTILINE)) if search_group_match: search_group = True main_url = None # <a href="/iplayer/episode/p026gmw9/world-of-difference-the-models" # title="World of Difference, The Models" class="list-item-link stat" url_match = re.search(r'<a.*?href="(.*?)".*?list-item-link.*?>', li, flags=(re.DOTALL | re.MULTILINE)) if url_match: url = url_match.group(1) if url: main_url = 'http://www.bbc.co.uk' + url name = '' title = '' #<div class="title top-title">World of Difference</div> title_match = re.search( r'<div class="title top-title">\s*(.*?)\s*</div>', li, flags=(re.DOTALL | re.MULTILINE)) if title_match: title = title_match.group(1) name = title subtitle = None #<div class="subtitle">The Models</div> subtitle_match = re.search( r'<div class="subtitle">\s*(.*?)\s*</div>', li, flags=(re.DOTALL | re.MULTILINE)) if subtitle_match: subtitle = subtitle_match.group(1) if subtitle: name = name + " - " + subtitle icon = '' type = None # <div class="r-image" data-ip-type="episode" # data-ip-src="http://ichef.bbci.co.uk/images/ic/336x189/p026vl1q.jpg"> # <div class="r-image" data-ip-type="group" # data-ip-src="http://ichef.bbci.co.uk/images/ic/336x189/p037ty9z.jpg"> image_match = re.search( r'<div class="r-image".+?data-ip-type="(.*?)".+?data-ip-src="http://ichef.bbci.co.uk/images/ic/336x189/(.*?)\.jpg"', li, flags=(re.DOTALL | re.MULTILINE)) if image_match: type = image_match.group(1) image = image_match.group(2) if image: icon = "http://ichef.bbci.co.uk/images/ic/832x468/" + image + ".jpg" synopsis = '' # <p class="synopsis">What was it like to be a top fashion model 30 years ago? (1978)</p> synopsis_match = re.search(r'<p class="synopsis">\s*(.*?)\s*</p>', li, flags=(re.DOTALL | re.MULTILINE)) if synopsis_match: synopsis = synopsis_match.group(1) aired = '' # <span class="release">\nFirst shown: 8 Jun 1967\n</span> release_match = re.search( r'<span class="release">.*?First shown:\s*(.*?)\n.*?</span>', li, flags=(re.DOTALL | re.MULTILINE)) if release_match: release = release_match.group(1) if release: aired = FirstShownToAired(release) episodes = None # <a class="view-more-container avail stat" href="/iplayer/episodes/p00db1jf" data-progress-state=""> # <a class="view-more-container sibling stat" # href="/iplayer/search?q=doctor&search_group_id=urn:bbc:programmes:b06qbs4n"> episodes_match = re.search( r'<a class="view-more-container.+?stat".+?href="(.*?)"', li, flags=(re.DOTALL | re.MULTILINE)) if episodes_match: episodes = episodes_match.group(1) more = None # <em class="view-more-heading">27</em> more_match = re.search(r'<em class="view-more-heading">(.*?)</em>', li, flags=(re.DOTALL | re.MULTILINE)) if more_match: more = more_match.group(1) if episodes: episodes_url = 'http://www.bbc.co.uk' + episodes if search_group: AddMenuEntry( '[B]%s[/B] - %s' % (title, translation(30318)), episodes_url, 128, icon, '', '') else: AddMenuEntry( '[B]%s[/B] - %s %s' % (title, more, translation(30313)), episodes_url, 128, icon, '', '') elif more: AddMenuEntry( '[B]%s[/B] - %s %s' % (title, more, translation(30313)), main_url, 128, icon, '', '') if type != "group": CheckAutoplay(name, main_url, icon, synopsis, aired) percent = int(100 * (page + list_item_num / len(list_items)) / total_pages) pDialog.update(percent, translation(30319), name) list_item_num += 1 percent = int(100 * page / total_pages) pDialog.update(percent, translation(30319)) if int(ADDON.getSetting('paginate_episodes')) == 0: if current_page < next_page: page_url = 'http://www.bbc.co.uk' + page_base_url + str(next_page) AddMenuEntry(" [COLOR ffffa500]%s >>[/COLOR]" % translation(30320), page_url, 128, '', '', '') xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_DATE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED) pDialog.close()
def ListHighlights(highlights_url): """Creates a list of the programmes in the highlights section. """ html = OpenURL('http://www.bbc.co.uk/%s' % highlights_url) inner_anchors = re.findall(r'<a.*?(?!<a).*?</a>', html, flags=(re.DOTALL | re.MULTILINE)) # First find all groups as we need to store some properties of groups for later reuse. group_properties = [] # NOTE find episode count first episode_count = dict() groups = [ a for a in inner_anchors if re.match( r'<a[^<]*?class="grouped-items__cta.*?data-object-type="group-list-link".*?', a, flags=(re.DOTALL | re.MULTILINE)) ] for group in groups: href = '' href_match = re.match(r'<a[^<]*?href="(.*?)"', group, flags=(re.DOTALL | re.MULTILINE)) if href_match: href = href_match.group(1) count_match = re.search(r'>View all ([0-9]*).*?</a>', group, flags=(re.DOTALL | re.MULTILINE)) if count_match: count = count_match.group(1) episode_count[href] = count groups = [ a for a in inner_anchors if re.match( r'<a[^<]*?class="grouped-items__title.*?data-object-type="group-list-link".*?', a, flags=(re.DOTALL | re.MULTILINE)) ] for group in groups: href = '' href_match = re.match(r'<a[^<]*?href="(.*?)"', group, flags=(re.DOTALL | re.MULTILINE)) if href_match: href = href_match.group(1) name = '' name_match = re.search(r'<strong>(.*?)</strong>', group, flags=(re.DOTALL | re.MULTILINE)) if name_match: name = name_match.group(1) count = '' if href in episode_count: count = episode_count[href] url = 'http://www.bbc.co.uk' + href # Unfortunately, the group type is not inside the links, so we need to search the whole HTML. group_type = '' group_type_match = re.search(r'data-group-name="' + name + '".+?data-group-type="(.+?)"', html, flags=(re.DOTALL | re.MULTILINE)) if group_type_match: group_type = group_type_match.group(1) position = '' position_match = re.search(r'data-object-position="(.+?)-ALL"', group, flags=(re.DOTALL | re.MULTILINE)) if position_match: group_properties.append( [position_match.group(1), name, group_type]) AddMenuEntry( '[B]%s: %s[/B] - %s %s' % (translation(30314), name, count, translation(30315)), url, 128, '', '', '') # Some programmes show up twice in HTML, once inside the groups, once outside. # We need to parse both to avoid duplicates and to make sure we get all of them. episodelist = [] # <a\n href="/iplayer/episode/b06tr74y/eastenders-24122015"\n class="grouped-items__list-link listeds = [ a for a in inner_anchors if re.search(r'class="grouped-items__list-link', a, flags=(re.DOTALL | re.MULTILINE)) ] for listed in listeds: episode_id = '' # <a\n href="/iplayer/episode/b06tr74y/eastenders-24122015" id_match = re.match(r'<a.*?href="/iplayer/episode/(.*?)/', listed, flags=(re.DOTALL | re.MULTILINE)) if id_match: episode_id = id_match.group(1) name = '' # <p class="grouped-items__title grouped-items__title--item typo typo--skylark"> # <strong>EastEnders</strong></p> title_match = re.search( r'<.*?class="grouped-items__title.*?<strong>(.*?)</strong>', listed, flags=(re.DOTALL | re.MULTILINE)) if title_match: name = title_match.group(1) name = re.compile(r'<.*?>', flags=(re.DOTALL | re.MULTILINE)).sub('', name) # <p class="grouped-items__subtitle typo typo--canary">24/12/2015</p> subtitle_match = re.search( r'<.*?class="grouped-items__subtitle.*?>(.*?)<', listed, flags=(re.DOTALL | re.MULTILINE)) if subtitle_match: name = name + ' - ' + subtitle_match.group(1) # Assign correct group based on the position of the episode position = '' position_match = re.search(r'data-object-position="(.+?)"', listed, flags=(re.DOTALL | re.MULTILINE)) if position_match: for n, i in enumerate(group_properties): if re.match(i[0], position_match.group(1), flags=(re.DOTALL | re.MULTILINE)): position = i[1] # For series-catchup groups, we need to modify the title. if i[2] == 'series-catchup': name = i[1] + ': ' + name episodelist.append([ episode_id, name, "%s %s" % (translation(30316), position), 'DefaultVideo.png', '' ]) # < a\nhref="/iplayer/episode/p036gq3z/bbc-music-introducing-from-buddhist-monk-to-rock-star"\n # class="single-item stat" singles = [ a for a in inner_anchors if re.search( r'class="single-item', a, flags=(re.DOTALL | re.MULTILINE)) ] for single in singles: object_type = '' # data-object-type="episode-backfill" data_object_type = re.search(r'data-object-type="(.*?)"', single, flags=(re.DOTALL | re.MULTILINE)) if data_object_type: object_type = data_object_type.group(1) if object_type == "episode-backfill": if (highlights_url not in ['tv/bbcnews', 'tv/bbcparliament', 'tv/s4c']): continue episode_id = '' url = '' # <a\nhref="/iplayer/episode/p036gq3z/bbc-music-introducing-from-buddhist-monk-to-rock-star" if object_type == "editorial-promo": id_match = re.match(r'<a.*?href="(.*?)"', single, flags=(re.DOTALL | re.MULTILINE)) else: id_match = re.match(r'<a.*?href="/iplayer/episode/(.*?)/', single, flags=(re.DOTALL | re.MULTILINE)) if id_match: episode_id = id_match.group(1) url = 'http://www.bbc.co.uk/iplayer/episode/' + episode_id name = '' # <h3 class="single-item__title typo typo--skylark"><strong>BBC Music Introducing</strong></h3> title_match = re.search( r'<.*?class="single-item__title.*?<strong>(.*?)</strong>', single, flags=(re.DOTALL | re.MULTILINE)) if title_match: name = title_match.group(1) name = re.compile(r'<.*?>', flags=(re.DOTALL | re.MULTILINE)).sub('', name) # <p class="single-item__subtitle typo typo--canary">From Buddhist Monk to Rock Star</p> subtitle_match = re.search( r'<.*?class="single-item__subtitle.*?>(.*?)<', single, flags=(re.DOTALL | re.MULTILINE)) if subtitle_match: name = name + ' - ' + subtitle_match.group(1) icon = '' # <div class="r-image" data-ip-type="episode" # data-ip-src="http://ichef.bbci.co.uk/images/ic/406x228/p036gtc5.jpg"> image_match = re.search(r'<.*?class="r-image.*?data-ip-src="(.*?)"', single, flags=(re.DOTALL | re.MULTILINE)) if image_match: icon = image_match.group(1) desc = '' # <p class="single-item__overlay__desc"> # The remarkable rise of Ngawang Lodup - from BBC Introducing to performing at the O2 Arena</p> desc_match = re.search( r'<.*?class="single-item__overlay__desc.*?>(.*?)<', single, flags=(re.DOTALL | re.MULTILINE)) if desc_match: desc = desc_match.group(1) aired = '' # <p class="single-item__overlay__subtitle">First shown: 4 Nov 2015</p> release_match = re.search( r'<.*?class="single-item__overlay__subtitle">First shown: (.*?)<', single, flags=(re.DOTALL | re.MULTILINE)) if release_match: release = release_match.group(1) if release: aired = FirstShownToAired(release) add_entry = True for n, i in enumerate(episodelist): if i[0] == episode_id: episodelist[n][2] = desc episodelist[n][3] = icon episodelist[n][4] = aired add_entry = False if add_entry: if object_type == "editorial-promo": AddMenuEntry('[B]%s[/B]' % (name), episode_id, 128, icon, '', '') else: CheckAutoplay(name, url, icon, desc, aired) # Finally add all programmes which have been identified as part of a group before. for episode in episodelist: episode_url = "http://www.bbc.co.uk/iplayer/episode/%s" % episode[0] if ((ADDON.getSetting('suppress_incomplete') == 'false') or (not episode[4] == '')): CheckAutoplay(episode[1], episode_url, episode[3], episode[2], episode[4]) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_VIDEO_TITLE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_DATE) xbmcplugin.addSortMethod(int(sys.argv[1]), xbmcplugin.SORT_METHOD_UNSORTED)