def _getMetadata(self, section, id): metadata = {} page = webpage("/".join((self.urls['base'], section, self.urls['videos'], id))) if page.doc: soup = BeautifulSoup(page.doc) description = soup.find('meta', attrs={"name" : "description"})['content'] thumbnail = soup.find('link', attrs={'rel' : 'image_src'})['href'] videosrc = soup.find('link', attrs={'rel' : 'video_src'})['href'] match = re.search(r'mediaXML=(.*?)(?:&|$)', videosrc) if match: xmlfile = match.group(1) else: xmlfile = None metadata['Plot'] = description metadata['thumbnail'] = thumbnail if xmlfile: xmlpage = webpage(xmlfile) if xmlpage: xmldom = self._xml(xmlpage.doc) title = xmldom.getElementsByTagName('title')[0].firstChild.nodeValue url = xmldom.getElementsByTagName('media:content')[0].getAttribute('url') metadata['Title'] = title metadata['url'] = url return metadata
def episodes(self, id): page = webpage(self.url(id)) # <?xml version="1.0" encoding="UTF-8"?><!--urn:MEDIA:6120103:home-and-away-s2014a-ep6082--> # http://tvnz.co.nz/home-and-away/s2014a-ep6082-video-6120103 m = re.match(r'^.*?<!--urn:MEDIA:(\d+):(.*?)-(s.*?-ep.*?)-->', page.doc) if not m: return [] url = "%s/%s/%s-video-%s" % (self.urls['base'], m.group(2), m.group(3), m.group(1)) print url page = webpage(url, agent="chrome") if page.doc: soup = BeautifulSoup(page.doc) if soup: div = soup.find('div', attrs={'id' : 'slidefullepisodes'}) shows = div.findAll('li', attrs={'class' : re.compile(r'\bshowItem\b')}) for show in shows: item = self._episode(show) if item: self.xbmcitems.items.append(item) div = soup.find('div', attrs={'id' : 'slidevideoextras'}) if div: shows = div.findAll('li', attrs={'class' : re.compile(r'\bshowItem\b')}) for show in shows: item = self._episode(show) if item: self.xbmcitems.items.append(item) return self.xbmcitems.addall() return []
def _geturls(self, id, channel): #Scrape a page for a given OnDemand video and build an RTMP URL from the info in the page, then play the URL urls = dict() ids = id.split(",") if len(ids) == 4: pageUrl = "%s/%s/%s/%s/%s/%s/%s/%s/%s" % (self.channels[channel]['base'], ids[0], self.urls["video1"], ids[1], self.urls["video2"], ids[2], self.urls["video3"], ids[3], self.urls["video4"]) page = webpage(pageUrl) else: page = webpage(id) # Huh? - I guess this is feeding a full URL via the id variable if page.doc: videoid = re.search('var video ="/(.*?)/([0-9A-Z\-]+)/(.*?)";', page.doc) if videoid: #videoplayer = re.search('swfobject.embedSWF\("(http://static.mediaworks.co.nz/(.*?).swf)', page.doc) videoplayer = 'http://static.mediaworks.co.nz/video/jw/5.10/df.swf' if videoplayer: rnd = "" auth = re.search('random_num = "([0-9]+)";', page.doc) if auth: rnd = "?rnd=" + auth.group(1) swfverify = ' swfVfy=true swfUrl=%s%s pageUrl=%s' % (videoplayer, rnd, pageUrl) realstudio = 'tv3' site = re.search("var pageloc='(TV-)?(.*?)-", page.doc) if site: realstudio = site.group(2).lower() playlist = list() qualities = [330] #if re.search('flashvars.sevenHundred = "yes";', page.doc): qualities.append(700) #if re.search('flashvars.fifteenHundred = "yes";', page.doc): #qualities.append(1500) #if not re.search('flashvars.highEnd = "true";', page.doc): # flashvars.highEnd = "true";//true removes 56K option # qualities.append(56) #geo = re.search('var geo= "(.*?)";', page.doc) #if geo: # if geo.group(1) == 'geo': for quality in qualities: urls[quality] = '%s/%s/%s/%s/%s/%s_%sK.mp4' % (self.urls["rtmp1"], self.channels[channel]['rtmp'], self.urls["rtmp2"], videoid.group(1), videoid.group(2), urllib.quote(videoid.group(3)), quality) + swfverify # elif geo.group(1) == 'str': # for quality in qualities: #app = ' app=tv3/mp4:transfer' # + videoid.group(1) #tcurl = ' tcUrl=rtmpe://flashcontent.mediaworks.co.nz:80/' #playpath = ' playpath=%s/%s_%sK' % (videoid.group(2), videoid.group(3), quality) # urls[quality] = '%s/%s/%s%s/%s/%s_%sK' % (self.urls["flash1"], self._rtmpchannel(realstudio), self.urls["flash2"], videoid.group(1), urllib.quote(videoid.group(2)), urllib.quote(videoid.group(3)), quality) + ' pageUrl=' + pageUrl #urls[quality] = '%s/%s/%s%s/%s/%s_%sK' % (self.urls["flash1"], self._rtmpchannel(realstudio), self.urls["flash2"], videoid.group(1), videoid.group(2), urllib.quote(videoid.group(3)), quality) #urls[quality] = 'rtmpe://flashcontent.mediaworks.co.nz:80/tv3/mp4:transfer' #urls[quality] = '%s/%s/%s%s/%s/%s_%sK' % (self.urls["flash1"], self._rtmpchannel(realstudio), self.urls["flash2"], videoid.group(1), videoid.group(2), urllib.quote(videoid.group(3)), quality) # + " swfVfy=true swfUrl=http://m1.2mdn.net/879366/DartShellPlayer9_14_39_2.swf" #urls[quality] = '%s/%s/%s%s/%s/%s_%sK' % (self.urls["flash1"], self._rtmpchannel(realstudio), self.urls["flash2"], videoid.group(1), videoid.group(2), urllib.quote(videoid.group(3)), quality) + tcurl + app + playpath + swfverify #urls[quality] = '%s/%s/%s%s/%s/%s_%sK' % (self.urls["flash1"], self._rtmpchannel(realstudio), self.urls["flash2"], videoid.group(1), videoid.group(2), urllib.quote(videoid.group(3)), quality) + playpath + swfverify #urls[quality] = '%s/%s/%s%s/%s/%s_%sK' % (self.urls["flash1"], self._rtmpchannel(realstudio), self.urls["flash2"], videoid.group(1), videoid.group(2), urllib.quote(videoid.group(3)), quality) + swfverify # elif geo.group(1) == 'no': # for quality in qualities: # urls[quality] = '%s/%s/%s%s/%s/%s_%s.%s' % (self.urls["http1"], "four", self.urls["http2"], videoid.group(1), videoid.group(2), urllib.quote(videoid.group(3)), quality, "mp4") else: sys.stderr.write("_geturls: No videoplayer") else: sys.stderr.write("_geturls: No videoid") else: sys.stderr.write("_geturls: No page.doc") return urls
def show(self, catid, title, provider): #Show video items from a TV Show style TV3 webpage baseurl = "" if catid[:4] != "http": baseurl = self.urls["base"] geturl = "%s%s" % (baseurl, catid) page = webpage(geturl) if page.doc: div_tag = SoupStrainer('div') html_divtag = BeautifulSoup(page.doc, parseOnlyThese = div_tag) tables = html_divtag.find(attrs={"xmlns:msxsl": "urn:schemas-microsoft-com:xslt"}) if tables: programs = tables.findAll('table') if len(programs) > 0: count = 0 for soup in programs: self.xbmcitems.items.append(self._itemshow(soup, provider, title)) count += 1 self.xbmcitems.addall() else: programs = tables.findAll('tr') if len(programs) > 0: count = -1 for soup in programs: count += 1 if count > 0: self.xbmcitems.items.append(self._itemtable(soup, provider, title)) self.xbmcitems.addall() else: sys.stderr.write("show: Couldn't find any videos in list") else: sys.stderr.write("show: Couldn't find video list") else: sys.stderr.write("show: Couldn't get index webpage")
def sections(self, section): page = webpage('%s/%s/%s/' % (self.urls['base'], section, self.urls['videos'])) if page.doc: div_tag = SoupStrainer('div') html_divtag = BeautifulSoup(page.doc, parseOnlyThese = div_tag) #landing = html_divtag.find(attrs = {'id' : 'landing_video'}) gallery = html_divtag.find(attrs = {'class' : 'gallery_box'}) if gallery: videos = gallery.findAll('div') if len(videos) > 0: for video in videos: link = video.find("a") if link: if link.string: item = tools.xbmcItem(self.channel) link = video.find("a") item['videoInfo']["Title"] = link.string.strip() image = video.find("img") if image: item['videoInfo']["Thumb"] = image['src'] videoid = re.match('/%s/%s/([0-9]+)/' % (section, self.urls['videos']), link['href']) if videoid: if self.prefetch: item.urls = [self._geturl(section, videoid)] else: item.playable = True item['videoInfo']["FileName"] = "%s?ch=%s§ion=%s&id=%s" % (self.base, self.channel, section, videoid.group(1)) self.xbmcitems.items.append(item) return self.xbmcitems.addall() else: sys.stderr.write("sections: no videos") else: sys.stderr.write("sections: no gallery_box") else: sys.stderr.write("sections: no page.doc")
def _getMetadata(self, index): metadata = {} page = webpage("%s/playlist/null/%s" % (self.urls['base'], index)) if page.doc: metadata['id'] = index xmldom = self._xml(page.doc) if not xmldom: return {} metadata['Title'] = xmldom.getElementsByTagName('title')[0].firstChild.data.strip() metadata['Plot'] = xmldom.getElementsByTagName('description')[0].firstChild.data.strip() metadata['Thumb'] = xmldom.getElementsByTagName('jwplayer:image')[0].firstChild.nodeValue if not metadata['Thumb'].startswith("http://"): metadata['Thumb'] = self.urls['base'] + metadata['Thumb'] srcUrls = xmldom.getElementsByTagName('jwplayer:source') urls = {} for srcUrl in srcUrls: url = srcUrl.getAttribute('file') if not url.startswith("http://"): url = self.urls['base'] + url # in format 720p or 540p size = srcUrl.getAttribute('label') size = int(size[:-1]) urls[size] = url metadata['urls'] = urls return metadata
def _geturl(self, section, id): page = webpage("/".join( (self.urls['base'], section, self.urls['videos'], id))) if page.doc: videourl = re.search('{file: "(http://(.*?).mp4)"}', page.doc) if videourl: return videourl.group(1)
def show(self, id, search = False): if search: import urllib url = "%s/%s/%s?q=%s" % (self.urls['base'], self.urls['search'], self.urls['page'], urllib.quote_plus(id)) else: url = self.url(id) page = webpage(url) xml = self._xml(page.doc) if xml: for show in xml.getElementsByTagName('Show'): se = re.search('/content/(.*)_(episodes|extras)_group/ps3_xml_skin.xml', show.attributes["href"].value) if se: if se.group(2) == "episodes": #videos = int(show.attributes["videos"].value) # Number of Extras #episodes = int(show.attributes["episodes"].value) # Number of Episodes #channel = show.attributes["channel"].value item = tools.xbmcItem() info = item.info info["FileName"] = "%s?ch=%s&type=singleshow&id=%s%s" % (self.base, self.channel, se.group(1), self.urls['episodes']) info["Title"] = show.attributes["title"].value info["TVShowTitle"] = info["Title"] #epinfo = self.firstepisode(se.group(1)) #if epinfo: # info = dict(epinfo.items() + info.items()) self.xbmcitems.items.append(item) #self.xbmcitems.type = "tvshows" self.xbmcitems.addall()
def showsindex(provider): #Create a second level list of TV Shows from a TV3 webpage #doc = resources.tools.gethtmlpage("%s/Shows/tabid/64/Default.aspx" % ("http://www.tv3.co.nz")) #Get our HTML page with a list of video categories #doc = resources.tools.gethtmlpage("%s/Shows.aspx" % ("http://www.tv3.co.nz")) #Get our HTML page with a list of video categories page = webpage("%s/Shows.aspx" % ("http://www.tv3.co.nz")) if page.doc: html_divtag = BeautifulSoup(page.doc) linksdiv = html_divtag.find('div', attrs = {"id": "pw_8171"}) if linksdiv: links = linksdiv.findAll('a') if len(links) > 0: count = 0 for link in links: item = tools.xbmcItem() item.info["Title"] = link.string.strip() catid = link['href'] if item.info["Title"] == "60 Minutes": #The URL on the next line has more videos item.info["FileName"] = "%s?ch=TV3&cat=%s&title=%s&catid=%s" % (self.base, "shows", urllib.quote(item.info["Title"]), urllib.quote(catid)) #"http://ondemand.tv3.co.nz/Default.aspx?TabId=80&cat=22" else: item.info["FileName"] = "%s?ch=TV3&cat=%s&title=%s&catid=%s" % (self.base, "shows", urllib.quote(item.info["Title"]), urllib.quote(catid)) self.xbmcitems.items.append(item) self.xbmcitems.addall() else: sys.stderr.write("showsindex: Couldn't find any videos in list") else: sys.stderr.write("showsindex: Couldn't find video list") else: sys.stderr.write("showsindex: Couldn't get index webpage")
def show(self, channel, title): #Show video items from a TV Show style TV3 webpage #page = webpage("%s/%s/%s" % (self.channels[channel]['base'], self.channels[channel]['shows'], title + ".aspx")) page = webpage( "%s/%s/%s/%s" % (self.channels[channel]['base'], self.channels[channel]['shows'], title, "TVOnDemand.aspx")) if page.doc: div_tag = SoupStrainer('div') html_divtag = BeautifulSoup(page.doc, parseOnlyThese=div_tag) programblock = html_divtag.find(attrs={"class": "grid_8"}) if programblock: #print programblock programs = programblock.findAll('div', attrs={"class": "grid_4"}) if len(programs) > 0: for soup in programs: print soup self.xbmcitems.items.append( self._itemshow(channel, title, soup)) self.xbmcitems.addall() else: sys.stderr.write("show: Couldn't find video list") else: sys.stderr.write("show: Couldn't get index webpage")
def episodes(self, id): page = webpage(self.url(id)) if page.doc: xml = self._xml(page.doc) if xml: #for ep in xml.getElementsByTagName('Episode').extend(xml.getElementsByTagName('Extra')): #for ep in map(xml.getElementsByTagName, ['Episode', 'Extra']): count = xml.getElementsByTagName('Episode').length for ep in xml.getElementsByTagName('Episode'): item = self._episode(ep) if item: self.xbmcitems.items.append(item) if self.prefetch: self.xbmcitems.add(count) for ep in xml.getElementsByTagName('Extras'): item = self._episode(ep) if item: self.xbmcitems.items.append(item) #self.xbmcitems.sorting.append("DATE") #self.xbmcitems.type = "episodes" #self.xbmcitems.addall() if self.prefetch: self.xbmcitems.sort() else: self.xbmcitems.addall()
def programmes(self, type, urlext): if type == "channel": folder = 1 url = self.urls['base'] elif type == "video": folder = 0 url = "%s/assets/php/slider.php?channel=%s" % (self.urls['base'], urlext) elif type == "search": folder = 0 url = "%s/search?search_keyword=%s" % (self.urls['base'], urlext.replace(" ", "+")) page = webpage(url) if page.doc: if type == "channel" or type == "search": div_tag = SoupStrainer('div') html_divtag = BeautifulSoup(page.doc, parseOnlyThese = div_tag) programmes = html_divtag.findAll(attrs={'class' : 'programmes'}) elif type == "video": div_tag = SoupStrainer('body') html_divtag = BeautifulSoup(page.doc, parseOnlyThese = div_tag) programmes = html_divtag.findAll(attrs={'class' : 'slider slider-small'}) if type == "search": type = "video" if len(programmes) > 0: for program in programmes: list = program.find('ul') if list: listitems = list.findAll('li') count = len(listitems) if count > 0: for listitem in listitems: link = listitem.find('a', attrs={'href' : re.compile("^/%s/" % type)}) if link.img: if re.search("assets/images/%ss/" % type, link.img["src"]): #item = tools.xbmcItem() item = tools.xbmcItem() if listitem.p.string: item.info["Title"] = listitem.p.string.strip() else: item.info["Title"] = link.img["alt"] item.info["Thumb"] = "%s/%s" % (self.urls['base'], link.img["src"]) index = re.search("assets/images/%ss/([0-9]*?)-mini.jpg" % type, link.img["src"]).group(1) item.info["FileName"] = "%s?ch=%s&%s=%s" % (self.base, self.channel, type, urllib.quote(index)) if type == "video": if self.prefetch: item.info["FileName"] = self._geturl(index) else: item.playable = True self.xbmcitems.items.append(item) if self.prefetch: self.xbmcitems.add(count) if self.prefetch: self.xbmcitems.sort() else: self.xbmcitems.addall() else: sys.stderr.write("Search returned no results") else: sys.stderr.write("Couldn't find any programs") else: sys.stderr.write("Couldn't get page")
def GetSwfUrl(self, qsData): url = "http://c.brightcove.com/services/viewer/federated_f9?&" + urllib.urlencode(qsData) page = webpage(url, agent='chrome') location = page.redirUrl base = location.split(u"?",1)[0] location = base.replace(u"BrightcoveBootloader.swf", u"federatedVideoUI/BrightcoveBootloader.swf") return location
def search(self, query): import urllib qid = urllib.quote_plus(query) qs = "&requiredfields=type:programme.site:tv&partialfields=programme-title:%s&fq=programme-title:%s&fq=type:programme&fq=site:tv&num=999" % (qid, qid) url = "%s/%s/%s?q=%s%s" % (self.urls['base'], self.urls['search'], self.urls['searchpage'], qid, qs) page = webpage(url) soup = BeautifulSoup(page.doc) if soup: for show in soup.findAll('ul', attrs={'class' : 'showDetailsMain'}): channel = show.find('li', attrs={'class' : "channel"}).contents[0].strip() item = tools.xbmcItem(channel, self.channel) info = item['videoInfo'] urlIn = show.a['href'] if not urlIn.startswith('http://'): urlIn = self.urls['base'] + urlIn info['urlIn'] = urlIn m = re.match(r'^.*?-(\d+)', urlIn) if not m: continue id = m.group(1) info["Title"] = show.a.contents[0].strip() info["Date"] = show.find('li', attrs={'class' : 'date'}).contents[0].strip() info["TVShowTitle"] = info["Title"] info["Plot"] = show.find('li', attrs={'class' : 'details'}).contents[0].strip() info["FileName"] = "%s?ch=%s&id=%s&type=shows" % (self.base, self.channel, id) self.xbmcitems.items.append(item) return self.xbmcitems.addall()
def shows(self, channel): #Create a second level list of TV Shows from a TV3 webpage #doc = resources.tools.gethtmlpage("%s/Shows/tabid/64/Default.aspx" % ("http://www.tv3.co.nz")) #Get our HTML page with a list of video categories #doc = resources.tools.gethtmlpage("%s/Shows.aspx" % ("http://www.tv3.co.nz")) #Get our HTML page with a list of video categories page = webpage("%s/%s/%s" % (self.channels[channel]['base'], self.channels[channel]['ondemand'], "TitleAZ.aspx")) if page.doc: html_divtag = BeautifulSoup(page.doc) showsdiv = html_divtag.findAll('div', attrs = {"class": "grid_2"}) if len(showsdiv) > 0: for show in showsdiv: item = tools.xbmcItem() title = show.find('p').find('a') if title: if title.string: if title['href'][len('http://www.'):len('http://www.') + 3] == channel[0:3].lower(): item.info["Title"] = title.string.strip() image = show.find("img") if image: item.info["Thumb"] = image['src'] item.info["FileName"] = "%s?ch=TV3&channel=%s&cat=%s&title=%s" % (self.base, channel, "show", urllib.quote(item.info["Title"].replace(" ", ""))) self.xbmcitems.items.append(item) self.xbmcitems.addall() else: sys.stderr.write("showsindex: Couldn't find any videos in list") else: sys.stderr.write("showsindex: Couldn't get index webpage")
def _geturl(self, index): page = webpage("%s/playlist/null/%s" % (self.urls['base'], index)) if page.doc: soup = BeautifulStoneSoup(page.doc) #return "%s%s" % (self.urls['base'], soup.find('media:content')["url"]) return "%s%s" % (self.urls['base'], urllib.quote(soup.find('media:content')["url"]))
def index(self, type = 'showall', id = ""): page = webpage('/'.join([self.urls['base'], self.urls['media'], type, id])) if page.doc: div_tag = SoupStrainer('div') html_divtag = BeautifulSoup(page.doc, parseOnlyThese = div_tag) programmes = html_divtag.findAll(attrs={'class' : 'col gu1 video'}) if len(programmes) > 0: for program in programmes: item = tools.xbmcItem() link = re.search("/media/([a-z]+)/([0-9]+)", program.p.a['href']) if link: item.info["Title"] = program.p.span.string item.info["Thumb"] = "%s%s" % (self.urls['base'], program.p.a.img['src']) if link.group(1) == "view": item.info["Title"] += ' ' + program.p.span.next.next.next.next.next.string.strip()[6:].strip() if self.prefetch: item.info["FileName"] = self._geturl(link.group(2)) else: item.playable = True item.info["FileName"] = "%s?ch=%s&view=%s&info=%s" % (self.base, self.channel, link.group(2), item.infoencode()) else: item.info["FileName"] = "%s?ch=%s&type=%s&id=%s" % (self.base, self.channel, link.group(1), link.group(2)) self.xbmcitems.items.append(item) return self.xbmcitems.addall() else: sys.stderr.write("index: no programmes") else: sys.stderr.write("index: no page.doc")
def _geturls(self, title): url = "%s%s%s" % (self.urls['base'], self.urls['json'], title) page = webpage(url) returnurls = dict() if page.doc: import json videos = json.loads(page.doc) allurls = dict() filesizes = dict() video = videos[0] for vidFormat, items in video.iteritems(): if type(items) is not dict: continue allurls[vidFormat] = dict() filesizes[vidFormat] = dict() for name, value in items.iteritems(): if name[-4:] == '_res': bitrate = name[:-4] if not bitrate in allurls[vidFormat]: allurls[vidFormat][bitrate] = list() if not bitrate in filesizes[vidFormat]: filesizes[vidFormat][bitrate] = 0 allurls[vidFormat][bitrate].append(video[vidFormat][bitrate + '_res']) if video[vidFormat][bitrate + '_res_mb']: filesizes[vidFormat][bitrate] += video[vidFormat][bitrate + '_res_mb'] for vidFormat, bitrates in allurls.iteritems(): for bitrate, urls in bitrates.iteritems(): size = filesizes[vidFormat][bitrate] if not size in returnurls: returnurls[size] = list() returnurls[size].extend(urls) return returnurls
def episodes(self, channel, cat): #Show video items from a normal TV3 webpage page = webpage("%s/%s/%s" % (self.channels[channel]['base'], self.channels[channel]['ondemand'], cat + ".aspx")) if page.doc: a_tag = SoupStrainer('div') html_atag = BeautifulSoup(page.doc, parseOnlyThese=a_tag) programs = html_atag.findAll(attrs={"class": "grid_2"}) if len(programs) > 0: for soup in programs: item = self._itemdiv(soup, channel) if item: self.xbmcitems.items.append(item) if len(item.urls) > 0: if self.prefetch: self.xbmcitems.add(len(programs)) if self.prefetch: self.xbmcitems.sort() else: self.xbmcitems.addall() else: sys.stderr.write("episodes: Couldn't find any videos") else: sys.stderr.write("episodes: Couldn't get videos webpage")
def firstepisode(self, id): page = webpage(self.url(id + self.urls['episodes'])) if page.doc: xml = self._xml(page.doc) if xml: item = self._episode(xml.getElementsByTagName('Episode')[0]) if item: return item.info return False
def _geturl(self, id): page = webpage('/'.join([self.urls['base'], self.urls['media'], 'view', id])) if page.doc: for link in re.finditer("'file': 'http(.*?)'", page.doc): pass #link = re.search("'file': 'http(.*?)'", page.doc) if link: return 'http' + link.group(1) else: sys.stderr.write("_geturl: no link") else: sys.stderr.write("_geturl: no page.doc")
def advert(self, chapter): advert = chapter.getElementsByTagName('ref') if len(advert): # fetch the link - it'll return a .asf file page = webpage(advert[0].attributes['src'].value) if page.doc: xml = self._xml(page.doc) if xml: # grab out the URL to the actual flash ad for flv in xml.getElementsByTagName('FLV'): if flv.firstChild and len(flv.firstChild.wholeText): return(flv.firstChild.wholeText)
def page(self, filter, page): url = "%s%s?page=%s" % (self.urls['base'], filter, page) page = webpage(url, 'chrome', 'nzos_html5=true') if page.doc: div_tag = SoupStrainer('div') html_divtag = BeautifulSoup(page.doc, parseOnlyThese = div_tag) results = html_divtag.find(attrs={'id' : 'filter_result_set'}) if results: rows = results.findAll('tr') if len(rows) > 0: for row in rows: cells = row.findAll('td') count = len(cells) if count > 0: item = tools.xbmcItem() for cell in cells: if cell['class'] == 'image': item.info['Thumb'] = "%s%s" % (self.urls['base'], cell.div.div.a.img['src']) title = re.search("/title/(.*)", cell.a['href']) if not title: title = re.search("/interviews/(.*)", cell.a['href']) #elif cell['class'] == 'title_link title': elif cell['class'].startswith('title_link'): item.info['Title'] = item.unescape(cell.a.contents[0]) #elif cell['class'] == 'year': # pass #elif cell['class'] == 'category': # pass #elif cell['class'] == 'director': # pass elif cell['class'] == 'added': item.info["Date"] = tools.xbmcdate(cell.contents[0], ".") if title: if self.prefetch: item.urls = self._videourls(title.group(1)) item.units = "MB" else: item.info["FileName"] = "%s?ch=%s&title=%s&info=%s" % (self.base, self.channel, title.group(1), item.infoencode()) item.playable = True self.xbmcitems.items.append(item) if self.prefetch: self.xbmcitems.add(count) if self.prefetch: self.xbmcitems.sort() else: self.xbmcitems.addall() else: sys.stderr.write("page: No rows") else: sys.stderr.write("page: No results") else: sys.stderr.write("page: No page.doc")
def index(self, filter = "/explore/"): filterarray = filter.strip('/').split('/') filterlevel = len(filterarray) if filterlevel == 4 and filterarray[2] == 'az': filterlevel += 1 url = self.urls['base'] + filter page = webpage(url, agent='chrome', cookie='nzos_html5=true') if page.doc: #resources.tools.gethtmlpage("http://www.nzonscreen.com/html5/opt_in", "chrome", 1) # Get a cookie for this session to enable the HTML5 video tag div_tag = SoupStrainer('div') html_divtag = BeautifulSoup(page.doc, parseOnlyThese = div_tag) sections = html_divtag.find(attrs={'id' : 'explore_filter_%s' % str(filterlevel)}) if not sections: sections = html_divtag.find(attrs={'id' : 'explore_listview'}) if sections: links = sections.findAll('a') if len(links) > 0: for link in links: item = tools.xbmcItem(self.channel) info = item['videoInfo'] info["FileName"] = "%s?ch=%s&filter=%s" % (self.base, self.channel, urllib.quote(link["href"])) if link.string: info["Title"] = link.string.strip() else: filterarray = link["href"].split('/') info["Title"] = filterarray[len(filterarray) - 1].capitalize() self.xbmcitems.items.append(item) if filterlevel == 1: item = tools.xbmcItem(self.channel) info = item['videoInfo'] info["FileName"] = "%s?ch=%s&filter=search" % (self.base, self.channel) info["Title"] = "Search" self.xbmcitems.items.append(item) else: nav = html_divtag.find(attrs={'class' : 'nav_pagination'}) if nav: pages = nav.findAll('a') if pages: for page in pages: if page.string: lastpage = page.string.strip() for i in range(1, int(lastpage)): item = tools.xbmcItem(self.channel) info = item['videoInfo'] info["FileName"] = "%s?ch=%s&filter=%s&page=%s" % (self.base, self.channel, urllib.quote(filter), str(i)) info["Title"] = 'Page %s' % str(i) self.xbmcitems.items.append(item) return self.xbmcitems.addall() else: sys.stderr.write("index: No sections") else: sys.stderr.write("index: No page.doc")
def _geturl(self, id): page = webpage('/'.join( [self.urls['base'], self.urls['media'], 'view', id])) if page.doc: for link in re.finditer("'file': 'http(.*?)'", page.doc): pass #link = re.search("'file': 'http(.*?)'", page.doc) if link: return 'http' + link.group(1) else: sys.stderr.write("_geturl: no link") else: sys.stderr.write("_geturl: no page.doc")
def _getMetadata(self, title): metadata = {} url = "%s%s%s" % (self.urls['base'], self.urls['metadata'], title) page = webpage(url) if page.doc: soup = BeautifulSoup(page.doc) synopsisDiv = soup.find("div", attrs={'id' : 'widget_title_synopsis'}) try: synopsis = [ str(item) for item in synopsisDiv.div.p.contents ] metadata['PlotOutline'] = " ".join(synopsis).strip() except Exception: pass metadata['id'] = title return metadata
def _search(self, searchterm, catid): #Show video items from a normal TV3 webpage page = webpage("%s/search/tabid/%s/Default.aspx?amq=%s" % (self._base_url('tv3'), catid, searchterm.replace(" ", "+"))) if page.doc: a_tag=SoupStrainer('div') html_atag = BeautifulSoup(page.doc, parseOnlyThese = a_tag) programs = html_atag.findAll(attrs={"class": "results"}) if len(programs) > 0: for soup in programs: self.xbmcitems.items.append(self._itemsearch(soup, "tv3")) self.xbmcitems.items.append(self._itemsearch(soup, "four")) self.xbmcitems.addall() else: sys.stderr.write("_search: Couldn't find any videos") else: sys.stderr.write("_search: Couldn't get videos webpage")
def show(self, channel, title): #Show video items from a TV Show style TV3 webpage page = webpage("%s/%s/%s/%s" % (self.channels[channel]['base'], self.channels[channel]['shows'], title, "TVOnDemand.aspx")) if page.doc: div_tag = SoupStrainer('div') html_divtag = BeautifulSoup(page.doc, parseOnlyThese = div_tag) programblock = html_divtag.find(attrs={"class": "grid_8"}) if programblock: programs = programblock.findAll('div', attrs={"class": re.compile(r"\bgrid_4\b")}) if len(programs) > 0: for soup in programs: self.xbmcitems.items.append(self._itemshow(channel, title, soup)) return self.xbmcitems.addall() else: sys.stderr.write("show: Couldn't find video list") else: sys.stderr.write("show: Couldn't get index webpage")
def atoz(self, catid, channel): #Show video items from an AtoZ style TV3 webpage page = webpage("%s%s%s" % (self._base_url("tv3"), self.urls["cat"], catid)) if page.doc: a_tag=SoupStrainer('div') html_atag = BeautifulSoup(page.doc, parseOnlyThese = a_tag) programs = html_atag.findAll(attrs={"class": "wideArticles"}) if len(programs) > 0: for soup in programs: item = self._itematoz(soup, channel) self.xbmcitems.items.append(item) if len(item['urls']) > 0: if self.prefetch: self.xbmcitems.add(len(programs)) return self.xbmcitems.addall() else: sys.stderr.write("atoz: Couldn't find any videos") else: sys.stderr.write("atoz: Couldn't get videos webpage")
def index(self): page = webpage(self.urls['base']) if page.doc: div_tag = SoupStrainer('div') html_divtag = BeautifulSoup(page.doc, parseOnlyThese = div_tag) menu = html_divtag.find(attrs = {'id' : 'home_nav'}) if menu: menuitems = menu.findAll('a') for menuitem in menuitems: item = tools.xbmcItem(self.channel) item['videoInfo']["Title"] = menuitem.string item['videoInfo']["FileName"] = "%s?ch=%s§ion=%s" % (self.base, self.channel, menuitem["href"][1:-1]) self.xbmcitems.items.append(item) return self.xbmcitems.addall() else: sys.stderr.write("index: no menu") else: sys.stderr.write("index: no page.doc")
def _search(self, searchterm, catid): #Show video items from a normal TV3 webpage page = webpage( "%s/search/tabid/%s/Default.aspx?amq=%s" % (self._base_url('tv3'), catid, searchterm.replace(" ", "+"))) if page.doc: a_tag = SoupStrainer('div') html_atag = BeautifulSoup(page.doc, parseOnlyThese=a_tag) programs = html_atag.findAll(attrs={"class": "results"}) if len(programs) > 0: for soup in programs: self.xbmcitems.items.append(self._itemsearch(soup, "tv3")) self.xbmcitems.items.append(self._itemsearch(soup, "four")) self.xbmcitems.addall() else: sys.stderr.write("_search: Couldn't find any videos") else: sys.stderr.write("_search: Couldn't get videos webpage")
def sections(self, section): page = webpage('%s/%s/%s/' % (self.urls['base'], section, self.urls['videos'])) if page.doc: div_tag = SoupStrainer('div') html_divtag = BeautifulSoup(page.doc, parseOnlyThese=div_tag) #landing = html_divtag.find(attrs = {'id' : 'landing_video'}) gallery = html_divtag.find(attrs={'class': 'gallery_box'}) if gallery: videos = gallery.findAll('div') if len(videos) > 0: for video in videos: link = video.find("a") if link: if link.string: item = tools.xbmcItem() link = video.find("a") item.info["Title"] = link.string.strip() image = video.find("img") if image: item.info["Thumb"] = image['src'] videoid = re.match( '/%s/%s/([0-9]+)/' % (section, self.urls['videos']), link['href']) if videoid: if self.prefetch: item.urls = [ self._geturl(section, videoid) ] else: item.playable = True item.info[ "FileName"] = "%s?ch=%s§ion=%s&id=%s" % ( self.base, self.channel, section, videoid.group(1)) self.xbmcitems.items.append(item) self.xbmcitems.addall() else: sys.stderr.write("sections: no videos") else: sys.stderr.write("sections: no gallery_box") else: sys.stderr.write("sections: no page.doc")
def episodes(self, channel, cat): #Show video items from a normal TV3 webpage page = webpage("%s/%s/%s" % (self.channels[channel]['base'], self.channels[channel]['ondemand'], cat + ".aspx")) if page.doc: a_tag=SoupStrainer('div') html_atag = BeautifulSoup(page.doc, parseOnlyThese = a_tag) programs = html_atag.findAll(attrs={"class": re.compile(r'\bgrid_2\b')}) if len(programs) > 0: for soup in programs: item = self._itemdiv(soup, channel) if item: self.xbmcitems.items.append(item) if len(item['urls']) > 0: if self.prefetch: self.xbmcitems.add(len(programs)) return self.xbmcitems.addall() else: sys.stderr.write("episodes: Couldn't find any videos") else: sys.stderr.write("episodes: Couldn't get videos webpage")
def index(self): page = webpage(self.urls['base']) if page.doc: div_tag = SoupStrainer('div') html_divtag = BeautifulSoup(page.doc, parseOnlyThese=div_tag) menu = html_divtag.find(attrs={'id': 'home_nav'}) if menu: menuitems = menu.findAll('a') for menuitem in menuitems: item = tools.xbmcItem() item.info["Title"] = menuitem.string item.info["FileName"] = "%s?ch=%s§ion=%s" % ( self.base, self.channel, menuitem["href"][1:-1]) self.xbmcitems.items.append(item) self.xbmcitems.addall() else: sys.stderr.write("index: no menu") else: sys.stderr.write("index: no page.doc")
def search(self, query, page=1): query = urllib.quote(query) url = '%s/search?utf8=✓&search_text=%s&search=search&page=%s' % (self.urls['base'], query, page) pg = webpage(url, agent='chrome', cookie='nzos_html5=true') if not pg.doc: return [] div_tag = SoupStrainer('div') html_divtag = BeautifulSoup(pg.doc, parseOnlyThese = div_tag) results = html_divtag.find(attrs={'class' : 'hero_results'}) if not results: return [] rows = results.findAll("div", attrs={'class' : re.compile(r'^(title|interview)$')}) for row in rows: item = tools.xbmcItem(self.channel) info = item['videoInfo'] src = row.a.img['src'] if not src.startswith("http://"): src = self.urls['base'] + src item['videoInfo']['Thumb'] = src title = re.search("/title/(.*)", row.a['href']) if not title: title = re.search("/interviews/(.*)", row.a['href']) item['videoInfo']['Title'] = item.unescape(row.p.a.contents[0]) if title: item['videoInfo']["FileName"] = "%s?ch=%s&title=%s&info=%s" % (self.base, self.channel, title.group(1), item.infoencode()) item['playable'] = True if not title or not title.group(1).endswith("/series"): self.xbmcitems.items.append(item) nav = html_divtag.find(attrs={'class' : 'nav_pagination'}) if nav: nextPage = nav.find("a", attrs={'rel' : 'next'}) if nextPage: item = tools.xbmcItem(self.channel) info = item['videoInfo'] pagenum = nextPage.contents[0] info["Title"] = "Next Page" info["FileName"] = "%s?ch=%s&filter=search&q=%s&page=%s" % (self.base, self.channel, query, pagenum) self.xbmcitems.items.append(item) return self.xbmcitems.addall()
def _geturls(self, title): url = "%s%s%s" % (self.urls['base'], self.urls['json'], title) page = webpage(url) if page.doc: import json videos = json.loads(page.doc) allurls = dict() returnurls = dict() filesizes = dict() for name, value in videos[0].iteritems(): if name[-4:] == '_res': bitrate = name[:-4] allurls[bitrate] = list() filesizes[bitrate] = 0 for video in videos: for bitrate, temp in allurls.iteritems(): allurls[bitrate].append(video[bitrate + '_res']) filesizes[bitrate] = filesizes[bitrate] + video[bitrate + '_res_mb'] for bitrate, urls in allurls.iteritems(): returnurls[filesizes[bitrate]] = urls return returnurls
def page(self, filter, page): url = "%s%s?page=%s" % (self.urls['base'], filter, page) pg = webpage(url, agent='chrome', cookie='nzos_html5=true') if pg.doc: div_tag = SoupStrainer('div') html_divtag = BeautifulSoup(pg.doc, parseOnlyThese = div_tag) results = html_divtag.find(attrs={'id' : 'filter_result_set'}) if results: rows = results.findAll('tr') if len(rows) > 0: for row in rows: cells = row.findAll('td') count = len(cells) if count > 0: item = tools.xbmcItem(self.channel) for cell in cells: if cell['class'] == 'image': src = cell.div.div.a.img['src'] if not src.startswith("http://"): src = self.urls['base'] + src item['videoInfo']['Thumb'] = src title = re.search("/title/(.*)", cell.a['href']) if not title: title = re.search("/interviews/(.*)", cell.a['href']) elif cell['class'].startswith('title_link'): item['videoInfo']['Title'] = item.unescape(cell.a.contents[0]) elif cell['class'] == 'added': item['videoInfo']["Date"] = tools.xbmcdate(cell.contents[0], ".") if title: item['videoInfo']["FileName"] = "%s?ch=%s&title=%s&info=%s" % (self.base, self.channel, title.group(1), item.infoencode()) item['playable'] = True if not title.group(1).endswith("/series"): self.xbmcitems.items.append(item) return self.xbmcitems.addall() else: sys.stderr.write("page: No rows") else: sys.stderr.write("page: No results") else: sys.stderr.write("page: No page.doc")
def index(self, type='showall', id=""): page = webpage('/'.join( [self.urls['base'], self.urls['media'], type, id])) if page.doc: div_tag = SoupStrainer('div') html_divtag = BeautifulSoup(page.doc, parseOnlyThese=div_tag) programmes = html_divtag.findAll(attrs={'class': 'col gu1 video'}) if len(programmes) > 0: for program in programmes: item = tools.xbmcItem() link = re.search("/media/([a-z]+)/([0-9]+)", program.p.a['href']) if link: item.info["Title"] = program.p.span.string item.info["Thumb"] = "%s%s" % (self.urls['base'], program.p.a.img['src']) if link.group(1) == "view": item.info[ "Title"] += ' ' + program.p.span.next.next.next.next.next.string.strip( )[6:].strip() if self.prefetch: item.info["FileName"] = self._geturl( link.group(2)) else: item.playable = True item.info[ "FileName"] = "%s?ch=%s&view=%s&info=%s" % ( self.base, self.channel, link.group(2), item.infoencode()) else: item.info[ "FileName"] = "%s?ch=%s&type=%s&id=%s" % ( self.base, self.channel, link.group(1), link.group(2)) self.xbmcitems.items.append(item) self.xbmcitems.addall() else: sys.stderr.write("index: no programmes") else: sys.stderr.write("index: no page.doc")
def index(self): page = webpage(self.url("ps3_navigation")) # http://tvnz.co.nz/content/ps3_navigation/ps3_xml_skin.xml xml = self._xml(page.doc) if xml: for stat in xml.getElementsByTagName('MenuItem'): type = stat.attributes["type"].value if type in ('shows', 'alphabetical'): #, 'distributor' m = re.search('/([0-9]+)/',stat.attributes["href"].value) if m: item = tools.xbmcItem() info = item.info info["Title"] = stat.attributes["title"].value info["FileName"] = "%s?ch=%s&type=%s&id=%s" % (self.base, self.channel, type, m.group(1)) self.xbmcitems.items.append(item) item = tools.xbmcItem() # Search info = item.info info["Title"] = "Search" info["FileName"] = "%s?ch=TVNZ&type=%s" % (self.base, "search") self.xbmcitems.items.append(item) else: sys.stderr.write("No XML Data") self.xbmcitems.addall()
def atoz(self, catid, provider): #Show video items from an AtoZ style TV3 webpage page = webpage("%s%s%s" % (self._base_url("tv3"), self.urls["cat"], catid)) if page.doc: a_tag = SoupStrainer('div') html_atag = BeautifulSoup(page.doc, parseOnlyThese=a_tag) programs = html_atag.findAll(attrs={"class": "wideArticles"}) if len(programs) > 0: for soup in programs: item = self._itematoz(soup, provider) self.xbmcitems.items.append(item) if len(item.urls) > 0: if self.prefetch: self.xbmcitems.add(len(programs)) if self.prefetch: self.xbmcitems.sort() else: self.xbmcitems.addall() else: sys.stderr.write("atoz: Couldn't find any videos") else: sys.stderr.write("atoz: Couldn't get videos webpage")
def shows(self, channel ): #Create a second level list of TV Shows from a TV3 webpage #doc = resources.tools.gethtmlpage("%s/Shows/tabid/64/Default.aspx" % ("http://www.tv3.co.nz")) #Get our HTML page with a list of video categories #doc = resources.tools.gethtmlpage("%s/Shows.aspx" % ("http://www.tv3.co.nz")) #Get our HTML page with a list of video categories page = webpage("%s/%s/%s" % (self.channels[channel]['base'], self.channels[channel]['ondemand'], "TitleAZ.aspx")) if page.doc: html_divtag = BeautifulSoup(page.doc) showsdiv = html_divtag.findAll('div', attrs={"class": "grid_2"}) if len(showsdiv) > 0: for show in showsdiv: item = tools.xbmcItem() title = show.find('p').find('a') if title: if title.string: if title['href'][len('http://www.' ):len('http://www.') + 3] == channel[0:3].lower(): item.info["Title"] = title.string.strip() image = show.find("img") if image: item.info["Thumb"] = image['src'] item.info[ "FileName"] = "%s?ch=TV3&channel=%s&cat=%s&title=%s" % ( self.base, channel, "show", urllib.quote( item.info["Title"].replace( " ", ""))) self.xbmcitems.items.append(item) self.xbmcitems.addall() else: sys.stderr.write( "showsindex: Couldn't find any videos in list") else: sys.stderr.write("showsindex: Couldn't get index webpage")
def programmes(self, type, urlext): if type == "channel": folder = 1 url = self.urls['base'] elif type == "video": folder = 0 url = "%s/assets/php/slider.php?channel=%s" % (self.urls['base'], urlext) elif type == "search": folder = 0 url = "%s/search?search_keyword=%s" % (self.urls['base'], urlext.replace(" ", "+")) page = webpage(url) if page.doc: if type == "channel" or type == "search": div_tag = SoupStrainer('div') html_divtag = BeautifulSoup(page.doc, parseOnlyThese=div_tag) programmes = html_divtag.findAll(attrs={'class': 'programmes'}) elif type == "video": div_tag = SoupStrainer('body') html_divtag = BeautifulSoup(page.doc, parseOnlyThese=div_tag) programmes = html_divtag.findAll( attrs={'class': 'slider slider-small'}) if type == "search": type = "video" if len(programmes) > 0: for program in programmes: list = program.find('ul') if list: listitems = list.findAll('li') count = len(listitems) if count > 0: for listitem in listitems: link = listitem.find( 'a', attrs={'href': re.compile("^/%s/" % type)}) if link.img: if re.search("assets/images/%ss/" % type, link.img["src"]): #item = tools.xbmcItem() item = tools.xbmcItem() if listitem.p.string: item.info[ "Title"] = listitem.p.string.strip( ) else: item.info["Title"] = link.img[ "alt"] item.info["Thumb"] = "%s/%s" % ( self.urls['base'], link.img["src"]) index = re.search( "assets/images/%ss/([0-9]*?)-mini.jpg" % type, link.img["src"]).group(1) item.info[ "FileName"] = "%s?ch=%s&%s=%s" % ( self.base, self.channel, type, urllib.quote(index)) if type == "video": if self.prefetch: item.info[ "FileName"] = self._geturl( index) else: item.playable = True self.xbmcitems.items.append(item) if self.prefetch: self.xbmcitems.add(count) if self.prefetch: self.xbmcitems.sort() else: self.xbmcitems.addall() else: sys.stderr.write("Search returned no results") else: sys.stderr.write("Couldn't find any programs") else: sys.stderr.write("Couldn't get page")
def index(self, filter="/explore/"): filterarray = filter.strip('/').split('/') filterlevel = len(filterarray) print filter print filter.strip('/') print str(filterlevel) url = self.urls['base'] + filter #sys.stderr.write("URL: " + url) #sys.stderr.write('explore_filter_%s' % str(filterlevel)) page = webpage(url, 'chrome', 'nzos_html5=true') #page = webpage(self.urls['base']) if page.doc: #resources.tools.gethtmlpage("http://www.nzonscreen.com/html5/opt_in", "chrome", 1) # Get a cookie for this session to enable the HTML5 video tag div_tag = SoupStrainer('div') html_divtag = BeautifulSoup(page.doc, parseOnlyThese=div_tag) sections = html_divtag.find( attrs={'id': 'explore_filter_%s' % str(filterlevel)}) if not sections: sections = html_divtag.find(attrs={'id': 'explore_listview'}) if sections: links = sections.findAll('a') if len(links) > 0: for link in links: # if link.string: # sys.stderr.write(link.contents[0].string) item = tools.xbmcItem() info = item.info info["FileName"] = "%s?ch=%s&filter=%s" % ( self.base, self.channel, urllib.quote( link["href"])) #info["Title"] = link.contents[0].string.strip() if link.string: info["Title"] = link.string.strip() else: filterarray = link["href"].split('/') info["Title"] = filterarray[len(filterarray) - 1].capitalize() # info["Thumb"] = self.xbmcitems.items.append(item) if filterlevel == 1: item = tools.xbmcItem() info = item.info info["FileName"] = "%s?ch=%s&filter=search" % ( self.base, self.channel) info["Title"] = "Search" self.xbmcitems.items.append(item) else: # if filterarray[filterlevel] == nav = html_divtag.find(attrs={'class': 'nav_pagination'}) if nav: pages = nav.findAll('a') if pages: for page in pages: if page.string: lastpage = page.string.strip() #url = page['href'] for i in range(1, int(lastpage)): item = tools.xbmcItem() info = item.info info[ "FileName"] = "%s?ch=%s&filter=%s&page=%s" % ( self.base, self.channel, urllib.quote(filter), str(i)) info["Title"] = 'Page %s' % str(i) self.xbmcitems.items.append(item) self.xbmcitems.addall() else: sys.stderr.write("index: No sections") else: sys.stderr.write("index: No page.doc")
def page(self, filter, page): url = "%s%s?page=%s" % (self.urls['base'], filter, page) page = webpage(url, 'chrome', 'nzos_html5=true') if page.doc: div_tag = SoupStrainer('div') html_divtag = BeautifulSoup(page.doc, parseOnlyThese=div_tag) results = html_divtag.find(attrs={'id': 'filter_result_set'}) if results: rows = results.findAll('tr') if len(rows) > 0: for row in rows: cells = row.findAll('td') count = len(cells) if count > 0: item = tools.xbmcItem() for cell in cells: if cell['class'] == 'image': item.info['Thumb'] = "%s%s" % ( self.urls['base'], cell.div.div.a.img['src']) title = re.search("/title/(.*)", cell.a['href']) if not title: title = re.search( "/interviews/(.*)", cell.a['href']) #elif cell['class'] == 'title_link title': elif cell['class'].startswith('title_link'): item.info['Title'] = item.unescape( cell.a.contents[0]) #elif cell['class'] == 'year': # pass #elif cell['class'] == 'category': # pass #elif cell['class'] == 'director': # pass elif cell['class'] == 'added': item.info["Date"] = tools.xbmcdate( cell.contents[0], ".") if title: if self.prefetch: item.urls = self._videourls(title.group(1)) item.units = "MB" else: item.info[ "FileName"] = "%s?ch=%s&title=%s&info=%s" % ( self.base, self.channel, title.group(1), item.infoencode()) item.playable = True self.xbmcitems.items.append(item) if self.prefetch: self.xbmcitems.add(count) if self.prefetch: self.xbmcitems.sort() else: self.xbmcitems.addall() else: sys.stderr.write("page: No rows") else: sys.stderr.write("page: No results") else: sys.stderr.write("page: No page.doc")
def _geturls( self, id, channel ): #Scrape a page for a given OnDemand video and build an RTMP URL from the info in the page, then play the URL urls = dict() ids = id.split(",") if len(ids) == 4: pageUrl = "%s/%s/%s/%s/%s/%s/%s/%s/%s" % ( self.channels[channel]['base'], ids[0], self.urls["video1"], ids[1], self.urls["video2"], ids[2], self.urls["video3"], ids[3], self.urls["video4"]) page = webpage(pageUrl) else: page = webpage( id ) # Huh? - I guess this is feeding a full URL via the id variable if page.doc: videoid = re.search('var video ="/(.*?)/([0-9A-Z\-]+)/(.*?)";', page.doc) if videoid: #videoplayer = re.search('swfobject.embedSWF\("(http://static.mediaworks.co.nz/(.*?).swf)', page.doc) videoplayer = 'http://static.mediaworks.co.nz/video/jw/5.10/df.swf' if videoplayer: rnd = "" auth = re.search('random_num = "([0-9]+)";', page.doc) if auth: rnd = "?rnd=" + auth.group(1) swfverify = ' swfVfy=true swfUrl=%s%s pageUrl=%s' % ( videoplayer, rnd, pageUrl) realstudio = 'tv3' site = re.search("var pageloc='(TV-)?(.*?)-", page.doc) if site: realstudio = site.group(2).lower() playlist = list() qualities = [330] #if re.search('flashvars.sevenHundred = "yes";', page.doc): qualities.append(700) #if re.search('flashvars.fifteenHundred = "yes";', page.doc): #qualities.append(1500) #if not re.search('flashvars.highEnd = "true";', page.doc): # flashvars.highEnd = "true";//true removes 56K option # qualities.append(56) geo = re.search('var geo= "(.*?)";', page.doc) if geo: if geo.group(1) == 'geo' or geo.group(1) == 'geomob': for quality in qualities: urls[quality] = '%s/%s/%s/%s/%s/%s_%sK.mp4' % ( self.urls["rtmp1"], self.channels[channel] ['rtmp'], self.urls["rtmp2"], videoid.group(1), videoid.group(2), urllib.quote( videoid.group(3)), quality) + swfverify elif geo.group(1) == 'str': for quality in qualities: app = ' app=tv3/mp4:transfer' # + videoid.group(1) tcurl = ' tcUrl=rtmpe://flashcontent.mediaworks.co.nz:80/' playpath = ' playpath=%s/%s_%sK' % ( videoid.group(2), videoid.group(3), quality) urls[quality] = '%s/%s/%s/%s/%s/%s_%sK' % ( self.urls['news1'], "vod", self.urls["rtmp2"] + "3news", videoid.group(1), urllib.quote(videoid.group(2)), urllib.quote(videoid.group(3)), quality) + ' pageUrl=' + pageUrl # elif geo.group(1) == 'no': # for quality in qualities: # urls[quality] = '%s/%s/%s%s/%s/%s_%s.%s' % (self.urls["http1"], "four", self.urls["http2"], videoid.group(1), videoid.group(2), urllib.quote(videoid.group(3)), quality, "mp4") else: sys.stderr.write("_geturls: No videoplayer") else: sys.stderr.write("_geturls: No videoid") else: sys.stderr.write("_geturls: No page.doc") return urls