def get_videos(self, datas): url = datas.get('url') page = datas.get('page', False) if page: page_txt = '&page=' + str(page) else: page_txt = '' data = channel.get_url(self.main_url + '/mediatheek/tag/' + url + page_txt) regex = r""" id="video-(\d+)"><img src="([^"]+)" /></a>\s*<h5><a[^>]+>([^<]+)""" for id, img, title in re.findall(regex, data): vurl = channel.array2url(channel_id=self.channel_id, url=id, action='play_video') channel.addLink(title, vurl, img) next_page = re.search( r"""\?page=(\d+)" class="pager-next active" title="Ga naar volg""", data) if next_page is not None: page = next_page.group(1) channel.addDir('Page nr ' + page, self.icon, channel_id=self.channel_id, url=url, action='show_videos', page=page)
def get_videos(self, datas): #from datetime import datetime url = datas.get('url') vid_id = datas.get('vid_id') md5 = datas.get('md5') if not vid_id: vid_id, md5 = self.get_video_id(url) if not vid_id: return page = datas.get('page', 1) #http://www.rtbf.be/video/detail/ajax/av?page=1&timeFilter=all&orderBy=publish_view&videoId=1856226&returnMode=program&categoryId=&md5=00f9cfd447930fa1742d5d9f3e29e45cf083609f data = channel.get_url(self.main_url + "/video/detail/ajax/av?page=" + str(page) + "&timeFilter=all&orderBy=publish_view&videoId=" + str(vid_id) + "&returnMode=program&categoryId=&md5=" + str(md5)) #data = channel.get_url(self.main_url + "/video/detail/ajax/av?page=" + str(page) + "&timeFilter=all&orderBy=more_recent&videoId=" + str(vid_id) + "&returnMode=program&categoryId=&md5=" + str(md5)) #regex = r"""video/detail_[^?]+\?id=(\d+)".+ src="([^"]+).+\n.+rel="">([^<]+).+\n.+\n.+\n.+<strong>([^<]+)""" regex = r"""video/detail_[^?]+\?id=(\d+)".+ src="([^"]+).+\n.+>([^<]+)(.+\n){3,4}.+<strong>(\d\d/\d\d/\d{4})""" #regex = r"""(?s)\?id=(\d+)&c[^>]+><img class="thumb" src="([^"]+).+?<h3><[^>]+>([^<]+)</a></h3>\s+<span[^>]+><a[^>]+>([^<]+)""" for id, img, title, tt, date in re.findall(regex, data): title = title + ' - ' + date vurl = channel.array2url(channel_id=self.channel_id, url=id, action='play_video') channel.addLink(title.replace(''', "'").replace('"', '"'), vurl, img) next_page = re.search(r"""rel="(\d+)">Suivante""", data) if next_page is not None: page = next_page.group(1) channel.addDir('Page nr ' + page, self.icon, channel_id=self.channel_id, vid_id=vid_id, action='show_videos', page=page, md5=md5)
def get_video_news(self): url = 'http://nieuws.vtm.be/herbekijk' data = channel.get_url(url) regex = r"""href="([^"]+)"><img src="([^"]+)[^<]+</a>\s+</div>\s+<div[^<]+</div>\s+<h3[^>]+>\s+<span>\s+<a href[^>]+>([^<]+)""" for url, img, title in re.findall(regex, data): vurl = channel.array2url(channel_id=self.channel_id, url=url, action='play_video', news='1') channel.addLink(title, vurl, img)
def get_direct_videos(self): url = 'http://www.rtl.be/rtltvi/page/les-directs-rtl-tvi/258.aspx' data = channel.get_url(url) regex = r"""OtherLiveItem(Big|)Img ><A href="http://www.rtl.be/[^/]+/[^/]+/[^/]+/(\d+)\.aspx"><img src="([^"]+)"[^>]+></A></DIV>\s+<[^>]+>([^<]+)</DIV>\s+<[^>]+>([^<]+)""" for big, id, img, title, time in re.findall(regex, data): title = channel.htmlentitydecode(title + ' - ' + time) vurl = channel.array2url(channel_id=self.channel_id, url=id, action='play_video', direct='1') channel.addLink(title, vurl, img)
def get_videos(self, datas): url = datas.get('url') data = channel.get_url(self.main_url + url) regex = r"""class="contentheading"[^>]+>([^<]+)</td>\s+</tr>\s+</table>\s+<table[^>]+>\s+<tr>\s+<td[^>]+>\s+<p><a href="([^"]+)[^>]+><img.+? src="([^"]+)""" for title, vurl, img in re.findall(regex, data): title = title.strip() vurl = channel.array2url(channel_id=self.channel_id, url=vurl, action='play_video') channel.addLink(title, vurl, self.main_url + img)
def parse_lives(data): regex = r"""href="http://www.rtbf.be/livecenter/([^"]+)"><img class="[^"]+" src="([^"]+)" alt="([^"]+).""" #*\3.*\3.*class="date">([^<]+).*is-live.*\1 for url, icon, name in re.findall(regex, data, flags=re.DOTALL): print "found" vurl = channel.array2url(channel_id=self.channel_id, url=url, action='play_live') channel.addLink( name.replace(''', "'").replace('"', '"'), vurl, icon) # + ' - ' + date
def get_videos(self, datas): url = datas.get('url') if datas.get('direct', False): self.get_direct_videos() return data = channel.get_url(self.main_url + self.channel_id + url) regex = r"""<img src="([^"]+)"[^>]*><A class=[^']+'(\d+)[^>]+>[a-z0-9A-Z\s]+<BR>([^<]+)""" ids = [] for img, id, title in re.findall(regex, data): if id in ids: continue ids.append(id) vurl = channel.array2url(channel_id=self.channel_id, url=id, action='play_video') channel.addLink(channel.htmlentitydecode(title), vurl, img) regex = r"""<A href="/""" + self.channel_id + """/video/(\d+)\.aspx[^"]*"[^>]*><img src="([^"]+?)"[^>]*></A>\s*?<H3>([^<]+)</H3>""" for id, img, title in re.findall(regex, data): if id in ids: continue ids.append(id) vurl = channel.array2url(channel_id=self.channel_id, url=id, action='play_video') channel.addLink(channel.htmlentitydecode(title), vurl, img)
def get_videos(self, datas): url = datas.get('url') page = datas.get('page', False) if page: page_txt = '&page=' + str(page) else: page_txt = '' data = channel.get_url(self.main_url + '/mediatheek/tag/' + url + page_txt) regex = r""" id="video-(\d+)"><img src="([^"]+)" /></a>\s*<h5><a[^>]+>([^<]+)""" for id, img, title in re.findall(regex, data): vurl = channel.array2url(channel_id=self.channel_id, url=id, action='play_video') channel.addLink(title, vurl, img) next_page = re.search(r"""\?page=(\d+)" class="pager-next active" title="Ga naar volg""", data) if next_page is not None: page = next_page.group(1) channel.addDir('Page nr ' + page, self.icon, channel_id=self.channel_id, url=url, action='show_videos', page=page)
def get_videos(self, datas): url = datas.get('url') print url if url == 'http://vtm.be/het-nieuws/video': print 'Nieuw' return self.get_video_news() data = channel.get_url(url) regex = r"""<a href="([^"]+)">([^<]+)</a></h3>\s+<time[^>]+>([^<]+).+?<a href="\1"><img src="([^"]+)""" for url, title, date, img in re.findall(regex, data, re.DOTALL): title = title + ' - ' + date vurl = channel.array2url(channel_id=self.channel_id, url=url, action='play_video') channel.addLink(title, vurl, img) next_page = re.search(r"""href="([^?]+\?page=(\d+))">volgende""", data) if next_page is not None: url = next_page.group(1) page = str(int(next_page.group(2)) + 1) channel.addDir('Page nr ' + page, self.icon, channel_id=self.channel_id, action='show_videos', url=url)
def get_videos(self, datas): #from datetime import datetime url = datas.get('url') vid_id = datas.get('vid_id') md5 = datas.get('md5') if not vid_id: vid_id, md5 = self.get_video_id(url) if not vid_id: return page = datas.get('page', 1) #http://www.rtbf.be/video/detail/ajax/av?page=1&timeFilter=all&orderBy=publish_view&videoId=1856226&returnMode=program&categoryId=&md5=00f9cfd447930fa1742d5d9f3e29e45cf083609f data = channel.get_url( self.main_url + "/video/detail/ajax/av?page=" + str(page) + "&timeFilter=all&orderBy=publish_view&videoId=" + str(vid_id) + "&returnMode=program&categoryId=&md5=" + str(md5)) #data = channel.get_url(self.main_url + "/video/detail/ajax/av?page=" + str(page) + "&timeFilter=all&orderBy=more_recent&videoId=" + str(vid_id) + "&returnMode=program&categoryId=&md5=" + str(md5)) #regex = r"""video/detail_[^?]+\?id=(\d+)".+ src="([^"]+).+\n.+rel="">([^<]+).+\n.+\n.+\n.+<strong>([^<]+)""" regex = r"""video/detail_[^?]+\?id=(\d+)".+ src="([^"]+).+\n.+>([^<]+)(.+\n){3,4}.+<strong>(\d\d/\d\d/\d{4})""" #regex = r"""(?s)\?id=(\d+)&c[^>]+><img class="thumb" src="([^"]+).+?<h3><[^>]+>([^<]+)</a></h3>\s+<span[^>]+><a[^>]+>([^<]+)""" for id, img, title, tt, date in re.findall(regex, data): title = title + ' - ' + date vurl = channel.array2url(channel_id=self.channel_id, url=id, action='play_video') channel.addLink( title.replace(''', "'").replace('"', '"'), vurl, img) next_page = re.search(r"""rel="(\d+)">Suivante""", data) if next_page is not None: page = next_page.group(1) channel.addDir('Page nr ' + page, self.icon, channel_id=self.channel_id, vid_id=vid_id, action='show_videos', page=page, md5=md5)
def parse_lives(data): regex = r"""href="http://www.rtbf.be/livecenter/([^"]+)"><img class="[^"]+" src="([^"]+)" alt="([^"]+).""" #*\3.*\3.*class="date">([^<]+).*is-live.*\1 for url, icon, name in re.findall(regex, data, flags=re.DOTALL): print "found" vurl = channel.array2url(channel_id=self.channel_id, url=url, action='play_live') channel.addLink(name.replace(''', "'").replace('"', '"') , vurl, icon) # + ' - ' + date