def get_channels(source): lst = [] page = get_page( "http://www.desi-tashan.com/%s/" % (source.replace(" ", "-").lower())) if not page: return lst content = page.find("div", {"id": "content"}) if not content: return lst for link in content.find_all('a'): href = link.get("href") if not href: continue mo = re_source.search(href) if not mo: continue th = None img = link.find("img") if img and img.has_attr("src"): th = img.get("src") lst.append(( _to_channel_name(mo.group(1)), plugin.url_for('show_channel', url=mo.group(0)), th, False)) return lst
def search(request): hit = 15 #每页结果数 os = SearchAPI() kw = request.GET.get('keywords').encode('utf-8') if kw == "": return HttpResponse("关键字不能为空!<a href=\"../index\">返回</a>") page = helper.get_page(request.GET.get('page')) start = (page - 1) * hit helper.save_search_log(kw, page) res = os.search(kw, start, hit) if res is None: return HttpResponse("内部错误,请联系 砺诚") num = res['num'] nextPage = page + 1 if not (res['total'] > page * hit): nextPage = 0 prePage = page - 1 f = open("mysite/result.html") t = template.Template(f.read()) for item in res['items']: item['id'] = int(item['id']) c = template.Context({ "keyword": kw, "all_result": res['items'], "total": res['total'], "num": res['num'], "searchtime": res['searchtime'], "page": page, "nextPage": nextPage, "prePage": prePage }) return HttpResponse(t.render(c))
def _jwplayer(page, domain): urls = [] url = None for iframe in page.find_all("iframe"): if not iframe.has_attr("src"): continue if domain not in iframe.get("src"): continue url = iframe.get("src") break if not url: return urls page = get_page(url) for script in page.find_all("script", {"type": "text/javascript"}): mo = re_jwplayer.search(script.text) if not mo: continue urls.append(mo.group(1)) break return urls
def search(request): hit = 15 #每页结果数 kw = request.GET.get('keywords').encode('utf-8') if kw == "": return HttpResponse("关键字不能为空!<a href=\"../index\">返回</a>") page = helper.get_page( request.GET.get('page') ) start = (page - 1) * hit helper.save_search_log(kw, page) res = searcher.search(kw.replace("'",""), start, hit, WEB_TAG) if res is None: return HttpResponse("内部错误,请联系 砺诚") num = res['num'] nextPage = page + 1 if not (res['total'] > page * hit): nextPage = 0 prePage = page -1 helper.save_search_result_info(kw, res['total']) f = open(search_page_template) t = template.Template(f.read()) for item in res['items']: item['id'] = int(item['id']) c = template.Context({ "keyword":kw, "all_result":res['items'], "total":res['total'], "num":res['num'], "searchtime":res['searchtime'], "page":page, "nextPage":nextPage, "prePage":prePage, "urlPrefix":URL_PREFIX }) return HttpResponse(t.render(c))
def search(request): hit = 15 #每页结果数 os = SearchAPI() kw = request.GET.get('keywords').encode('utf-8') if kw == "": return HttpResponse("关键字不能为空!<a href=\"../index\">返回</a>") page = helper.get_page( request.GET.get('page') ) start = (page - 1) * hit helper.save_search_log(kw, page) res = os.search(kw, start, hit) if res is None: return HttpResponse("内部错误,请联系 砺诚") num = res['num'] nextPage = page + 1 if not (res['total'] > page * hit): nextPage = 0 prePage = page -1 f = open("mysite/result.html") t = template.Template(f.read()) for item in res['items']: item['id'] = int(item['id']) c = template.Context({ "keyword":kw, "all_result":res['items'], "total":res['total'], "num":res['num'], "searchtime":res['searchtime'], "page":page, "nextPage":nextPage, "prePage":prePage }) return HttpResponse(t.render(c))
def get_links(url, name): lst = [] page = get_page(url) if not page: return lst urls = resolvers.youtube(page) urls.extend(resolvers.playwire(page)) urls.extend(resolvers.vodlocker(page)) urls.extend(resolvers.dailymotion(page)) for u in urls: lst.append(( name, plugin.url_for('play_video', video=u, name=name), None, True)) if not urls: for link in page.find_all("a", {"target":"_blank"}): if not link.has_attr("href"): continue href = link.get("href") if href.startswith("http://www.desi-tashan.com/"): player = None if "youtube" in href: player = "Youtube" elif "/pw-" in href or "/pw/" in href: player = "Playwire" elif "dailymotion" in href: player = "Dailymotion" elif "vodlocker" in href: player = "Vodlocker" if player: part = "" mo = re_part.search(link.text) if mo: part = mo.group(1) lst.append(( "%s %s" % (player, part.strip()), plugin.url_for( 'show_part', url=link.get("href"), name=name), None, True)) return lst
def get_episodes(show): lst = [] page = get_page(show) if not page: return lst content = page.find("div", {"id": "left-inside"}) if not content: return lst for a in content.find_all("a"): if a.get("id") == "nav-home": continue name = a.text.strip() name = re_char.sub(' ', name) if "Older Entries" in name: url = None mo = re_page.search(show) if not mo: continue if mo.group(2): url = "%spage/%d%s" % ( mo.group(1), int(mo.group(2)) + 1, mo.group(3)) else: url = "%s/page/2%s" % (mo.group(1), mo.group(3)) lst.insert(0, ( "Next Page", plugin.url_for( "show_show", show=url), None, False)) continue if "Next Entries" in name: continue lst.append(( name, plugin.url_for( 'show_links', link=a.get("href"), name=name), None, False)) return lst
def get_shows(channel): lst = [] page = get_page(channel) if not page: return lst categories = page.find("li", {"class": "categories"}) if not categories: return lst for a in categories.find_all("a"): lst.append(( a.text, plugin.url_for('show_show', show=a.get("href") + "?tag=video"), None, False)) return lst
def play_video(video, name, mode): page = get_page(video) url = None if page: if mode == "flash": mode = "playwire" resolver = get_resolver(mode) if resolver: urls = resolver(page) if urls: url = urls[0] if url: return plugin.play_video(dict(label=name, path=url)) return None
def get_links(link, name): lst = [] page = get_page(link) if not page: return lst content = page.find("div", {"id": "left-inside"}) if not content: return lst content = content.find("center") if not content: return lst group = "" for p in content.find_all("p"): if not p.text: continue span = p.find("span") if span: group = span.text mo = re_group.search(group) if mo: group = mo.group(1).replace(" ", "").capitalize() else: group = "" elif group: for a in p.find_all("a"): part = "" mo = re_part.search(a.text) if mo: part = mo.group(1) lst.append(( "%s %s" % (group, part.strip()), plugin.url_for( 'play_video', video=a.get("href"), name=name, mode=group.lower()), None, True)) return lst
def get_episodes(mode, url, page=None): lst = [] if not page: page = get_page(url) if not page: return lst pagenavi = page.find("div", {"class": "wp-pagenavi"}) if pagenavi: a_next = pagenavi.find("a", {"class": "nextpostslink"}) if a_next: mo = re_page.search(url) if mo: url_next = "%s%d/" % (mo.group(1), int(mo.group(2)) + 1) else: url_next = url.strip() + "page/2/" lst.append(( "Next Page", plugin.url_for(mode, url=url_next), None, False)) for article in page.find_all("article"): if not article.find( "div", {"class": ["videoPost", "videoClip"]}): continue title = article.get("title") link = article.find("a") if not link or not link.has_attr("href"): continue th = None img = article.find("img") if img and img.has_attr("src"): th = img.get("src") lst.append(( title, plugin.url_for('show_episode', url=link.get("href"), name=title), th, False)) return lst
def get_tvshows(url, newshows=True): lst = [] page = get_page(url) if not page: return lst cat = "mainNewcat" if newshows else "mainOldcat" select = page.find("select", {"class": cat}) for option in select.find_all("option"): value = option.get("value") if not value: continue lst.append(( option.text, plugin.url_for('show_show', url=value), None, False)) return lst
def get_channels(): lst = [] page = get_page("http://www.desitvbox.me/") if not page: return lst categories = page.find("div", {"id": "categories"}) if not categories: return lst for a in categories.find_all("a"): if "DTB" in a.text or "TRP" in a.text: continue lst.append(( a.text, plugin.url_for('show_channel', channel=a.get("href")), None, False)) return lst
#!/usr/bin/env python3 import sys import helper import re from urllib.parse import urlparse if len(sys.argv) < 2: print("Usage: {} url".format(sys.argv[0])) sys.exit(1) URL = sys.argv[1] (browser, page) = helper.get_page(URL) DOMAIN = "{uri.scheme}://{uri.netloc}".format(uri=urlparse(URL)) lecture_links = page.soup.select('a.medialink') for index, lecture_link in enumerate(lecture_links): lecture_title = lecture_link.string link = "{}/{}".format(DOMAIN, lecture_link.attrs['href']) video_page = browser.get(link) script = [ script for script in video_page.soup.select('script') if 'ocw_embed_chapter_media' in str(script) ] if len(script) < 1: print(":(") continue youtube_url = re.findall(r'(https:\/\/www.youtube.com\/v\/.*?)\'', str(script[0]))
def getTopicsPager(self): pages = self.getTopicsLength()/settings.TOPICS_PER_PAGE page = get_page(pages) return {'page':page, 'pages':pages}
def show_channel(url): items.clear() page = get_page(url) items.add_all(get_channel_items(url, page)) items.add_all(get_episodes('show_channel', url, page)) return items