handle_list = ['/haiuken.com/'] def query_info(self, url): # http://haiuken.com/theatre/2muu/ vid = match1(url, r'haiuken.com/theatre/([^/]+)/') echo("vid=", vid) hutf = self.get_hutf(url) m = MyHtmlParser(tidy=False) m.feed(hutf) if self.title == UTITLE: title = m.select("head title")[0].text if title.startswith("Theatre - "): title = title[10:] else: title = self.title ret = m.select(".bg2 .tmpl img") ips = json.dumps([r['src'].split("://")[1].split('/')[0] for r in ret]) d = {"xEvent": "UIMovieComments.Error", "xJson": ips} hutf = self.get_html("http://haiuken.com/ajax/theatre/%s/" % vid, postdata=urllib.urlencode(d).encode("utf8")) ret = json.loads(hutf) url = b64decode(ret['Data']['Error'].encode('utf8')).decode('utf8') return title, None, [url], None if __name__ == '__main__': start(HYG)
echo(title) ul = SelStr("div.listmain dl", hutf) if not ul: return [] #for u in ul.descendants: sel = True lst = [] for u in ul[0].children: #echo(u) if u.tag == 'dt': sel = u"最新章节" not in u.text continue if sel and u.tag == 'dd': l = os.path.join(base, u.select("a")[0]['href']) echo(l, u.text) lst.append((u.text, l)) return lst def test(self, args): url = "https://www.shuquge.com/txt/12236/46252712.html" hutf = open("s1.html").read().decode('utf8') #hutf = self.get_hutf(url) #echo(hutf) #ct = SelStr("div.content", hutf)[0] echo(ct.text) if __name__ == '__main__': start(SHUQUGE)
echo(title) m = re.search("player\.load\(\{\s+file: \"([^\"]+)\",\s+image\:", hutf) if m: u = m.group(1) echo(u) u = UO(u, url) return title, ext, [u], None return None #def get_playlist(self, url): # ns = SelStr('div.entry-content.rich-content tr td a', # self.get_hutf(url)) # return [(a.text, a['href']) for a in ns] def test(self, args): ''' wget -U 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.109 Safari/537.36' -O 10_4.mp4 --header="Referer: http://video66.org/embed.php?w=798&h=449&vid=vids4/w_2016_-_10_clip4.mp4" "http://gateway.play44.net:3010/old/w_2016_-_10_clip4.mp4?st=MTJmZmQ1Yzc0ODM4ZDg5ZWQ4MmMyOTc4NDEyZDRlYzU&e=1497829689" ''' url = "http://video66.org/embed.php?w=798&h=449&vid=vids4/w_2016_-_10_clip4.mp4" hutf = self.get_hutf(url) m = re.search("player\.load\(\{\s+file: \"([^\"]+)\",\s+image\:", hutf) if m: u = m.group(1) echo(u) self.extra_headers['Referer'] = url html = self.get_html(u) if __name__ == '__main__': start(VIDEO66)
handle_list = ['\.tudou\.com/albumplay/'] def query_info(self, url): # http://www.tudou.com/albumplay/zgdaPAjRz1s/8cUPFUj8sl4.html hutf = self.get_hutf(url) vcode = match1(hutf, U("vcode:\s*'([^']+)',\s*lan\:\s*'粤语'")) echo("vcode", vcode) yu = "http://youku.com/v_show/id_" + vcode #return title, None, [url], None return YOUKU().query_info(yu) def get_playlist(self, url): ns = SelStr('a.item.item_positive', self.phantom_hutf(url)) return [(a.text.strip(), a['href']) for a in ns] def test(self, args): url = "http://www.tudou.com/albumplay/zgdaPAjRz1s/8cUPFUj8sl4.html" #youku = "http://youku.com/v_show/id_XNzYxNzM0MDk2" #,vcode: 'XNzYxNzM0MDk2' hutf = self.get_hutf(url) #vcode = match1(hutf, "vcode:\s*'([^']+)'") vcode = match1(hutf, U("vcode:\s*'([^']+)',\s*lan\:\s*'粤语'")) #vcode = match1(hutf, "id:\s*3\s*,\s*vcode:\s*'([^']+)',\s*lan:") echo("vcode", vcode) yu = "http://youku.com/v_show/id_" + vcode echo(YOUKU().query_info(yu)) if __name__ == '__main__': start(TUDOU)
class HYG(DWM): # http://haiuken.com/ 海宇根 handle_list = ['/haiuken.com/'] def query_info(self, url): # http://haiuken.com/theatre/2muu/ vid = match1(url, r'haiuken.com/theatre/([^/]+)/') echo("vid=", vid) hutf = self.get_hutf(url) m = MyHtmlParser(tidy=False) m.feed(hutf) if self.title == UTITLE: title = m.select("head title")[0].text if title.startswith("Theatre - "): title = title[10:] else: title = self.title ret = m.select(".bg2 .tmpl img") ips = json.dumps([r['src'].split("://")[1].split('/')[0] for r in ret]) d = {"xEvent": "UIMovieComments.Error", "xJson": ips} hutf = self.get_html("http://haiuken.com/ajax/theatre/%s/" % vid, postdata=urllib.urlencode(d).encode("utf8")) ret = json.loads(hutf) url = b64decode(ret['Data']['Error'].encode('utf8')).decode('utf8') return title, None, [url], None if __name__ == '__main__': start(HYG)
class ODC(DWM): handle_list = ['/ondemandchina\.com/'] def query_info(self, url): # http://8drama.com/122804/ #http://8drama.net/ipobar_.php?sign=251438... echo('phantomjs wait ...') p = Popen(["./phantomjs", "dwm.js", "300", url], stdout=PIPE) html = p.stdout.read() hutf = html.decode('utf8') p.wait() url = SelStr('video source', hutf)[0]['src'] title = SelStr('h1.entry-title', hutf)[0].text return title, None, [url], None def get_playlist(self, url): ns = SelStr('div.entry-content.rich-content tr td a', self.get_hutf(url)) return [(a.text, a['href']) for a in ns] def test(self, args): url = "https://www.ondemandchina.com/zh-Hans/watch/jade-dynasty/movie-1" hutf = self.get_hutf(url) echo(hutf) if __name__ == '__main__': start(ODC)
# echo(self.get_playlist('http://tv8.fun/20170328-人民的名义/')) # 'http://www.dayi.ca/ys/?p=3004&page=2' #url = 'http://www.dayi.ca/ys/?p=2386&page=52' #url = 'http://www.dayi.ca/ys/?p=3004&page=1' #url = 'http://www.dayi.ca/ys/?p=4076&&page=1' url = 'http://tv8.fun/%e4%b8%8a%e9%98%b3%e8%b5%8b/' # 上阳赋 url = 'http://tv8.fun/%e8%a5%bf%e4%ba%ac%e6%95%85%e4%ba%8b/' # 西京故事 hutf = self.get_hutf(url) echo(hutf) return t = SelStr("h1.entry-title", hutf)[0] m = re.search(u"(.+) 至第(\d+)集", t.text) echo(m.group(1), m.group(2)) p = SelStr("div.entry-content p", hutf) echo(p[3].text) m = re.search(u"通用版.+第(\d+)集", p[3].text) echo(m.group(1)) m = re.search(U("首播:.+共(\d+)集"), p[0].text) #, flags=re.M+re.U) echo(m.group(1)) for a in p[1].select("a"): if 'page=' not in a['href']: continue uo = urlparse.urlparse(a['href']) qs = urlparse.parse_qs(uo.query) echo(qs) break if __name__ == '__main__': start(TV8)
#url = 'https://www.youtube.com/watch?v=dF2X2Bl9fps' #'https://www.youtube.com/watch?v=dF2X2Bl9fps&pbj=1' hutf = self.get_hutf(args.url) #dat = parse_qs(unquote(hutf)) #echo(json.dumps(dat, indent=2)) #echo(hutf) # <meta name="title" content="心經唱誦36次 -齊豫居士"> ret = match1(hutf, '\<meta name="title" content="([^"]+)"\>') echo(repr(ret)) def find_in(dat, name): if isinstance(dat, dict): for k, v in dat.items(): if k == name: return v else: d = find_in(v, name) if d: return d elif isinstance(dat, (list, tuple)): for v in dat: d = find_in(v, name) if d: return d return None if __name__ == '__main__': start(YOUTUBE)
return title, "m3u8", mu, None #echo(us) def get_playlist(self, url): hutf = self.get_hutf(url) t = SelStr("h2.title a", hutf)[0] t = t.text.strip() ns = SelStr('div#playlist1 a', hutf) return [(t + "_" + a.text.strip(), "https://www.duboku.co" + a['href']) for a in ns] def test(self, argv): url = "https://www.duboku.co/vodplay/1433-1-1.html" url = "https://u.zdubo.com/vodplay/1697-1-1.html" #hutf = self.get_hutf(url) #echo(hutf) #return hutf = open("d.html").read().decode('utf8') dat = match1(hutf, r"var\s+player_data\s*\=\s*({[^}]+})") dat = json.loads(dat) echo(dat) echo(dat['url']) #us = self.try_m3u8(dat['url']) #echo(us) t = SelStr("h2.title", hutf)[0] echo(' '.join(t.text.split())) if __name__ == '__main__': start(DUBOKU)
def find_kls(url): p = os.path.dirname(sys.argv[0]) n = os.path.basename(sys.argv[0]) if not p: p = "." dwmkls = re.compile("^class\s+(\S+)\s*\((DWM|BOOK)\)\:", re.M) for fn in os.listdir(p): if not fn.endswith(".py") or fn == n: continue ret = dwmkls.findall(open(fn).read()) if not ret: continue name = fn[:-3] try: m = imp.load_source(name, fn) except Exception as e: echo(name, e) else: #echo(ret) for n, c in ret: kls = getattr(m, n) if kls.can_handle_it(url): return kls return None if __name__ == '__main__': start(find_kls)
if 'api.ourder.com' in i['src']: url = i['src'] echo(url) break tn = m.select('div.crumbs span')[0] title = tn.text self.get_html(url) echo(self.get_html_url) #https://api.ourder.com/video/ssl/https.html?h=380px&id=CODIzNzA5Mg== #https://api.ourder.com/video/ssl/YkcrefHandler.ashx?id=xxx r = urlparse(self.get_html_url) q = parse_qs(r.query) if '/video/ssl/https.html' in self.get_html_url: vid = q['id'][0] echo(vid) url = 'https://api.ourder.com/video/ssl/YkcrefHandler.ashx?id=' url = url + vid elif '/video/ssl/videoplayer.html' in self.get_html_url: #http://api.ourder.com/video/ssl/videoplayer.html?url=http://v.youku.com/v_show/id_XMTQ1MDM1MDc3Ng==.html?from=y1.12-96 url = q['url'][0] if 'youku' in url: from youku import YOUKU return YOUKU().query_info(url) return title, None, [url], None if __name__ == '__main__': start(ZSJ)
#debug(hutf) #for a in SelStr('ul[data-identity=guest] > li > div.bfan-n > a', hutf): # debug(a.text, a['href']) # urls.append((a.text, 'https://www.dnvod.eu' + a['href'])) hutf = self.chrome_hutf(url) urls = [] for a in SelStr( 'ul[data-identity=guest] > li > div.bfan-n > div.bfan-n > a', hutf): #debug(a.text, a['href']) #urls.append((a.text, 'https://www.dnvod.eu/Movie/' + a['href'])) urls.append( (self.title, 'https://www.dnvod.tv/Movie/' + a['href'])) echo(urls) return urls def test(self, argv): url = 'https://www.dnvod.tv/Movie/detail.aspx?id=TEee8%2fITNg4%3d' url = 'https://www.dnvod.tv/Movie/Readyplay.aspx?id=OIfaQTVHEiA%3d' ru = 'http://server3.dnvod.tv/hvod/lxj-tscgwlb-50-022061041.mp4?sourceIp=154.20.114.142&signature=856ddbf8ecd34fb9b3aae7ad4c8beddf.56b9f1609633f7eacbc18ecd0dd5e4be&start=1536543792.79147&custom=0&ua=62e66f1213d2881d9f80510593ffe2ec' #ru = 'http://server3.dnvod.tv/hvod/lxj-tscgwlb-50-022061041.mp4' url = "https://www.dnvod.tv/play?id=TW29RCmFL4o%3D" #hutf = self.get_hutf(url) hutf = self.chrome_hutf(url) echo(hutf) #echo(get_kind_size(ru)) if __name__ == '__main__': start(DNVOD)
#echo(m) if not m: break url = m[0] #break def test(self, args): url = "http://www.bookdown.com.cn/bookinfo/30258.html" #url = "http://www.bookdown.com.cn/read/30258_1.html" ret = match1(url, "/bookinfo/(\d+)\.html", "/read/(\d+).*\.html") bid = int(ret[0]) echo("bid =", bid) url = "http://www.bookdown.com.cn/read/%d_1.html" % bid while True: #print >> sys.stderr, url hutf = self.get_hutf(url) #echo(hutf) for div in SelStr('div#view_content_txt', hutf): echo( re.sub(u"分节阅读.+,请点击下一页继续阅读。", "", re.sub(" ", " ", div.text))) al = SelStr("a#nextPage", hutf) if not al: break url = al[0]['href'] #break if __name__ == '__main__': start(BOOKDN)
from video66 import VIDEO66 v66 = VIDEO66() urls = [] for uid in dd['ids']: u = "http://video66.org/embed.php?w=798&h=449&vid=" + uid echo(u) t, e, us, s = v66.query_info(u) urls.append(us[0]) return title, e, urls, None else: echo("found new source") echo(dd) return None #sys.exit(1) def get_playlist(self, url): hutf = self.get_hutf(url) urls = [] for a in SelStr('div.tvlists div.items div.item a', hutf): urls.append((a.text, 'http://bigdramas.net' + a['href'])) return urls def test(self): # handle by dailymotion url = 'http://bigdramas.net/video/%E6%94%BE%E6%A3%84%E6%88%91%EF%BC%8C%E6%8A%93%E7%B7%8A%E6%88%91-%E7%AC%AC22%E9%9B%86/' if __name__ == '__main__': start(BigDr) #BigDr().query_info('')
#url = 'https://www.rapidvideo.com/embed/ZsNSciBj' # https://admkis.playercdn.net/85/1/sQ52oTwwZ6vCo3Vk7-RS2g/1482741547/161202/063k10VmKldzoX8.mp4 hutf = self.get_hutf(url, postdata='block=1') data = match1(hutf, 'jwplayer\("home_video"\)\.setup\(([^\(\)]+)\);') debug(data) data = match1(data, '"sources":\s*(\[[^\[\]]+\])') ml, u = 0, '' for src in json.loads(data): l = src['label'] if l not in self.labels: echo("new label", l) i = self.labels.index(l) if i > ml: ml, u = i, src['file'] debug(title, u) return title, k, [u], None def test(self, args): url = "https://www.rapidvideo.com/embed/FUZ35WDLM7" # https://www3731.playercdn.net/187/0/G4i-UJ6bQxIZI6FWc_F5dg/1536365722/180905/692FUZ37O792IXDCUZDFX.mp4 #echo(self.query_info(url)) hutf = self.get_hutf(url) #echo(hutf) d = SelStr("video#videojs source", hutf) u = d[0]["src"] if __name__ == '__main__': start(RapidVideo) #RapidVideo().query_info(1)
u = 'https://www.kantv6.com/index.php/video/part' u = '%s?tvid=%s' % (u, tvid) dat = self.get_hutf(u) dat = json.loads(dat) debug(json.dumps(dat, indent=2)) def test(self, argv): url = 'https://www.kantv6.com/tvdrama/301948271219001-161948271219033' url = 'https://www.kantv6.com/index.php/video/part?tvid=301948271219001' url = 'https://www.kantv6.com/index.php/video/info?tvid=301948271219001&seo=tvdrama' url = 'https://www.kantv6.com/tvdrama/301948271219001' url = 'https://www.kantv6.com/movie/301749570845001' #url = "https://www.kantv6.com/index.php/video/info?tvid=301749570845001&seo=movie" url = "https://www.kantv6.com/index.php/video/info?tvid=301805671042001&seo=documentary" #url = 'https://www.kantv6.com/documentary/301805671042001' dat = self.get_hutf(url) dat = json.loads(dat) echo(json.dumps(dat, indent=2)) echo(dat['data']['title']) url = "https://www.kantv6.com/index.php/video/play?tvid=301749570845001&line=1&seo=movie" dat = self.get_hutf(url) dat = json.loads(dat) echo(json.dumps(dat, indent=2)) #self.get_title(url) #l = self.get_playlist(url) #echo(json.dumps(l, indent=2)) if __name__ == '__main__': start(KANTV6)
info = ci.Network.getResponseBody(requestId=req_id) dat = json.loads(info['result']['body'])['data']['info'][0] print(n, json.dumps(dat, indent=2)) if 'detail' in n: title = dat['title'] if 'playlist' in n: pl = dat['playList'] if title and pl: break murl = chm.get() ci.close() debug("playlist", pl) debug("title", title) debug("murl", murl) def test(self, args): ''' curl 'https://s8-e1.dnvodcdn.me/cdn/_definst_/mp4:s8/jvod/xj-csywm-480p-011A7CB68.mp4/chunklist.m3u8?dnvodendtime=1630365931&dnvodhash=UdJqjguhoxl8IjH7zE3VmVYSt1lmRk7jfX4VhHl-sME=&dnvodCustomParameter=0_64.180.112.212.CA_1&lb=4e0618fc4b0ea2e45d57e6ea11efb267&us=1&vv=d1a310cefe1e3333a2b6021eb7e5e9fd&pub=CJOpC34vCp4oEIusD3KnCryXeAzDZGkCJWmBZ4nCYuoCJ9VP3CpCpKmCZanC3CpD3OuD64oC3SnOZOnP34rCpKpEJXVEJ5XPZHcPcDcPMGuOs8sOpHbC39cOZOnOpGvD68pEM5' -H 'authority: s8-e1.dnvodcdn.me' -H 'pragma: no-cache' -H 'cache-control: no-cache' -H 'sec-ch-ua: "Chromium";v="92", " Not A;Brand";v="99", "Google Chrome";v="92"' -H 'sec-ch-ua-mobile: ?0' -H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36' ''' ''' https://www.ifsp.tv/play?id=h0zrX16TdlV ''' url = 'https://www.ifsp.tv/play?id=m0IXOmqu894' url = 'https://s8-e1.dnvodcdn.me/cdn/_definst_/mp4:s8/jvod/xj-csywm-480p-011A7CB68.mp4/chunklist.m3u8?dnvodendtime=1630365931&dnvodhash=UdJqjguhoxl8IjH7zE3VmVYSt1lmRk7jfX4VhHl-sME=&dnvodCustomParameter=0_64.180.112.212.CA_1&lb=4e0618fc4b0ea2e45d57e6ea11efb267&us=1&vv=d1a310cefe1e3333a2b6021eb7e5e9fd&pub=CJOpC34vCp4oEIusD3KnCryXeAzDZGkCJWmBZ4nCYuoCJ9VP3CpCpKmCZanC3CpD3OuD64oC3SnOZOnP34rCpKpEJXVEJ5XPZHcPcDcPMGuOs8sOpHbC39cOZOnOpGvD68pEM5' url = 'https://s8-e1.dnvodcdn.me/cdn/_definst_/mp4:s8/jvod/xj-csywm-480p-011A7CB68.mp4/chunklist.m3u8?dnvodendtime=1630365740&dnvodhash=otFp1V-_aeahcOjnMSbBLDfxVITK4IK95JzEJvtIaeg=&dnvodCustomParameter=0_64.180.112.212.CA_1&lb=4e0618fc4b0ea2e45d57e6ea11efb267&us=1&vv=93e0e71677f8618bd245507435fe10d8&pub=CJOpC34vCZapE2utDJ4uD5yXeAzDZGkCJWmBZ4nCYuoCJ9VC6KoD6OpDc8mE6KnD65bE3bYOc9cOJHbEJ9XCZTZDMHVDZWpD3GmDZPbOJatPZDZCJOrPM4vPcCtE65ZDZCoDJ7' echo(self.get_hutf(url)) if __name__ == '__main__': start(IFSP)
hutf = self.get_hutf(url) echo(hutf) #us = self._get_m3u8_urls(url, hutf) us = self.try_m3u8(url) return "", None, us, None def test(self, argv): # 'http://v.youku.com/v_show/id_XMTEzMjczNzk2.html' url = 'http://pl-ali.youku.com/playlist/m3u8?vid=XMTEzMjczNzk2&type=mp4&ups_client_netip=d05b730a&utid=%2FyEoFBRd2DACAdBbcwpLOq2V&ccode=0502&psid=d1de1dfcd8a33c57e548965d7827c0ae&duration=2760&expire=18000&drm_type=1&drm_device=7&ups_ts=1537291271&onOff=0&encr=0&ups_key=7300cbefd42af5579bff92f2d143f29f' url = "http://video.zuidajiexi.com/20170822/ds2vvCC1/index.m3u8" url = 'http://video.zuidajiexi.com/ppvod/54EC0F37D50BAE9A41F39A070CA7FDB5.m3u8' #url = 'http://yingshi.yazyzw.com/20170822/0hOHYQLl/index.m3u8' #url = 'http://yingshi.yazyzw.com/ppvod/CE6246563AF188F1E783CC204EEE750C.m3u8' #echo(self.query_info(url)) data = open(argv.url).read() lines = data.split('\n') #EXT-X-KEY:METHOD=AES-128,URI="a0ef9dd2fe9edf72d4162b50d096089d.key" for line in lines: line = line.strip() if not line or line[0] != '#': continue #print line m = re.search('#EXT-X-KEY:.+URI="([^"]+)"', line) if m: print m.groups() print m.group(1) if __name__ == '__main__': start(M3U8)
ns = SelStr('article.article-content > p > strong', hutf) title = ns[0].text urls = [] tsize = 0 for vid in ret: url = 'https://vipwobuka.dianyingbar.com:998/api/yUrl.php?id=%s&type=mp4' % vid #self.extra_headers['Referer'] = 'https://vipwobuka.dianyingbar.com:998/ckplayer/YKYun.php?id=' + vid urls.append(url) break #echo("title=", title, 'mp4') return title, 'mp4', urls, None def get_playlist(self, url): # http://www.dianyingbar.com/9111.html # http://www.dianyingbar.com/3970.html # get xml html = self.get_html(url) hutf = html.decode('utf8', 'ignore') ret = re.findall("videoarr.push\('YKYun\.php\?id\=([^\(\)]+)'\)", hutf) t = self.title #pl = ["http://bodekuai.duapp.com/api/yUrl.php?id=" + r for r in ret] pl = [] for i, r in enumerate(ret, start=1): pl.append(("%s_%02d" % (t, i), "http://bodekuai.duapp.com/api/yUrl.php?id=" + r)) return pl if __name__ == '__main__': start(DYB)
hutf = self.get_hutf(url) #echo(hutf) # "720":[{"type":"application\/x-mpegURL","url":"http:\/\/www.dailymotion.com\/cdn\/manifest\/video\/x2hpv0i.m3u8?auth=1482432896-2562-0fq84z9d-24047244e9a36f0f3fab8388642b74c1&include=720"},{"type":"video\/mp4","url":"http:\/\/www.dailymotion.com\/cdn\/H264-1280x720\/video\/x2hpv0i.mp4?auth=1482432896-2562-pvg451ll-4c251ca9aa8a1bf6f56c88d318eccd65"}]} m = re.search("var config = ([^\n]+);", hutf) j = json.loads(m.group(1)) mr, mq, mu, ex = -2, '', '', '' qua = ['auto', '144', '240', '380', '480', '720'] for k, vs in j['metadata']['qualities'].items(): if k not in qua: echo("New qua", k) continue r = qua.index(k) if r > mr: for v in vs: t = v.get('type', '') if t.startswith('video/'): mr, mq = r, k mu = v['url'] ex = t[-3:] echo("ext=%s, mq=%s, url=%s" % (ex, mq, mu)) title = j['metadata']['title'].strip('.') return title, ex, [mu], None def test(self): url = 'http://www.dailymotion.com/video/k336RLStrzbIGzl96CY' url = 'http://www.dailymotion.com/embed/video/k6ELdHzeVXWQmemlHkY&info=0' if __name__ == '__main__': start(DM)
#echo('TTWanda has new source') #echo(dst) #sys.exit(1) return title, None, [dst], None def get_playlist(self, url): if '/tv/' not in url: return [] url = url.split('?')[0] hutf = self.get_hutf(url) ns = SelStr('div.article-paging a', hutf) # href="?vid=20723618&title=第01集 新局长崛起" urls = [] for a in ns: vid = match1(a['href'], 'vid=(\d+)') if vid: urls.append((a.text, url + '?vid=' + vid)) else: urls.append((a.text, url + a['href'])) return urls def test(self, args): # /tv/ustv/945.html?vid=20723618&title=第01集%20新局长崛起 url = 'http://www.ttwanda.com/tv/ustv/945.html' url = 'http://www.ttwanda.com/tv/ustv/945.html?vid=20723618&title=%E7%AC%AC01%E9%9B%86%20%E6%96%B0%E5%B1%80%E9%95%BF%E5%B4%9B%E8%B5%B7' html = self.get_hutf(url) if __name__ == '__main__': start(TTWanDa)
from subprocess import Popen, PIPE from mybs import SelStr from comm import DWM, echo, start class DRAMA8(DWM): handle_list = ['/8drama\.com/'] def query_info(self, url): # http://8drama.com/122804/ #http://8drama.net/ipobar_.php?sign=251438... echo('phantomjs wait ...') p = Popen(["./phantomjs", "dwm.js", "300", url], stdout=PIPE) html = p.stdout.read() hutf = html.decode('utf8') p.wait() url = SelStr('video source', hutf)[0]['src'] title = SelStr('h1.entry-title', hutf)[0].text return title, None, [url], None def get_playlist(self, url): ns = SelStr('div.entry-content.rich-content tr td a', self.get_hutf(url)) return [(a.text, a['href']) for a in ns] if __name__ == '__main__': start(DRAMA8)
echo("stream_id =", stream_id) u2 = info["playurl"]["domain"][0] u2 = u2 + info["playurl"]["dispatch"][stream_id][0] ext = info["playurl"]["dispatch"][stream_id][1].split('.')[-1] u2 = u2 + "&ctv=pc&m3v=1&termid=1&format=1&hwtype=un&ostype=Linux" u2 = u2 + ("&tag=letv&sign=letv&expect=3&tn=%d" % random.random()) u2 = u2 + ("&pay=0&iscpn=f9051&rateid=%s" % stream_id) r2 = self.get_html(u2) info2 = json.loads(r2.decode("utf-8")) m3u8 = self.get_html(info2["location"]) m3u8_list = decode_m3u8(bytearray(m3u8)) us = re.findall(r'^[^#][^\r]*', m3u8_list, re.MULTILINE) #return title, ext, us, None return title, None, us, None def get_playlist(self, page_url): # http://www.letv.com/tv/10003313.html # http://www.le.com/tv/10009472.html urls = [] hutf = self.get_hutf(page_url) for a in SelStr('div.list.active > dl > dt > a', hutf): i = a.select("img")[0] if 'title' in i: urls.append((i['title'], a['href'])) return urls if __name__ == '__main__': start(LETV)
if 'default' in ur: break #break keyid = mp.select('vl > vi > keyid')[0].text.strip() debug("ur =", ur) pt = ui.select('hls > pt')[0].text.strip() tp = ui.select('hls > ftype')[0].text.strip() um = ur + pt + "&type=" + tp + "&fmt=" + fm debug("um =", um) hutf = self.get_hutf(um, raw=True) #echo(hutf) urls = [] for line in hutf.split('\n'): if keyid in line: urls.append(ur + line.strip()) debug('urls =', urls) return title, tp, urls, mz def test(self): url = 'https://y.qq.com/portal/mv/v/s0017amxyfd.html' # don't care music url = 'https://v.qq.com/x/cover/tu0kfx77pkwk3t6.html?vid=k00205g5xhk' # don't add proxy='auto' url = 'http://v.qq.com/iframe/player.html?vid=i00167ai266&tiny=0&auto=0' hutf = self.get_hutf(url) #title, vid, mp = self.getvinfo(url) echo(hutf) if __name__ == '__main__': start(QQ) #QQ().test()
u = msg['params']['request']['url'] debug("Network.requestWillBeSent url", url) if 'master.m3u8' in u: debug("got master.m3u8", u) qnrwb.put(u) ci.reg("Network.requestWillBeSent", nrwb) try: ci.Page.navigate(url=url) murl = qnrwb.get(timeout=ci.get_to()) debug("murl = ", murl) return murl #except Exception as e: # echo("key_m3u8 out:", repr(e)) finally: ci.close() def test(self, args): url = "https://www.olevod.com/index.php/vod/play/id/24986/sid/1/nid/75.html" for t in (self.get_playlist(url)): echo(t[0], t[1]) #hutf = self.get_hutf(url) #echo(hutf) #self.title_murl(url) # https://europe.olemovienews.com/hlstimeofffmp4/20210503/xyflmsiG/mp4/xyflmsiG.mp4/master.m3u8 if __name__ == '__main__': start(OLEVOD)
cnt = cnt + 1 if cnt > tp > 0: break if cnt < sk: continue echo(t, u) b = BILIBILI() b.title = t args.url = u run(b, args) sys.exit(1) def get_playlist(self, url): if "bilibili.com/sp/" in url: BILIBILI.sp = True self.handle_sp_list(url) h, p = self.get_h_p(url) hutf = self.get_hutf(url) m = re.search("<option value='(/%s/index_\d+.html)' selected>" "([^<>]+)</option>" % p, hutf) if m: pl = [(m.group(1), m.group(2))] else: pl = re.findall("<option value='(/%s/index_\d+.html)'>" "([^<>]+)</option>" % p, hutf) return [(self.align_title_num(t), h + u) for u, t in pl] if __name__ == '__main__': start(BILIBILI)
import time from urllib2 import urlopen from urllib import urlencode #url = "http://qdrama.org/k2/" hutf = self.get_hutf(args.url) #echo(hutf) title = SelStr("div.title.sizing h1", hutf)[0].text #echo("title =", title) nodes = SelStr("div#playsource a", hutf) cnt = 0 for node in nodes: cnt += 1 t = "%s_%02d" % (title.encode('utf8'), cnt) u = node['href'] if 'daily' not in u: continue echo(t, u) if cnt < 0: continue data = urlencode({"aviurl": u, "avitil": t, "destdn": "../dwm/xman/", "sub": "Start"}) urlopen("http://127.0.0.1:8080/", data).read() time.sleep(2) if __name__ == '__main__': start(QDRAMA)
)) u = 'http://k.youku.com/player/getFlvPath/sid/{sid}_00' \ '/st/{container}/fileid/{fileid}?{q}'.format( sid = sid, container = ext, fileid = fileid, q = q ) html = self.get_html(u, True) for i in json.loads(html.decode("utf8")): echo(i['server']) urls.append(i['server']) k, size = self.get_total_size(urls) return title, ext, urls, size def get_playlist(self, url): hutf = self.get_hutf(url) #echo(hutf) urls = [] for a in SelStr('div.tvlists div.item a', hutf): if not a.select("span.sn_ispreview"): urls.append((a.text, a['href'])) return urls def test(self, args): url = 'http://list.youku.com/show/id_zcbfffbf2962411de83b1.html' url = 'http://v.youku.com/v_show/id_XMTEzMjYzMTIw.html' if __name__ == '__main__': start(YOUKU)
#m = re.findall(u'''\<a class="btn" href=".+"\>下一章\</a\>''', hutf) #, re.U) #echo(m) if not m: break url = m[0] #break def test(self, args): url = "http://www.bookdown.com.cn/bookinfo/30258.html" #url = "http://www.bookdown.com.cn/read/30258_1.html" ret = match1(url, "/bookinfo/(\d+)\.html", "/read/(\d+).*\.html") bid = int(ret[0]) echo("bid =", bid) url = "http://www.bookdown.com.cn/read/%d_1.html" % bid while True: #print >> sys.stderr, url hutf = self.get_hutf(url) #echo(hutf) for div in SelStr('div#view_content_txt', hutf): echo(re.sub(u"分节阅读.+,请点击下一页继续阅读。", "", re.sub(" ", " ", div.text))) al = SelStr("a#nextPage", hutf) if not al: break url = al[0]['href'] #break if __name__ == '__main__': start(BOOKDN)
info_url = 'http://www.acfun.cn/video/getVideo.aspx?id=%s' % vid hutf = self.get_hutf(info_url) ''' {"encode":"1_1489346126_cd3a0e8575edd448bbd6e497a65908bc","sourceId":"58be74680cf2a0edfd235a75","contentId":3526338,"allowDanmaku":0,"title":"时空线索","userId":10171686,"danmakuId":4938063,"sourceType":"zhuzhan","createTime":"2017-03-07 17:40:59.0","videoList":[{"bitRate":99,"playUrl":"58be74680cf2a0edfd235a75"}],"success":true,"startTime":0,"id":4938063,"time":7565,"config":0,"player":"youku","status":2} ''' echo(hutf) """ curl 'http://aplay-vod.cn-beijing.aliyuncs.com/acfun/web?vid=58be74680cf2a0edfd235a75&ct=85&ev=2&sign=1_1489616951_c07d20643dad14757cfa9aa122a6b33d&time=1489616965963' -H 'Pragma: no-cache' -H 'Accept-Encoding: gzip, deflate, sdch' -H 'Accept-Language: zh-CN,zh;q=0.8,en;q=0.6,zh-TW;q=0.4' -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36' -H 'Accept: */*' -H 'Referer: http://www.acfun.cn/v/ac3526338' -H 'X-Requested-With: ShockwaveFlash/22.0.0.192' -H 'Connection: keep-alive' -H 'Cache-Control: no-cache' --compressed """ info = json.loads(hutf) #url = "https://api.youku.com/players/custom.json?client_id=908a519d032263f8&video_id=58be74680cf2a0edfd235a75&embsig=1_1489346126_cd3a0e8575edd448bbd6e497a65908bc&player_id=ytec" url = "http://aplay-vod.cn-beijing.aliyuncs.com/acfun/web?vid=%s&ct=85&ev=2&sign=%s&time=1489616965963" % ( info['sourceId'], info['encode']) hutf = self.get_hutf(url) #echo(hutf) d = json.loads(hutf) echo(d) #s = d['data'] #key = "328f45d8" key = "2da3ca9e" data = json.loads(rc4(key, base64.b64decode(d['data']))) import pprint pprint.PrettyPrinter(indent=4).pprint(data) for s in data['stream']: echo(s['stream_type'], s['total_size'], s['resolution'], len(s.get('segs', ['No Segs']))) if __name__ == '__main__': start(ACFUN)
h = False for n in phrs: if n.tag == 'hr': h = True continue if not h: continue na = n.select('a') for a in na: urls.append((a.text, a['href'])) return urls def get_playlist(self, url): hutf = self.get_hutf(url) ret = SelStr("a.fasc-button", hutf) return [(self.title, a['href']) for a in ret] def test1(self, argv): url = 'http://vmus.online/the-outpost-s01.html' #hutf = self.chrome_hutf(url) hutf = self.get_hutf(url) #echo(hutf) ret = SelStr("a.fasc-button", hutf) #ret = [str(a) for a in ret] ret = [(0, a['href']) for a in ret] echo(ret) if __name__ == '__main__': start(VMUS)
#url = 'https://www.dnvod.eu/Movie/detail.aspx?id=NU%2bOQHwQObI%3d' #url = 'https://www.dnvod.eu/Movie/Readyplay.aspx?id=deYM01Pf0bo%3d' #hutf = self.get_hutf(url) #debug(hutf) #for a in SelStr('ul[data-identity=guest] > li > div.bfan-n > a', hutf): # debug(a.text, a['href']) # urls.append((a.text, 'https://www.dnvod.eu' + a['href'])) hutf = self.chrome_hutf(url) urls = [] for a in SelStr('ul[data-identity=guest] > li > div.bfan-n > div.bfan-n > a', hutf): #debug(a.text, a['href']) #urls.append((a.text, 'https://www.dnvod.eu/Movie/' + a['href'])) urls.append((self.title, 'https://www.dnvod.tv/Movie/' + a['href'])) echo(urls) return urls def test(self, argv): url = 'https://www.dnvod.tv/Movie/detail.aspx?id=TEee8%2fITNg4%3d' url = 'https://www.dnvod.tv/Movie/Readyplay.aspx?id=OIfaQTVHEiA%3d' ru = 'http://server3.dnvod.tv/hvod/lxj-tscgwlb-50-022061041.mp4?sourceIp=154.20.114.142&signature=856ddbf8ecd34fb9b3aae7ad4c8beddf.56b9f1609633f7eacbc18ecd0dd5e4be&start=1536543792.79147&custom=0&ua=62e66f1213d2881d9f80510593ffe2ec' #ru = 'http://server3.dnvod.tv/hvod/lxj-tscgwlb-50-022061041.mp4' url = "https://www.dnvod.tv/play?id=TW29RCmFL4o%3D" #hutf = self.get_hutf(url) hutf = self.chrome_hutf(url) echo(hutf) #echo(get_kind_size(ru)) if __name__ == '__main__': start(DNVOD)
# -*- coding: utf8 -*- from subprocess import Popen, PIPE from mybs import SelStr from comm import DWM, echo, start class M3U8(DWM): handle_list = ['\.m3u8'] def query_info(self, url): # https://vip.pp63.org/20180615/20jqyayZ/hls/index.m3u8 hutf = self.get_hutf(url) echo(hutf) us = self._get_m3u8_urls(url, hutf) #return "", "mp4", us, None return "", None, us, None def test(self, argv): # 'http://v.youku.com/v_show/id_XMTEzMjczNzk2.html' url = 'http://pl-ali.youku.com/playlist/m3u8?vid=XMTEzMjczNzk2&type=mp4&ups_client_netip=d05b730a&utid=%2FyEoFBRd2DACAdBbcwpLOq2V&ccode=0502&psid=d1de1dfcd8a33c57e548965d7827c0ae&duration=2760&expire=18000&drm_type=1&drm_device=7&ups_ts=1537291271&onOff=0&encr=0&ups_key=7300cbefd42af5579bff92f2d143f29f' if __name__ == '__main__': start(M3U8)
m = re.findall('''<a onclick="doif\('([^<>]+)'.+title="([^"]+)".*>''', hutf) echo(m) u = m[0][0] echo(u) # var redirecturl = "http://v2.438vip.com"; # var main = "/20170506/GwFE9JWN/index.m3u8?sign=6afb22db55a3e57908ae61c92b5d8a7ef8dc83032480fa7053da8894ca59e41f64b650d5000a87a8b99b66c829ee8513bed34740982d1bc9e924c752a8d01ce3"; hutf = self.get_hutf(u) #echo(hutf) m = re.findall('''var redirecturl = "(.+)";''', hutf) echo(m) b = m[0] m = re.findall('''var main = "(.+index.m3u8.+)";''', hutf) echo(m) url = b + m[0] echo(url) us = self.try_m3u8(url) echo(us) us = self.try_m3u8(us[0]) echo(us) def test(self, args): url = "https://www.y3600.com/hanju/2014/334.html" url = "https://www.y3600.com/hanju/2014/367.html" hutf = self.get_hutf(url) echo(hutf) if __name__ == '__main__': start(Y3600)
from comm import start, echo def find_kls(url): p = os.path.dirname(sys.argv[0]) n = os.path.basename(sys.argv[0]) if not p: p = "." dwmkls = re.compile("^class\s+(\S+)\s*\(DWM\)\:", re.M) for fn in os.listdir(p): if not fn.endswith(".py") or fn == n: continue ret = dwmkls.findall(open(fn).read()) if not ret: continue name = fn[:-3] try: m = imp.load_source(name, fn) except Exception as e: echo(name, e) else: for n in ret: kls = getattr(m, n) if kls.can_handle_it(url): return kls return None if __name__ == '__main__': start(find_kls)
#url = "http://www.dailymotion.com/embed/video/k4BjypcByJGUTDl6Bvx" hutf = self.get_hutf(url) #echo(hutf) # "720":[{"type":"application\/x-mpegURL","url":"http:\/\/www.dailymotion.com\/cdn\/manifest\/video\/x2hpv0i.m3u8?auth=1482432896-2562-0fq84z9d-24047244e9a36f0f3fab8388642b74c1&include=720"},{"type":"video\/mp4","url":"http:\/\/www.dailymotion.com\/cdn\/H264-1280x720\/video\/x2hpv0i.mp4?auth=1482432896-2562-pvg451ll-4c251ca9aa8a1bf6f56c88d318eccd65"}]} m = re.search("var config = ([^\n]+);", hutf) j = json.loads(m.group(1)) mr, mq, mu, ex = -2, '', '', '' qua = ['auto', '144', '240', '380', '480', '720'] for k, vs in j['metadata']['qualities'].items(): if k not in qua: echo("New qua", k) continue r = qua.index(k) if r > mr: for v in vs: t = v.get('type', '') if t.startswith('video/'): mr, mq = r, k mu = v['url'] ex = t[-3:] echo("ext=%s, mq=%s, url=%s" % (ex, mq, mu)) title = j['metadata']['title'].strip('.') return title, ex, [mu], None def test(self): url = 'http://www.dailymotion.com/video/k336RLStrzbIGzl96CY' if __name__ == '__main__': start(DM)
def nrr(ci, msg): url = msg['params']['response']['url'] if '/api/video/detail' in url: qnrr.put(msg['params']['requestId']) ci.reg("Network.responseReceived", nrr) try: ci.Page.navigate(url=url) req_id = qnrr.get(timeout=ci.get_to()) ret = ci.Network.getResponseBody(requestId=req_id) body = json.loads(ret['result']['body']) bdi0 = body['data']['info'][0] title, channel = bdi0['title'], bdi0['channel'] keys = [g['key'] for g in bdi0['guestSeriesList']] debug("keys = ", keys) return channel, title, keys except Exception as e: echo("detail_key out:", repr(e)) finally: ci.close() return None, None, [] def test(self, args): url = 'https://train.ifvod.tv/detail?id=pGhytibvDFN' if __name__ == '__main__': start(IFVOD)