async def get_page_info_ep(self, url): async with aiohttp.ClientSession() as session: async with session.request("get", url, headers=self.headers) as r: data = json.loads(match1(await r.text(), "__INITIAL_STATE__=({.+?});")) title = data.get("h1Title") or match1(html, "<title>(.+?)_番剧_bilibili_哔哩哔哩<") cid = data["epInfo"]["cid"] bvid = data["epInfo"]["bvid"] mediaInfo = data["mediaInfo"] season_type = mediaInfo.get("season_type") or mediaInfo.get("ssType") upInfo = mediaInfo.get("upInfo") artist = upInfo and upInfo.get("name") return bvid, cid, title, artist, season_type
async def get_play_info(self): ret = dict() title = "" artist = "" async with aiohttp.ClientSession() as sess: async with sess.request("get", self.room_url, headers=self.headers) as resp: html = await resp.text() self.rid = match1( html, "\$ROOM\.room_id\s*=\s*(\d+)", "room_id\s*=\s*(\d+)", '"room_id.?":(\d+)', "data-onlineid=(\d+)", ) title = match1(html, 'Title-head\w*">([^<]+)<') artist = match1(html, 'Title-anchorName\w*" title="([^"]+)"') if not title or not artist: async with sess.request("get", self.api_url2 + self.rid, headers=self.headers) as resp1: room_data = await resp1.json() if room_data["error"] == 0: room_data = room_data["data"] title = room_data["room_name"] artist = room_data["owner_name"] ret["title"] = u"{} - {}".format(title, artist) js_enc = await self.get_h5enc(html) params = {"cdn": "", "iar": 0, "ive": 0} await self.ub98484234(js_enc, params) params["rate"] = 0 live_data = None async with aiohttp.ClientSession() as sess: async with sess.request("post", self.api_url3 + self.rid, data=params, headers=self.headers) as resp: live_data = await resp.json() if live_data["error"]: print(live_data["msg"]) live_data = live_data["data"] # print(live_data) ret["play_url"] = "{}/{}".format(live_data["rtmp_url"], live_data["rtmp_live"]) # print(ret) return ret
async def get_page_info_ep(self, url): async with aiohttp.ClientSession() as session: async with session.request('get', url, headers=self.headers) as r: data = json.loads( match1(await r.text(), '__INITIAL_STATE__=({.+?});')) title = data.get('h1Title') or match1( html, '<title>(.+?)_番剧_bilibili_哔哩哔哩<') cid = data['epInfo']['cid'] bvid = data['epInfo']['bvid'] mediaInfo = data['mediaInfo'] season_type = mediaInfo.get('season_type') or mediaInfo.get('ssType') upInfo = mediaInfo.get('upInfo') artist = upInfo and upInfo.get('name') return bvid, cid, title, artist, season_type
async def get_play_info(self): ret = dict() title = '' artist = '' async with aiohttp.ClientSession() as sess: async with sess.request('get', self.room_url, headers=self.headers) as resp: h = await resp.text() json_stream = match1(h, '"stream": "([a-zA-Z0-9+=/]+)"') assert json_stream, "live video is offline" data = json.loads(base64.b64decode(json_stream).decode()) assert data['status'] == 200, data['msg'] room_info = data['data'][0]['gameLiveInfo'] ret['title'] = u'{}「{} - {}」'.format(room_info['roomName'], room_info['nick'], room_info['introduction']) stream_info = random.choice( data['data'][0]['gameStreamInfoList']) sStreamName = stream_info['sStreamName'] for sType in ('flv', 'hls'): sType = sType.title() sUrl = stream_info['s{}Url'.format(sType)] sUrlSuffix = stream_info['s{}UrlSuffix'.format(sType)] sAntiCode = stream_info['s{}AntiCode'.format(sType)] ret['play_url'] = html.unescape(u'{}/{}.{}?{}'.format( sUrl, sStreamName, sUrlSuffix, sAntiCode)) break # print(ret) return ret
async def get_play_info(self): ret = dict() title = "" artist = "" async with aiohttp.ClientSession() as sess: async with sess.request("get", self.room_url, headers=self.headers) as resp: h = await resp.text() json_stream = match1(h, '"stream": "([a-zA-Z0-9+=/]+)"') assert json_stream, "live video is offline" data = json.loads(base64.b64decode(json_stream).decode()) assert data["status"] == 200, data["msg"] room_info = data["data"][0]["gameLiveInfo"] ret["title"] = u"{}「{} - {}」".format(room_info["roomName"], room_info["nick"], room_info["introduction"]) stream_info = random.choice( data["data"][0]["gameStreamInfoList"]) sStreamName = stream_info["sStreamName"] for sType in ("flv", "hls"): sType = sType.title() sUrl = stream_info["s{}Url".format(sType)] sUrlSuffix = stream_info["s{}UrlSuffix".format(sType)] sAntiCode = stream_info["s{}AntiCode".format(sType)] ret["play_url"] = html.unescape(u"{}/{}.{}?{}".format( sUrl, sStreamName, sUrlSuffix, sAntiCode)) break # print(ret) return ret
async def get_play_info(self): ret = dict() title = '' artist = '' async with aiohttp.ClientSession() as sess: async with sess.request('get', self.room_url, headers=self.headers) as resp: html = await resp.text() self.rid = match1(html, '\$ROOM\.room_id\s*=\s*(\d+)', 'room_id\s*=\s*(\d+)', '"room_id.?":(\d+)', 'data-onlineid=(\d+)') title = match1(html, 'Title-head\w*">([^<]+)<') artist = match1(html, 'Title-anchorName\w*" title="([^"]+)"') if not title or not artist: async with sess.request('get', self.api_url2 + self.rid, headers=self.headers) as resp1: room_data = await resp1.json() if room_data['error'] == 0: room_data = room_data['data'] title = room_data['room_name'] artist = room_data['owner_name'] ret['title'] = u'{} - {}'.format(title, artist) js_enc = await self.get_h5enc(html) params = { 'cdn': '', 'iar': 0, 'ive': 0 } await self.ub98484234(js_enc, params) params['rate'] = 0 live_data = None async with aiohttp.ClientSession() as sess: async with sess.request('post', self.api_url3 + self.rid, data=params, headers=self.headers) as resp: live_data = await resp.json() if live_data['error']: print(live_data['msg']) live_data = live_data["data"] # print(live_data) ret['play_url'] = '{}/{}'.format(live_data['rtmp_url'], live_data['rtmp_live']) # print(ret) return ret
async def get_h5enc(self, html): js_enc = match1(html, '(var vdwdae325w_64we =[\s\S]+?)\s*</script>') if js_enc is None or 'ub98484234(' not in js_enc: params = { 'rids': self.rid } async with aiohttp.ClientSession() as sess: async with sess.request('get', self.api_url1, params=params, headers=self.headers) as resp: data = await resp.json() assert data['error'] == 0, data['msg'] js_enc = data['data']['room' + self.rid] return js_enc
async def get_page_info(self, url): page_index = match1(url, "\?p=(\d+)", "index_(\d+)\.") or "1" data = None async with aiohttp.ClientSession() as session: async with session.request("get", url, headers=self.headers) as r: data = json.loads(match1(await r.text(), "__INITIAL_STATE__=({.+?});"))["videoData"] bvid = data["bvid"] title = data["title"] artist = data["owner"]["name"] pages = data["pages"] for page in pages: index = str(page["page"]) subtitle = page["part"] if index == page_index: cid = page["cid"] if len(pages) > 1: title = u"{} - {} - {}".format(title, index, subtitle) elif subtitle and subtitle != title: title = u"{} - {}".format(title, subtitle) break return bvid, cid, title, artist
async def get_h5enc(self, html): js_enc = match1(html, "(var vdwdae325w_64we =[\s\S]+?)\s*</script>") if js_enc is None or "ub98484234(" not in js_enc: params = {"rids": self.rid} async with aiohttp.ClientSession() as sess: async with sess.request("get", self.api_url1, params=params, headers=self.headers) as resp: data = await resp.json() assert data["error"] == 0, data["msg"] js_enc = data["data"]["room" + self.rid] return js_enc
async def get_page_info(self, url): page_index = match1(url, '\?p=(\d+)', 'index_(\d+)\.') or '1' data = None async with aiohttp.ClientSession() as session: async with session.request('get', url, headers=self.headers) as r: data = json.loads( match1(await r.text(), '__INITIAL_STATE__=({.+?});'))['videoData'] bvid = data['bvid'] title = data['title'] artist = data['owner']['name'] pages = data['pages'] for page in pages: index = str(page['page']) subtitle = page['part'] if index == page_index: cid = page['cid'] if len(pages) > 1: title = u'{} - {} - {}'.format(title, index, subtitle) elif subtitle and subtitle != title: title = u'{} - {}'.format(title, subtitle) break return bvid, cid, title, artist
async def ub98484234(self, js_enc, params): names_dict = { 'debugMessages': get_random_name(8), 'decryptedCodes': get_random_name(8), 'resoult': get_random_name(8), '_ub98484234': get_random_name(8), 'workflow': match1(js_enc, 'function ub98484234\(.+?\Weval\((\w+)\);'), } js_dom = ''' {debugMessages} = {{{decryptedCodes}: []}}; if (!this.window) {{window = {{}};}} if (!this.document) {{document = {{}};}} '''.format(**names_dict) js_patch = ''' {debugMessages}.{decryptedCodes}.push({workflow}); var patchCode = function(workflow) {{ var testVari = /(\w+)=(\w+)\([\w\+]+\);.*?(\w+)="\w+";/.exec(workflow); if (testVari && testVari[1] == testVari[2]) {{ {workflow} += testVari[1] + "[" + testVari[3] + "] = function() {{return true;}};"; }} }}; patchCode({workflow}); var subWorkflow = /(?:\w+=)?eval\((\w+)\)/.exec({workflow}); if (subWorkflow) {{ var subPatch = ` {debugMessages}.{decryptedCodes}.push('sub workflow: ' + subWorkflow); patchCode(subWorkflow); `.replace(/subWorkflow/g, subWorkflow[1]) + subWorkflow[0]; {workflow} = {workflow}.replace(subWorkflow[0], subPatch); }} eval({workflow}); '''.format(**names_dict) js_debug = ''' var {_ub98484234} = ub98484234; ub98484234 = function(p1, p2, p3) {{ try {{ var resoult = {_ub98484234}(p1, p2, p3); {debugMessages}.{resoult} = resoult; }} catch(e) {{ {debugMessages}.{resoult} = e.message; }} return {debugMessages}; }}; '''.format(**names_dict) js_enc = js_enc.replace('eval({workflow});'.format(**names_dict), js_patch) js_ctx = JSEngine() js_ctx.append(self.js_md5) js_ctx.append(js_dom) js_ctx.append(js_enc) js_ctx.append(js_debug) did = uuid.uuid4().hex tt = str(int(time.time())) ub98484234 = js_ctx.call('ub98484234', self.rid, did, tt) # print(ub98484234) # extractor.logger.debug('ub98484234: %s', ub98484234) ub98484234 = ub98484234[names_dict['resoult']] params.update({ 'v': match1(ub98484234, 'v=(\d+)'), 'did': did, 'tt': tt, 'sign': match1(ub98484234, 'sign=(\w{32})') })