def fetch_mozecname(vid): # vid: seems non-interrelated with result global mozecname if len(mozecname) == 4: return digits = list( '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') def i2b(d): if base == 10: return str(d) b = [] while d: d, m = divmod(d, base) b.append(digits[m]) return ''.join(b[::-1]) or '0' def b2i(text): try: i = int(text, base) except ValueError: i = p = 0 for d in text[::-1]: i += digits.index(d) * base**p p += 1 return i def encrypt(text): '''fake, only for existed texts''' return i2b(keys_dict[text]) def decrypt(text): return keys_list[b2i(text)] or text html = get_content('https://m.fun.tv/vplay/?vid=' + vid) for path in matchall(html[:html.find('</head>')], 'src="(/static/js/v12/pkg/m\w{4}_v12_\w{9}.js)"'): js = get_content('https://m.fun.tv' + path).strip() crypt, base, _, keys, sep = re.search( r"}\('(.+?)[^\\]',(\d+),(\d+),'(.+?)'\.split\('(.)'", js).groups() base = int(base) keys_list = keys.split(sep) keys_dict = {k: i for i, k in enumerate(keys_list)} pattern = '\\.'.join( encrypt(text) for text in ['document', 'mozEcName', 'push']) + '\("(\w+)' mozecname += [decrypt(text) for text in matchall(crypt, pattern)] site.logger.debug('mozEcName: %s', mozecname) mozecname = {int(m[-1]): int(m[:-1], 16) for m in mozecname}
def get_vid_title(self): av_id = match1(self.url, '(?:/av|aid=)(\d+)') page_index = '1' if "#page=" in self.url or "?p=" in self.url or 'index_' in self.url: page_index = match1(self.url, '(?:#page|\?p)=(\d+)', 'index_(\d+)\.') if page_index == '1': self.url = 'https://www.bilibili.com/av{}/'.format(av_id) else: self.url = 'https://www.bilibili.com/av{}/?p={}'.format( av_id, page_index) if not self.vid: html = get_content(self.url) #vid = match1(html, 'cid=(\d+)', 'cid="(\d+)', '"cid":(\d+)') title = match1(html, '"title":"([^"]+)', '<h1 title="([^"]+)', '<title>([^<]+)').strip() video_list = matchall( html, ['"cid":(\d+),"page":(\d+),"from":"[^"]+","part":"([^"]*)",']) for cid, page, part in video_list: if page == page_index: vid = cid if len(video_list) > 1: title = u'{} - {} - {}'.format(title, page, part) elif part: title = u'{} - {}'.format(title, part) break return vid, title
def prepare_list(self): av_id = match1(self.url, '(?:/av|aid=)(\d+)') self.url = 'https://www.bilibili.com/av{}/'.format(av_id) html = get_content(self.url) video_list = matchall(html, ['"page":(\d+),']) if video_list: return ['https://www.bilibili.com/av{}/?p={}'.format(av_id, p) for p in video_list]
def get_realurl(url): location = get_location(url) if location != url: return location else: html = get_content(url) return matchall(html, ['CDATA\[([^\]]+)'])[1]
def prepare(self): info = VideoInfo(self.name) if not self.vid: self.vid = match1(self.url, '#([a-zA-Z0-9\-]+)', '/([a-zA-Z0-9\-]+).shtml') if not self.vid: html = get_content(self.url) self.vid = match1(html, '"vid": "([^"]+)', 'vid: "([^"]+)') xml = get_content( 'http://vxml.ifengimg.com/video_info_new/{}/{}/{}.xml'.format( self.vid[-2], self.vid[-2:], self.vid)) info.title = match1(xml, 'SE_Title="([^"]+)') urls = matchall(xml, 'playurl="([^"]+)') urls = ['http://ips.ifeng.com/' + u[7:] for u in urls] info.stream_types.append('current') info.streams['current'] = { 'container': 'mp4', 'video_profile': 'current', 'src': urls, 'size': 0 } return info
def prepare(self): info = VideoInfo(self.name) if self.url and not self.vid: vid = matchall(self.url, ['curid=([^_]+)_([\w]+)']) if vid: self.vid = vid[0] info_u = 'http://mixer.video.iqiyi.com/jp/mixin/videos/' + self.vid[ 0] mixin = get_content(info_u) mixin_json = json.loads(mixin[len('var tvInfoJs='):]) real_u = mixin_json['url'] real_html = get_content(real_u) info.title = match1(real_html, '<title>([^<]+)').split('-')[0] if self.url and not self.vid: html = get_content(self.url) video_info = match1(html, ":video-info='(.+?)'") if video_info: video_info = json.loads(video_info) self.vid = str(video_info['tvId']), str(video_info['vid']) info.title = video_info['name'] else: tvid = match1(html, 'data-player-tvid="([^"]+)"', 'tvid="(.+?)"', 'tvId:([^,]+)', r'''param\['tvid'\]\s*=\s*"(.+?)"''', r'"tvid":\s*"(\d+)"') videoid = match1(html, 'data-player-videoid="([^"]+)"', 'vid="(.+?)"', 'vid:"([^"]+)', r'''param\['vid'\]\s*=\s*"(.+?)"''', r'"vid":\s*"(\w+)"') self.vid = (tvid, videoid) info.title = match1(html, '<title>([^<]+)').split('-')[0] tvid, vid = self.vid vps_data = getvps(tvid, vid) assert vps_data['code'] == 'A00000', 'can\'t play this video!!' url_prefix = vps_data['data']['vp']['du'] stream = vps_data['data']['vp']['tkl'][0] vs_array = stream['vs'] for vs in vs_array: bid = vs['bid'] fs_array = vs['fs'] real_urls = [] for seg_info in fs_array: url = url_prefix + seg_info['l'] json_data = json.loads(get_content(url)) down_url = json_data['l'] real_urls.append(down_url) stream = self.vd_2_id[bid] info.stream_types.append(stream) stream_profile = self.id_2_profile[stream] info.streams[stream] = { 'video_profile': stream_profile, 'container': 'flv', 'src': real_urls, 'size': 0 } info.stream_types = sorted(info.stream_types, key=self.ids.index) return info
def prepare(self): info = VideoInfo(self.name) id1 = match1(self.url, 'a/([^\.]+)\.') api1 = 'http://www.kankanews.com/vxml/{}.xml'.format(id1) video_data1 = get_content(api1) self.vid = match1(video_data1, '<omsid>([^<]+)<') if self.vid == '0' or not self.vid: html = get_content(self.url) id1 = match1(html, 'xmlid=([^\"]+)') or match1( html, 'embed/([^\"]+)').replace('_', '/') api1 = 'http://www.kankanews.com/vxml/{}.xml'.format(id1) video_data1 = get_content(api1) self.vid = match1(video_data1, '<omsid>([^<]+)<') assert self.vid != '0' and self.vid, self.url + ': Not a video news link!' api2 = 'http://vapi.kankanews.com/index.php?app=api&mod=public&act=getvideo&id={}'.format( self.vid) video_data2 = get_content(api2) urls = matchall(video_data2, ['<videourl><!\[CDATA\[([^\]]+)']) info.title = match1(video_data2, '<otitle><!\[CDATA\[([^\]]+)') info.stream_types.append('current') info.streams['current'] = { 'container': 'mp4', 'video_profile': 'current', 'src': urls, 'size': 0 } return info
def prepare(self): if self.url and not self.vid: #maybe error!! self.vid = (vu, uu) = matchall(self.url, ["vu=([^&]+)", "uu=([^&]+)"]) return self.letvcloud_download_by_vu()
def download_playlist(self, url, param): self.url = url self.param = param html = get_content(self.url) vids = matchall(html, ['vid="(\d+)"']) for v in vids: self.download(v, param)
def download_playlist(self, url, param): self.url = url self.param = param html = get_content(self.url) vids = matchall(html, ['vid="(\d+)"']) for v in vids: self.download(v,param)
def prepare_list(self): av_id = match1(self.url, '(?:/av|aid=)(\d+)') if "aid=" in self.url: self.url = 'https://www.bilibili.com/video/av' + av_id html = get_content(self.url) video_list = matchall(html, ['"page":(\d+),']) if video_list: return ['https://www.bilibili.com/av{}/index_{}.html'.format(av_id, p) for p in video_list]
def prepare_list(self): html = get_content(self.url) video_list = matchall(html, [',"ep_id":(\d+),']) if video_list: del video_list[0] return [ 'https://www.bilibili.com/bangumi/play/ep{}'.format(eid) for eid in video_list ]
def get_url(self): if self.url_patterns: v_url = [] for url in matchall(self.html, self.url_patterns): if url[:2] == '//': url = 'http:' + url if url not in v_url: v_url.append(url) self.v_url = v_url
def prepare_list(self): # backup https://api.bilibili.com/x/player/pagelist?bvid= vid = match1(self.url, '/(av\d+|(?:BV|bv)[0-9A-Za-z]{10})') if vid[:2] == 'av': vid = av2bv(vid) html = get_content(self.url) video_list = matchall(html, ['"page":(\d+),']) if video_list: return ['https://www.bilibili.com/video/{}?p={}'.format(vid, p) for p in video_list]
def prepare_list(self): html = get_content(self.url) eplist = match1(html, '"epList":(\[.+?\])') if eplist: eplist = matchall(eplist, [',"id":(\d+),']) return [ 'https://www.bilibili.com/bangumi/play/ep{}'.format(eid) for eid in eplist ]
def get_url(self): if self.url_patterns: v_url = [] for url in matchall(self.html, *self.url_patterns): if url[:2] == '//': url = 'http:' + url if url not in v_url: v_url.append(url) self.v_url = v_url
def prepare_list(self): html = get_content(self.url) video_list = matchall(html, ['<option value=\'([^\']*)\'']) if video_list: return ['http://www.bilibili.com'+v for v in video_list] sid = match1(html, 'var season_id = "(\d+)";') j_ = get_content("http://bangumi.bilibili.com/jsonp/seasoninfo/{}.ver".format(sid)) s_data = json.loads(j_) urls = [e['webplay_url'] for e in sorted(s_data['result']['episodes'], key=lambda e: e['index'])] return urls
def prepare_list(self): html = get_content(self.url) eplist = matchall(html, ['"epList":(\[.*?\])']) if eplist: eplist = sum( map(matchall, eplist, [[',"id":(\d+),']] * len(eplist)), []) return [ 'https://www.bilibili.com/bangumi/play/ep{}'.format(eid) for eid in eplist ]
def prepare(self): info = VideoInfo(self.name) if self.url: html = get_content(self.url) self.vid = match1(html, 'playVideo\(\"(\d+)') self.logger.debug("VID: {}".format(self.vid)) xml = video_info_xml(self.vid) info.title = match1(xml, '<vname><!\[CDATA\[([^\]]+)') urls = matchall(xml, ['<url><!\[CDATA\[([^\]]+)']) sizes = matchall(xml, ['<filesize>([^<]+)']) size = 0 for s in sizes: size += int(s) info.stream_types.append('current') info.streams['current'] = {'container': 'hlv', 'video_profile': 'current', 'src': urls, 'size' : size} return info
def prepare(self): self.live = True if not self.vid: html = get_content(self.url) self.vid = match1(html, 'cid=([^&]+)') t = match1(html, '<title>([^<]+)').split('-') self.title = t[0] self.artist = t[1] info = get_content('http://live.bilibili.com/api/playurl?cid={}'.format(self.vid)) urls = [matchall(info, ['CDATA\[([^\]]+)'])[1]] size = float('inf') ext = 'flv' self.stream_types.append('current') self.streams['current'] = {'container': ext, 'video_profile': 'current', 'src' : urls, 'size': size}
def prepare(self): info = VideoInfo(self.name, True) if not self.vid: html = get_content(self.url) self.vid = match1(html, 'var ROOMID = (\d+);') t = match1(html, '<title>([^<]+)').split('-') info.title = t[0] info.artist = t[1] data = get_content('http://live.bilibili.com/api/playurl?cid={}'.format(self.vid)) urls = [matchall(data, ['CDATA\[([^\]]+)'])[1]] size = float('inf') ext = 'flv' info.stream_types.append('current') info.streams['current'] = {'container': ext, 'video_profile': 'current', 'src' : urls, 'size': size} return info
def prepare(self): info = VideoInfo(self.name) if not self.vid: self.vid = match1(self.url, 'resourceId=([0-9]+)') if not self.url: self.url = "http://www.joy.cn/video?resourceId={}".format(self.vid) html= get_content(self.url) info.title = match1(html, '<meta content=\"([^\"]+)') url = matchall(html, ['<source src=\"([^\"]+)'])[3] _, ext, size = url_info(url) info.stream_types.append('current') info.streams['current'] = {'container': ext, 'src': [url], 'size': size } return info
def prepare(self): info = VideoInfo(self.name) if self.url and not self.vid: vid = matchall(self.url, ['curid=([^_]+)_([\w]+)']) if vid: self.vid = vid[0] info.title = self.name + '_' + str(self.vid) if self.url and not self.vid: html = get_content(self.url) tvid = match1(html, 'data-player-tvid="([^"]+)"', 'tvid=([^&]+)', 'tvId:([^,]+)') videoid = match1(html, 'data-player-videoid="([^"]+)"', 'vid=([^&]+)', 'vid:"([^"]+)') self.vid = (tvid, videoid) info.title = match1(html, '<title>([^<]+)').split('-')[0] tvid, vid = self.vid vps_data = getvps(tvid, vid) assert vps_data['code'] == 'A00000', 'can\'t play this video!!' url_prefix = vps_data['data']['vp']['du'] stream = vps_data['data']['vp']['tkl'][0] vs_array = stream['vs'] for vs in vs_array: bid = vs['bid'] fs_array = vs['fs'] real_urls = [] for seg_info in fs_array: url = url_prefix + seg_info['l'] json_data = json.loads(get_content(url)) down_url = json_data['l'] real_urls.append(down_url) stream = self.vd_2_id[bid] info.stream_types.append(stream) stream_profile = self.id_2_profile[stream] info.streams[stream] = { 'video_profile': stream_profile, 'container': 'flv', 'src': real_urls, 'size': 0 } info.stream_types = sorted(info.stream_types, key=self.ids.index) return info
def prepare(self): info = VideoInfo(self.name) id1 = match1(self.url, 'a/([^\.]+)\.') api1 = 'http://www.kankanews.com/vxml/{}.xml'.format(id1) video_data1 = get_content(api1) self.vid = match1(video_data1, '<omsid>([^<]+)<') if self.vid == '0' or not self.vid: html = get_content(self.url) id1 = match1(html, 'xmlid=([^\"]+)') or match1(html, 'embed/([^\"]+)').replace('_', '/') api1 = 'http://www.kankanews.com/vxml/{}.xml'.format(id1) video_data1 = get_content(api1) self.vid = match1(video_data1, '<omsid>([^<]+)<') assert self.vid != '0' and self.vid, self.url + ': Not a video news link!' api2 = 'http://vapi.kankanews.com/index.php?app=api&mod=public&act=getvideo&id={}'.format(self.vid) video_data2 = get_content(api2) urls = matchall(video_data2, ['<videourl><!\[CDATA\[([^\]]+)']) info.title = match1(video_data2, '<otitle><!\[CDATA\[([^\]]+)') info.stream_types.append('current') info.streams['current'] = {'container': 'mp4', 'video_profile': 'current', 'src' : urls, 'size': 0} return info
def prepare(self): info = VideoInfo(self.name) html = get_content(self.url) self.vid = match1(html, 'webcfg\s*=\s*{"id":\s*(\d+)') xml = get_content( 'http://web-play.pptv.com/webplay3-0-{}.xml?type=web.fpp'.format( self.vid)) host = match1(xml, '<sh>([^<>]+)</sh>') k = match1(xml, '<key expire=[^<>]+>([^<>]+)</key>') rid = match1(xml, 'rid="([^"]+)"') info.title = match1(xml, 'nm="([^"]+)"') st = match1(xml, '<st>([^<>]+)</st>')[:-4] st = time.mktime( time.strptime(st)) * 1000 - 60 * 1000 - time.time() * 1000 st += time.time() * 1000 st = st / 1000 key = constructKey(st) pieces = matchall(xml, ['<sgm no="(\d+)"[^<>]+fs="(\d+)"']) numbers, fs = zip(*pieces) urls = [ "http://{}/{}/{}?key={}&fpp.ver=1.3.0.4&k={}&type=web.fpp".format( host, i, rid, key, k) for i in range(max(map(int, numbers)) + 1) ] total_size = sum(map(int, fs)) info.stream_types.append('current') info.streams['current'] = { 'container': 'mp4', 'video_profile': 'current', 'src': urls, 'size': total_size } return info
def prepare(self): self.live = True if not self.vid: html = get_content(self.url) self.vid = match1(html, 'cid=([^&]+)') t = match1(html, '<title>([^<]+)').split('-') self.title = t[0] self.artist = t[1] info = get_content( 'http://live.bilibili.com/api/playurl?cid={}'.format(self.vid)) urls = [matchall(info, ['CDATA\[([^\]]+)'])[1]] size = float('inf') ext = 'flv' self.stream_types.append('current') self.streams['current'] = { 'container': ext, 'video_profile': 'current', 'src': urls, 'size': size }
def prepare(self): info = VideoInfo(self.name) if not self.vid: self.vid = match1(self.url, 'resourceId=([0-9]+)') if not self.url: self.url = "http://www.joy.cn/video?resourceId={}".format(self.vid) html = get_content(self.url) info.title = match1(html, '<meta content=\"([^\"]+)') url = matchall(html, '<source src=\"([^\"]+)')[3] _, ext, size = url_info(url) info.stream_types.append('current') info.streams['current'] = { 'container': ext, 'src': [url], 'size': size } return info
def prepare(self): info = VideoInfo(self.name, True) if not self.vid: html = get_content(self.url) self.vid = match1(html, 'var ROOMID = (\d+);') t = match1(html, '<title>([^<]+)').split('-') info.title = t[0] info.artist = t[1] data = get_content( 'http://live.bilibili.com/api/playurl?cid={}'.format(self.vid)) urls = [matchall(data, ['CDATA\[([^\]]+)'])[1]] size = float('inf') ext = 'flv' info.stream_types.append('current') info.streams['current'] = { 'container': ext, 'video_profile': 'current', 'src': urls, 'size': size } return info
def get_vid_title(self): av_id = match1(self.url, '(?:/av|aid=)(\d+)') page_index = '1' if "#page=" in self.url or "?p=" in self.url or 'index_' in self.url: page_index = match1(self.url, '(?:#page|\?p)=(\d+)', 'index_(\d+)\.') if page_index == '1': self.url = 'https://www.bilibili.com/av{}/'.format(av_id) else: self.url = 'https://www.bilibili.com/av{}/?p={}'.format(av_id, page_index) if not self.vid: html = get_content(self.url) #vid = match1(html, 'cid=(\d+)', 'cid="(\d+)', '"cid":(\d+)') title = match1(html, '"title":"([^"]+)', '<h1 title="([^"]+)', '<title>([^<]+)').strip() video_list = matchall(html, ['"cid":(\d+),"page":(\d+),"from":"[^"]+","part":"([^"]*)",']) for cid, page, part in video_list: if page == page_index: vid = cid if len(video_list) > 1: title = u'{} - {} - {}'.format(title, page, part) elif part: title = u'{} - {}'.format(title, part) break return vid, title
def prepare(self): info = VideoInfo(self.name) if self.url and not self.vid: vid = matchall(self.url, ['curid=([^_]+)_([\w]+)']) if vid: self.vid = vid[0] info.title = self.name + '_' + str(self.vid) if self.url and not self.vid: html = get_content(self.url) tvid = match1(html, 'data-player-tvid="([^"]+)"', 'tvid=([^&]+)' , 'tvId:([^,]+)') videoid = match1(html, 'data-player-videoid="([^"]+)"', 'vid=([^&]+)', 'vid:"([^"]+)') self.vid = (tvid, videoid) info.title = match1(html, '<title>([^<]+)').split('-')[0] tvid, vid = self.vid vps_data = getvps(tvid, vid) assert vps_data['code'] == 'A00000', 'can\'t play this video!!' url_prefix = vps_data['data']['vp']['du'] stream = vps_data['data']['vp']['tkl'][0] vs_array = stream['vs'] for vs in vs_array: bid = vs['bid'] fs_array = vs['fs'] real_urls = [] for seg_info in fs_array: url = url_prefix + seg_info['l'] json_data=json.loads(get_content(url)) down_url = json_data['l'] real_urls.append(down_url) stream = self.vd_2_id[bid] info.stream_types.append(stream) stream_profile = self.id_2_profile[stream] info.streams[stream] = {'video_profile': stream_profile, 'container': 'flv', 'src': real_urls, 'size' : 0} info.stream_types = sorted(info.stream_types, key = self.ids.index) return info
def prepare_list(self): html = get_content(self.url, headers={}) return matchall(html, ['"a-pic-play" href="([^"]+)"'])
def prepare_list(self): html = get_content(self.url) return matchall(html, ['id=\"([^\"]+)\S title'])
def prepare_list(self): html = get_content(self.url) return matchall(html, ['video_id: ([^,]+)'])
def prepare_list(self): html = get_content(self.url) vids = matchall(html, ['vid="(\d+)"']) # fake urls return ['http://live.le.com/izt/vid={}'.format(vid) for vid in vids]
def prepare_list(self): html = get_content(self.url) return matchall(html, ['data-tvid=\"([^\"]+)\" data-vid=\"([^\"]+)\"'])
def prepare_list(self): html = get_content(self.url) return matchall(html, ['vid="(\d+)"'])
def prepare_list(self): html = get_content(self.url) return matchall(html, douyu_match_pattern)
def prepare(self): info = VideoInfo(self.name) if self.url and not self.vid: vid = matchall(self.url, ['curid=([^_]+)_([\w]+)']) if vid: self.vid = vid[0] info_u = 'http://pcw-api.iqiyi.com/video/video/playervideoinfo?tvid=' + self.vid[0] try: info_json = json.loads(get_content(info_u)) info.title = info_json['data']['vn'] except: self.vid = None def get_vid(): html = get_content(self.url) video_info = match1(html, ":video-info='(.+?)'") if video_info: video_info = json.loads(video_info) self.vid = str(video_info['tvId']), str(video_info['vid']) info.title = video_info['name'] else: tvid = match1(html, 'tvId:\s*"([^"]+)', 'data-video-tvId="([^"]+)', '''\['tvid'\]\s*=\s*"([^"]+)''', '"tvId":\s*([^,]+)') videoid = match1(html, 'data-video-vid="([^"]+)', 'vid:\s*"([^"]+)', '''\['vid'\]\s*=\s*"([^"]+)''', '"vid":\s*([^,]+)') if not (tvid and videoid): url = match1(html, '(www\.iqiyi\.com/v_\w+\.html)') if url: self.url = 'https://' + url return get_vid() self.vid = (tvid, videoid) info.title = match1(html, '<title>([^<]+)').split('-')[0] if self.url and not self.vid: get_vid() tvid, vid = self.vid assert tvid and vid, 'can\'t play this video!!' def push_stream_vd(vs): vd = vs['vd'] stream = self.vd_2_id[vd] if not stream in info.streams: info.stream_types.append(stream) elif int(vd) < 10: return m3u8 = vs['m3utx'] stream_profile = self.id_2_profile[stream] info.streams[stream] = { 'video_profile': stream_profile, 'container': 'm3u8', 'src': [m3u8], 'size': 0 } def push_stream_bid(bid, container, fs_array, size): stream = self.vd_2_id[bid] if stream in info.streams: return real_urls = [] for seg_info in fs_array: url = url_prefix + seg_info['l'] json_data = json.loads(get_content(url)) down_url = json_data['l'] real_urls.append(down_url) info.stream_types.append(stream) stream_profile = self.id_2_profile[stream] info.streams[stream] = { 'video_profile': stream_profile, 'container': container, 'src': real_urls, 'size': size } try: # try use tmts first # less http requests, get results quickly tmts_data = gettmts(tvid, vid) self.logger.debug('tmts_data:\n' + str(tmts_data)) assert tmts_data['code'] == 'A00000', 'can\'t play this video!!' vs_array = tmts_data['data']['vidl'] for vs in vs_array: push_stream_vd(vs) vip_conf = tmts_data['data'].get('ctl', {}).get('configs') if vip_conf: for vds in (('10', '19'), ('18', '5')): for vd in vds: if vd in vip_conf: tmts_data = gettmts(tvid, vip_conf[vd]['vid']) if tmts_data['code'] == 'A00000': push_stream_vd(tmts_data['data']) break except: try: # use vps as preferred fallback vps_data = getvps(tvid, vid) self.logger.debug('vps_data:\n' + str(vps_data)) assert vps_data['code'] == 'A00000', 'can\'t play this video!!' url_prefix = vps_data['data']['vp']['du'] vs_array = vps_data['data']['vp']['tkl'][0]['vs'] for vs in vs_array: bid = vs['bid'] fs_array = vs['fs'] size = vs['vsize'] push_stream_bid(bid, 'flv', fs_array, size) except: # use dash as fallback for bid in (500, 300, 200, 100): dash_data = getdash(tvid, vid, bid) self.logger.debug('dash_data:\n' + str(dash_data)) assert dash_data['code'] == 'A00000', 'can\'t play this video!!' url_prefix = dash_data['data']['dd'] streams = dash_data['data']['program']['video'] for stream in streams: if 'fs' in stream: _bid = stream['bid'] container = stream['ff'] fs_array = stream['fs'] size = stream['vsize'] break push_stream_bid(_bid, container, fs_array, size) info.stream_types = sorted(info.stream_types, key=self.ids.index) return info
def prepare(self): info = VideoInfo(self.name) if self.url and not self.vid: vid = matchall(self.url, ['curid=([^_]+)_([\w]+)']) if vid: self.vid = vid[0] if self.url and not self.vid: html = get_content(self.url) tvid = match1(html, 'data-player-tvid="([^"]+)"', 'tvid=([^&]+)' , 'tvId:([^,]+)') videoid = match1(html, 'data-player-videoid="([^"]+)"', 'vid=([^&]+)', 'vid:"([^"]+)') self.vid = (tvid, videoid) info.title = match1(html, '<title>([^<]+)').split('-')[0] tvid, vid = self.vid data = getVMS(tvid, vid) if not data['code'] == 'A00000': for bid in self.id_h5: h5_data = geth5VMS(tvid, vid, bid) if h5_data["code"] == "A00000": stream = self.vd_2_id[bid] profile = self.id_2_profile[stream] info.title = h5_data['data']['playInfo']['vn'] info.stream_types.append(stream) info.streams[stream] = {'container': 'mp4', 'video_profile': profile, 'src' : [h5_data['data']['m3u']], 'size' : 0} return info for stream in data['data']['vidl']: try: stream_id = self.vd_2_id[stream['vd']] if stream_id in info.stream_types or stream_id in self.id_ignore: continue stream_profile = self.id_2_profile[stream_id] info.stream_types.append(stream_id) info.streams[stream_id] = {'video_profile': stream_profile, 'container': 'm3u8', 'src': [stream['m3u']], 'size' : 0} except: log.i("vd: {} is not handled".format(stream['vd'])) log.i("info is {}".format(stream)) # why I need do below??? try: vip_vds = data['data']['ctl']['vip']['bids'] vip_conf = data['data']['ctl']['configs'] except: info.stream_types = sorted(info.stream_types, key = self.ids.index) return info if not 'BD' in info.stream_types: p1080_vids = [] if 5 in vip_vds: p1080_vids.append(vip_conf['5']['vid']) for v in p1080_vids: p1080_info = getVMS(tvid, v) if p1080_info['code'] == 'A00000': p1080_url = p1080_info['data']['m3u'] info.stream_types.append('BD') info.streams['BD'] = {'video_profile': '1080p', 'container': 'm3u8', 'src': [p1080_url], 'size' : 0} break if not '4k' in info.stream_types: k4_vids = [] if 10 in vip_vds: k4_vids.append(vip_conf['10']['vid']) for v in k4_vids: k4_info = getVMS(tvid, v) if k4_info['code'] == 'A00000': k4_url = k4_info['data']['m3u'] info.stream_types.append('4k') info.streams['4k'] = {'video_profile': '4k', 'container': 'm3u8', 'src': [k4_url], 'size' : 0} break info.stream_types = sorted(info.stream_types, key = self.ids.index) return info
def prepare_list(self): html = get_content(self.url) video_list = match1(html, 'video_list=\[([^\]]+)') return matchall(video_list, ['\"([^\",]+)'])
def prepare(self): if self.url and not self.vid: #maybe error!! self.vid = (vu, uu) = matchall(self.url, ["vu=([^&]+)","uu=([^&]+)"]) return self.letvcloud_download_by_vu()
def prepare(self): info = VideoInfo(self.name) if self.url and not self.vid: vid = matchall(self.url, ['curid=([^_]+)_([\w]+)']) if vid: self.vid = vid[0] info_u = 'http://pcw-api.iqiyi.com/video/video/playervideoinfo?tvid=' + self.vid[ 0] try: info_json = json.loads(get_content(info_u)) info.title = info_json['data']['vn'] except: self.vid = None def get_vid(): html = get_content(self.url) video_info = match1(html, ":video-info='(.+?)'") if video_info: video_info = json.loads(video_info) self.vid = str(video_info['tvId']), str(video_info['vid']) info.title = video_info['name'] else: tvid = match1(html, 'tvId:\s*"([^"]+)', 'data-video-tvId="([^"]+)', '''\['tvid'\]\s*=\s*"([^"]+)''', '"tvId":\s*([^,]+)') videoid = match1(html, 'data-video-vid="([^"]+)', 'vid"?\'?\]?\s*(?:=|:)\s*"?\'?([^"\',]+)') if not (tvid and videoid): url = match1(html, '(www\.iqiyi\.com/v_\w+\.html)') if url: self.url = 'https://' + url return get_vid() self.vid = (tvid, videoid) info.title = match1(html, '<title>([^<]+)').split('-')[0] if self.url and not self.vid: get_vid() tvid, vid = self.vid assert tvid and vid, 'can\'t play this video!!' def push_stream_vd(vs): vd = vs['vd'] stream = self.vd_2_id[vd] if stream in info.streams: # prefer H264 than H265 if vs.get('fileFormat') == 'H265': return else: info.stream_types.append(stream) m3u8 = vs['m3utx'] stream_profile = self.id_2_profile[stream] info.streams[stream] = { 'video_profile': stream_profile, 'container': 'm3u8', 'src': [m3u8], 'size': 0 } def push_stream_bid(bid, container, fs_array, size): stream = self.vd_2_id[bid] if stream in info.streams: return real_urls = [] for seg_info in fs_array: url = url_prefix + seg_info['l'] json_data = json.loads(get_content(url)) down_url = json_data['l'] real_urls.append(down_url) info.stream_types.append(stream) stream_profile = self.id_2_profile[stream] info.streams[stream] = { 'video_profile': stream_profile, 'container': container, 'src': real_urls, 'size': size } try: # try use tmts first # less http requests, get results quickly tmts_data = gettmts(tvid, vid) self.logger.debug('tmts_data:\n' + str(tmts_data)) assert tmts_data['code'] == 'A00000', 'can\'t play this video!!' vs_array = tmts_data['data']['vidl'] for vs in vs_array: push_stream_vd(vs) vip_conf = tmts_data['data'].get('ctl', {}).get('configs') if vip_conf: # prefer H264 than H265 for vds in (('5', '18'), ('10', '19')): for vd in vds: if vd in vip_conf: tmts_data = gettmts(tvid, vip_conf[vd]['vid']) if tmts_data['code'] == 'A00000': push_stream_vd(tmts_data['data']) break except: try: # use vps as preferred fallback vps_data = getvps(tvid, vid) self.logger.debug('vps_data:\n' + str(vps_data)) assert vps_data['code'] == 'A00000', 'can\'t play this video!!' url_prefix = vps_data['data']['vp']['du'] vs_array = vps_data['data']['vp']['tkl'][0]['vs'] for vs in vs_array: bid = vs['bid'] fs_array = vs['fs'] size = vs['vsize'] push_stream_bid(bid, 'flv', fs_array, size) except: # use dash as fallback for bid in (500, 300, 200, 100): dash_data = getdash(tvid, vid, bid) self.logger.debug('dash_data:\n' + str(dash_data)) assert dash_data[ 'code'] == 'A00000', 'can\'t play this video!!' url_prefix = dash_data['data']['dd'] streams = dash_data['data']['program']['video'] for stream in streams: if 'fs' in stream: _bid = stream['bid'] container = stream['ff'] fs_array = stream['fs'] size = stream['vsize'] push_stream_bid(_bid, container, fs_array, size) break assert info.streams, 'can\'t play this video!!' info.stream_types = sorted(info.stream_types, key=self.ids.index) return info
def download_playlist(self, url, param): html = get_content(url) vids = matchall(html, ['id=\"([^\"]+)\S title']) for vid in vids: self.download(vid, param)
def get_path_list(self): html = get_content(self.url) videos = matchall(html, ['href=[\'"](/v/[a-zA-Z0-9_]+)[\'"] title=[\'"]']) return videos
def prepare_list(self): html = get_content(self.url) video_list = matchall(html, ['<option value=\'([^\']*)\'']) return ['http://www.bilibili.com'+v for v in video_list]
def prepare_list(self): html = get_content(self.url) eplist = match1(html, '"epList":(\[.+?\])') if eplist: eplist = matchall(eplist, [',"id":(\d+),']) return ['https://www.bilibili.com/bangumi/play/ep{}'.format(eid) for eid in eplist]