def naver_download_by_url(url, info_only=False, **kwargs): ep = 'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{}?key={}' page = get_content(url) og_video_url = re.search( r"<meta\s+property=\"og:video:url\"\s+content='(.+?)'>", page ).group(1) params_dict = urllib.parse.parse_qs( urllib.parse.urlparse(og_video_url).query ) vid = params_dict['vid'][0] key = params_dict['outKey'][0] meta_str = get_content(ep.format(vid, key)) meta_json = json.loads(meta_str) if 'errorCode' in meta_json: log.wtf(meta_json['errorCode']) title = meta_json['meta']['subject'] videos = meta_json['videos']['list'] video_list = sorted( videos, key=lambda video: video['encodingOption']['width'] ) video_url = video_list[-1]['source'] size = url_size(video_url) print_info(site_info, title, 'mp4', size) if not info_only: download_urls([video_url], title, 'mp4', size, **kwargs)
def mtv81_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_content(url) title = unescape_html('|'.join( match1(html, r'<title>(.*?)</title>').split('|')[:-2])) # mgid%3Auma%3Avideo%3Amtv81.com%3A897974 vid = match1(html, r'getTheVideo\("(.*?)"') xml = parseString( get_content( 'http://intl.esperanto.mtvi.com/www/xml/media/mediaGen.jhtml?uri={}&' 'flashPlayer=LNX%2013,0,0,206&geo=CN&sid=123456'.format(vid))) url = sorted(map(lambda x: x.firstChild.nodeValue, xml.getElementsByTagName("src")), key=lambda x: int(match1(x, r'_(\d+?)_')))[-1] mediatype, ext, size = 'mp4', 'mp4', 0 print_info(site_info, title, mediatype, size) # rtmpdump -r 'rtmpe://cp30865.edgefcs.net/ondemand/mtviestor/_!/intlod/MTVInternational/MBUS/GeoLocals/00JP/VIAMTVI/PYC/201304/7122HVAQ4/00JPVIAMTVIPYC7122HVAQ4_640x_360_1200_m30.mp4' -o "title.mp4" --swfVfy http://media.mtvnservices.com/player/prime/mediaplayerprime.1.10.8.swf # noqa # because rtmpdump is unstable,may try serveral times if not info_only: download_rtmp_url( url=url, title=title, ext=ext, params={ '--swfVfy': ('http://media.mtvnservices.com/player/prime/mediaplayer' 'prime.1.10.8.swf') }, output_dir=output_dir)
def videomega_download(url, info_only=False, **kwargs): # Hot-plug cookie handler ssl_context = request.HTTPSHandler( context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) cookie_handler = request.HTTPCookieProcessor() opener = request.build_opener(ssl_context, cookie_handler) opener.addheaders = [('Referer', url), ('Cookie', 'noadvtday=0')] request.install_opener(opener) if re.search(r'view\.php', url): php_url = url else: content = get_content(url) m = re.search(r'ref="([^"]*)";\s*width="([^"]*)";\s*height="([^"]*)"', content) ref = m.group(1) width, height = m.group(2), m.group(3) php_url = ( 'http://videomega.tv/view.php?ref={}&width={}&height={}'.format( ref, width, height)) content = get_content(php_url) title = match1(content, r'<title>(.*)</title>') js = match1(content, r'(eval.*)') t = match1(js, r'\$\("\w+"\)\.\w+\("\w+","([^"]+)"\)') t = re.sub(r'(\w)', r'{\1}', t) t = t.translate({87 + i: str(i) for i in range(10, 36)}) s = match1(js, r"'([^']+)'\.split").split('|') src = t.format(*s) _type, ext, size = url_info(src) print_info(site_info, title, _type, size) if not info_only: download_urls([src], title, ext, size, **kwargs)
def douyutv_video_download(url, output_dir='.', merge=True, info_only=False, **kwargs): ep = 'http://vmobile.douyu.com/video/getInfo?vid=' patt = r'show/([0-9A-Za-z]+)' title_patt = r'<h1>(.+?)</h1>' hit = re.search(patt, url) if hit is None: log.wtf('Unknown url pattern') vid = hit.group(1) page = get_content(url) hit = re.search(title_patt, page) if hit is None: title = vid else: title = hit.group(1) meta = json.loads(get_content(ep + vid)) if meta['error'] != 0: log.wtf('Error from API server') m3u8_url = meta['data']['video_url'] print_info('Douyu Video', title, 'm3u8', 0, m3u8_url=m3u8_url) if not info_only: urls = general_m3u8_extractor(m3u8_url) download_urls(urls, title, 'ts', 0, output_dir=output_dir, merge=merge, **kwargs)
def live_entry(self, **kwargs): # Extract room ID from the short display ID (seen in the room # URL). The room ID is usually the same as the short ID, but not # always; case in point: https://live.bilibili.com/48, with 48 # as the short ID and 63727 as the actual ID. room_short_id = re.search( r'live.bilibili.com/([^?]+)', self.url ).group(1) room_init_api_response = json.loads(get_content( self.live_room_init_api_url.format(room_short_id) )) self.room_id = room_init_api_response['data']['room_id'] room_info_api_response = json.loads(get_content( self.live_room_info_api_url.format(self.room_id) )) self.title = room_info_api_response['data']['title'] api_url = self.live_api.format(self.room_id) json_data = json.loads(get_content(api_url)) urls = [json_data['durl'][0]['url']] self.streams['live'] = {} self.streams['live']['src'] = urls self.streams['live']['container'] = 'flv' self.streams['live']['size'] = 0
def prepare(self, **kwargs): if self.url and not self.vid: if not re.match(r'http://v.pptv.com/show/(\w+)\.html', self.url): raise ('Unknown url pattern') page_content = get_content(self.url) self.vid = match1(page_content, r'webcfg\s*=\s*{"id":\s*(\d+)') if not self.vid: raise ('Cannot find id') self.referer = self.url api_url = 'http://web-play.pptv.com/webplay3-0-{}.xml'.format(self.vid) api_url += ( '?appplt=flp&appid=pptv.flashplayer.vod&appver=3.4.2.28&type=' '&version=4') dom = parseString(get_content(api_url)) self.title, m_items, m_streams, m_segs = parse_pptv_xml(dom) xml_streams = merge_meta(m_items, m_streams, m_segs) for stream_id in xml_streams: stream_data = xml_streams[stream_id] src = make_url(stream_data) self.streams[stream_id] = { 'container': 'mp4', 'video_profile': stream_data['res'], 'size': int(stream_data['size']), 'src': src }
def nicovideo_download(url, info_only=False, **kwargs): import ssl ssl_context = request.HTTPSHandler( context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) cookie_handler = request.HTTPCookieProcessor() opener = request.build_opener(ssl_context, cookie_handler) request.install_opener(opener) import netrc import getpass try: info = netrc.netrc().authenticators('nicovideo') except Exception: info = None if info is None: user = input('User: '******'Password: '******'Logging in...') nicovideo_login(user, password) html = get_content(url) # necessary! title = match1(html, r'<title>(.+?)</title>') vid = url.split('/')[-1].split('?')[0] api_html = get_content( 'http://flapi.nicovideo.jp/api/getflv?v={}'.format(vid)) real_url = parse.unquote(match1(api_html, r'url=([^&]+)&')) _type, ext, size = url_info(real_url) print_info(site_info, title, _type, size) if not info_only: download_urls([real_url], title, ext, size, **kwargs)
def xiami_download_mv(url, output_dir='.', merge=True, info_only=False): # FIXME: broken merge page = get_content(url) title = re.findall('<title>([^<]+)', page)[0] vid, uid = re.findall(r'vid:"(\d+)",uid:"(\d+)"', page)[0] api_url = ( 'http://cloud.video.taobao.com/videoapi/info.php?vid={}&uid={}'.format( vid, uid)) result = get_content(api_url) doc = parseString(result) video_url = doc.getElementsByTagName('video_url')[-1].firstChild.nodeValue length = int(doc.getElementsByTagName('length')[-1].firstChild.nodeValue) v_urls = [] k_start = 0 total_size = 0 while True: k_end = k_start + 20000000 if k_end >= length: k_end = length - 1 v_url = video_url + '/start_{}/end_{}/1.flv'.format(k_start, k_end) try: _, ext, size = url_info(v_url) except Exception: break v_urls.append(v_url) total_size += size k_start = k_end + 1 print_info(site_info, title, ext, total_size) if not info_only: download_urls(v_urls, title, ext, total_size, output_dir, merge=merge)
def xiami_download(url, output_dir='.', merge=True, info_only=False, **kwargs): # albums if re.match(r'http://www.xiami.com/album/\d+', url): _id = match1(url, r'http://www.xiami.com/album/(\d+)') xiami_download_album(_id, output_dir, info_only) elif re.match(r'http://www.xiami.com/album/\w+', url): page = get_content(url) album_id = re.search( r'rel="canonical"\s+href="http://www.xiami.com/album/([^"]+)"', page).group(1) xiami_download_album(album_id, output_dir, info_only) # collections if re.match(r'http://www.xiami.com/collect/\d+', url): _id = match1(url, r'http://www.xiami.com/collect/(\d+)') xiami_download_showcollect(_id, output_dir, info_only) # single track if re.match(r'http://www.xiami.com/song/\d+\b', url): _id = match1(url, r'http://www.xiami.com/song/(\d+)') xiami_download_song(_id, output_dir, info_only) elif re.match(r'http://www.xiami.com/song/\w+', url): html = get_content(url) _id = match1( html, r'rel="canonical" href="http://www.xiami.com/song/([^"]+)"') xiami_download_song(_id, output_dir, info_only) if re.match('http://www.xiami.com/song/detail/id/\d+', url): _id = match1(url, r'http://www.xiami.com/song/detail/id/(\d+)') xiami_download_song(_id, output_dir, info_only) if re.match('http://www.xiami.com/mv', url): xiami_download_mv(url, output_dir, merge=merge, info_only=info_only)
def prepare(self, **kwargs): headers = FAKE_HEADERS.copy() if 'referer' in kwargs: headers['Referer'] = kwargs['referer'] try: page = get_content('https://vimeo.com/{}'.format(self.vid)) cfg_patt = r'clip_page_config\s*=\s*(\{.+?\});' cfg = json.loads(match1(page, cfg_patt)) video_page = get_content( cfg['player']['config_url'], headers=headers ) self.title = cfg['clip']['title'] info = json.loads(video_page) except Exception as e: page = get_content('https://player.vimeo.com/video/{}'.format( self.vid )) self.title = match1(page, r'<title>([^<]+)</title>') info = json.loads(match1(page, r'var t=(\{.+?\});')) plain = info['request']['files']['progressive'] for s in plain: meta = dict(src=[s['url']], container='mp4') meta['video_profile'] = '{}x{}'.format(s['width'], s['height']) for stream in self.__class__.stream_types: if s['quality'] == stream['id']: self.streams[s['quality']] = meta self.master_m3u8 = info['request']['files']['hls']['cdns']
def vine_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_content(url) video_id = match1(url, r'vine.co/v/([^/]+)') title = match1(html, r'<title>([^<]*)</title>') stream = match1( html, r'<meta property="twitter:player:stream" content="([^"]*)">' ) if not stream: # https://vine.co/v/.../card stream = match1(html, r'"videoUrl":"([^"]+)"') if stream: stream = stream.replace('\\/', '/') else: posts_url = 'https://archive.vine.co/posts/{}.json'.format( video_id ) json_data = json.loads(get_content(posts_url)) stream = json_data['videoDashUrl'] title = json_data['description'] if title == '': title = '{}_{}'.format( json_data['username'].replace(' ', '_'), video_id ) mime, ext, size = url_info(stream) print_info(site_info, title, mime, size) if not info_only: download_urls([stream], title, ext, size, output_dir, merge=merge)
def extract_m3u(source): r1 = get_content(source) s1 = re.findall(r'(/ext_tw_video/.*)', r1) s1 += re.findall(r'(/amplify_video/.*)', r1) r2 = get_content('https://video.twimg.com{}'.format(s1[-1])) s2 = re.findall(r'(/ext_tw_video/.*)', r2) s2 += re.findall(r'(/amplify_video/.*)', r2) return ['https://video.twimg.com{}'.format(i) for i in s2]
def sohu_download(url, info_only=False, **kwargs): if re.match(r'http://share.vrs.sohu.com', url): vid = match1(url, 'id=(\d+)') else: html = get_content(url) vid = match1(html, r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?') assert vid if re.match(r'http[s]://tv.sohu.com/', url): info = json.loads( get_content( 'http://hot.vrs.sohu.com/vrs_flash.action?vid={}'.format(vid))) for qtyp in ['oriVid', 'superVid', 'highVid', 'norVid', 'relativeId']: if 'data' in info: hqvid = info['data'][qtyp] else: hqvid = info[qtyp] if hqvid != 0 and hqvid != vid: info = json.loads( get_content( 'http://hot.vrs.sohu.com/vrs_flash.action?vid={}'. format(hqvid))) if 'allot' not in info: continue break host = info['allot'] tvid = info['tvid'] urls = [] data = info['data'] title = data['tvName'] size = sum(data['clipsBytes']) assert len(data['clipsURL']) == len(data['clipsBytes']) \ == len(data['su']) for new, clip, ck in zip(data['su'], data['clipsURL'], data['ck']): clipURL = urlparse(clip).path urls.append(real_url(host, hqvid, tvid, new, clipURL, ck)) else: info = json.loads( get_content( 'http://my.tv.sohu.com/play/videonew.do?vid={}&referer=' 'http://my.tv.sohu.com'.format(vid))) host = info['allot'] tvid = info['tvid'] urls = [] data = info['data'] title = data['tvName'] size = sum(map(int, data['clipsBytes'])) assert len(data['clipsURL']) == len(data['clipsBytes']) \ == len(data['su']) for new, clip, ck, in zip(data['su'], data['clipsURL'], data['ck']): clipURL = urlparse(clip).path urls.append(real_url(host, vid, tvid, new, clipURL, ck)) print_info(site_info, title, 'mp4', size) if not info_only: download_urls(urls, title, 'mp4', size, refer=url, **kwargs)
def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): info_api = ('http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333' '&platform=11&defnpayver=1&vid={}'.format(vid)) info = get_content(info_api) video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1]) fn_pre = video_json['vl']['vi'][0]['lnk'] title = video_json['vl']['vi'][0]['ti'] host = video_json['vl']['vi'][0]['ul']['ui'][0]['url'] streams = video_json['fl']['fi'] seg_cnt = video_json['vl']['vi'][0]['cl']['fc'] if seg_cnt == 0: seg_cnt = 1 # best_quality = streams[-1]['name'] part_format_id = streams[-1]['id'] part_urls = [] total_size = 0 for part in range(1, seg_cnt + 1): filename = '{}.p{}.{}.mp4'.format(fn_pre, str(part_format_id % 10000), str(part)) key_api = ('http://vv.video.qq.com/getkey?otype=json&platform=11&' 'format={}&vid={}&filename={}&appver=3.2.19.333'.format( part_format_id, vid, filename)) part_info = get_content(key_api) key_json = json.loads(match1(part_info, r'QZOutputJson=(.*)')[:-1]) if key_json.get('key') is None: vkey = video_json['vl']['vi'][0]['fvkey'] url = '{}{}?vkey={}'.format( video_json['vl']['vi'][0]['ul']['ui'][0]['url'], fn_pre + '.mp4', vkey) else: vkey = key_json['key'] url = '{}{}?vkey={}'.format(host, filename, vkey) if not vkey: if part == 1: log.wtf(key_json['msg']) else: log.w(key_json['msg']) break part_urls.append(url) _, ext, size = url_info(url) total_size += size print_info(site_info, title, ext, total_size) if not info_only: download_urls(part_urls, title, ext, total_size, output_dir=output_dir, merge=merge)
def pixnet_download(url, info_only=False, **kwargs): if not re.match(r'http://(\w)+.pixnet.net/album/video/(\d)+', url): log.wtf('[Failed] Unsupported URL pattern.') return # http://eric6513.pixnet.net/album/video/206644535 html = get_content(url) title = ''.join(match1( html, r'<meta property="og:description\" content="([^"]*)"' ).split('-')[1:]).strip() time_now = int(time()) m = re.match(r'http://(\w+).pixnet.net/album/video/(\d+)', url) username = m.group(1) # eric6513 _id = m.group(2) # 206644535 data_dict = { 'username': username, 'autoplay': 1, 'id': _id, 'loop': 0, 'profile': 9, 'time': time_now, } # have to be like this data_dict_str = quote(str(data_dict).replace("'", '"'), safe='"') url2 = 'http://api.pixnet.tv/content?type=json&customData={}'.format( data_dict_str ) # &sig=edb07258e6a9ff40e375e11d30607983 can be blank for now # if required, can be obtained from url like # http://s.ext.pixnet.tv/user/eric6513/html5/autoplay/206644507.js # http://api.pixnet.tv/content?type=json&customData={%22username%22:%22eric6513%22,%22id%22:%22206644535%22,%22time%22:1441823350,%22autoplay%22:0,%22loop%22:0,%22profile%22:7} video_json = get_content(url2) content = json.loads(video_json) url_main = content['element']['video_url'] url_backup = content['element']['backup_video_uri'] try: # In some rare cases the main URL is IPv6 only... # Something like #611 url_info(url_main) url = url_main except Exception: url = url_backup _type, ext, size = url_info(url) print_info(site_info, title, _type, size) if not info_only: download_urls([url], title, ext, size, **kwargs)
def iwara_download(url, info_only=False, **kwargs): video_hash = match1(url, r'http://\w+.iwara.tv/videos/(\w+)') video_url = match1(url, r'(http://\w+.iwara.tv)/videos/\w+') html = get_content(url, headers=headers) title = match1(html, r'<title>(.*)</title>') api_url = '{}/api/video/{}'.format(video_url, video_hash) content = get_content(api_url, headers=headers) data = json.loads(content) _type, ext, size = url_info(data[0]['uri'], headers=headers) down_urls = data[0]['uri'] print_info(down_urls, title, _type, size) if not info_only: download_urls([down_urls], title, ext, size, headers=headers, **kwargs)
def get_single_photo_url(url): page = get_content(url) pid = get_photo_id(url, page) title = match1(page, pattern_inline_title) if match1(page, pattern_inline_video_mark): api_key = get_api_key(page) reply = get_content( tmpl_api_call_photo_info(api_key, get_photo_id(url, page)) ) secret = json.loads(reply)['photo']['secret'] return get_orig_video_source(api_key, pid, secret), title # last match always has the best resolution match = match1(page, pattern_inline_img_url) return 'https:{}'.format(match.replace('\\', '')), title
def kuwo_download_by_rid(rid, info_only=False, **kwargs): html = get_content( 'http://player.kuwo.cn/webmusic/st/getNewMuiseByRid?rid=' 'MUSIC_{}'.format(rid)) title = match1(html, r'<name>(.*)</name>') if not title: title = rid # format =aac|mp3 ->to get aac format=mp3 ->to get mp3 url = get_content( 'http://antiserver.kuwo.cn/anti.s?format=mp3&rid=MUSIC_{}&' 'type=convert_url&response=url'.format(rid)) songtype, ext, size = url_info(url) print_info(site_info, title, songtype, size) if not info_only: download_urls([url], title, ext, size, **kwargs)
def douyutv_download(url, output_dir='.', merge=True, info_only=False, **kwargs): if 'v.douyu.com/show/' in url: douyutv_video_download(url, output_dir=output_dir, merge=merge, info_only=info_only, **kwargs) return url = re.sub(r'[\w.]*douyu.com', 'm.douyu.com', url) html = get_content(url) room_id_patt = r'room_id\s*:\s*(\d+),' room_id = match1(html, room_id_patt) if room_id == '0': room_id = url[url.rfind('/') + 1:] api_url = 'http://www.douyutv.com/api/v1/' args = 'room/{}?aid=wp&client_sys=wp&time={}'.format( room_id, int(time.time())) auth_md5 = (args + 'zNzMV1y4EMxOHS6I5WKm').encode('utf-8') auth_str = hashlib.md5(auth_md5).hexdigest() json_request_url = '{}{}&auth={}'.format(api_url, args, auth_str) content = get_content(json_request_url) json_content = json.loads(content) data = json_content['data'] server_status = json_content.get('error', 0) if server_status is not 0: raise ValueError('Server returned error: {}'.format(server_status)) title = data.get('room_name') show_status = data.get('show_status') if show_status is not '1': raise ValueError( 'The live stream is not online! (Errno: {})'.format(server_status)) real_url = '{}/{}'.format(data.get('rtmp_url'), data.get('rtmp_live')) print_info(site_info, title, 'flv', float('inf')) if not info_only: download_url_ffmpeg(real_url, title, 'flv', None, output_dir=output_dir, merge=merge)
def baomihua_download_by_id(_id, title=None, output_dir='.', merge=True, info_only=False, **kwargs): html = get_content( 'http://play.baomihua.com/getvideourl.aspx?flvid={}&devicetype=' 'phone_app'.format(_id)) host = match1(html, r'host=([^&]*)') assert host _type = match1(html, r'videofiletype=([^&]*)') assert _type vid = match1(html, r'&stream_name=([^&]*)') assert vid dir_str = match1(html, r'&dir=([^&]*)').strip() url = 'http://{}/{}/{}.{}'.format(host, dir_str, vid, _type) _, ext, size = url_info(url) print_info(site_info, title, _type, size) if not info_only: download_urls([url], title, ext, size, output_dir, merge=merge, **kwargs)
def xiami_download_song(sid, output_dir='.', info_only=False): xml = get_content( 'http://www.xiami.com/song/playlist/id/{}/object_name/default/' 'object_id/0'.format(sid)) doc = parseString(xml) i = doc.getElementsByTagName('track')[0] artist = i.getElementsByTagName('artist')[0].firstChild.nodeValue album_name = i.getElementsByTagName('album_name')[0].firstChild.nodeValue song_title = i.getElementsByTagName('name')[0].firstChild.nodeValue url = location_dec( i.getElementsByTagName('location')[0].firstChild.nodeValue) try: lrc_url = i.getElementsByTagName('lyric')[0].firstChild.nodeValue except Exception: pass type_, ext, size = url_info(url) if not ext: ext = 'mp3' print_info(site_info, song_title, ext, size) if not info_only: file_name = '{} - {} - {}'.format(song_title, artist, album_name) download_urls([url], file_name, ext, size, output_dir) try: xiami_download_lyric(lrc_url, file_name, output_dir) except Exception: pass
def sina_zxt(url, info_only=False, **kwargs): ep = 'http://s.video.sina.com.cn/video/play?video_id=' frag = urllib.parse.urlparse(url).fragment if not frag: log.wtf('No video specified with fragment') meta = json.loads(get_content(ep + frag)) if meta['code'] != 1: # Yes they use 1 for success. log.wtf(meta['message']) title = meta['data']['title'] videos = sorted(meta['data']['videos'], key=lambda i: int(i['size'])) if len(videos) == 0: log.wtf('No video file returned by API server') vid = videos[-1]['file_id'] container = videos[-1]['type'] size = int(videos[-1]['size']) if container == 'hlv': container = 'flv' urls, _, _ = video_info(api_req(vid)) print_info(site_info, title, container, size) if not info_only: download_urls(urls, title, container, size, **kwargs) return
def sina_download(url, info_only=False, **kwargs): """Downloads Sina videos by URL. """ if 'news.sina.com.cn/zxt' in url: sina_zxt(url, info_only=info_only, **kwargs) return vid = match1(url, r'vid=(\d+)') if vid is None: video_page = get_content(url) vid = hd_vid = match1(video_page, r'hd_vid\s*:\s*\'([^\']+)\'') if hd_vid == '0': vids = match1( video_page, r'[^\w]vid\s*:\s*\'([^\']+)\'' ).split('|') vid = vids[-1] if vid is None: vid = match1(video_page, r'vid:"?(\d+)"?') if vid: # title = match1(video_page, r'title\s*:\s*\'([^\']+)\'') sina_download_by_vid(vid, info_only=info_only, **kwargs) else: vkey = match1(video_page, r'vkey\s*:\s*"([^"]+)"') if vkey is None: vid = match1(url, r'#(\d+)') sina_download_by_vid(vid, info_only=info_only, **kwargs) return title = match1(video_page, r'title\s*:\s*"([^"]+)"') sina_download_by_vkey(vkey, title=title, info_only=info_only, **kwargs)
def panda_download(url, info_only=False, **kwargs): roomid = re.search('/(\d+)', url) if roomid is None: log.wtf('Cannot found room id for this url') roomid = roomid.group(1) json_request_url = ( 'http://www.panda.tv/api_room_v2?roomid={}&__plat=pc_web&_={}'.format( roomid, int(time.time()))) content = get_content(json_request_url) api_json = json.loads(content) errno = api_json['errno'] errmsg = api_json['errmsg'] if errno: raise ValueError('Errno : {}, Errmsg : {}'.format(errno, errmsg)) data = api_json['data'] title = data['roominfo']['name'] room_key = data['videoinfo']['room_key'] plflag = data['videoinfo']['plflag'].split('_') status = data['videoinfo']['status'] if status is not '2': raise ValueError( 'The live stream is not online! (status:{})'.format(status)) data2 = json.loads(data['videoinfo']['plflag_list']) rid = data2['auth']['rid'] sign = data2['auth']['sign'] ts = data2['auth']['time'] real_url = ('http://pl{}.live.panda.tv/live_panda/{}.flv?sign={}&ts={}&' 'rid={}'.format(plflag[1], room_key, sign, ts, rid)) print_info(site_info, title, 'flv', float('inf')) if not info_only: download_urls([real_url], title, 'flv', None, **kwargs)
def prepare(self, **kwargs): # scrape the html content = get_content(self.url) # extract title self.title = match1( content, r'<meta property="og:description" name="og:description" ' r'content="([^"]+)"' ) data = match1( content, r'<script type="application/json" id=\'initial-state\'>(.+)' r'</script>' ) data = json.loads(data) keys = list(data['resources']['data']['PinPageResource'].keys()) orig_img = data['resources']['data']['PinPageResource'][keys[0]][ 'data' ]['images']['orig']['url'] twit_img = match1( content, r'<meta property="twitter:image:src" name="twitter:image:src" ' r'content="([^"]+)"' ) # construct available streams if orig_img: self.streams['original'] = {'url': orig_img} if twit_img: self.streams['small'] = {'url': twit_img}
def letv_download(url, info_only=False, **kwargs): url = url_locations([url])[0] if re.match(r'http://yuntv.letv.com/', url): letvcloud_download(url, info_only=info_only, **kwargs) elif 'sports.le.com' in url: html = get_content(url) vid = match1(url, r'video/(\d+)\.html') title = match1(html, r'<h2 class="title">([^<]+)</h2>') letv_download_by_vid(vid, title=title, info_only=info_only, **kwargs) else: html = get_content(url) vid = match1(url, r'http://www.letv.com/ptv/vplay/(\d+).html') or \ match1(url, r'http://www.le.com/ptv/vplay/(\d+).html') or \ match1(html, r'vid="(\d+)"') title = match1(html, r'name="irTitle" content="(.*?)"') letv_download_by_vid(vid, title=title, info_only=info_only, **kwargs)
def extract(self, **kwargs): if not self.streams_sorted: # No stream is available return if 'stream_id' in kwargs and kwargs['stream_id']: # Extract the stream stream_id = kwargs['stream_id'] if stream_id not in self.streams \ and stream_id not in self.dash_streams: log.e('[Error] Invalid video format.') log.e('Run \'-i\' command with no specific video format to ' 'view all available formats.') exit(2) else: # Extract stream with the best quality stream_id = self.streams_sorted[0]['itag'] if stream_id in self.streams: src = self.streams[stream_id]['url'] if self.streams[stream_id]['sig'] is not None: sig = self.streams[stream_id]['sig'] src += '&signature={}'.format(sig) elif self.streams[stream_id]['s'] is not None: if not hasattr(self, 'js'): self.js = get_content(self.html5player) s = self.streams[stream_id]['s'] sig = self.__class__.decipher(self.js, s) src += '&signature={}'.format(sig) self.streams[stream_id]['src'] = [src] self.streams[stream_id]['size'] = urls_size( self.streams[stream_id]['src'])
def extract(self, url, **kwargs): if '163.fm' in url: url = get_location(url) if 'music.163.com' in url: self.need_download = False self.netease_cloud_music_download(url, **kwargs) else: html = get_content(url) title = match1(html, 'movieDescription=\'([^\']+)\'') or \ match1(html, '<title>(.+)</title>') if title[0] == ' ': title = title[1:] src = match1(html, r'<source src="([^"]+)"') or \ match1(html, r'<source type="[^"]+" src="([^"]+)"') if src: url = src _, ext, size = url_info(src) else: url = (match1(html, r'["\'](.+)-list.m3u8["\']') or match1(html, r'["\'](.+).m3u8["\']')) + '.mp4' _, _, size = url_info(url) ext = 'mp4' return { 'urls': [url], 'title': title, 'file_format': ext, 'size': size, }
def vidto_download(url, info_only=False, **kwargs): html = get_content(url) params = {} r = re.findall( r'type="(?:hidden|submit)?"(?:.*?)name="(.+?)"\s* value="?(.+?)">', html ) for name, value in r: params[name] = value data = parse.urlencode(params).encode('utf-8') req = request.Request(url, headers=FAKE_HEADERS) print('Please wait for 6 seconds...') time.sleep(6) print('Starting') new_html = request.urlopen(req, data).read().decode('utf-8', 'replace') new_stff = re.search(r'lnk_download" href="(.*?)">', new_html) if new_stff: url = new_stff.group(1) title = params['fname'] _type = '' ext = '' a, b, size = url_info(url) print_info(site_info, title, _type, size) if not info_only: download_urls([url], title, ext, size, **kwargs) else: log.wtf("Cann't find link, please review")
def prepare(self, **kwargs): if self.url: self.vid = self.get_vid_from_url(self.url) self.referer = self.url content = get_content(self.api_endpoint.format(room_id=self.vid)) content = json.loads(content) self.title = content['data']['room_name'] rtmp_url = content['data']['rtmp_url'] # stream_avalable = [i['name'] for i in content['data']['stream']] stream_available = {} stream_available['normal'] = '{}/{}'.format( rtmp_url, content['data']['rtmp_live'] ) if len(content['data']['rtmp_multi_bitrate']) > 0: for k, v in content['data']['rtmp_multi_bitrate'].items(): stream_available[k] = rtmp_url + '/' + v for s in self.stream_types: if s['id'] in stream_available.keys(): quality_id = s['id'] url = stream_available[quality_id] self.streams[quality_id] = { 'container': 'flv', 'video_profile': s['video_profile'], 'size': 0, 'url': url }