def ckplayer_download_by_xml( ckinfo, output_dir='.', merge=False, info_only=False, **kwargs ): # Info XML video_info = ckplayer_get_info_by_xml(ckinfo) try: title = kwargs['title'] except Exception: title = '' type_ = '' size = 0 if len(video_info['links']) > 0: # has link # use 1st to determine type, ext type_, _ext, size = url_info(video_info['links'][0]) if 'size' in video_info: size = int(video_info['size']) else: for i in video_info['links'][1:]: # save 1st one size += url_info(i)[2] print_info(site_info, title, type_, size) if not info_only: download_urls( video_info['links'], title, _ext, size, output_dir=output_dir, merge=merge )
def suntv_download(url, info_only=False, **kwargs): if re.match(r'http://www.isuntv.com/\w+', url): API_URL = ('http://www.isuntv.com/ajaxpro/SunTv.pro_vod_playcatemp4,' 'App_Web_playcatemp4.ascx.9f08f04f.ashx') itemid = match1(url, r'http://www.isuntv.com/pro/ct(\d+).html') values = {'itemid': itemid, 'vodid': ''} data = str(values).replace("'", '"') data = data.encode('utf-8') req = urllib.request.Request(API_URL, data) req.add_header('AjaxPro-Method', 'ToPlay') # important! resp = urllib.request.urlopen(req) respData = resp.read() respData = respData.decode('ascii').strip('"') # Ahhhhhhh! video_url = 'http://www.isuntv.com' + str(respData) html = get_content(url, decoded=False) html = html.decode('gbk') title = match1(html, '<title>([^<]+)').strip() # get rid of \r\n s size = 0 _type, ext, size = url_info(video_url) print_info(site_info, title, _type, size) if not info_only: download_urls([url], title, 'mp4', size, **kwargs)
def fc2video_download_by_upid(upid, output_dir='.', merge=True, info_only=False, **kwargs): fake_headers = FAKE_HEADERS.copy() fake_headers.update({ 'DNT': '1', 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'en-CA,en;q=0.8,en-US;q=0.6,zh-CN;q=0.4,zh;q=0.2', 'X-Requested-With': 'ShockwaveFlash/19.0.0.245', 'Connection': 'keep-alive', }) api_base = ('https://video.fc2.com/ginfo.php?upid={upid}&mimi=' '{mimi}'.format(upid=upid, mimi=makeMimi(upid))) html = get_content(api_base, headers=fake_headers) video_url = match1(html, r'filepath=(.+)&sec') video_url = video_url.replace('&mid', '?mid') title = match1(html, r'&title=([^&]+)') _type, ext, size = url_info(video_url, headers=fake_headers) print_info(site_info, title, _type, size) if not info_only: download_urls([video_url], title, ext, size, output_dir, merge=merge, headers=fake_headers, **kwargs)
def nicovideo_download(url, info_only=False, **kwargs): import ssl ssl_context = request.HTTPSHandler( context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) cookie_handler = request.HTTPCookieProcessor() opener = request.build_opener(ssl_context, cookie_handler) request.install_opener(opener) import netrc import getpass try: info = netrc.netrc().authenticators('nicovideo') except Exception: info = None if info is None: user = input('User: '******'Password: '******'Logging in...') nicovideo_login(user, password) html = get_content(url) # necessary! title = match1(html, r'<title>(.+?)</title>') vid = url.split('/')[-1].split('?')[0] api_html = get_content( 'http://flapi.nicovideo.jp/api/getflv?v={}'.format(vid)) real_url = parse.unquote(match1(api_html, r'url=([^&]+)&')) _type, ext, size = url_info(real_url) print_info(site_info, title, _type, size) if not info_only: download_urls([real_url], title, ext, size, **kwargs)
def dailymotion_download(url, output_dir='.', merge=True, info_only=False, **kwargs): """Downloads Dailymotion videos by URL. """ html = get_content(rebuilt_url(url)) info = json.loads(match1(html, r'qualities":({.+?}),"')) title = match1(html, r'"video_title"\s*:\s*"([^"]+)"') or \ match1(html, r'"title"\s*:\s*"([^"]+)"') title = unicodize(title) for quality in ['1080', '720', '480', '380', '240', '144', 'auto']: try: real_url = info[quality][1]["url"] if real_url: break except KeyError: pass mime, ext, size = url_info(real_url) print_info(site_info, title, mime, size) if not info_only: download_urls([real_url], title, ext, size, output_dir=output_dir, merge=merge, **kwargs)
def _download(item, **kwargs): url = item['fullLinkUrl'] title = item['title'].strip() _, ext, size = url_info(url) print_info(site_info=site_info, title=title, type=ext, size=size) if not kwargs.get('info_only'): download_urls([url], title, ext, size, **kwargs)
def facebook_download( url, output_dir='.', merge=True, info_only=False, **kwargs ): html = get_content(url) title = match1(html, r'<title id="pageTitle">(.+)</title>') if title is None: title = url sd_urls = list(set([ unicodize(str.replace(i, '\\/', '/')) for i in re.findall(r'sd_src_no_ratelimit:"([^"]*)"', html) ])) hd_urls = list(set([ unicodize(str.replace(i, '\\/', '/')) for i in re.findall(r'hd_src_no_ratelimit:"([^"]*)"', html) ])) urls = hd_urls if hd_urls else sd_urls _type, ext, size = url_info(urls[0], True) size = urls_size(urls) print_info(site_info, title, _type, size) if not info_only: download_urls(urls, title, ext, size, output_dir, merge=False)
def baidu_download_album(aid, output_dir='.', merge=True, info_only=False): html = get_content('http://music.baidu.com/album/{}'.format(aid)) parser = get_parser(html) album_name = parser.find('h2', class_='album-name').text artist = parser.find('span', class_='author_list')['title'] output_dir = '{}/{} - {}'.format(output_dir, artist, album_name) ids = json.loads( match1(html, r'<span class="album-add" data-adddata=\'(.+?)\'>').replace( '"', '').replace(';', '"'))['ids'] track_nr = 1 for _id in ids: song_data = baidu_get_song_data(_id) song_url = song_data['songLink'] song_title = song_data['songName'] song_lrc = song_data['lrcLink'] file_name = '{:0>2d}.{}'.format(track_nr, song_title) _type, ext, size = url_info(song_url) print_info(site_info, song_title, _type, size) if not info_only: download_urls([song_url], file_name, ext, size, output_dir, merge=merge) if song_lrc: _type, ext, size = url_info(song_lrc) print_info(site_info, song_title, _type, size) if not info_only: download_urls([song_lrc], file_name, ext, size, output_dir) track_nr += 1
def vidto_download(url, info_only=False, **kwargs): html = get_content(url) params = {} r = re.findall( r'type="(?:hidden|submit)?"(?:.*?)name="(.+?)"\s* value="?(.+?)">', html ) for name, value in r: params[name] = value data = parse.urlencode(params).encode('utf-8') req = request.Request(url, headers=FAKE_HEADERS) print('Please wait for 6 seconds...') time.sleep(6) print('Starting') new_html = request.urlopen(req, data).read().decode('utf-8', 'replace') new_stff = re.search(r'lnk_download" href="(.*?)">', new_html) if new_stff: url = new_stff.group(1) title = params['fname'] _type = '' ext = '' a, b, size = url_info(url) print_info(site_info, title, _type, size) if not info_only: download_urls([url], title, ext, size, **kwargs) else: log.wtf("Cann't find link, please review")
def videomega_download(url, info_only=False, **kwargs): # Hot-plug cookie handler ssl_context = request.HTTPSHandler( context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) cookie_handler = request.HTTPCookieProcessor() opener = request.build_opener(ssl_context, cookie_handler) opener.addheaders = [('Referer', url), ('Cookie', 'noadvtday=0')] request.install_opener(opener) if re.search(r'view\.php', url): php_url = url else: content = get_content(url) m = re.search(r'ref="([^"]*)";\s*width="([^"]*)";\s*height="([^"]*)"', content) ref = m.group(1) width, height = m.group(2), m.group(3) php_url = ( 'http://videomega.tv/view.php?ref={}&width={}&height={}'.format( ref, width, height)) content = get_content(php_url) title = match1(content, r'<title>(.*)</title>') js = match1(content, r'(eval.*)') t = match1(js, r'\$\("\w+"\)\.\w+\("\w+","([^"]+)"\)') t = re.sub(r'(\w)', r'{\1}', t) t = t.translate({87 + i: str(i) for i in range(10, 36)}) s = match1(js, r"'([^']+)'\.split").split('|') src = t.format(*s) _type, ext, size = url_info(src) print_info(site_info, title, _type, size) if not info_only: download_urls([src], title, ext, size, **kwargs)
def fantasy_download_by_id_channelId( id=0, channelId=0, output_dir='.', merge=True, info_only=False, **kwargs ): api_url = ( 'http://www.fantasy.tv/tv/playDetails.action?' 'myChannelId=1&id={id}&channelId={channelId}&t={t}'.format( id=id, channelId=channelId, t=str(random.random()) ) ) html = get_content(api_url) html = json.loads(html) if int(html['status']) != 100000: raise Exception('API error!') title = html['data']['tv']['title'] video_url = html['data']['tv']['videoPath'] headers = FAKE_HEADERS.copy() headers['Referer'] = api_url type, ext, size = url_info(video_url, headers=headers) print_info(site_info, title, type, size) if not info_only: download_urls( [video_url], title, ext, size, output_dir, merge=merge, headers=headers )
def zhanqi_live(room_id, merge=True, output_dir='.', info_only=False, **kwargs): api_url = ('https://www.zhanqi.tv/api/static/v2.2/room/domain/{}.json'. format(room_id)) json_data = json.loads(get_content(api_url))['data'] status = json_data['status'] if status != '4': raise Exception('The live stream is not online!') nickname = json_data['nickname'] title = '{}:{}'.format(nickname, json_data['title']) video_levels = base64.b64decode( json_data['flashvars']['VideoLevels']).decode('utf8') m3u8_url = json.loads(video_levels)['streamUrl'] print_info(site_info, title, 'm3u8', 0, m3u8_url=m3u8_url, m3u8_type='master') if not info_only: download_url_ffmpeg(m3u8_url, title, 'mp4', output_dir=output_dir, merge=merge)
def xiami_download_song(sid, output_dir='.', info_only=False): xml = get_content( 'http://www.xiami.com/song/playlist/id/{}/object_name/default/' 'object_id/0'.format(sid)) doc = parseString(xml) i = doc.getElementsByTagName('track')[0] artist = i.getElementsByTagName('artist')[0].firstChild.nodeValue album_name = i.getElementsByTagName('album_name')[0].firstChild.nodeValue song_title = i.getElementsByTagName('name')[0].firstChild.nodeValue url = location_dec( i.getElementsByTagName('location')[0].firstChild.nodeValue) try: lrc_url = i.getElementsByTagName('lyric')[0].firstChild.nodeValue except Exception: pass type_, ext, size = url_info(url) if not ext: ext = 'mp3' print_info(site_info, song_title, ext, size) if not info_only: file_name = '{} - {} - {}'.format(song_title, artist, album_name) download_urls([url], file_name, ext, size, output_dir) try: xiami_download_lyric(lrc_url, file_name, output_dir) except Exception: pass
def xiami_download_mv(url, output_dir='.', merge=True, info_only=False): # FIXME: broken merge page = get_content(url) title = re.findall('<title>([^<]+)', page)[0] vid, uid = re.findall(r'vid:"(\d+)",uid:"(\d+)"', page)[0] api_url = ( 'http://cloud.video.taobao.com/videoapi/info.php?vid={}&uid={}'.format( vid, uid)) result = get_content(api_url) doc = parseString(result) video_url = doc.getElementsByTagName('video_url')[-1].firstChild.nodeValue length = int(doc.getElementsByTagName('length')[-1].firstChild.nodeValue) v_urls = [] k_start = 0 total_size = 0 while True: k_end = k_start + 20000000 if k_end >= length: k_end = length - 1 v_url = video_url + '/start_{}/end_{}/1.flv'.format(k_start, k_end) try: _, ext, size = url_info(v_url) except Exception: break v_urls.append(v_url) total_size += size k_start = k_end + 1 print_info(site_info, title, ext, total_size) if not info_only: download_urls(v_urls, title, ext, total_size, output_dir, merge=merge)
def vine_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_content(url) video_id = match1(url, r'vine.co/v/([^/]+)') title = match1(html, r'<title>([^<]*)</title>') stream = match1( html, r'<meta property="twitter:player:stream" content="([^"]*)">' ) if not stream: # https://vine.co/v/.../card stream = match1(html, r'"videoUrl":"([^"]+)"') if stream: stream = stream.replace('\\/', '/') else: posts_url = 'https://archive.vine.co/posts/{}.json'.format( video_id ) json_data = json.loads(get_content(posts_url)) stream = json_data['videoDashUrl'] title = json_data['description'] if title == '': title = '{}_{}'.format( json_data['username'].replace(' ', '_'), video_id ) mime, ext, size = url_info(stream) print_info(site_info, title, mime, size) if not info_only: download_urls([stream], title, ext, size, output_dir, merge=merge)
def __call__(self, url, **kwargs): ''' data = { 'urls': [], 'title': '', 'file_format': '', 'size': '', } ''' data = self.extract(url, **kwargs) if not self.need_download: return file_format = data.get('file_format', 'mp4') size = data.get('size') urls = data['urls'] if not size: if len(urls) == 1: size = url_size(urls[0]) else: size = urls_size(urls) print_info(site_info=self.site_info, title=data['title'], type=file_format, size=size) if not kwargs['info_only']: download_urls(urls=urls, title=data['title'], ext=file_format, total_size=size, **kwargs)
def sina_zxt(url, info_only=False, **kwargs): ep = 'http://s.video.sina.com.cn/video/play?video_id=' frag = urllib.parse.urlparse(url).fragment if not frag: log.wtf('No video specified with fragment') meta = json.loads(get_content(ep + frag)) if meta['code'] != 1: # Yes they use 1 for success. log.wtf(meta['message']) title = meta['data']['title'] videos = sorted(meta['data']['videos'], key=lambda i: int(i['size'])) if len(videos) == 0: log.wtf('No video file returned by API server') vid = videos[-1]['file_id'] container = videos[-1]['type'] size = int(videos[-1]['size']) if container == 'hlv': container = 'flv' urls, _, _ = video_info(api_req(vid)) print_info(site_info, title, container, size) if not info_only: download_urls(urls, title, container, size, **kwargs) return
def panda_download(url, info_only=False, **kwargs): roomid = re.search('/(\d+)', url) if roomid is None: log.wtf('Cannot found room id for this url') roomid = roomid.group(1) json_request_url = ( 'http://www.panda.tv/api_room_v2?roomid={}&__plat=pc_web&_={}'.format( roomid, int(time.time()))) content = get_content(json_request_url) api_json = json.loads(content) errno = api_json['errno'] errmsg = api_json['errmsg'] if errno: raise ValueError('Errno : {}, Errmsg : {}'.format(errno, errmsg)) data = api_json['data'] title = data['roominfo']['name'] room_key = data['videoinfo']['room_key'] plflag = data['videoinfo']['plflag'].split('_') status = data['videoinfo']['status'] if status is not '2': raise ValueError( 'The live stream is not online! (status:{})'.format(status)) data2 = json.loads(data['videoinfo']['plflag_list']) rid = data2['auth']['rid'] sign = data2['auth']['sign'] ts = data2['auth']['time'] real_url = ('http://pl{}.live.panda.tv/live_panda/{}.flv?sign={}&ts={}&' 'rid={}'.format(plflag[1], room_key, sign, ts, rid)) print_info(site_info, title, 'flv', float('inf')) if not info_only: download_urls([real_url], title, 'flv', None, **kwargs)
def douyutv_video_download(url, output_dir='.', merge=True, info_only=False, **kwargs): ep = 'http://vmobile.douyu.com/video/getInfo?vid=' patt = r'show/([0-9A-Za-z]+)' title_patt = r'<h1>(.+?)</h1>' hit = re.search(patt, url) if hit is None: log.wtf('Unknown url pattern') vid = hit.group(1) page = get_content(url) hit = re.search(title_patt, page) if hit is None: title = vid else: title = hit.group(1) meta = json.loads(get_content(ep + vid)) if meta['error'] != 0: log.wtf('Error from API server') m3u8_url = meta['data']['video_url'] print_info('Douyu Video', title, 'm3u8', 0, m3u8_url=m3u8_url) if not info_only: urls = general_m3u8_extractor(m3u8_url) download_urls(urls, title, 'ts', 0, output_dir=output_dir, merge=merge, **kwargs)
def mtv81_download(url, output_dir='.', merge=True, info_only=False, **kwargs): html = get_content(url) title = unescape_html('|'.join( match1(html, r'<title>(.*?)</title>').split('|')[:-2])) # mgid%3Auma%3Avideo%3Amtv81.com%3A897974 vid = match1(html, r'getTheVideo\("(.*?)"') xml = parseString( get_content( 'http://intl.esperanto.mtvi.com/www/xml/media/mediaGen.jhtml?uri={}&' 'flashPlayer=LNX%2013,0,0,206&geo=CN&sid=123456'.format(vid))) url = sorted(map(lambda x: x.firstChild.nodeValue, xml.getElementsByTagName("src")), key=lambda x: int(match1(x, r'_(\d+?)_')))[-1] mediatype, ext, size = 'mp4', 'mp4', 0 print_info(site_info, title, mediatype, size) # rtmpdump -r 'rtmpe://cp30865.edgefcs.net/ondemand/mtviestor/_!/intlod/MTVInternational/MBUS/GeoLocals/00JP/VIAMTVI/PYC/201304/7122HVAQ4/00JPVIAMTVIPYC7122HVAQ4_640x_360_1200_m30.mp4' -o "title.mp4" --swfVfy http://media.mtvnservices.com/player/prime/mediaplayerprime.1.10.8.swf # noqa # because rtmpdump is unstable,may try serveral times if not info_only: download_rtmp_url( url=url, title=title, ext=ext, params={ '--swfVfy': ('http://media.mtvnservices.com/player/prime/mediaplayer' 'prime.1.10.8.swf') }, output_dir=output_dir)
def baomihua_download_by_id(_id, title=None, output_dir='.', merge=True, info_only=False, **kwargs): html = get_content( 'http://play.baomihua.com/getvideourl.aspx?flvid={}&devicetype=' 'phone_app'.format(_id)) host = match1(html, r'host=([^&]*)') assert host _type = match1(html, r'videofiletype=([^&]*)') assert _type vid = match1(html, r'&stream_name=([^&]*)') assert vid dir_str = match1(html, r'&dir=([^&]*)').strip() url = 'http://{}/{}/{}.{}'.format(host, dir_str, vid, _type) _, ext, size = url_info(url) print_info(site_info, title, _type, size) if not info_only: download_urls([url], title, ext, size, output_dir, merge=merge, **kwargs)
def naver_download_by_url(url, info_only=False, **kwargs): ep = 'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{}?key={}' page = get_content(url) og_video_url = re.search( r"<meta\s+property=\"og:video:url\"\s+content='(.+?)'>", page ).group(1) params_dict = urllib.parse.parse_qs( urllib.parse.urlparse(og_video_url).query ) vid = params_dict['vid'][0] key = params_dict['outKey'][0] meta_str = get_content(ep.format(vid, key)) meta_json = json.loads(meta_str) if 'errorCode' in meta_json: log.wtf(meta_json['errorCode']) title = meta_json['meta']['subject'] videos = meta_json['videos']['list'] video_list = sorted( videos, key=lambda video: video['encodingOption']['width'] ) video_url = video_list[-1]['source'] size = url_size(video_url) print_info(site_info, title, 'mp4', size) if not info_only: download_urls([video_url], title, 'mp4', size, **kwargs)
def miaopai_download(url, output_dir='.', merge=False, info_only=False, **kwargs): fid = match1(url, r'\?fid=(\d{4}:\w{32})') if fid: miaopai_download_by_fid(fid, output_dir, merge, info_only) elif '/p/230444' in url: fid = match1(url, r'/p/230444(\w+)') miaopai_download_by_fid('1034:' + fid, output_dir, merge, info_only) else: status_id = url.split('?')[0].split('/')[-1] video_info = json.loads( get_content( 'https://m.weibo.cn/statuses/show?id={}'.format(status_id), headers=config.FAKE_HEADERS_MOBILE)) video_url = video_info['data']['page_info']['media_info']['stream_url'] title = video_info['data']['page_info']['content2'] video_format = 'mp4' size = url_size(video_url) print_info(site_info=site_info, title=title, type=video_format, size=size) if not info_only: download_urls(urls=[video_url], title=title, ext=video_format, total_size=size, **kwargs)
def theplatform_download_by_pid( pid, title, output_dir='.', merge=True, info_only=False, **kwargs ): smil_url = ( 'http://link.theplatform.com/s/dJ5BDC/{}/meta.smil?format=smil' '&mbr=true'.format(pid) ) smil = get_content(smil_url) smil_base = unescape_html(match1(smil, r'<meta base="([^"]+)"')) smil_videos = { y: x for x, y in dict( re.findall(r'<video src="([^"]+)".+height="([^"]+)"', smil) ).items() } for height in ['1080', '720', '480', '360', '240', '216']: if height in smil_videos: smil_video = smil_videos[height] break assert smil_video _type, ext, size = 'mp4', 'mp4', 0 print_info(site_info, title, _type, size) if not info_only: download_rtmp_url( url=smil_base, title=title, ext=ext, params={"-y": '{}:{}'.format(ext, smil_video)}, output_dir=output_dir )
def baidu_download_song(sid, output_dir='.', merge=True, info_only=False): data = baidu_get_song_data(sid) if data is not None: url = data['songLink'] title = data['songName'] artist = data['artistName'] # album = data['albumName'] lrc = data['lrcLink'] file_name = '{} - {}'.format(title, artist) else: html = get_content('http://music.baidu.com/song/{}'.format(sid)) url = match1(html, r'data_url="([^"]+)"') title = match1(html, r'data_name="([^"]+)"') file_name = title _type, ext, size = url_info(url) print_info(site_info, title, _type, size) if not info_only: download_urls([url], file_name, ext, size, output_dir, merge=merge) try: _type, ext, size = url_info(lrc) print_info(site_info, title, _type, size) if not info_only: download_urls([lrc], file_name, ext, size, output_dir) except Exception: pass
def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): info_api = ('http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333' '&platform=11&defnpayver=1&vid={}'.format(vid)) info = get_content(info_api) video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1]) fn_pre = video_json['vl']['vi'][0]['lnk'] title = video_json['vl']['vi'][0]['ti'] host = video_json['vl']['vi'][0]['ul']['ui'][0]['url'] streams = video_json['fl']['fi'] seg_cnt = video_json['vl']['vi'][0]['cl']['fc'] if seg_cnt == 0: seg_cnt = 1 # best_quality = streams[-1]['name'] part_format_id = streams[-1]['id'] part_urls = [] total_size = 0 for part in range(1, seg_cnt + 1): filename = '{}.p{}.{}.mp4'.format(fn_pre, str(part_format_id % 10000), str(part)) key_api = ('http://vv.video.qq.com/getkey?otype=json&platform=11&' 'format={}&vid={}&filename={}&appver=3.2.19.333'.format( part_format_id, vid, filename)) part_info = get_content(key_api) key_json = json.loads(match1(part_info, r'QZOutputJson=(.*)')[:-1]) if key_json.get('key') is None: vkey = video_json['vl']['vi'][0]['fvkey'] url = '{}{}?vkey={}'.format( video_json['vl']['vi'][0]['ul']['ui'][0]['url'], fn_pre + '.mp4', vkey) else: vkey = key_json['key'] url = '{}{}?vkey={}'.format(host, filename, vkey) if not vkey: if part == 1: log.wtf(key_json['msg']) else: log.w(key_json['msg']) break part_urls.append(url) _, ext, size = url_info(url) total_size += size print_info(site_info, title, ext, total_size) if not info_only: download_urls(part_urls, title, ext, total_size, output_dir=output_dir, merge=merge)
def ku6_download(url, info_only=False, **kwargs): page = get_content(url) video = match1(page, r'type: "video/mp4", src: "(.+)"').replace(' ', '%20') video = parse.quote(video, safe=string.printable) title = match1(page, r'document.title = "(.+)"') _type, ext, size = url_info(video) print_info(site_info, title, _type, size) if not info_only: download_urls([video], title, ext, size, **kwargs)
def joy_download(url, info_only=False, **kwargs): page = get_content(url) parser = get_parser(page) url = parser.source['src'] title = parser.h1.text.strip() _, ext, size = url_info(url) print_info(site_info, title, ext, size) if not info_only: download_urls([url], title, ext, size, **kwargs)
def sohu_download(url, info_only=False, **kwargs): if re.match(r'http://share.vrs.sohu.com', url): vid = match1(url, 'id=(\d+)') else: html = get_content(url) vid = match1(html, r'\Wvid\s*[\:=]\s*[\'"]?(\d+)[\'"]?') assert vid if re.match(r'http[s]://tv.sohu.com/', url): info = json.loads( get_content( 'http://hot.vrs.sohu.com/vrs_flash.action?vid={}'.format(vid))) for qtyp in ['oriVid', 'superVid', 'highVid', 'norVid', 'relativeId']: if 'data' in info: hqvid = info['data'][qtyp] else: hqvid = info[qtyp] if hqvid != 0 and hqvid != vid: info = json.loads( get_content( 'http://hot.vrs.sohu.com/vrs_flash.action?vid={}'. format(hqvid))) if 'allot' not in info: continue break host = info['allot'] tvid = info['tvid'] urls = [] data = info['data'] title = data['tvName'] size = sum(data['clipsBytes']) assert len(data['clipsURL']) == len(data['clipsBytes']) \ == len(data['su']) for new, clip, ck in zip(data['su'], data['clipsURL'], data['ck']): clipURL = urlparse(clip).path urls.append(real_url(host, hqvid, tvid, new, clipURL, ck)) else: info = json.loads( get_content( 'http://my.tv.sohu.com/play/videonew.do?vid={}&referer=' 'http://my.tv.sohu.com'.format(vid))) host = info['allot'] tvid = info['tvid'] urls = [] data = info['data'] title = data['tvName'] size = sum(map(int, data['clipsBytes'])) assert len(data['clipsURL']) == len(data['clipsBytes']) \ == len(data['su']) for new, clip, ck, in zip(data['su'], data['clipsURL'], data['ck']): clipURL = urlparse(clip).path urls.append(real_url(host, vid, tvid, new, clipURL, ck)) print_info(site_info, title, 'mp4', size) if not info_only: download_urls(urls, title, 'mp4', size, refer=url, **kwargs)
def toutiao_download(url, info_only=False, **kwargs): html = get_content(url) video_id = match1(html, r"videoid\s*:\s*'([^']+)',\n") title = match1(html, r"title: '([^']+)'.replace") video_file_list = get_file_by_vid(video_id) # 调api获取视频源文件 _type, ext, size = url_info(video_file_list[0].url) print_info(site_info=site_info, title=title, type=_type, size=size) if not info_only: download_urls([video_file_list[0].url], title, ext, size, **kwargs)