def bangumi_entry(self, **kwargs): bangumi_id = re.search(r'(\d+)', self.url).group(1) bangumi_data = get_bangumi_info(bangumi_id) bangumi_payment = bangumi_data.get('payment') if bangumi_payment and bangumi_payment['price'] != '0': log.w("It's a paid item") # ep_ids = collect_bangumi_epids(bangumi_data) frag = urllib.parse.urlparse(self.url).fragment if frag: episode_id = frag else: episode_id = re.search(r'first_ep_id\s*=\s*"(\d+)"', self.page) cont = get_content( 'http://bangumi.bilibili.com/web_api/episode/{}.json'.format( episode_id ) ) ep_info = json.loads(cont)['result']['currentEpisode'] index_title = ep_info['indexTitle'] # 集数 long_title = ep_info['longTitle'].strip() # 本集标题 cid = ep_info['danmaku'] if long_title: long_title = ' {}'.format(long_title) # 如果有标题的话,在标题前面加一个空格 # name 1 title # name 1 self.title = '{} {}{}'.format(self.title, index_title, long_title) self.download_by_vid(cid, bangumi=True, **kwargs)
def qq_download_by_vid(vid, title, output_dir='.', merge=True, info_only=False): info_api = ('http://vv.video.qq.com/getinfo?otype=json&appver=3.2.19.333' '&platform=11&defnpayver=1&vid={}'.format(vid)) info = get_content(info_api) video_json = json.loads(match1(info, r'QZOutputJson=(.*)')[:-1]) fn_pre = video_json['vl']['vi'][0]['lnk'] title = video_json['vl']['vi'][0]['ti'] host = video_json['vl']['vi'][0]['ul']['ui'][0]['url'] streams = video_json['fl']['fi'] seg_cnt = video_json['vl']['vi'][0]['cl']['fc'] if seg_cnt == 0: seg_cnt = 1 # best_quality = streams[-1]['name'] part_format_id = streams[-1]['id'] part_urls = [] total_size = 0 for part in range(1, seg_cnt + 1): filename = '{}.p{}.{}.mp4'.format(fn_pre, str(part_format_id % 10000), str(part)) key_api = ('http://vv.video.qq.com/getkey?otype=json&platform=11&' 'format={}&vid={}&filename={}&appver=3.2.19.333'.format( part_format_id, vid, filename)) part_info = get_content(key_api) key_json = json.loads(match1(part_info, r'QZOutputJson=(.*)')[:-1]) if key_json.get('key') is None: vkey = video_json['vl']['vi'][0]['fvkey'] url = '{}{}?vkey={}'.format( video_json['vl']['vi'][0]['ul']['ui'][0]['url'], fn_pre + '.mp4', vkey) else: vkey = key_json['key'] url = '{}{}?vkey={}'.format(host, filename, vkey) if not vkey: if part == 1: log.wtf(key_json['msg']) else: log.w(key_json['msg']) break part_urls.append(url) _, ext, size = url_info(url) total_size += size print_info(site_info, title, ext, total_size) if not info_only: download_urls(part_urls, title, ext, total_size, output_dir=output_dir, merge=merge)
def entry(self, **kwargs): # tencent player tc_flashvars = re.search( r'"bili-cid=\d+&bili-aid=\d+&vid=([^"]+)"', self.page ) if tc_flashvars: tc_flashvars = tc_flashvars.group(1) if tc_flashvars is not None: self.out = True qq_download_by_vid( tc_flashvars, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only'] ) return has_plist = re.search(r'<option', self.page) if has_plist and r1('index_(\d+).html', self.url) is None: log.w( 'This page contains a playlist. (use --playlist to download ' 'all videos.)' ) try: cid = re.search(r'cid=(\d+)', self.page).group(1) except Exception: cid = re.search(r'"cid":(\d+)', self.page).group(1) if cid is not None: self.download_by_vid( cid, re.search('bangumi', self.url) is not None, **kwargs ) else: # flashvars? flashvars = re.search(r'flashvars="([^"]+)"', self.page).group(1) if flashvars is None: raise Exception('Unsupported page {}'.format(self.url)) param = flashvars.split('&')[0] t, cid = param.split('=') t = t.strip() cid = cid.strip() if t == 'vid': sina_download_by_vid( cid, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only'] ) elif t == 'ykid': youku_download_by_vid( cid, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only'] ) else: raise NotImplementedError( 'Unknown flashvars {}'.format(flashvars) ) return
def set_socks_proxy(proxy): try: import socks socks_proxy_addrs = proxy.split(':') socks.set_default_proxy(socks.SOCKS5, socks_proxy_addrs[0], int(socks_proxy_addrs[1])) socket.socket = socks.socksocket def getaddrinfo(*args): return [(socket.AF_INET, socket.SOCK_STREAM, 6, '', (args[0], args[1]))] socket.getaddrinfo = getaddrinfo except ImportError: log.w('Error importing PySocks library, socks proxy ignored.' 'In order to use use socks proxy, please install PySocks.')
def vimeo_download_by_channel_id(channel_id, info_only=False, **kwargs): """str/int->None """ html = get_content( 'https://api.vimeo.com/channels/{channel_id}/videos?access_token=' '{access_token}'.format( channel_id=channel_id, access_token=access_token ) ) data = json.loads(html) id_list = [] for i in data['data']: id_list.append(match1(i['uri'], r'/videos/(\w+)')) for _id in id_list: try: vimeo_download_by_id(_id, None, info_only, **kwargs) except urllib.error.URLError as e: log.w('{} failed with {}'.format(id, e))
def parse_cid_playurl(xml): from xml.dom.minidom import parseString try: urls_list = [] total_size = 0 doc = parseString(xml.encode('utf-8')) durls = doc.getElementsByTagName('durl') cdn_cnt = len(durls[0].getElementsByTagName('url')) for i in range(cdn_cnt): urls_list.append([]) for durl in durls: size = durl.getElementsByTagName('size')[0] total_size += int(size.firstChild.nodeValue) cnt = len(durl.getElementsByTagName('url')) for i in range(cnt): u = durl.getElementsByTagName('url')[i].firstChild.nodeValue urls_list[i].append(u) return urls_list, total_size except Exception as e: log.w(e) return [], 0
def fetch_cna(): def quote_cna(val): if '%' in val: return val return urllib.parse.quote(val) if cookies: for cookie in cookies: if cookie.name == 'cna' and cookie.domain == '.youku.com': log.i('Found cna in imported cookies. Use it') return quote_cna(cookie.value) url = 'http://log.mmstat.com/eg.js' req = urllib.request.urlopen(url) headers = req.getheaders() for header in headers: if header[0].lower() == 'set-cookie': n_v = header[1].split(';')[0] name, value = n_v.split('=') if name == 'cna': return quote_cna(value) log.w('It seems that the client failed to fetch a cna cookie. ' 'Please load your own cookie if possible') return quote_cna('DOG4EdW4qzsCAbZyXbU+t7Jt')
def showroom_download_by_room_id(room_id, info_only=False, **kwargs): '''Source: Android mobile ''' while True: timestamp = str(int(time() * 1000)) api_endpoint = ( 'https://www.showroom-live.com/api/live/streaming_url?room_id=' '{room_id}&_={timestamp}'.format(room_id=room_id, timestamp=timestamp)) html = get_content(api_endpoint) html = json.loads(html) if len(html) >= 1: break log.w('The live show is currently offline.') sleep(1) # This is mainly for testing the M3U FFmpeg parser so I would ignore # any non-m3u ones stream_url = [ i['url'] for i in html['streaming_url_list'] if i['is_default'] and i['type'] == 'hls' ][0] assert stream_url # title title = '' profile_api = ('https://www.showroom-live.com/api/room/profile?room_id=' '{room_id}'.format(room_id=room_id)) html = json.loads(get_content(profile_api)) try: title = html['main_name'] except KeyError: title = 'Showroom_{room_id}'.format(room_id=room_id) type_, ext, size = url_info(stream_url) print_info(site_info, title, type_, size) if not info_only: download_url_ffmpeg(url=stream_url, title=title, ext='mp4', **kwargs)
def prepare(self, **kwargs): assert self.url or self.vid if self.url and not self.vid: self.get_vid_from_url() if self.vid is None: self.get_vid_from_page() if self.vid is None: log.wtf('Cannot fetch vid') if kwargs.get('password') and kwargs['password']: self.password_protected = True self.password = kwargs['password'] self.utid = fetch_cna() time.sleep(3) self.youku_ups() if self.api_data.get('stream') is None: if self.api_error_code == -6001: # wrong vid parsed from the page vid_from_url = self.vid self.get_vid_from_page() if vid_from_url == self.vid: log.wtf(self.api_error_msg) self.youku_ups() if self.api_data.get('stream') is None: if self.api_error_code == -2002: # wrong password self.password_protected = True # it can be True already(from cli). # offer another chance to retry self.password = input(log.sprint('Password: '******'stream') is None: if self.api_error_msg: log.wtf(self.api_error_msg) else: log.wtf('Unknown error') self.title = self.api_data['video']['title'] stream_types = dict([(i['id'], i) for i in self.stream_types]) audio_lang = self.api_data['stream'][0]['audio_lang'] for stream in self.api_data['stream']: stream_id = stream['stream_type'] is_preview = False if stream_id in stream_types \ and stream['audio_lang'] == audio_lang: if 'alias-of' in stream_types[stream_id]: stream_id = stream_types[stream_id]['alias-of'] if stream_id not in self.streams: self.streams[stream_id] = { 'container': stream_types[stream_id]['container'], 'video_profile': stream_types[stream_id]['video_profile'], 'size': stream['size'], 'pieces': [{ 'segs': stream['segs'] }], 'm3u8_url': stream['m3u8_url'] } src = [] for seg in stream['segs']: if seg.get('cdn_url'): src.append( self.__class__.change_cdn(seg['cdn_url'])) else: is_preview = True self.streams[stream_id]['src'] = src else: self.streams[stream_id]['size'] += stream['size'] self.streams[stream_id]['pieces'].append( {'segs': stream['segs']}) src = [] for seg in stream['segs']: if seg.get('cdn_url'): src.append( self.__class__.change_cdn(seg['cdn_url'])) else: is_preview = True self.streams[stream_id]['src'].extend(src) if is_preview: log.w('{} is a preview'.format(stream_id)) # Audio languages if 'dvd' in self.api_data: al = self.api_data['dvd'].get('audiolang') if al: self.audiolang = al for i in self.audiolang: i['url'] = 'http://v.youku.com/v_show/id_{}'.format( i['vid'])