def info(self, url): if url.find('sohu.com') < 0: raise ValueError('not a sohu.com video url') import json import re html = HttpUtil().get(url) playlistid = re.findall(r'var playlistId="(?P<s>[^"]*?)";', html)[0] url = r'http://pl.hd.sohu.com/videolist?playlistid=%s'%playlistid data = json.loads(HttpUtil().get(url), encoding='gbk') title = data['albumName'] items = [video['pageUrl'] for video in data['videos']] return title, items
def __info(self, url, vidfmt): parse_url = 'http://www.yytingting.com/bookstore/playAndDownload.action?' \ 'id=%s&pageNo=%d&pageSize=%d' id = _util.r1('bookId=(\d+)', url) http = HttpUtil() http.add_header('Referer', url) tmp = parse_url % (id, 1, 20) info = http.get(tmp) js = json.loads(info) data = js['data']['data'] pageNo = js['data']['pageNo'] pageSize = js['data']['pageSize'] total = js['data']['total'] urls1 = [] for i in range(total/pageSize): url = parse_url % (id, i+1, pageSize) html = http.get(url) js = json.loads(html) fmt = 'http://www.yytingting.com/resource/getPlayUrl.action?id=%d&type=6' urls1 = urls1 + [(data['resName'], fmt % data['resId']) for data in js['data']['data']] urls = [] for name, url in urls1: html = http.get(url) js = json.loads(html) urls.append((name, js['data']['url'])) return urls
def info(slef, url, merge=True, vidfmt=0): """ format_op = ["norVid", "highVid", "superVid", "oriVid"] """ assert vidfmt in (0, 1, 2, 3) http = HttpUtil() vid_page = http.get(url) vid = r1('vid="(\d+)"', vid_page) if not vid: vid = r1('vid:\s*\'(\d+)\'', vid_page) assert vid import json html = http.get('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid) data = json.loads(html.decode(http.parse_charset())) if vidfmt > 0: format_op = ["norVid", "highVid", "superVid", "oriVid"] vid = data['data'][format_op[vidfmt]] html = http.get('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid) data = json.loads(html.decode(http.parse_charset())) host = data['allot'] prot = data['prot'] urls = [] data = data['data'] title = data['tvName'] size = sum(data['clipsBytes']) assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su']) for file, new in zip(data['clipsURL'], data['su']): urls.append(real_url(host, prot, file, new)) assert data['clipsURL'][0].endswith('.mp4') return urls, title, 'mp4', 5, None
def info(self, url): if url.find('youku.com') < 0: raise ValueError('not a youku.com video url') html = HttpUtil().get(url) soup = BeautifulSoup(html) self.title = self.__title(html, soup) self.items = self.__items(html, soup) return self.title, self.items
def __init__(self, axel, proxy=None, log=None): ThreadBase.__init__(self, log=log) self.__oldurls = [] self.__urltsks_q = Queue.Queue() self.__axel = axel self.__http = HttpUtil() self.__progress_bar = ProgressBar() if proxy: self.__http.set_proxy(proxy)
def w56_download_by_id(id, refer, vidfmt=0, merge=True): html = HttpUtil().get('http://vxml.56.com/json/%s/?src=site' % id) info = json.loads(html)['info'] title = info['Subject'] # assert title # hd = info['hd'] # assert hd in (0, 1, 2) # type = ['normal', 'clear', 'super'][hd] assert vidfmt in (0, 1, 2) type = ['normal', 'clear', 'super'][vidfmt] files = [x for x in info['rfiles'] if x['type'] == type] assert len(files) == 1 size = int(files[0]['filesize']) url = files[0]['url'] ext = r1(r'\.([^.]+)\?', url) assert ext in ('flv', 'mp4') return [url], title, str(ext), 1, None
def __get_content_len(self, url): http = HttpUtil() if self.proxy: http.set_proxy(self.proxy) info = http.head(url) if 200 <= info.status < 300: if info.msg.dict.has_key('Content-Length'): return int(info.getheader('Content-Length')) try: resp = http.get_response(url) except urllib2.URLError as e: self.log.warn('%s \n %s', e.reason, url) return 0 if 200 <= resp.code < 300: # assert resp.has_header('Accept-Ranges') length = int(resp.headers.get('Content-Length')) resp.close() return length
def info(self, url, vidfmt): parse_url = 'http://www.flvcd.com/parse.php?' parse_url += 'kw=' + quote(url) parse_url += '&flag=one' format = ['', 'high', 'super', 'real'] if vidfmt > 0: parse_url += '&format=%s' % format[vidfmt] parse_url += "&Go=1&go=1" # 20150723 http = HttpUtil() http.add_header('Referer', parse_url) print parse_url try: html = http.get(parse_url).decode('gb2312', 'ignore') from bs4 import BeautifulSoup soup = BeautifulSoup(html) m3u = soup.find('input', attrs={'name': 'inf'}).get('value') title = soup.find('input', attrs={'name': 'name'}).get('value') except Exception as e: # raise ValueError('not support') return [], '', None, 0, None urls = [u for u in m3u.split('|')] npf, headers = host_filter(url) return urls, title, None, npf, headers
def real_url(host, prot, file, new): url = 'http://%s/?prot=%s&file=%s&new=%s' % (host, prot, file, new) html = HttpUtil().get(url) start, _, host, key, _, _, _, _, _ = html.split('|') return '%s%s?key=%s' % (start[:-1], new, key)