Esempio n. 1
0
 def info(self, url):
     if url.find('sohu.com') < 0:
         raise ValueError('not a sohu.com video url')
     import json
     import re
     html = HttpUtil().get(url)
     playlistid = re.findall(r'var playlistId="(?P<s>[^"]*?)";', html)[0]
     url = r'http://pl.hd.sohu.com/videolist?playlistid=%s'%playlistid
     data = json.loads(HttpUtil().get(url), encoding='gbk')
     title = data['albumName']
     items = [video['pageUrl'] for video in data['videos']]
     return title, items
Esempio n. 2
0
    def __info(self, url, vidfmt):
        parse_url = 'http://www.yytingting.com/bookstore/playAndDownload.action?' \
                    'id=%s&pageNo=%d&pageSize=%d'
        id = _util.r1('bookId=(\d+)', url)
        http = HttpUtil()
        http.add_header('Referer', url)
        tmp = parse_url % (id, 1, 20)
        info = http.get(tmp)
        js = json.loads(info)
        data = js['data']['data']
        pageNo = js['data']['pageNo']
        pageSize = js['data']['pageSize']
        total = js['data']['total']

        urls1 = []
        for i in range(total/pageSize):
            url = parse_url % (id, i+1, pageSize)
            html = http.get(url)
            js = json.loads(html)
            fmt = 'http://www.yytingting.com/resource/getPlayUrl.action?id=%d&type=6'
            urls1 = urls1 + [(data['resName'], fmt % data['resId']) for data in js['data']['data']]

        urls = []
        for name, url in urls1:
            html = http.get(url)
            js = json.loads(html)
            urls.append((name, js['data']['url']))
        return urls
Esempio n. 3
0
 def info(slef, url, merge=True, vidfmt=0):
     """ format_op = ["norVid", "highVid", "superVid", "oriVid"] """
     assert vidfmt in (0, 1, 2, 3)
     http = HttpUtil()
     vid_page = http.get(url)
     vid = r1('vid="(\d+)"', vid_page)
     if not vid:
         vid = r1('vid:\s*\'(\d+)\'', vid_page)
     assert vid
     import json
     html = http.get('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)
     data = json.loads(html.decode(http.parse_charset()))
     if vidfmt > 0:
         format_op = ["norVid", "highVid", "superVid", "oriVid"]
         vid = data['data'][format_op[vidfmt]]
         html = http.get('http://hot.vrs.sohu.com/vrs_flash.action?vid=%s' % vid)
         data = json.loads(html.decode(http.parse_charset()))
     host = data['allot']
     prot = data['prot']
     urls = []
     data = data['data']
     title = data['tvName']
     size = sum(data['clipsBytes'])
     assert len(data['clipsURL']) == len(data['clipsBytes']) == len(data['su'])
     for file, new in zip(data['clipsURL'], data['su']):
         urls.append(real_url(host, prot, file, new))
     assert data['clipsURL'][0].endswith('.mp4')
     return urls, title, 'mp4', 5, None
Esempio n. 4
0
 def info(self, url):
     if url.find('youku.com') < 0:
         raise ValueError('not a youku.com video url')
     html = HttpUtil().get(url)
     soup = BeautifulSoup(html)
     self.title = self.__title(html, soup)
     self.items = self.__items(html, soup)
     return self.title, self.items
Esempio n. 5
0
 def __init__(self, axel, proxy=None, log=None):
     ThreadBase.__init__(self, log=log)
     self.__oldurls = []
     self.__urltsks_q = Queue.Queue()
     self.__axel = axel
     self.__http = HttpUtil()
     self.__progress_bar = ProgressBar()
     if proxy:
         self.__http.set_proxy(proxy)
Esempio n. 6
0
def w56_download_by_id(id, refer, vidfmt=0, merge=True):
    html = HttpUtil().get('http://vxml.56.com/json/%s/?src=site' % id)
    info = json.loads(html)['info']
    title = info['Subject']
    # assert title
    # hd = info['hd']
    # assert hd in (0, 1, 2)
    # type = ['normal', 'clear', 'super'][hd]
    assert vidfmt in (0, 1, 2)
    type = ['normal', 'clear', 'super'][vidfmt]
    files = [x for x in info['rfiles'] if x['type'] == type]
    assert len(files) == 1
    size = int(files[0]['filesize'])
    url = files[0]['url']
    ext = r1(r'\.([^.]+)\?', url)
    assert ext in ('flv', 'mp4')
    return [url], title, str(ext), 1, None
Esempio n. 7
0
 def __get_content_len(self, url):
     http = HttpUtil()
     if self.proxy:
         http.set_proxy(self.proxy)
     info = http.head(url)
     if 200 <= info.status < 300:
         if info.msg.dict.has_key('Content-Length'):
             return int(info.getheader('Content-Length'))
     try:
         resp = http.get_response(url)
     except urllib2.URLError as e:
         self.log.warn('%s \n %s', e.reason, url)
         return 0
     if 200 <= resp.code < 300:
         # assert resp.has_header('Accept-Ranges')
         length = int(resp.headers.get('Content-Length'))
         resp.close()
         return length
Esempio n. 8
0
 def info(self, url, vidfmt):
     parse_url = 'http://www.flvcd.com/parse.php?'
     parse_url += 'kw=' + quote(url)
     parse_url += '&flag=one'
     format = ['', 'high', 'super', 'real']
     if vidfmt > 0:
         parse_url += '&format=%s' % format[vidfmt]
     parse_url += "&Go=1&go=1"  # 20150723
     http = HttpUtil()
     http.add_header('Referer', parse_url)
     print parse_url
     try:
         html = http.get(parse_url).decode('gb2312', 'ignore')
         from bs4 import BeautifulSoup
         soup = BeautifulSoup(html)
         m3u = soup.find('input', attrs={'name': 'inf'}).get('value')
         title = soup.find('input', attrs={'name': 'name'}).get('value')
     except Exception as e:
         # raise ValueError('not support')
         return [], '', None, 0, None
     urls = [u for u in m3u.split('|')]
     npf, headers = host_filter(url)
     return urls, title, None, npf, headers
Esempio n. 9
0
def real_url(host, prot, file, new):
    url = 'http://%s/?prot=%s&file=%s&new=%s' % (host, prot, file, new)
    html = HttpUtil().get(url)
    start, _, host, key, _, _, _, _, _ = html.split('|')
    return '%s%s?key=%s' % (start[:-1], new, key)