class LSession(): def __init__(self,cookiefile = None, proxy = None, timeout = 10, retime = 30,sleept = 3): self.timeout=timeout self.retime=retime self.sleept=sleept #proxy '1.234.77.96:80' if cookiefile == None: self.cookiejar = CookieJar() else: self.cookiejar = MozillaCookieJar(filename=cookiefile) #self.cookiejar =cookielib.LWPCookieJar(filename=cookiefile) if not os.path.isfile(cookiefile): open(cookiefile, 'w').write(MozillaCookieJar.header) #open(cookiefile, 'w').write('#abc\n') pass self.cookiejar.load(filename=cookiefile,ignore_discard=True) #print "ck:",self.cookiejar self.cookie_processor = HTTPCookieProcessor(self.cookiejar) self.opener=build_opener(urllib2.HTTPRedirectHandler(),self.cookie_processor) if proxy : self.opener.add_handler(ProxyHandler({"http" : proxy})) #for posting a file try: import MultipartPostHandler #for posting a file,need installed self.opener.add_handler(MultipartPostHandler.MultipartPostHandler()) except NameError as e:print e self.response=None self.request=None self.header=[] def add_header(self,k,v) : self.header.append((k,v)) def build_request(self,url,params=None): self.request=Request(url,params) if not self.response is None:self.request.add_header('Referer',self.url()) #self.request.add_header('User-Agent', # 'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 \ # (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25') #NokiaE63/UC Browser7.9.0.102/28/355/UCWEB #self.request.add_header('User-Agent','NokiaE63/UC Browser7.9.0.102/28/355/UCWEB') self.request.add_header('User-Agent','Opera/9.80 (J2ME/MIDP; Opera Mini/1.0/886; U; en) Presto/2.4.15') while self.header : _k,_v = self.header.pop() self.request.add_header(_k,_v) #Mobile/7B405 #self.request.add_header('User-Agent','Mobile/7B405') return self.request def __del__(self) : self.save_cookie() def urlopen(self,req): retime=self.retime while retime > 0: try: return self.opener.open(req,timeout=self.timeout) except Exception as e: retime -= 1 traceback.print_exc(file=sys.stdout) print 'Wait and retry...%d'%(self.retime-retime) sleep(self.sleept) def savefile(self,filename,url): self.response=self.urlopen(self.build_request(url)) CHUNK = 50 * 1024 with open(filename, 'wb') as fp: while True: chunk = self.response.read(CHUNK) if not chunk: break fp.write(chunk) def post(self,url,post_data): self.response=self.urlopen(self.build_request(url,urlencode(post_data))) return self.response def post_raw(self,url,post_data): self.response=self.urlopen(self.build_request(url,post_data)) return self.response def post_file(self,url,params): self.response=self.urlopen(self.build_request(url, params)) return self.response def get(self,url): self.response=self.urlopen(self.build_request(url)) #import urllib #print urllib.urlopen('http://mrozekma.com/302test.php').geturl() # import requests # r=requests.get(url) # print r.content return self.response def text(self,dec='gbk',enc='utf') : return self.response.read().decode(dec).encode(enc) def url(self) : return self.response.url def logout(self) : self.cookiejar.clear() def Verify_proxy(self) : pass def show_cookie(self): #print self.cookiejar for i in self.cookiejar: print i def save_cookie(self): # if hasattr(self.cookiejar,'save'):#in case non cookiejar # self.cookiejar.save(ignore_discard=True, ignore_expires=False) try: self.cookiejar.save(ignore_discard=True, ignore_expires=False) except Exception as e: traceback.print_exc(file=sys.stdout)
class Bilibili(): name = u'哔哩哔哩 (Bilibili)' api_url = 'http://interface.bilibili.com/playurl?' bangumi_api_url = 'http://bangumi.bilibili.com/player/web_api/playurl?' SEC1 = '94aba54af9065f71de72f5508f1cd42e' SEC2 = '9b288147e5474dd2aa67085f716c560d' supported_stream_profile = [u'流畅', u'高清', u'超清'] stream_types = [{ 'id': 'hdflv' }, { 'id': 'flv' }, { 'id': 'hdmp4' }, { 'id': 'mp4' }, { 'id': 'live' }, { 'id': 'vc' }] fmt2qlt = dict(hdflv=4, flv=3, hdmp4=2, mp4=1) def __init__(self, appkey=APPKEY, appsecret=APPSECRET, width=720, height=480): self.defaultHeader = {'Referer': 'http://www.bilibili.com'} #self.defaultHeader = {} self.appkey = appkey self.appsecret = appsecret self.WIDTH = width self.HEIGHT = height self.is_login = False cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie' self.cj = MozillaCookieJar(cookie_path) if os.path.isfile(cookie_path): self.cj.load() key = None for ck in self.cj: if ck.name == 'DedeUserID': key = ck.value break if key is not None: self.is_login = True self.mid = str(key) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(opener) try: os.remove(self._get_tmp_dir() + '/tmp.ass') except: pass def _get_tmp_dir(self): try: return tempfile.gettempdir() except: return '' def get_captcha(self, path=None): key = None for ck in self.cj: if ck.name == 'sid': key = ck.value break if key is None: get_html( LOGIN_CAPTCHA_URL.format(random()), headers={'Referer': 'https://passport.bilibili.com/login'}) result = get_html( LOGIN_CAPTCHA_URL.format(random()), decoded=False, headers={'Referer': 'https://passport.bilibili.com/login'}) if path is None: path = tempfile.gettempdir() + '/captcha.jpg' with open(path, 'wb') as f: f.write(result) return path def get_encryped_pwd(self, pwd): import rsa result = loads( get_html( LOGIN_HASH_URL.format(random()), headers={'Referer': 'https://passport.bilibili.com/login'})) pwd = result['hash'] + pwd key = result['key'] pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key) pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key) pwd = base64.b64encode(pwd) pwd = urllib.quote(pwd) return pwd def api_sign(self, params): params['appkey'] = self.appkey data = '' keys = params.keys() # must sorted. urllib.urlencode(params) doesn't work keys.sort() for key in keys: data += '{}={}&'.format(key, urllib.quote(str(params[key]))) data = data[:-1] # remove last '&' if self.appsecret is None: return data m = hashlib.md5() m.update(data + self.appsecret) return data + '&sign=' + m.hexdigest() def get_category_from_web_page(self): category_dict = {'0': {'title': u'全部', 'url': HOME_URL}} node = category_dict['0'] url = node['url'] result = BeautifulSoup(get_html(url), "html.parser").findAll('li', {'class': 'm-i'}) for item in result: if len(item['class']) != 1: continue tid = item['data-tid'] title = item.em.contents[0] url = 'http:' + item.a['href'] category_dict[tid] = {'title': title, 'url': url} node['subs'].append(tid) #Fix video and movie if '11' not in category_dict['0']['subs']: category_dict['0']['subs'].append('11') if '23' not in category_dict['0']['subs']: category_dict['0']['subs'].append('23') category_dict['11'] = { 'title': u'电视剧', 'url': 'http://bangumi.bilibili.com/tv/' } category_dict['23'] = { 'title': u'电影', 'url': 'http://bangumi.bilibili.com/movie/' } for sub in category_dict['0']['subs']: node = category_dict[sub] url = node['url'] result = BeautifulSoup(get_html(url), "html.parser").select('ul.n_num li') for item in result[1:]: if not item.has_attr('tid'): continue if not hasattr(item, 'a'): continue if item.has_attr('class'): continue tid = item['tid'] title = item.a.contents[0] if item.a['href'][:2] == '//': url = 'http:' + item.a['href'] else: url = HOME_URL + item.a['href'] category_dict[tid] = {'title': title, 'url': url} node['subs'].append(tid) return category_dict def get_category(self, tid='0'): items = [{tid: {'title': '全部', 'url': CATEGORY[tid]['url']}}] for sub in CATEGORY[tid]['subs']: items.append({sub: CATEGORY[sub]}) return items def get_category_name(self, tid): return CATEGORY[str(tid)]['title'] def get_order(self): return ORDER def get_category_by_tag(self, tag=0, tid=0, page=1, pagesize=20): if tag == 0: url = LIST_BY_ALL.format(tid, pagesize, page) else: url = LIST_BY_TAG.format(tag, tid, pagesize, page) results = loads(get_html(url)) return results def get_category_list(self, tid=0, order='default', days=30, page=1, pagesize=20): params = { 'tid': tid, 'order': order, 'days': days, 'page': page, 'pagesize': pagesize } url = LIST_URL.format(self.api_sign(params)) result = loads(get_html(url, headers=self.defaultHeader)) results = [] for i in range(pagesize): if result['list'].has_key(str(i)): results.append(result['list'][str(i)]) else: continue return results, result['pages'] def get_my_info(self): if self.is_login == False: return [] result = loads(get_html(MY_INFO_URL)) return result['data'] def get_bangumi_chase(self, page=1, pagesize=20): if self.is_login == False: return [] url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize) result = loads(get_html(url, headers=self.defaultHeader)) return result['data']['result'], result['data']['pages'] def get_bangumi_detail(self, season_id): url = BANGUMI_SEASON_URL.format(season_id) result = get_html(url, headers=self.defaultHeader) if result[0] != '{': start = result.find('(') + 1 end = result.find(');') result = result[start:end] result = loads(result) return result['result'] def get_history(self, page=1, pagesize=20): if self.is_login == False: return [] url = HISTORY_URL.format(page, pagesize) result = loads(get_html(url, headers=self.defaultHeader)) if len(result['data']) >= int(pagesize): total_page = int(page) + 1 else: total_page = int(page) return result['data'], total_page def get_dynamic(self, page=1, pagesize=20): if self.is_login == False: return [] url = DYNAMIC_URL.format(pagesize, page) result = loads(get_html(url, headers=self.defaultHeader)) total_page = int( (result['data']['page']['count'] + pagesize - 1) / pagesize) return result['data']['feeds'], total_page def get_attention(self, page=1, pagesize=20): if self.is_login == False: return [] url = ATTENTION_URL.format(self.mid, page, pagesize) result = loads(get_html(url)) return result['data']['list'] def get_attention_video(self, mid, tid=0, page=1, pagesize=20): if self.is_login == False: return [] url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid) result = loads(get_html(url, headers=self.defaultHeader)) return result['data'], result['data']['pages'] def get_attention_channel(self, mid): if self.is_login == False: return [] url = ATTENTION_CHANNEL_URL.format(mid) result = loads(get_html(url, headers=self.defaultHeader)) return result['data']['list'] def get_fav_box(self): if self.is_login == False: return [] url = FAV_BOX_URL.format(self.mid) result = loads(get_html(url, headers=self.defaultHeader)) return result['data']['list'] def get_fav(self, fav_box, page=1, pagesize=20): if self.is_login == False: return [] url = FAV_URL.format(self.mid, page, pagesize, fav_box) result = loads(get_html(url, headers=self.defaultHeader)) return result['data']['vlist'], result['data']['pages'] def login(self, userid, pwd, captcha): #utils.get_html('http://www.bilibili.com') if self.is_login == True: return True, '' pwd = self.get_encryped_pwd(pwd) data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format( captcha, userid, pwd) result = get_html( LOGIN_URL, data, { 'Origin': 'https://passport.bilibili.com', 'Referer': 'https://passport.bilibili.com/login' }) key = None for ck in self.cj: if ck.name == 'DedeUserID': key = ck.value break if key is None: return False, LOGIN_ERROR_MAP[loads(result)['code']] self.cj.save() self.is_login = True self.mid = str(key) return True, '' def logout(self): self.cj.clear() self.cj.save() self.is_login = False def get_av_list_detail(self, aid, page=1, fav=0, pagesize=20): params = {'id': aid, 'page': page} if fav != 0: params['fav'] = fav url = VIEW_URL.format(self.api_sign(params)) result = loads(get_html(url, headers=self.defaultHeader)) results = [result] if (int(page) < result['pages']) and (pagesize > 1): results += self.get_av_list_detail(aid, int(page) + 1, fav, pagesize=pagesize - 1)[0] return results, result['pages'] def get_av_list(self, aid): url = AV_URL.format(aid) try: page = get_html(url) result = loads(page) except: result = {} return result # 调用niconvert生成弹幕的ass文件 def parse_subtitle(self, cid): page_full_url = COMMENT_URL.format(cid) website = create_website(page_full_url) if website is None: return '' else: text = website.ass_subtitles_text(font_name=u'黑体', font_size=24, resolution='%d:%d' % (self.WIDTH, self.HEIGHT), line_count=12, bottom_margin=0, tune_seconds=0) f = open(self._get_tmp_dir() + '/tmp.ass', 'w') f.write(text.encode('utf8')) f.close() return 'tmp.ass' def get_video_urls(self, cid): m = hashlib.md5() m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER)) url = INTERFACE_URL.format(str(cid), m.hexdigest()) doc = parseString(get_html(url)) urls = [] for durl in doc.getElementsByTagName('durl'): u = durl.getElementsByTagName('url')[0].firstChild.nodeValue if re.match(r'.*\.qqvideo\.tc\.qq\.com', url): re.sub(r'.*\.qqvideo\.tc', 'http://vsrc.store', u) urls.append(u) #urls.append(u + '|Referer={}'.format(urllib.quote('https://www.bilibili.com/'))) return urls def add_history(self, aid, cid): url = ADD_HISTORY_URL.format(str(cid), str(aid)) get_html(url) def api_req(self, cid, quality, bangumi, bangumi_movie=False, **kwargs): ts = str(int(time.time())) if not bangumi: params_str = 'cid={}&player=1&quality={}&ts={}'.format( cid, quality, ts) chksum = hashlib.md5(bytes(params_str + self.SEC1)).hexdigest() api_url = self.api_url + params_str + '&sign=' + chksum else: mod = 'movie' if bangumi_movie else 'bangumi' params_str = 'cid={}&module={}&player=1&quality={}&ts={}'.format( cid, mod, quality, ts) chksum = hashlib.md5(bytes(params_str + self.SEC2)).hexdigest() api_url = self.bangumi_api_url + params_str + '&sign=' + chksum return get_html(api_url) def download_by_vid(self, cid, bangumi, **kwargs): stream_id = kwargs.get('stream_id') if stream_id and stream_id in self.fmt2qlt: quality = stream_id else: quality = 'hdflv' if bangumi else 'flv' level = kwargs.get('level', 0) xml = self.api_req(cid, level, bangumi, **kwargs) doc = parseString(xml) urls = [] for durl in doc.getElementsByTagName('durl'): u = durl.getElementsByTagName('url')[0].firstChild.nodeValue #urls.append(u) urls.append( urllib.quote_plus(u + '|Referer=https://www.bilibili.com')) return urls def entry(self, **kwargs): # tencent player tc_flashvars = re.search(r'"bili-cid=\d+&bili-aid=\d+&vid=([^"]+)"', self.page) if tc_flashvars: tc_flashvars = tc_flashvars.group(1) if tc_flashvars is not None: self.out = True return qq_download_by_vid(tc_flashvars, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only']) cid = re.search(r'cid=(\d+)', self.page).group(1) if cid is not None: return self.download_by_vid(cid, False, **kwargs) else: # flashvars? flashvars = re.search(r'flashvars="([^"]+)"', self.page).group(1) if flashvars is None: raise Exception('Unsupported page {}'.format(self.url)) param = flashvars.split('&')[0] t, cid = param.split('=') t = t.strip() cid = cid.strip() if t == 'vid': sina_download_by_vid(cid, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only']) elif t == 'ykid': youku_download_by_vid(cid, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only']) elif t == 'uid': tudou_download_by_id(cid, self.title, output_dir=kwargs['output_dir'], merge=kwargs['merge'], info_only=kwargs['info_only']) else: raise NotImplementedError( 'Unknown flashvars {}'.format(flashvars)) return def movie_entry(self, **kwargs): patt = r"var\s*aid\s*=\s*'(\d+)'" aid = re.search(patt, self.page).group(1) page_list = loads( get_html( 'http://www.bilibili.com/widget/getPageList?aid={}'.format( aid))) # better ideas for bangumi_movie titles? self.title = page_list[0]['pagename'] return self.download_by_vid(page_list[0]['cid'], True, bangumi_movie=True, **kwargs) def get_video_from_url(self, url, **kwargs): self.url = url_locations(url) frag = urlparse(self.url).fragment # http://www.bilibili.com/video/av3141144/index_2.html#page=3 if frag: hit = re.search(r'page=(\d+)', frag) if hit is not None: page = hit.group(1) av_id = re.search(r'av(\d+)', self.url).group(1) self.url = 'http://www.bilibili.com/video/av{}/index_{}.html'.format( av_id, page) self.page = get_html(self.url) if 'bangumi.bilibili.com/movie' in self.url: return self.movie_entry(**kwargs) elif 'bangumi.bilibili.com' in self.url: return self.bangumi_entry(**kwargs) elif 'live.bilibili.com' in self.url: return self.live_entry(**kwargs) elif 'vc.bilibili.com' in self.url: return self.vc_entry(**kwargs) else: return self.entry(**kwargs) def bangumi_entry(self, **kwargs): pass def live_entry(self, **kwargs): pass def vc_entry(self, **kwargs): pass
class Bilibili(): def __init__(self, appkey=APPKEY, appsecret=APPSECRET): self.appkey = appkey self.appsecret = appsecret self.is_login = False cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie' self.cj = MozillaCookieJar(cookie_path) if os.path.isfile(cookie_path): self.cj.load() if requests.utils.dict_from_cookiejar( self.cj).has_key('DedeUserID'): self.is_login = True self.mid = str( requests.utils.dict_from_cookiejar(self.cj)['DedeUserID']) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(opener) def get_captcha(self, path=None): if not requests.utils.dict_from_cookiejar(self.cj).has_key('sid'): utils.get_page_content( LOGIN_CAPTCHA_URL.format(random.random()), headers={'Referer': 'https://passport.bilibili.com/login'}) result = utils.get_page_content( LOGIN_CAPTCHA_URL.format(random.random()), headers={'Referer': 'https://passport.bilibili.com/login'}) if path == None: path = tempfile.gettempdir() + '/captcha.jpg' with open(path, 'wb') as f: f.write(result) return path def get_encryped_pwd(self, pwd): import rsa result = json.loads( utils.get_page_content( LOGIN_HASH_URL.format(random.random()), headers={'Referer': 'https://passport.bilibili.com/login'})) pwd = result['hash'] + pwd key = result['key'] pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key) pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key) pwd = base64.b64encode(pwd) pwd = urllib.quote(pwd) return pwd def api_sign(self, params): params['appkey'] = self.appkey data = "" keys = params.keys() keys.sort() for key in keys: if data != "": data += "&" value = params[key] if type(value) == int: value = str(value) data += key + "=" + str(urllib.quote(value)) if self.appsecret == None: return data m = hashlib.md5() m.update(data + self.appsecret) return data + '&sign=' + m.hexdigest() def get_category_from_web_page(self): category_dict = {'0': {'title': u'全部', 'url': HOME_URL, 'subs': []}} node = category_dict['0'] url = node['url'] result = BeautifulSoup(utils.get_page_content(url), "html.parser").findAll('li', {'class': 'm-i'}) for item in result: if len(item['class']) != 1: continue tid = item['data-tid'] title = item.em.contents[0] url = 'http:' + item.a['href'] category_dict[tid] = {'title': title, 'url': url, 'subs': []} node['subs'].append(tid) #Fix video and movie if '11' not in category_dict['0']['subs']: category_dict['0']['subs'].append('11') if '23' not in category_dict['0']['subs']: category_dict['0']['subs'].append('23') category_dict['11'] = { 'title': u'电视剧', 'url': 'http://bangumi.bilibili.com/tv/', 'subs': [] } category_dict['23'] = { 'title': u'电影', 'url': 'http://bangumi.bilibili.com/movie/', 'subs': [] } for sub in category_dict['0']['subs']: node = category_dict[sub] url = node['url'] result = BeautifulSoup(utils.get_page_content(url), "html.parser").select('ul.n_num li') for item in result[1:]: if not item.has_attr('tid'): continue if not hasattr(item, 'a'): continue if item.has_attr('class'): continue tid = item['tid'] title = item.a.contents[0] if item.a['href'][:2] == '//': url = 'http:' + item.a['href'] else: url = HOME_URL + item.a['href'] category_dict[tid] = {'title': title, 'url': url, 'subs': []} node['subs'].append(tid) return category_dict def get_category(self, tid='0'): items = [{ tid: { 'title': '全部', 'url': CATEGORY[tid]['url'], 'subs': [] } }] for sub in CATEGORY[tid]['subs']: items.append({sub: CATEGORY[sub]}) return items def get_category_name(self, tid): return CATEGORY[str(tid)]['title'] def get_order(self): return ORDER def get_category_list(self, tid=0, order='default', days=30, page=1, pagesize=10): params = { 'tid': tid, 'order': order, 'days': days, 'page': page, 'pagesize': pagesize } url = LIST_URL.format(self.api_sign(params)) result = json.loads(utils.get_page_content(url)) results = [] for i in range(pagesize): if result['list'].has_key(str(i)): results.append(result['list'][str(i)]) else: break return results, result['pages'] def get_my_info(self): if self.is_login == False: return [] result = json.loads(utils.get_page_content(MY_INFO_URL)) return result['data'] def get_bangumi_chase(self, page=1, pagesize=10): if self.is_login == False: return [] url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize) result = json.loads(utils.get_page_content(url)) return result['data']['result'], result['data']['pages'] def get_bangumi_detail(self, season_id): url = BANGUMI_SEASON_URL.format(season_id) result = utils.get_page_content(url) if result[0] != '{': start = result.find('(') + 1 end = result.find(');') result = result[start:end] result = json.loads(result) return result['result'] def get_history(self, page=1, pagesize=10): if self.is_login == False: return [] url = HISTORY_URL.format(page, pagesize) result = json.loads(utils.get_page_content(url)) if len(result['data']) >= int(pagesize): total_page = int(page) + 1 else: total_page = int(page) return result['data'], total_page def get_dynamic(self, page=1, pagesize=10): if self.is_login == False: return [] url = DYNAMIC_URL.format(pagesize, page) result = json.loads(utils.get_page_content(url)) total_page = int( (result['data']['page']['count'] + pagesize - 1) / pagesize) return result['data']['feeds'], total_page def get_attention(self, page=1, pagesize=10): if self.is_login == False: return [] url = ATTENTION_URL.format(self.mid, page, pagesize) result = json.loads(utils.get_page_content(url)) return result['data']['list'], result['data']['pages'] def get_attention_video(self, mid, tid=0, page=1, pagesize=10): if self.is_login == False: return [] url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid) result = json.loads(utils.get_page_content(url)) return result['data'], result['data']['pages'] def get_attention_channel(self, mid): if self.is_login == False: return [] url = ATTENTION_CHANNEL_URL.format(mid) result = json.loads(utils.get_page_content(url)) return result['data']['list'] def get_attention_channel_list(self, mid, cid, page=1, pagesize=10): if self.is_login == False: return [] url = ATTENTION_CHANNEL_LIST_URL.format(mid, cid, page, pagesize) result = json.loads(utils.get_page_content(url)) return result['data']['list'], result['data']['total'] def get_fav_box(self): if self.is_login == False: return [] url = FAV_BOX_URL.format(self.mid) result = json.loads(utils.get_page_content(url)) return result['data']['list'] def get_fav(self, fav_box, page=1, pagesize=10): if self.is_login == False: return [] url = FAV_URL.format(self.mid, page, pagesize, fav_box) result = json.loads(utils.get_page_content(url)) return result['data']['vlist'], result['data']['pages'] def login(self, userid, pwd, captcha): #utils.get_page_content('http://www.bilibili.com') if self.is_login == True: return True, '' pwd = self.get_encryped_pwd(pwd) data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format( captcha, userid, pwd) result = utils.get_page_content( LOGIN_URL, data, { 'Origin': 'https://passport.bilibili.com', 'Referer': 'https://passport.bilibili.com/login' }) if not requests.utils.dict_from_cookiejar( self.cj).has_key('DedeUserID'): return False, LOGIN_ERROR_MAP[json.loads(result)['code']] self.cj.save() self.is_login = True self.mid = str( requests.utils.dict_from_cookiejar(self.cj)['DedeUserID']) return True, '' def logout(self): self.cj.clear() self.cj.save() self.is_login = False def get_av_list_detail(self, aid, page=1, fav=0, pagesize=10): params = {'id': aid, 'page': page} if fav != 0: params['fav'] = fav url = VIEW_URL.format(self.api_sign(params)) result = json.loads(utils.get_page_content(url)) results = [result] if (int(page) < result['pages']) and (pagesize > 1): results += self.get_av_list_detail(aid, int(page) + 1, fav, pagesize=pagesize - 1)[0] return results, result['pages'] def get_av_list(self, aid): url = AV_URL.format(aid) result = json.loads(utils.get_page_content(url)) return result def get_video_urls(self, cid): m = hashlib.md5() m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER)) url = INTERFACE_URL.format(str(cid), m.hexdigest()) doc = minidom.parseString(utils.get_page_content(url)) urls = [ durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl') ] urls = [ url if not re.match(r'.*\.qqvideo\.tc\.qq\.com', url) else re.sub( r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', url) for url in urls ] return urls def add_history(self, aid, cid): url = ADD_HISTORY_URL.format(str(cid), str(aid)) utils.get_page_content(url)
class Session(requests.Session): """ Session for making API requests and interacting with the filesystem """ def __init__(self): super(Session, self).__init__() self.trust_env = False cookie_file = os.path.expanduser('~/.danabox/cookies.txt') cookie_dir = os.path.dirname(cookie_file) self.cookies = MozillaCookieJar(cookie_file) # Create the $HOME/.danabox dir if it doesn't exist if not os.path.isdir(cookie_dir): os.mkdir(cookie_dir, 0700) # Load existing cookies if the cookies.txt exists if os.path.isfile(cookie_file): self.cookies.load() self.cookies.clear_expired_cookies() def clear(self): """Clear cookies""" try: self.cookies.clear() self.cookies.save() except KeyError: pass def git_root(self): """ Return the absolute path from the git repository root If no git repository exists, raise an EnvironmentError """ try: git_root = subprocess.check_output( ['git', 'rev-parse', '--show-toplevel'], stderr=subprocess.PIPE).strip('\n') except subprocess.CalledProcessError: raise EnvironmentError('Current directory is not a git repository') return git_root def get_app(self): """ Return the application name for the current directory The application is determined by parsing `git remote -v` output for the origin remote. Because Danabox only allows deployment of public Github repos we can create unique app names from a combination of the Github user's name and the repo name. Eg; '[email protected]:opdemand/example-ruby-sinatra.git' becomes 'opdemand-example--ruby--sinatra' If no application is found, raise an EnvironmentError. """ git_root = self.git_root() remotes = subprocess.check_output(['git', 'remote', '-v'], cwd=git_root) if remotes is None: raise EnvironmentError('No git remotes found.') for remote in remotes.splitlines(): if 'github.com' in remote: url = remote.split()[1] break if url is None: raise EnvironmentError('No Github remotes found.') pieces = url.split('/') owner = pieces[-2].split(':')[-1] repo = pieces[-1].replace('.git', '') app_raw = owner + '/' + repo app_name = app_raw.replace('-', '--').replace('/', '-') return app_name app = property(get_app) def request(self, *args, **kwargs): """ Issue an HTTP request with proper cookie handling including `Django CSRF tokens <https://docs.djangoproject.com/en/dev/ref/contrib/csrf/>` """ for cookie in self.cookies: if cookie.name == 'csrftoken': if 'headers' in kwargs: kwargs['headers']['X-CSRFToken'] = cookie.value else: kwargs['headers'] = {'X-CSRFToken': cookie.value} break response = super(Session, self).request(*args, **kwargs) self.cookies.save() return response
class Bilibili(): def __init__(self, appkey = APPKEY, appsecret = APPSECRET): self.appkey = appkey self.appsecret = appsecret self.is_login = False cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie' self.cj = MozillaCookieJar(cookie_path) if os.path.isfile(cookie_path): self.cj.load() if requests.utils.dict_from_cookiejar(self.cj).has_key('DedeUserID'): self.is_login = True self.mid = str(requests.utils.dict_from_cookiejar(self.cj)['DedeUserID']) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj)) urllib2.install_opener(opener) def get_captcha(self, path = None): if not requests.utils.dict_from_cookiejar(self.cj).has_key('sid'): utils.get_page_content(LOGIN_CAPTCHA_URL.format(random.random()), headers = {'Referer':'https://passport.bilibili.com/login'}) result = utils.get_page_content(LOGIN_CAPTCHA_URL.format(random.random()), headers = {'Referer':'https://passport.bilibili.com/login'}) if path == None: path = tempfile.gettempdir() + '/captcha.jpg' with open(path, 'wb') as f: f.write(result) return path def get_encryped_pwd(self, pwd): import rsa result = json.loads(utils.get_page_content(LOGIN_HASH_URL.format(random.random()), headers={'Referer':'https://passport.bilibili.com/login'})) pwd = result['hash'] + pwd key = result['key'] pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key) pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key) pwd = base64.b64encode(pwd) pwd = urllib.quote(pwd) return pwd def api_sign(self, params): params['appkey']=self.appkey data = "" keys = params.keys() keys.sort() for key in keys: if data != "": data += "&" value = params[key] if type(value) == int: value = str(value) data += key + "=" + str(urllib.quote(value)) if self.appsecret == None: return data m = hashlib.md5() m.update(data + self.appsecret) return data + '&sign=' + m.hexdigest() def get_category_from_web_page(self): category_dict = {'0': {'title': u'全部', 'url': HOME_URL, 'subs':[]}} node = category_dict['0'] url = node['url'] result = BeautifulSoup(utils.get_page_content(url), "html.parser").findAll('li', {'class': 'm-i'}) for item in result: if len(item['class']) != 1: continue tid = item['data-tid'] title = item.em.contents[0] url = 'http:' + item.a['href'] category_dict[tid] = {'title': title, 'url': url, 'subs':[]} node['subs'].append(tid) #Fix video and movie if '11' not in category_dict['0']['subs']: category_dict['0']['subs'].append('11') if '23' not in category_dict['0']['subs']: category_dict['0']['subs'].append('23') category_dict['11'] = {'title': u'电视剧', 'url': 'http://bangumi.bilibili.com/tv/', 'subs': []} category_dict['23'] = {'title': u'电影', 'url': 'http://bangumi.bilibili.com/movie/', 'subs': []} for sub in category_dict['0']['subs']: node = category_dict[sub] url = node['url'] result = BeautifulSoup(utils.get_page_content(url), "html.parser").select('ul.n_num li') for item in result[1:]: if not item.has_attr('tid'): continue if not hasattr(item, 'a'): continue if item.has_attr('class'): continue tid = item['tid'] title = item.a.contents[0] if item.a['href'][:2] == '//': url = 'http:' + item.a['href'] else: url = HOME_URL + item.a['href'] category_dict[tid] = {'title': title, 'url': url, 'subs':[]} node['subs'].append(tid) return category_dict def get_category(self, tid = '0'): items = [{tid: {'title': '全部', 'url': CATEGORY[tid]['url'], 'subs': []}}] for sub in CATEGORY[tid]['subs']: items.append({sub: CATEGORY[sub]}) return items def get_category_name(self, tid): return CATEGORY[str(tid)]['title'] def get_order(self): return ORDER def get_category_list(self, tid = 0, order = 'default', days = 30, page = 1, pagesize = 10): params = {'tid': tid, 'order': order, 'days': days, 'page': page, 'pagesize': pagesize} url = LIST_URL.format(self.api_sign(params)) result = json.loads(utils.get_page_content(url)) results = [] for i in range(pagesize): if result['list'].has_key(str(i)): results.append(result['list'][str(i)]) else: break return results, result['pages'] def get_my_info(self): if self.is_login == False: return [] result = json.loads(utils.get_page_content(MY_INFO_URL)) return result['data'] def get_bangumi_chase(self, page = 1, pagesize = 10): if self.is_login == False: return [] url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize) result = json.loads(utils.get_page_content(url)) return result['data']['result'], result['data']['pages'] def get_bangumi_detail(self, season_id): url = BANGUMI_SEASON_URL.format(season_id) result = utils.get_page_content(url) if result[0] != '{': start = result.find('(') + 1 end = result.find(');') result = result[start:end] result = json.loads(result) return result['result'] def get_history(self, page = 1, pagesize = 10): if self.is_login == False: return [] url = HISTORY_URL.format(page, pagesize) result = json.loads(utils.get_page_content(url)) if len(result['data']) >= int(pagesize): total_page = int(page) + 1 else: total_page = int(page) return result['data'], total_page def get_dynamic(self, page = 1, pagesize = 10): if self.is_login == False: return [] url = DYNAMIC_URL.format(pagesize, page) result = json.loads(utils.get_page_content(url)) total_page = int((result['data']['page']['count'] + pagesize - 1) / pagesize) return result['data']['feeds'], total_page def get_attention(self, page = 1, pagesize = 10): if self.is_login == False: return [] url = ATTENTION_URL.format(self.mid, page, pagesize) result = json.loads(utils.get_page_content(url)) return result['data']['list'], result['data']['pages'] def get_attention_video(self, mid, tid = 0, page = 1, pagesize = 10): if self.is_login == False: return [] url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid) result = json.loads(utils.get_page_content(url)) return result['data'], result['data']['pages'] def get_attention_channel(self, mid): if self.is_login == False: return [] url = ATTENTION_CHANNEL_URL.format(mid) result = json.loads(utils.get_page_content(url)) return result['data']['list'] def get_attention_channel_list(self, mid, cid, page = 1, pagesize = 10): if self.is_login == False: return [] url = ATTENTION_CHANNEL_LIST_URL.format(mid, cid, page, pagesize) result = json.loads(utils.get_page_content(url)) return result['data']['list'], result['data']['total'] def get_fav_box(self): if self.is_login == False: return [] url = FAV_BOX_URL.format(self.mid) result = json.loads(utils.get_page_content(url)) return result['data']['list'] def get_fav(self, fav_box, page = 1, pagesize = 10): if self.is_login == False: return [] url = FAV_URL.format(self.mid, page, pagesize, fav_box) result = json.loads(utils.get_page_content(url)) return result['data']['vlist'], result['data']['pages'] def login(self, userid, pwd, captcha): #utils.get_page_content('http://www.bilibili.com') if self.is_login == True: return True, '' pwd = self.get_encryped_pwd(pwd) data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format(captcha, userid, pwd) result = utils.get_page_content(LOGIN_URL, data, {'Origin':'https://passport.bilibili.com', 'Referer':'https://passport.bilibili.com/login'}) if not requests.utils.dict_from_cookiejar(self.cj).has_key('DedeUserID'): return False, LOGIN_ERROR_MAP[json.loads(result)['code']] self.cj.save() self.is_login = True self.mid = str(requests.utils.dict_from_cookiejar(self.cj)['DedeUserID']) return True, '' def logout(self): self.cj.clear() self.cj.save() self.is_login = False def get_av_list_detail(self, aid, page = 1, fav = 0, pagesize = 10): params = {'id': aid, 'page': page} if fav != 0: params['fav'] = fav url = VIEW_URL.format(self.api_sign(params)) result = json.loads(utils.get_page_content(url)) results = [result] if (int(page) < result['pages']) and (pagesize > 1): results += self.get_av_list_detail(aid, int(page) + 1, fav, pagesize = pagesize - 1)[0] return results, result['pages'] def get_av_list(self, aid): url = AV_URL.format(aid) result = json.loads(utils.get_page_content(url)) return result def get_video_urls(self, cid): m = hashlib.md5() m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER)) url = INTERFACE_URL.format(str(cid), m.hexdigest()) doc = minidom.parseString(utils.get_page_content(url)) urls = [durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl')] urls = [url if not re.match(r'.*\.qqvideo\.tc\.qq\.com', url) else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', url) for url in urls] return urls def add_history(self, aid, cid): url = ADD_HISTORY_URL.format(str(cid), str(aid)) utils.get_page_content(url)