Python MozillaCookieJar.clear Examples

Programming Language: Python

Namespace/Package Name: cookielib

Class/Type: MozillaCookieJar

Method/Function: clear

Examples at hotexamples.com: 6

Python MozillaCookieJar.clear - 6 examples found. These are the top rated real world Python examples of cookielib.MozillaCookieJar.clear extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

MozillaCookieJar(30)

load(27)

save(19)

clear(3)

clear_expired_cookies(2)

_really_load(1)

add_cookie_header(1)

extract_cookies(1)

set_cookie(1)

Example #1

Show file

File: lsession.py Project: khalidhsu/qqmsgbak

class LSession():
    def __init__(self,cookiefile = None, proxy = None, timeout = 10, retime = 30,sleept = 3):
        self.timeout=timeout
        self.retime=retime
        self.sleept=sleept
        #proxy '1.234.77.96:80'
        if cookiefile == None:
            self.cookiejar = CookieJar()
        else:
            self.cookiejar = MozillaCookieJar(filename=cookiefile)
            #self.cookiejar =cookielib.LWPCookieJar(filename=cookiefile)
            if not os.path.isfile(cookiefile):
                open(cookiefile, 'w').write(MozillaCookieJar.header)
                #open(cookiefile, 'w').write('#abc\n')
                pass
            self.cookiejar.load(filename=cookiefile,ignore_discard=True)
            #print "ck:",self.cookiejar 
        self.cookie_processor = HTTPCookieProcessor(self.cookiejar)
        self.opener=build_opener(urllib2.HTTPRedirectHandler(),self.cookie_processor)
        if proxy : self.opener.add_handler(ProxyHandler({"http" : proxy}))
        #for posting a file
        try:
            import MultipartPostHandler #for posting a file,need installed
            self.opener.add_handler(MultipartPostHandler.MultipartPostHandler())
        except NameError as e:print e
            
        self.response=None
        self.request=None
        self.header=[]
    def add_header(self,k,v) : self.header.append((k,v))

    def build_request(self,url,params=None):
        self.request=Request(url,params)
        if not self.response is None:self.request.add_header('Referer',self.url())
        #self.request.add_header('User-Agent',
        #                        'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 \
        #                        (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25')
        #NokiaE63/UC Browser7.9.0.102/28/355/UCWEB
        #self.request.add_header('User-Agent','NokiaE63/UC Browser7.9.0.102/28/355/UCWEB')
        self.request.add_header('User-Agent','Opera/9.80 (J2ME/MIDP; Opera Mini/1.0/886; U; en) Presto/2.4.15')
        while  self.header :
            _k,_v = self.header.pop()
            self.request.add_header(_k,_v)
        #Mobile/7B405
        #self.request.add_header('User-Agent','Mobile/7B405')
        return self.request

    def __del__(self) : self.save_cookie()

    def urlopen(self,req):
        retime=self.retime
        while retime > 0:
            try:
                return self.opener.open(req,timeout=self.timeout)
            except Exception as e:
                retime -= 1
                traceback.print_exc(file=sys.stdout)
                print 'Wait and retry...%d'%(self.retime-retime)
                sleep(self.sleept)

    def savefile(self,filename,url):
        self.response=self.urlopen(self.build_request(url))
        CHUNK = 50 * 1024
        with open(filename, 'wb') as fp:
            while True:
                chunk = self.response.read(CHUNK)
                if not chunk: break
                fp.write(chunk)
    def post(self,url,post_data):
        self.response=self.urlopen(self.build_request(url,urlencode(post_data)))
        return  self.response
    def post_raw(self,url,post_data):
        self.response=self.urlopen(self.build_request(url,post_data))
        return  self.response

    def post_file(self,url,params):
        self.response=self.urlopen(self.build_request(url, params))
        return  self.response
    def get(self,url):
        self.response=self.urlopen(self.build_request(url))
        #import urllib
        #print  urllib.urlopen('http://mrozekma.com/302test.php').geturl()
        # import requests
        # r=requests.get(url)
        # print r.content
        return  self.response
    def text(self,dec='gbk',enc='utf') :
        return self.response.read().decode(dec).encode(enc)
    def url(self) : return self.response.url
    def logout(self) : self.cookiejar.clear()
    def Verify_proxy(self) :
        pass
    def show_cookie(self):
        #print self.cookiejar
        for i in self.cookiejar:
            print i
    def save_cookie(self):
        # if  hasattr(self.cookiejar,'save'):#in case non cookiejar
        #     self.cookiejar.save(ignore_discard=True, ignore_expires=False)
        try: 
            self.cookiejar.save(ignore_discard=True, ignore_expires=False)
        except Exception as e: 
            traceback.print_exc(file=sys.stdout)

Example #2

Show file

File: bilibili.py Project: peiit/kodi_plugins

class Bilibili():
    name = u'哔哩哔哩 (Bilibili)'

    api_url = 'http://interface.bilibili.com/playurl?'
    bangumi_api_url = 'http://bangumi.bilibili.com/player/web_api/playurl?'
    SEC1 = '94aba54af9065f71de72f5508f1cd42e'
    SEC2 = '9b288147e5474dd2aa67085f716c560d'
    supported_stream_profile = [u'流畅', u'高清', u'超清']
    stream_types = [{
        'id': 'hdflv'
    }, {
        'id': 'flv'
    }, {
        'id': 'hdmp4'
    }, {
        'id': 'mp4'
    }, {
        'id': 'live'
    }, {
        'id': 'vc'
    }]
    fmt2qlt = dict(hdflv=4, flv=3, hdmp4=2, mp4=1)

    def __init__(self,
                 appkey=APPKEY,
                 appsecret=APPSECRET,
                 width=720,
                 height=480):
        self.defaultHeader = {'Referer': 'http://www.bilibili.com'}
        #self.defaultHeader = {}
        self.appkey = appkey
        self.appsecret = appsecret
        self.WIDTH = width
        self.HEIGHT = height
        self.is_login = False
        cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie'
        self.cj = MozillaCookieJar(cookie_path)
        if os.path.isfile(cookie_path):
            self.cj.load()
            key = None
            for ck in self.cj:
                if ck.name == 'DedeUserID':
                    key = ck.value
                    break
            if key is not None:
                self.is_login = True
                self.mid = str(key)
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
        urllib2.install_opener(opener)

        try:
            os.remove(self._get_tmp_dir() + '/tmp.ass')
        except:
            pass

    def _get_tmp_dir(self):
        try:
            return tempfile.gettempdir()
        except:
            return ''

    def get_captcha(self, path=None):
        key = None
        for ck in self.cj:
            if ck.name == 'sid':
                key = ck.value
                break

        if key is None:
            get_html(
                LOGIN_CAPTCHA_URL.format(random()),
                headers={'Referer': 'https://passport.bilibili.com/login'})
        result = get_html(
            LOGIN_CAPTCHA_URL.format(random()),
            decoded=False,
            headers={'Referer': 'https://passport.bilibili.com/login'})
        if path is None:
            path = tempfile.gettempdir() + '/captcha.jpg'
        with open(path, 'wb') as f:
            f.write(result)
        return path

    def get_encryped_pwd(self, pwd):
        import rsa
        result = loads(
            get_html(
                LOGIN_HASH_URL.format(random()),
                headers={'Referer': 'https://passport.bilibili.com/login'}))
        pwd = result['hash'] + pwd
        key = result['key']
        pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key)
        pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key)
        pwd = base64.b64encode(pwd)
        pwd = urllib.quote(pwd)
        return pwd

    def api_sign(self, params):
        params['appkey'] = self.appkey
        data = ''
        keys = params.keys()
        # must sorted.  urllib.urlencode(params) doesn't work
        keys.sort()
        for key in keys:
            data += '{}={}&'.format(key, urllib.quote(str(params[key])))

        data = data[:-1]  # remove last '&'
        if self.appsecret is None:
            return data
        m = hashlib.md5()
        m.update(data + self.appsecret)
        return data + '&sign=' + m.hexdigest()

    def get_category_from_web_page(self):
        category_dict = {'0': {'title': u'全部', 'url': HOME_URL}}
        node = category_dict['0']
        url = node['url']
        result = BeautifulSoup(get_html(url),
                               "html.parser").findAll('li', {'class': 'm-i'})
        for item in result:
            if len(item['class']) != 1:
                continue
            tid = item['data-tid']
            title = item.em.contents[0]
            url = 'http:' + item.a['href']
            category_dict[tid] = {'title': title, 'url': url}
            node['subs'].append(tid)

        #Fix video and movie
        if '11' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('11')
        if '23' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('23')
        category_dict['11'] = {
            'title': u'电视剧',
            'url': 'http://bangumi.bilibili.com/tv/'
        }
        category_dict['23'] = {
            'title': u'电影',
            'url': 'http://bangumi.bilibili.com/movie/'
        }

        for sub in category_dict['0']['subs']:
            node = category_dict[sub]
            url = node['url']
            result = BeautifulSoup(get_html(url),
                                   "html.parser").select('ul.n_num li')
            for item in result[1:]:
                if not item.has_attr('tid'):
                    continue
                if not hasattr(item, 'a'):
                    continue
                if item.has_attr('class'):
                    continue
                tid = item['tid']
                title = item.a.contents[0]
                if item.a['href'][:2] == '//':
                    url = 'http:' + item.a['href']
                else:
                    url = HOME_URL + item.a['href']
                category_dict[tid] = {'title': title, 'url': url}
                node['subs'].append(tid)
        return category_dict

    def get_category(self, tid='0'):
        items = [{tid: {'title': '全部', 'url': CATEGORY[tid]['url']}}]
        for sub in CATEGORY[tid]['subs']:
            items.append({sub: CATEGORY[sub]})
        return items

    def get_category_name(self, tid):
        return CATEGORY[str(tid)]['title']

    def get_order(self):
        return ORDER

    def get_category_by_tag(self, tag=0, tid=0, page=1, pagesize=20):
        if tag == 0:
            url = LIST_BY_ALL.format(tid, pagesize, page)
        else:
            url = LIST_BY_TAG.format(tag, tid, pagesize, page)

        results = loads(get_html(url))
        return results

    def get_category_list(self,
                          tid=0,
                          order='default',
                          days=30,
                          page=1,
                          pagesize=20):
        params = {
            'tid': tid,
            'order': order,
            'days': days,
            'page': page,
            'pagesize': pagesize
        }
        url = LIST_URL.format(self.api_sign(params))

        result = loads(get_html(url, headers=self.defaultHeader))
        results = []
        for i in range(pagesize):
            if result['list'].has_key(str(i)):
                results.append(result['list'][str(i)])
            else:
                continue
        return results, result['pages']

    def get_my_info(self):
        if self.is_login == False:
            return []
        result = loads(get_html(MY_INFO_URL))
        return result['data']

    def get_bangumi_chase(self, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize)
        result = loads(get_html(url, headers=self.defaultHeader))
        return result['data']['result'], result['data']['pages']

    def get_bangumi_detail(self, season_id):
        url = BANGUMI_SEASON_URL.format(season_id)
        result = get_html(url, headers=self.defaultHeader)
        if result[0] != '{':
            start = result.find('(') + 1
            end = result.find(');')
            result = result[start:end]
        result = loads(result)
        return result['result']

    def get_history(self, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = HISTORY_URL.format(page, pagesize)
        result = loads(get_html(url, headers=self.defaultHeader))
        if len(result['data']) >= int(pagesize):
            total_page = int(page) + 1
        else:
            total_page = int(page)
        return result['data'], total_page

    def get_dynamic(self, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = DYNAMIC_URL.format(pagesize, page)
        result = loads(get_html(url, headers=self.defaultHeader))
        total_page = int(
            (result['data']['page']['count'] + pagesize - 1) / pagesize)
        return result['data']['feeds'], total_page

    def get_attention(self, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = ATTENTION_URL.format(self.mid, page, pagesize)
        result = loads(get_html(url))
        return result['data']['list']

    def get_attention_video(self, mid, tid=0, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid)
        result = loads(get_html(url, headers=self.defaultHeader))
        return result['data'], result['data']['pages']

    def get_attention_channel(self, mid):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_URL.format(mid)
        result = loads(get_html(url, headers=self.defaultHeader))
        return result['data']['list']

    def get_fav_box(self):
        if self.is_login == False:
            return []
        url = FAV_BOX_URL.format(self.mid)
        result = loads(get_html(url, headers=self.defaultHeader))
        return result['data']['list']

    def get_fav(self, fav_box, page=1, pagesize=20):
        if self.is_login == False:
            return []
        url = FAV_URL.format(self.mid, page, pagesize, fav_box)
        result = loads(get_html(url, headers=self.defaultHeader))
        return result['data']['vlist'], result['data']['pages']

    def login(self, userid, pwd, captcha):
        #utils.get_html('http://www.bilibili.com')
        if self.is_login == True:
            return True, ''
        pwd = self.get_encryped_pwd(pwd)
        data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format(
            captcha, userid, pwd)
        result = get_html(
            LOGIN_URL, data, {
                'Origin': 'https://passport.bilibili.com',
                'Referer': 'https://passport.bilibili.com/login'
            })

        key = None
        for ck in self.cj:
            if ck.name == 'DedeUserID':
                key = ck.value
                break

        if key is None:
            return False, LOGIN_ERROR_MAP[loads(result)['code']]
        self.cj.save()
        self.is_login = True
        self.mid = str(key)
        return True, ''

    def logout(self):
        self.cj.clear()
        self.cj.save()
        self.is_login = False

    def get_av_list_detail(self, aid, page=1, fav=0, pagesize=20):
        params = {'id': aid, 'page': page}
        if fav != 0:
            params['fav'] = fav
        url = VIEW_URL.format(self.api_sign(params))
        result = loads(get_html(url, headers=self.defaultHeader))
        results = [result]
        if (int(page) < result['pages']) and (pagesize > 1):
            results += self.get_av_list_detail(aid,
                                               int(page) + 1,
                                               fav,
                                               pagesize=pagesize - 1)[0]

        return results, result['pages']

    def get_av_list(self, aid):
        url = AV_URL.format(aid)
        try:
            page = get_html(url)
            result = loads(page)
        except:
            result = {}
        return result

    # 调用niconvert生成弹幕的ass文件
    def parse_subtitle(self, cid):
        page_full_url = COMMENT_URL.format(cid)
        website = create_website(page_full_url)
        if website is None:
            return ''
        else:
            text = website.ass_subtitles_text(font_name=u'黑体',
                                              font_size=24,
                                              resolution='%d:%d' %
                                              (self.WIDTH, self.HEIGHT),
                                              line_count=12,
                                              bottom_margin=0,
                                              tune_seconds=0)
            f = open(self._get_tmp_dir() + '/tmp.ass', 'w')
            f.write(text.encode('utf8'))
            f.close()
            return 'tmp.ass'

    def get_video_urls(self, cid):
        m = hashlib.md5()
        m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER))
        url = INTERFACE_URL.format(str(cid), m.hexdigest())
        doc = parseString(get_html(url))
        urls = []
        for durl in doc.getElementsByTagName('durl'):
            u = durl.getElementsByTagName('url')[0].firstChild.nodeValue
            if re.match(r'.*\.qqvideo\.tc\.qq\.com', url):
                re.sub(r'.*\.qqvideo\.tc', 'http://vsrc.store', u)
            urls.append(u)
            #urls.append(u + '|Referer={}'.format(urllib.quote('https://www.bilibili.com/')))

        return urls

    def add_history(self, aid, cid):
        url = ADD_HISTORY_URL.format(str(cid), str(aid))
        get_html(url)

    def api_req(self, cid, quality, bangumi, bangumi_movie=False, **kwargs):
        ts = str(int(time.time()))
        if not bangumi:
            params_str = 'cid={}&player=1&quality={}&ts={}'.format(
                cid, quality, ts)
            chksum = hashlib.md5(bytes(params_str + self.SEC1)).hexdigest()
            api_url = self.api_url + params_str + '&sign=' + chksum
        else:
            mod = 'movie' if bangumi_movie else 'bangumi'
            params_str = 'cid={}&module={}&player=1&quality={}&ts={}'.format(
                cid, mod, quality, ts)
            chksum = hashlib.md5(bytes(params_str + self.SEC2)).hexdigest()
            api_url = self.bangumi_api_url + params_str + '&sign=' + chksum

        return get_html(api_url)

    def download_by_vid(self, cid, bangumi, **kwargs):
        stream_id = kwargs.get('stream_id')
        if stream_id and stream_id in self.fmt2qlt:
            quality = stream_id
        else:
            quality = 'hdflv' if bangumi else 'flv'

        level = kwargs.get('level', 0)
        xml = self.api_req(cid, level, bangumi, **kwargs)
        doc = parseString(xml)
        urls = []
        for durl in doc.getElementsByTagName('durl'):
            u = durl.getElementsByTagName('url')[0].firstChild.nodeValue
            #urls.append(u)
            urls.append(
                urllib.quote_plus(u + '|Referer=https://www.bilibili.com'))

        return urls

    def entry(self, **kwargs):
        # tencent player
        tc_flashvars = re.search(r'"bili-cid=\d+&bili-aid=\d+&vid=([^"]+)"',
                                 self.page)
        if tc_flashvars:
            tc_flashvars = tc_flashvars.group(1)
        if tc_flashvars is not None:
            self.out = True
            return qq_download_by_vid(tc_flashvars,
                                      self.title,
                                      output_dir=kwargs['output_dir'],
                                      merge=kwargs['merge'],
                                      info_only=kwargs['info_only'])

        cid = re.search(r'cid=(\d+)', self.page).group(1)
        if cid is not None:
            return self.download_by_vid(cid, False, **kwargs)
        else:
            # flashvars?
            flashvars = re.search(r'flashvars="([^"]+)"', self.page).group(1)
            if flashvars is None:
                raise Exception('Unsupported page {}'.format(self.url))
            param = flashvars.split('&')[0]
            t, cid = param.split('=')
            t = t.strip()
            cid = cid.strip()
            if t == 'vid':
                sina_download_by_vid(cid,
                                     self.title,
                                     output_dir=kwargs['output_dir'],
                                     merge=kwargs['merge'],
                                     info_only=kwargs['info_only'])
            elif t == 'ykid':
                youku_download_by_vid(cid,
                                      self.title,
                                      output_dir=kwargs['output_dir'],
                                      merge=kwargs['merge'],
                                      info_only=kwargs['info_only'])
            elif t == 'uid':
                tudou_download_by_id(cid,
                                     self.title,
                                     output_dir=kwargs['output_dir'],
                                     merge=kwargs['merge'],
                                     info_only=kwargs['info_only'])
            else:
                raise NotImplementedError(
                    'Unknown flashvars {}'.format(flashvars))
            return

    def movie_entry(self, **kwargs):
        patt = r"var\s*aid\s*=\s*'(\d+)'"
        aid = re.search(patt, self.page).group(1)
        page_list = loads(
            get_html(
                'http://www.bilibili.com/widget/getPageList?aid={}'.format(
                    aid)))
        # better ideas for bangumi_movie titles?
        self.title = page_list[0]['pagename']
        return self.download_by_vid(page_list[0]['cid'],
                                    True,
                                    bangumi_movie=True,
                                    **kwargs)

    def get_video_from_url(self, url, **kwargs):
        self.url = url_locations(url)
        frag = urlparse(self.url).fragment
        # http://www.bilibili.com/video/av3141144/index_2.html#page=3
        if frag:
            hit = re.search(r'page=(\d+)', frag)
            if hit is not None:
                page = hit.group(1)
                av_id = re.search(r'av(\d+)', self.url).group(1)
                self.url = 'http://www.bilibili.com/video/av{}/index_{}.html'.format(
                    av_id, page)
        self.page = get_html(self.url)

        if 'bangumi.bilibili.com/movie' in self.url:
            return self.movie_entry(**kwargs)
        elif 'bangumi.bilibili.com' in self.url:
            return self.bangumi_entry(**kwargs)
        elif 'live.bilibili.com' in self.url:
            return self.live_entry(**kwargs)
        elif 'vc.bilibili.com' in self.url:
            return self.vc_entry(**kwargs)
        else:
            return self.entry(**kwargs)

    def bangumi_entry(self, **kwargs):
        pass

    def live_entry(self, **kwargs):
        pass

    def vc_entry(self, **kwargs):
        pass

Example #3

Show file

class Bilibili():
    def __init__(self, appkey=APPKEY, appsecret=APPSECRET):
        self.appkey = appkey
        self.appsecret = appsecret
        self.is_login = False
        cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie'
        self.cj = MozillaCookieJar(cookie_path)
        if os.path.isfile(cookie_path):
            self.cj.load()
            if requests.utils.dict_from_cookiejar(
                    self.cj).has_key('DedeUserID'):
                self.is_login = True
                self.mid = str(
                    requests.utils.dict_from_cookiejar(self.cj)['DedeUserID'])
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
        urllib2.install_opener(opener)

    def get_captcha(self, path=None):
        if not requests.utils.dict_from_cookiejar(self.cj).has_key('sid'):
            utils.get_page_content(
                LOGIN_CAPTCHA_URL.format(random.random()),
                headers={'Referer': 'https://passport.bilibili.com/login'})
        result = utils.get_page_content(
            LOGIN_CAPTCHA_URL.format(random.random()),
            headers={'Referer': 'https://passport.bilibili.com/login'})
        if path == None:
            path = tempfile.gettempdir() + '/captcha.jpg'
        with open(path, 'wb') as f:
            f.write(result)
        return path

    def get_encryped_pwd(self, pwd):
        import rsa
        result = json.loads(
            utils.get_page_content(
                LOGIN_HASH_URL.format(random.random()),
                headers={'Referer': 'https://passport.bilibili.com/login'}))
        pwd = result['hash'] + pwd
        key = result['key']
        pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key)
        pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key)
        pwd = base64.b64encode(pwd)
        pwd = urllib.quote(pwd)
        return pwd

    def api_sign(self, params):
        params['appkey'] = self.appkey
        data = ""
        keys = params.keys()
        keys.sort()
        for key in keys:
            if data != "":
                data += "&"
            value = params[key]
            if type(value) == int:
                value = str(value)
            data += key + "=" + str(urllib.quote(value))
        if self.appsecret == None:
            return data
        m = hashlib.md5()
        m.update(data + self.appsecret)
        return data + '&sign=' + m.hexdigest()

    def get_category_from_web_page(self):
        category_dict = {'0': {'title': u'全部', 'url': HOME_URL, 'subs': []}}
        node = category_dict['0']
        url = node['url']
        result = BeautifulSoup(utils.get_page_content(url),
                               "html.parser").findAll('li', {'class': 'm-i'})
        for item in result:
            if len(item['class']) != 1:
                continue
            tid = item['data-tid']
            title = item.em.contents[0]
            url = 'http:' + item.a['href']
            category_dict[tid] = {'title': title, 'url': url, 'subs': []}
            node['subs'].append(tid)

        #Fix video and movie
        if '11' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('11')
        if '23' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('23')
        category_dict['11'] = {
            'title': u'电视剧',
            'url': 'http://bangumi.bilibili.com/tv/',
            'subs': []
        }
        category_dict['23'] = {
            'title': u'电影',
            'url': 'http://bangumi.bilibili.com/movie/',
            'subs': []
        }

        for sub in category_dict['0']['subs']:
            node = category_dict[sub]
            url = node['url']
            result = BeautifulSoup(utils.get_page_content(url),
                                   "html.parser").select('ul.n_num li')
            for item in result[1:]:
                if not item.has_attr('tid'):
                    continue
                if not hasattr(item, 'a'):
                    continue
                if item.has_attr('class'):
                    continue
                tid = item['tid']
                title = item.a.contents[0]
                if item.a['href'][:2] == '//':
                    url = 'http:' + item.a['href']
                else:
                    url = HOME_URL + item.a['href']
                category_dict[tid] = {'title': title, 'url': url, 'subs': []}
                node['subs'].append(tid)
        return category_dict

    def get_category(self, tid='0'):
        items = [{
            tid: {
                'title': '全部',
                'url': CATEGORY[tid]['url'],
                'subs': []
            }
        }]
        for sub in CATEGORY[tid]['subs']:
            items.append({sub: CATEGORY[sub]})
        return items

    def get_category_name(self, tid):
        return CATEGORY[str(tid)]['title']

    def get_order(self):
        return ORDER

    def get_category_list(self,
                          tid=0,
                          order='default',
                          days=30,
                          page=1,
                          pagesize=10):
        params = {
            'tid': tid,
            'order': order,
            'days': days,
            'page': page,
            'pagesize': pagesize
        }
        url = LIST_URL.format(self.api_sign(params))
        result = json.loads(utils.get_page_content(url))
        results = []
        for i in range(pagesize):
            if result['list'].has_key(str(i)):
                results.append(result['list'][str(i)])
            else:
                break
        return results, result['pages']

    def get_my_info(self):
        if self.is_login == False:
            return []
        result = json.loads(utils.get_page_content(MY_INFO_URL))
        return result['data']

    def get_bangumi_chase(self, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['result'], result['data']['pages']

    def get_bangumi_detail(self, season_id):
        url = BANGUMI_SEASON_URL.format(season_id)
        result = utils.get_page_content(url)
        if result[0] != '{':
            start = result.find('(') + 1
            end = result.find(');')
            result = result[start:end]
        result = json.loads(result)
        return result['result']

    def get_history(self, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = HISTORY_URL.format(page, pagesize)
        result = json.loads(utils.get_page_content(url))
        if len(result['data']) >= int(pagesize):
            total_page = int(page) + 1
        else:
            total_page = int(page)
        return result['data'], total_page

    def get_dynamic(self, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = DYNAMIC_URL.format(pagesize, page)
        result = json.loads(utils.get_page_content(url))
        total_page = int(
            (result['data']['page']['count'] + pagesize - 1) / pagesize)
        return result['data']['feeds'], total_page

    def get_attention(self, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = ATTENTION_URL.format(self.mid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list'], result['data']['pages']

    def get_attention_video(self, mid, tid=0, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid)
        result = json.loads(utils.get_page_content(url))
        return result['data'], result['data']['pages']

    def get_attention_channel(self, mid):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_URL.format(mid)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list']

    def get_attention_channel_list(self, mid, cid, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_LIST_URL.format(mid, cid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list'], result['data']['total']

    def get_fav_box(self):
        if self.is_login == False:
            return []
        url = FAV_BOX_URL.format(self.mid)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list']

    def get_fav(self, fav_box, page=1, pagesize=10):
        if self.is_login == False:
            return []
        url = FAV_URL.format(self.mid, page, pagesize, fav_box)
        result = json.loads(utils.get_page_content(url))
        return result['data']['vlist'], result['data']['pages']

    def login(self, userid, pwd, captcha):
        #utils.get_page_content('http://www.bilibili.com')
        if self.is_login == True:
            return True, ''
        pwd = self.get_encryped_pwd(pwd)
        data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format(
            captcha, userid, pwd)
        result = utils.get_page_content(
            LOGIN_URL, data, {
                'Origin': 'https://passport.bilibili.com',
                'Referer': 'https://passport.bilibili.com/login'
            })
        if not requests.utils.dict_from_cookiejar(
                self.cj).has_key('DedeUserID'):
            return False, LOGIN_ERROR_MAP[json.loads(result)['code']]
        self.cj.save()
        self.is_login = True
        self.mid = str(
            requests.utils.dict_from_cookiejar(self.cj)['DedeUserID'])
        return True, ''

    def logout(self):
        self.cj.clear()
        self.cj.save()
        self.is_login = False

    def get_av_list_detail(self, aid, page=1, fav=0, pagesize=10):
        params = {'id': aid, 'page': page}
        if fav != 0:
            params['fav'] = fav
        url = VIEW_URL.format(self.api_sign(params))
        result = json.loads(utils.get_page_content(url))
        results = [result]
        if (int(page) < result['pages']) and (pagesize > 1):
            results += self.get_av_list_detail(aid,
                                               int(page) + 1,
                                               fav,
                                               pagesize=pagesize - 1)[0]
        return results, result['pages']

    def get_av_list(self, aid):
        url = AV_URL.format(aid)
        result = json.loads(utils.get_page_content(url))
        return result

    def get_video_urls(self, cid):
        m = hashlib.md5()
        m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER))
        url = INTERFACE_URL.format(str(cid), m.hexdigest())
        doc = minidom.parseString(utils.get_page_content(url))
        urls = [
            durl.getElementsByTagName('url')[0].firstChild.nodeValue
            for durl in doc.getElementsByTagName('durl')
        ]
        urls = [
            url if not re.match(r'.*\.qqvideo\.tc\.qq\.com', url) else re.sub(
                r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', url)
            for url in urls
        ]
        return urls

    def add_history(self, aid, cid):
        url = ADD_HISTORY_URL.format(str(cid), str(aid))
        utils.get_page_content(url)

Example #4

Show file

File: danabox.py Project: danabox/deis

class Session(requests.Session):
    """
    Session for making API requests and interacting with the filesystem
    """

    def __init__(self):
        super(Session, self).__init__()
        self.trust_env = False
        cookie_file = os.path.expanduser('~/.danabox/cookies.txt')
        cookie_dir = os.path.dirname(cookie_file)
        self.cookies = MozillaCookieJar(cookie_file)
        # Create the $HOME/.danabox dir if it doesn't exist
        if not os.path.isdir(cookie_dir):
            os.mkdir(cookie_dir, 0700)
        # Load existing cookies if the cookies.txt exists
        if os.path.isfile(cookie_file):
            self.cookies.load()
            self.cookies.clear_expired_cookies()

    def clear(self):
        """Clear cookies"""
        try:
            self.cookies.clear()
            self.cookies.save()
        except KeyError:
            pass

    def git_root(self):
        """
        Return the absolute path from the git repository root

        If no git repository exists, raise an EnvironmentError
        """
        try:
            git_root = subprocess.check_output(
                ['git', 'rev-parse', '--show-toplevel'],
                stderr=subprocess.PIPE).strip('\n')
        except subprocess.CalledProcessError:
            raise EnvironmentError('Current directory is not a git repository')
        return git_root

    def get_app(self):
        """
        Return the application name for the current directory

        The application is determined by parsing `git remote -v` output for the origin remote.

        Because Danabox only allows deployment of public Github repos we can create unique app
        names from a combination of the Github user's name and the repo name. Eg;
        '[email protected]:opdemand/example-ruby-sinatra.git' becomes 'opdemand-example--ruby--sinatra'

        If no application is found, raise an EnvironmentError.
        """
        git_root = self.git_root()
        remotes = subprocess.check_output(['git', 'remote', '-v'], cwd=git_root)
        if remotes is None:
            raise EnvironmentError('No git remotes found.')
        for remote in remotes.splitlines():
            if 'github.com' in remote:
                url = remote.split()[1]
                break
        if url is None:
            raise EnvironmentError('No Github remotes found.')
        pieces = url.split('/')
        owner = pieces[-2].split(':')[-1]
        repo = pieces[-1].replace('.git', '')
        app_raw = owner + '/' + repo
        app_name = app_raw.replace('-', '--').replace('/', '-')
        return app_name

    app = property(get_app)

    def request(self, *args, **kwargs):
        """
        Issue an HTTP request with proper cookie handling including
        `Django CSRF tokens <https://docs.djangoproject.com/en/dev/ref/contrib/csrf/>`
        """
        for cookie in self.cookies:
            if cookie.name == 'csrftoken':
                if 'headers' in kwargs:
                    kwargs['headers']['X-CSRFToken'] = cookie.value
                else:
                    kwargs['headers'] = {'X-CSRFToken': cookie.value}
                break
        response = super(Session, self).request(*args, **kwargs)
        self.cookies.save()
        return response

Example #5

Show file

File: bilibili.py Project: brmnh/xbmc-addons-chinese

class Bilibili():
    def __init__(self, appkey = APPKEY, appsecret = APPSECRET):
        self.appkey = appkey
        self.appsecret = appsecret
        self.is_login = False
        cookie_path = os.path.dirname(os.path.abspath(__file__)) + '/.cookie'
        self.cj = MozillaCookieJar(cookie_path)
        if os.path.isfile(cookie_path):
            self.cj.load()
            if requests.utils.dict_from_cookiejar(self.cj).has_key('DedeUserID'):
                self.is_login = True
                self.mid = str(requests.utils.dict_from_cookiejar(self.cj)['DedeUserID'])
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
        urllib2.install_opener(opener)

    def get_captcha(self, path = None):
        if not requests.utils.dict_from_cookiejar(self.cj).has_key('sid'):
            utils.get_page_content(LOGIN_CAPTCHA_URL.format(random.random()),
                                   headers = {'Referer':'https://passport.bilibili.com/login'})
        result = utils.get_page_content(LOGIN_CAPTCHA_URL.format(random.random()),
                                        headers = {'Referer':'https://passport.bilibili.com/login'})
        if path == None:
            path = tempfile.gettempdir() + '/captcha.jpg'
        with open(path, 'wb') as f:
            f.write(result)
        return path

    def get_encryped_pwd(self, pwd):
        import rsa
        result = json.loads(utils.get_page_content(LOGIN_HASH_URL.format(random.random()),
                                                   headers={'Referer':'https://passport.bilibili.com/login'}))
        pwd = result['hash'] + pwd
        key = result['key']
        pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(key)
        pwd = rsa.encrypt(pwd.encode('utf-8'), pub_key)
        pwd = base64.b64encode(pwd)
        pwd = urllib.quote(pwd)
        return pwd

    def api_sign(self, params):
        params['appkey']=self.appkey
        data = ""
        keys = params.keys()
        keys.sort()
        for key in keys:
            if data != "":
                data += "&"
            value = params[key]
            if type(value) == int:
                value = str(value)
            data += key + "=" + str(urllib.quote(value))
        if self.appsecret == None:
            return data
        m = hashlib.md5()
        m.update(data + self.appsecret)
        return data + '&sign=' + m.hexdigest()

    def get_category_from_web_page(self):
        category_dict = {'0': {'title': u'全部', 'url': HOME_URL, 'subs':[]}}
        node = category_dict['0']
        url = node['url']
        result = BeautifulSoup(utils.get_page_content(url), "html.parser").findAll('li', {'class': 'm-i'})
        for item in result:
            if len(item['class']) != 1:
                continue
            tid = item['data-tid']
            title = item.em.contents[0]
            url = 'http:' + item.a['href']
            category_dict[tid] = {'title': title, 'url': url, 'subs':[]}
            node['subs'].append(tid)

        #Fix video and movie
        if '11' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('11')
        if '23' not in category_dict['0']['subs']:
            category_dict['0']['subs'].append('23')
        category_dict['11'] = {'title': u'电视剧', 'url': 'http://bangumi.bilibili.com/tv/', 'subs': []}
        category_dict['23'] = {'title': u'电影', 'url': 'http://bangumi.bilibili.com/movie/', 'subs': []}

        for sub in category_dict['0']['subs']:
            node = category_dict[sub]
            url = node['url']
            result = BeautifulSoup(utils.get_page_content(url), "html.parser").select('ul.n_num li')
            for item in result[1:]:
                if not item.has_attr('tid'):
                    continue
                if not hasattr(item, 'a'):
                    continue
                if item.has_attr('class'):
                    continue
                tid = item['tid']
                title = item.a.contents[0]
                if item.a['href'][:2] == '//':
                    url = 'http:' + item.a['href']
                else:
                    url = HOME_URL + item.a['href']
                category_dict[tid] = {'title': title, 'url': url, 'subs':[]}
                node['subs'].append(tid)
        return category_dict

    def get_category(self, tid = '0'):
        items = [{tid: {'title': '全部', 'url': CATEGORY[tid]['url'], 'subs': []}}]
        for sub in CATEGORY[tid]['subs']:
            items.append({sub: CATEGORY[sub]})
        return items

    def get_category_name(self, tid):
        return CATEGORY[str(tid)]['title']

    def get_order(self):
        return ORDER

    def get_category_list(self, tid = 0, order = 'default', days = 30, page = 1, pagesize = 10):
        params = {'tid': tid, 'order': order, 'days': days, 'page': page, 'pagesize': pagesize}
        url = LIST_URL.format(self.api_sign(params))
        result = json.loads(utils.get_page_content(url))
        results = []
        for i in range(pagesize):
            if result['list'].has_key(str(i)):
                results.append(result['list'][str(i)])
            else:
                break
        return results, result['pages']

    def get_my_info(self):
        if self.is_login == False:
            return []
        result = json.loads(utils.get_page_content(MY_INFO_URL))
        return result['data']

    def get_bangumi_chase(self, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = BANGUMI_CHASE_URL.format(self.mid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['result'], result['data']['pages']

    def get_bangumi_detail(self, season_id):
        url = BANGUMI_SEASON_URL.format(season_id)
        result = utils.get_page_content(url)
        if result[0] != '{':
            start = result.find('(') + 1
            end = result.find(');')
            result = result[start:end]
        result = json.loads(result)
        return result['result']

    def get_history(self, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = HISTORY_URL.format(page, pagesize)
        result = json.loads(utils.get_page_content(url))
        if len(result['data']) >= int(pagesize):
            total_page = int(page) + 1
        else:
            total_page = int(page)
        return result['data'], total_page

    def get_dynamic(self, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = DYNAMIC_URL.format(pagesize, page)
        result = json.loads(utils.get_page_content(url))
        total_page = int((result['data']['page']['count'] + pagesize - 1) / pagesize)
        return result['data']['feeds'], total_page

    def get_attention(self, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = ATTENTION_URL.format(self.mid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list'], result['data']['pages']

    def get_attention_video(self, mid, tid = 0, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = ATTENTION_VIDEO_URL.format(mid, page, pagesize, tid)
        result = json.loads(utils.get_page_content(url))
        return result['data'], result['data']['pages']

    def get_attention_channel(self, mid):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_URL.format(mid)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list']

    def get_attention_channel_list(self, mid, cid, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = ATTENTION_CHANNEL_LIST_URL.format(mid, cid, page, pagesize)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list'], result['data']['total']

    def get_fav_box(self):
        if self.is_login == False:
            return []
        url = FAV_BOX_URL.format(self.mid)
        result = json.loads(utils.get_page_content(url))
        return result['data']['list']

    def get_fav(self, fav_box, page = 1, pagesize = 10):
        if self.is_login == False:
            return []
        url = FAV_URL.format(self.mid, page, pagesize, fav_box)
        result = json.loads(utils.get_page_content(url))
        return result['data']['vlist'], result['data']['pages']

    def login(self, userid, pwd, captcha):
        #utils.get_page_content('http://www.bilibili.com')
        if self.is_login == True:
            return True, ''
        pwd = self.get_encryped_pwd(pwd)
        data = 'cType=2&vcType=1&captcha={}&user={}&pwd={}&keep=true&gourl=http://www.bilibili.com/'.format(captcha, userid, pwd)
        result = utils.get_page_content(LOGIN_URL, data,
                                        {'Origin':'https://passport.bilibili.com',
                                         'Referer':'https://passport.bilibili.com/login'})
        if not requests.utils.dict_from_cookiejar(self.cj).has_key('DedeUserID'):
            return False, LOGIN_ERROR_MAP[json.loads(result)['code']]
        self.cj.save()
        self.is_login = True
        self.mid = str(requests.utils.dict_from_cookiejar(self.cj)['DedeUserID'])
        return True, ''

    def logout(self):
        self.cj.clear()
        self.cj.save()
        self.is_login = False

    def get_av_list_detail(self, aid, page = 1, fav = 0, pagesize = 10):
        params = {'id': aid, 'page': page}
        if fav != 0:
            params['fav'] = fav
        url = VIEW_URL.format(self.api_sign(params))
        result = json.loads(utils.get_page_content(url))
        results = [result]
        if (int(page) < result['pages']) and (pagesize > 1):
            results += self.get_av_list_detail(aid, int(page) + 1, fav, pagesize = pagesize - 1)[0]
        return results, result['pages']

    def get_av_list(self, aid):
        url = AV_URL.format(aid)
        result = json.loads(utils.get_page_content(url))
        return result

    def get_video_urls(self, cid):
        m = hashlib.md5()
        m.update(INTERFACE_PARAMS.format(str(cid), SECRETKEY_MINILOADER))
        url = INTERFACE_URL.format(str(cid), m.hexdigest())
        doc = minidom.parseString(utils.get_page_content(url))
        urls = [durl.getElementsByTagName('url')[0].firstChild.nodeValue for durl in doc.getElementsByTagName('durl')]
        urls = [url
                if not re.match(r'.*\.qqvideo\.tc\.qq\.com', url)
                else re.sub(r'.*\.qqvideo\.tc\.qq\.com', 'http://vsrc.store.qq.com', url)
                for url in urls]
        return urls

    def add_history(self, aid, cid):
        url = ADD_HISTORY_URL.format(str(cid), str(aid))
        utils.get_page_content(url)

Example #6

Show file

class Session(requests.Session):
    """
    Session for making API requests and interacting with the filesystem
    """
    def __init__(self):
        super(Session, self).__init__()
        self.trust_env = False
        cookie_file = os.path.expanduser('~/.danabox/cookies.txt')
        cookie_dir = os.path.dirname(cookie_file)
        self.cookies = MozillaCookieJar(cookie_file)
        # Create the $HOME/.danabox dir if it doesn't exist
        if not os.path.isdir(cookie_dir):
            os.mkdir(cookie_dir, 0700)
        # Load existing cookies if the cookies.txt exists
        if os.path.isfile(cookie_file):
            self.cookies.load()
            self.cookies.clear_expired_cookies()

    def clear(self):
        """Clear cookies"""
        try:
            self.cookies.clear()
            self.cookies.save()
        except KeyError:
            pass

    def git_root(self):
        """
        Return the absolute path from the git repository root

        If no git repository exists, raise an EnvironmentError
        """
        try:
            git_root = subprocess.check_output(
                ['git', 'rev-parse', '--show-toplevel'],
                stderr=subprocess.PIPE).strip('\n')
        except subprocess.CalledProcessError:
            raise EnvironmentError('Current directory is not a git repository')
        return git_root

    def get_app(self):
        """
        Return the application name for the current directory

        The application is determined by parsing `git remote -v` output for the origin remote.

        Because Danabox only allows deployment of public Github repos we can create unique app
        names from a combination of the Github user's name and the repo name. Eg;
        '[email protected]:opdemand/example-ruby-sinatra.git' becomes 'opdemand-example--ruby--sinatra'

        If no application is found, raise an EnvironmentError.
        """
        git_root = self.git_root()
        remotes = subprocess.check_output(['git', 'remote', '-v'],
                                          cwd=git_root)
        if remotes is None:
            raise EnvironmentError('No git remotes found.')
        for remote in remotes.splitlines():
            if 'github.com' in remote:
                url = remote.split()[1]
                break
        if url is None:
            raise EnvironmentError('No Github remotes found.')
        pieces = url.split('/')
        owner = pieces[-2].split(':')[-1]
        repo = pieces[-1].replace('.git', '')
        app_raw = owner + '/' + repo
        app_name = app_raw.replace('-', '--').replace('/', '-')
        return app_name

    app = property(get_app)

    def request(self, *args, **kwargs):
        """
        Issue an HTTP request with proper cookie handling including
        `Django CSRF tokens <https://docs.djangoproject.com/en/dev/ref/contrib/csrf/>`
        """
        for cookie in self.cookies:
            if cookie.name == 'csrftoken':
                if 'headers' in kwargs:
                    kwargs['headers']['X-CSRFToken'] = cookie.value
                else:
                    kwargs['headers'] = {'X-CSRFToken': cookie.value}
                break
        response = super(Session, self).request(*args, **kwargs)
        self.cookies.save()
        return response