예제 #1
0
    def get_data(query):
        """

        获取sign和token
        """
        url = 'https://fanyi.baidu.com/'
        headers = {
            'User-Agent':
            get_headers(),
            'Cookie':
            'BAIDUID=7FD0412FAAD4330FF200A81055995783:FG=1; BAIDUID_BFESS=7FD0412FAAD4330FF200A81055995783:FG=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1606117276; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1606117443; __yjsv5_shitong=1.0_7_342e47f274f0b0479c666c81efa3d18ca4a9_300_1606117442884_221.194.139.117_3a134ef6; yjs_js_security_passport=d791e604171c2b5814dd935b42190b97de9e5cf9_1606117444_js',
        }
        content = requests.get(url=url,
                               headers=headers).content.decode('utf-8')
        # 全局搜索token和gtk
        token = re.findall(r"token: '(.*?)'", content)[0]
        gtk = re.findall(
            r"<script>window.bdstoken = '';window.gtk = '(.*?)';</script>",
            content)[0]
        with open("./baidu.js", "r", encoding='utf-8') as f:
            js = f.read()
        js = js.replace('u = null !== i ? i : (i = window[l] || "") || "";',
                        'u= "%s"' % gtk)
        cxt = execjs.compile(js)
        # 调用方法
        sign = cxt.call("e", query)
        return sign, token
 def __init__(self, page_queue, html_queue, school_infor_queue, *args,
              **kwargs):
     super(Producer, self).__init__(*args, **kwargs)
     threading.Thread.__init__(self)
     self.headers = {'User-Agent': get_headers()}
     self.page_queue = page_queue
     self.html_queue = html_queue
     self.schoolInforQueue = school_infor_queue
 def __init__(self):
     self.headers = {'User-Agent': get_headers()}
     self.firstUrl = 'https://music.douban.com/top250'
     self.musicQueue = Queue(300)
     self.client = pymongo.MongoClient(host='localhost',
                                       port=27017,
                                       username='******',
                                       password='******')['douban']
 def __init__(self):
     """初始化参数设置"""
     self.headers = {'User-Agent': get_headers()}
     self.keyWord = input('请输入关键字:')
     self.baseUrl = 'https://tieba.baidu.com/f'
     self.firstUrl = 'https://tieba.baidu.com/f?kw={}&ie=utf-8&pn=0'.format(
         self.keyWord)
     self.page_num = 1
     self.pn = 0
예제 #5
0
 def __init__(self):
     self.headers = {'User-Agent': get_headers()}
     self.baseUrl = 'https://movie.douban.com/j/search_subjects?' \
                    'type=movie&tag=热门&sort=rank&page_limit=10&page_start={}'
     self.startPg = 0
     self.movieQueue = Queue(300)
     self.executor = ThreadPoolExecutor(max_workers=10)
     self.client = pymongo.MongoClient(host='localhost',
                                       port=27017,
                                       username='******',
                                       password='******')['douban']
예제 #6
0
 def __init__(self):
     self.headers = {'User-Agent': get_headers()}
     # 基础url
     self.baseUrl = 'https://www.douyu.com/gapi/rkc/directory/mixList/0_0/'
     # 第一页
     self.firstUrl = 'https://www.douyu.com/gapi/rkc/directory/mixList/0_0/1'
     # 直播队列
     self.liveQueue = Queue(300)
     # 线程池
     self.executor = ThreadPoolExecutor(max_workers=10)
     # MongoDB对象
     self.client = pymongo.MongoClient(host='localhost', port=27017, username='******', password='******')['douyu']
예제 #7
0
 def get_lang(query):
     url = 'https://fanyi.baidu.com/langdetect'
     headers = {
         'User-Agent': get_headers(),
         'Cookie':
         'BAIDUID=7FD0412FAAD4330FF200A81055995783:FG=1; BAIDUID_BFESS=7FD0412FAAD4330FF200A81055995783:FG=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1606117276; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1606117443; __yjsv5_shitong=1.0_7_342e47f274f0b0479c666c81efa3d18ca4a9_300_1606117442884_221.194.139.117_3a134ef6; yjs_js_security_passport=d791e604171c2b5814dd935b42190b97de9e5cf9_1606117444_js',
         'origin': 'https://fanyi.baidu.com',
         'referer': 'https://fanyi.baidu.com/'
     }
     data = {
         'query': query,
     }
     response = requests.post(url=url, data=data, headers=headers)
     content = json.loads(response.content.decode('utf-8'))
     return content["lan"]
예제 #8
0
 def run(self):
     lang = self.get_lang(self.query)
     sign, token = self.get_data(self.query)
     # 如果输入是中文
     if lang == "zh":
         data = {
             'from': 'zh',
             'to': 'en',
             'query': self.query,
             'simple_means_flag': '3',
             'sign': sign,
             'token': token,
             'domain': 'common',
         }
     else:
         data = {
             'from': 'en',
             'to': 'zh',
             'query': self.query,
             'simple_means_flag': '3',
             'sign': sign,
             'token': token,
             'domain': 'common',
         }
     headers = {
         "authority": "fanyi.baidu.com",
         "method": "POST",
         "scheme": "https",
         "accept": "*/*",
         "accept-encoding": "gzip, deflate, br",
         "accept-language": "zh-CN,zh;q=0.9",
         "content-type": "application/x-www-form-urlencoded; charset=UTF-8",
         "cookie":
         "BAIDUID=7FD0412FAAD4330FF200A81055995783:FG=1; BAIDUID_BFESS=7FD0412FAAD4330FF200A81055995783:FG=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1606117276; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1606117443; __yjsv5_shitong=1.0_7_342e47f274f0b0479c666c81efa3d18ca4a9_300_1606117442884_221.194.139.117_3a134ef6; yjs_js_security_passport=d791e604171c2b5814dd935b42190b97de9e5cf9_1606117444_js",
         "origin": "https://fanyi.baidu.com",
         "referer": "https://fanyi.baidu.com/",
         "sec-fetch-dest": "empty",
         "sec-fetch-mode": "cors",
         "sec-fetch-site": "same-origin",
         "user-agent": get_headers(),
         "x-requested-with": "XMLHttpRequest",
     }
     response = requests.post(url=self.url, data=data, headers=headers)
     content = response.content.decode('utf-8')
     content = json.loads(content)
     print('翻译结果:', content["trans_result"]["data"][0]["dst"])
예제 #9
0
 def __init__(self):
     self.headers = {'User-Agent': get_headers()}
     self.baseUrl = 'https://baike.baidu.com/item/{}'
     self.num = 1
예제 #10
0
 def __init__(self, page_queue, img_queue, *args, **kwargs):
     super(Producer).__init__(*args, **kwargs)
     threading.Thread.__init__(self)
     self.headers = {'User-Agent': get_headers()}
     self.page_queue = page_queue
     self.img_queue = img_queue
 def __init__(self):
     self.headers = {'User-Agent': get_headers()}
     # 基础url
     self.baseUrl = 'https://www.doutula.com/photo/list/?page={}'
     # 页数
     self.pageNum = 10