def get_data(query): """ 获取sign和token """ url = 'https://fanyi.baidu.com/' headers = { 'User-Agent': get_headers(), 'Cookie': 'BAIDUID=7FD0412FAAD4330FF200A81055995783:FG=1; BAIDUID_BFESS=7FD0412FAAD4330FF200A81055995783:FG=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1606117276; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1606117443; __yjsv5_shitong=1.0_7_342e47f274f0b0479c666c81efa3d18ca4a9_300_1606117442884_221.194.139.117_3a134ef6; yjs_js_security_passport=d791e604171c2b5814dd935b42190b97de9e5cf9_1606117444_js', } content = requests.get(url=url, headers=headers).content.decode('utf-8') # 全局搜索token和gtk token = re.findall(r"token: '(.*?)'", content)[0] gtk = re.findall( r"<script>window.bdstoken = '';window.gtk = '(.*?)';</script>", content)[0] with open("./baidu.js", "r", encoding='utf-8') as f: js = f.read() js = js.replace('u = null !== i ? i : (i = window[l] || "") || "";', 'u= "%s"' % gtk) cxt = execjs.compile(js) # 调用方法 sign = cxt.call("e", query) return sign, token
def __init__(self, page_queue, html_queue, school_infor_queue, *args, **kwargs): super(Producer, self).__init__(*args, **kwargs) threading.Thread.__init__(self) self.headers = {'User-Agent': get_headers()} self.page_queue = page_queue self.html_queue = html_queue self.schoolInforQueue = school_infor_queue
def __init__(self): self.headers = {'User-Agent': get_headers()} self.firstUrl = 'https://music.douban.com/top250' self.musicQueue = Queue(300) self.client = pymongo.MongoClient(host='localhost', port=27017, username='******', password='******')['douban']
def __init__(self): """初始化参数设置""" self.headers = {'User-Agent': get_headers()} self.keyWord = input('请输入关键字:') self.baseUrl = 'https://tieba.baidu.com/f' self.firstUrl = 'https://tieba.baidu.com/f?kw={}&ie=utf-8&pn=0'.format( self.keyWord) self.page_num = 1 self.pn = 0
def __init__(self): self.headers = {'User-Agent': get_headers()} self.baseUrl = 'https://movie.douban.com/j/search_subjects?' \ 'type=movie&tag=热门&sort=rank&page_limit=10&page_start={}' self.startPg = 0 self.movieQueue = Queue(300) self.executor = ThreadPoolExecutor(max_workers=10) self.client = pymongo.MongoClient(host='localhost', port=27017, username='******', password='******')['douban']
def __init__(self): self.headers = {'User-Agent': get_headers()} # 基础url self.baseUrl = 'https://www.douyu.com/gapi/rkc/directory/mixList/0_0/' # 第一页 self.firstUrl = 'https://www.douyu.com/gapi/rkc/directory/mixList/0_0/1' # 直播队列 self.liveQueue = Queue(300) # 线程池 self.executor = ThreadPoolExecutor(max_workers=10) # MongoDB对象 self.client = pymongo.MongoClient(host='localhost', port=27017, username='******', password='******')['douyu']
def get_lang(query): url = 'https://fanyi.baidu.com/langdetect' headers = { 'User-Agent': get_headers(), 'Cookie': 'BAIDUID=7FD0412FAAD4330FF200A81055995783:FG=1; BAIDUID_BFESS=7FD0412FAAD4330FF200A81055995783:FG=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1606117276; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1606117443; __yjsv5_shitong=1.0_7_342e47f274f0b0479c666c81efa3d18ca4a9_300_1606117442884_221.194.139.117_3a134ef6; yjs_js_security_passport=d791e604171c2b5814dd935b42190b97de9e5cf9_1606117444_js', 'origin': 'https://fanyi.baidu.com', 'referer': 'https://fanyi.baidu.com/' } data = { 'query': query, } response = requests.post(url=url, data=data, headers=headers) content = json.loads(response.content.decode('utf-8')) return content["lan"]
def run(self): lang = self.get_lang(self.query) sign, token = self.get_data(self.query) # 如果输入是中文 if lang == "zh": data = { 'from': 'zh', 'to': 'en', 'query': self.query, 'simple_means_flag': '3', 'sign': sign, 'token': token, 'domain': 'common', } else: data = { 'from': 'en', 'to': 'zh', 'query': self.query, 'simple_means_flag': '3', 'sign': sign, 'token': token, 'domain': 'common', } headers = { "authority": "fanyi.baidu.com", "method": "POST", "scheme": "https", "accept": "*/*", "accept-encoding": "gzip, deflate, br", "accept-language": "zh-CN,zh;q=0.9", "content-type": "application/x-www-form-urlencoded; charset=UTF-8", "cookie": "BAIDUID=7FD0412FAAD4330FF200A81055995783:FG=1; BAIDUID_BFESS=7FD0412FAAD4330FF200A81055995783:FG=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1606117276; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1606117443; __yjsv5_shitong=1.0_7_342e47f274f0b0479c666c81efa3d18ca4a9_300_1606117442884_221.194.139.117_3a134ef6; yjs_js_security_passport=d791e604171c2b5814dd935b42190b97de9e5cf9_1606117444_js", "origin": "https://fanyi.baidu.com", "referer": "https://fanyi.baidu.com/", "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-origin", "user-agent": get_headers(), "x-requested-with": "XMLHttpRequest", } response = requests.post(url=self.url, data=data, headers=headers) content = response.content.decode('utf-8') content = json.loads(content) print('翻译结果:', content["trans_result"]["data"][0]["dst"])
def __init__(self): self.headers = {'User-Agent': get_headers()} self.baseUrl = 'https://baike.baidu.com/item/{}' self.num = 1
def __init__(self, page_queue, img_queue, *args, **kwargs): super(Producer).__init__(*args, **kwargs) threading.Thread.__init__(self) self.headers = {'User-Agent': get_headers()} self.page_queue = page_queue self.img_queue = img_queue
def __init__(self): self.headers = {'User-Agent': get_headers()} # 基础url self.baseUrl = 'https://www.doutula.com/photo/list/?page={}' # 页数 self.pageNum = 10