class ZhiHuLogin(object): def __init__(self): self._init_driver() self._set_headers() def _init_driver(self): self.driver = BaseDriver( type=FIREFOX, executable_path=FIREFOX_DRIVER_PATH, user_agent_type=PC, load_images=True, driver_use_proxy=True, headless=False, ip_pool_type=fz_ip_pool, ).driver def _set_headers(self): self.headers = { 'authority': 'www.zhihu.com', 'cache-control': 'max-age=0', 'upgrade-insecure-requests': '1', 'user-agent': get_random_pc_ua(), 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'accept-encoding': 'gzip, deflate, br', 'accept-language': 'zh-CN,zh;q=0.9', } def is_driver_low_version_error(self, body) -> bool: ''' driver版本过低, 则重启再试 :param body: :return: ''' while True: # 无限重试, 直到成功! driver_low_version_error = re.compile('你正在使用的浏览器版本过低').findall( body) if driver_low_version_error == []: break else: print('提示浏览器版本过低!') collect() self._init_driver() self.driver.get("https://www.zhihu.com/signup") sleep(3) body = self.driver.page_source continue return True def _get_login_cookies(self): ''' :return: ''' def _scan_qrcode(qrcode_url): '''扫码''' print('download qrcode ...') # local 保存qrcode qrcode_body = get(qrcode_url, headers=self.headers).content with open('./images/qrcode.jpg', 'wb') as f: f.write(qrcode_body) qrcode_img = Image.open('./images/qrcode.jpg') qrcode_img.show() before_url = self.driver.current_url print('wait to scan qrcode ...') sleep(15) while self.driver.current_url != before_url: print('扫码登陆成功!') print('-' * 100) break return True self.driver.get("https://www.zhihu.com/signup") sleep(3) self.is_driver_low_version_error(body=self.driver.page_source) try: self.driver.find_element_by_css_selector( 'div.SignContainer-switch span').click() sleep(1) self.driver.find_element_by_css_selector( 'span.Login-qrcode button').click() sleep(2) qrcode_url = Selector(text=self.driver.page_source).css( 'div.Qrcode-img img ::attr("src")').extract_first() print('获取到的二维码地址为:{}'.format(qrcode_url)) print('wait to scan qrcode ...') # 扫码 # scan_res = _scan_qrcode(qrcode_url=qrcode_url) except (NoSuchElementException, IndexError) as e: print(e) # TODO 出现: Missing argument grant_type sleep(20) cookies = self.driver.get_cookies() pprint(cookies) return cookies def __del__(self): try: del self.driver except: pass collect()
class TaoWaiMaiSpider(object): def __init__(self): self.driver = BaseDriver( type=FIREFOX, executable_path=FIREFOX_DRIVER_PATH, headless=False, load_images=True, ).driver self.search_key = '杭州' def _actions(self): ''' 行为 :return: ''' url = 'https://h5.m.taobao.com/app/waimai/index.html#/' self.driver.get(url) sleep(3) try: self.driver.find_element_by_css_selector( 'div.location span').click() sleep(2) self.driver.find_element_by_css_selector( 'div.search input').send_keys(self.search_key) sleep(2.5) # add_p_list = self.driver.find_elements_by_css_selector('.search-result div.add-list div.item-wrap p.address') # pprint(add_p_list) # add_p_list[0].send_keys(Keys.ENTER) # 默认点第一个 # self.driver.find_element_by_xpath("(.//*[normalize-space(text()) and normalize-space(.)='Crocker St914'])[1]/preceding::p[22]").click() print('请点击选择定位处...') sleep(10) # scroll_js = '''document.body.scrollTop=10000''' scroll_js = r''' function scrollToBottom() { var Height = document.body.clientHeight, //文本高度 screenHeight = window.innerHeight, //屏幕高度 INTERVAL = 100, // 滚动动作之间的间隔时间 delta = 500, //每次滚动距离 curScrollTop = 0; //当前window.scrollTop 值 var scroll = function () { curScrollTop = document.body.scrollTop; window.scrollTo(0,curScrollTop + delta); }; var timer = setInterval(function () { var curHeight = curScrollTop + screenHeight; if (curHeight >= Height){ //滚动到页面底部时,结束滚动 clearInterval(timer); } scroll(); }, INTERVAL) } scrollToBottom() ''' self.driver.execute_script(script=scroll_js) sleep(5) body = self.driver.page_source # div.list div.list-item shop_list = Selector(text=body).css( 'div.list div.list-item ::text').extract() or [] pprint(shop_list) except Exception as e: print(e) sleep(60) def __del__(self): try: del self.driver except: pass collect()