예제 #1
0
class ZhiHuLogin(object):
    def __init__(self):
        self._init_driver()
        self._set_headers()

    def _init_driver(self):
        self.driver = BaseDriver(
            type=FIREFOX,
            executable_path=FIREFOX_DRIVER_PATH,
            user_agent_type=PC,
            load_images=True,
            driver_use_proxy=True,
            headless=False,
            ip_pool_type=fz_ip_pool,
        ).driver

    def _set_headers(self):
        self.headers = {
            'authority': 'www.zhihu.com',
            'cache-control': 'max-age=0',
            'upgrade-insecure-requests': '1',
            'user-agent': get_random_pc_ua(),
            'accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
            'accept-encoding': 'gzip, deflate, br',
            'accept-language': 'zh-CN,zh;q=0.9',
        }

    def is_driver_low_version_error(self, body) -> bool:
        '''
        driver版本过低, 则重启再试
        :param body:
        :return:
        '''
        while True:  # 无限重试, 直到成功!
            driver_low_version_error = re.compile('你正在使用的浏览器版本过低').findall(
                body)
            if driver_low_version_error == []:
                break
            else:
                print('提示浏览器版本过低!')
                collect()
                self._init_driver()
                self.driver.get("https://www.zhihu.com/signup")
                sleep(3)
                body = self.driver.page_source
                continue

        return True

    def _get_login_cookies(self):
        '''
        :return:
        '''
        def _scan_qrcode(qrcode_url):
            '''扫码'''
            print('download qrcode ...')
            # local 保存qrcode
            qrcode_body = get(qrcode_url, headers=self.headers).content
            with open('./images/qrcode.jpg', 'wb') as f:
                f.write(qrcode_body)

            qrcode_img = Image.open('./images/qrcode.jpg')
            qrcode_img.show()

            before_url = self.driver.current_url
            print('wait to scan qrcode ...')
            sleep(15)

            while self.driver.current_url != before_url:
                print('扫码登陆成功!')
                print('-' * 100)
                break

            return True

        self.driver.get("https://www.zhihu.com/signup")
        sleep(3)
        self.is_driver_low_version_error(body=self.driver.page_source)

        try:
            self.driver.find_element_by_css_selector(
                'div.SignContainer-switch span').click()
            sleep(1)
            self.driver.find_element_by_css_selector(
                'span.Login-qrcode button').click()
            sleep(2)
            qrcode_url = Selector(text=self.driver.page_source).css(
                'div.Qrcode-img img ::attr("src")').extract_first()
            print('获取到的二维码地址为:{}'.format(qrcode_url))
            print('wait to scan qrcode ...')

            # 扫码
            # scan_res = _scan_qrcode(qrcode_url=qrcode_url)
        except (NoSuchElementException, IndexError) as e:
            print(e)

        # TODO 出现: Missing argument grant_type
        sleep(20)
        cookies = self.driver.get_cookies()
        pprint(cookies)

        return cookies

    def __del__(self):
        try:
            del self.driver
        except:
            pass
        collect()
예제 #2
0
class TaoWaiMaiSpider(object):
    def __init__(self):
        self.driver = BaseDriver(
            type=FIREFOX,
            executable_path=FIREFOX_DRIVER_PATH,
            headless=False,
            load_images=True,
        ).driver
        self.search_key = '杭州'

    def _actions(self):
        '''
        行为
        :return:
        '''
        url = 'https://h5.m.taobao.com/app/waimai/index.html#/'
        self.driver.get(url)
        sleep(3)

        try:
            self.driver.find_element_by_css_selector(
                'div.location span').click()
            sleep(2)
            self.driver.find_element_by_css_selector(
                'div.search input').send_keys(self.search_key)
            sleep(2.5)
            # add_p_list = self.driver.find_elements_by_css_selector('.search-result div.add-list div.item-wrap p.address')
            # pprint(add_p_list)
            # add_p_list[0].send_keys(Keys.ENTER)
            # 默认点第一个
            # self.driver.find_element_by_xpath("(.//*[normalize-space(text()) and normalize-space(.)='Crocker St914'])[1]/preceding::p[22]").click()
            print('请点击选择定位处...')
            sleep(10)
            # scroll_js = '''document.body.scrollTop=10000'''
            scroll_js = r'''
            function scrollToBottom() {
                var Height = document.body.clientHeight,  //文本高度
                    screenHeight = window.innerHeight,  //屏幕高度
                    INTERVAL = 100,  // 滚动动作之间的间隔时间
                    delta = 500,  //每次滚动距离
                    curScrollTop = 0;    //当前window.scrollTop 值

                var scroll = function () {
                    curScrollTop = document.body.scrollTop;
                    window.scrollTo(0,curScrollTop + delta);
                };

                var timer = setInterval(function () {
                    var curHeight = curScrollTop + screenHeight;
                    if (curHeight >= Height){   //滚动到页面底部时,结束滚动
                        clearInterval(timer);
                    }
                    scroll();
                }, INTERVAL)
            }
            scrollToBottom()
            '''
            self.driver.execute_script(script=scroll_js)
            sleep(5)
            body = self.driver.page_source

            # div.list div.list-item
            shop_list = Selector(text=body).css(
                'div.list div.list-item ::text').extract() or []
            pprint(shop_list)

        except Exception as e:
            print(e)

        sleep(60)

    def __del__(self):
        try:
            del self.driver
        except:
            pass
        collect()