Beispiel #1
0
    def start_requests(self):
        cookies = []
        if os.path.exists(BASE_DIR + r"\cookies\zhihu.cookie"):
            cookies = pickle.load(open(BASE_DIR + "/cookies/lagou.cookie", "rb"))

        if not cookies:
            from selenium import webdriver
            from selenium.webdriver.chrome.options import Options
            from selenium.webdriver.common.keys import Keys
            chrome_option = Options()
            chrome_option.add_argument('--disable-extensions')
            chrome_option.add_experimental_option('debuggerAddress', '127.0.0.1:9222')
            browser = webdriver.Chrome(executable_path=r"D:\scrapytest\ArticleSpider\venv\Scripts\chromedriver.exe",
                                       chrome_options=chrome_option)

            browser.get("https://www.zhihu.com/signin")
            browser.find_element_by_xpath('//*[@id="root"]/div/main/div/div/div[1]/div/form/div[1]/div[2]').click()
            browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys(
                Keys.CONTROL + "a")
            browser.find_element_by_css_selector(".SignFlow-accountInput.Input-wrapper input").send_keys("13643095504")
            time.sleep(1)
            browser.find_element_by_css_selector(".SignFlow-password input").send_keys(Keys.CONTROL + "a")
            time.sleep(1)
            browser.find_element_by_css_selector(".SignFlow-password input").send_keys("634498qxp@")
            time.sleep(2)
            # browser.find_element_by_css_selector(".Button SignFlow-submitButton.Button--primary Button--blue").click()
            # browser.find_element_by_xpath('//*[@id = "root"]/div/main/div/div/div[1]/div/form/button').click()
            move(678, 511)
            click()
            time.sleep(6)

            login_success = False
            while not login_success:
                try:
                    notify_ele = browser.find_element_by_class_name("Popover PushNotifications AppHeader-notifications")
                    login_success = True
                    return [scrapy.Request(url=self.start_urls[0], dont_filter=True)]
                except:
                    pass

                try:
                    browser.maximize_window()
                except:
                    pass
                try:
                    english_captcha = browser.find_element_by_class_name('Captcha-englishImg')
                except:
                    english_captcha = None
                try:
                    chinese_captcha = browser.find_element_by_class_name('Captcha-chineseImg')
                except:
                    chinese_captcha = None
                if chinese_captcha:
                    ele_postion = chinese_captcha.location
                    x_relative = ele_postion["x"]
                    y_relative = ele_postion["y"]
                    browser_navigation_panel_height = browser.execute_script(
                    'return window.outerHeight - window.innerHeight;'
                )
                    base64_text = chinese_captcha.get_attribute("src")
                    import base64
                    code = base64_text.replace("data:image/jpg;base64,", '').replace("%0A", "")
                    fh = open("yzm_cn.jpeg", "wb")
                    fh.write(base64.b64decode(code))
                    fh.close()

                    from zheye import zheye
                    z = zheye()
                    position = z.Recognize('yzm_cn.jpeg')
                    last_position = []
                    if len(position) == 2:
                        if position[0][1] > position[1][1]:
                            last_position.append([position[1][1], position[1][0]])
                            last_position.append([position[0][1], position[0][0]])
                        else:
                            last_position.append([position[0][1], position[0][0]])
                            last_position.append([position[1][1], position[1][0]])

                        first_position = [int(last_position[0][0] / 2), int(last_position[0][1] / 2)]
                        second_position = [int(last_position[1][0] / 2), int(last_position[1][1] / 2)]

                        move(x_relative + first_position[0],
                         y_relative + browser_navigation_panel_height + first_position[1])
                        click()
                        time.sleep(2)
                        move(x_relative + second_position[0],
                         y_relative + browser_navigation_panel_height + second_position[1])
                        click()

                    # for url in self.start_urls:
                    # yield scrapy.Request(url, dont_filter=True, headers=self.headers)
                    else:
                        last_position.append([position[0][1], position[0][0]])
                        first_position = [int(last_position[0][0] / 2), int(last_position[0][1] / 2)]

                        move(x_relative + first_position[0],
                         y_relative + browser_navigation_panel_height + first_position[1])
                        click()
                    time.sleep(1)
                    move(663, 569)
                    click()

                if english_captcha:
                    base64_text = english_captcha.get_attribute("src")
                    import base64
                    code = base64_text.replace('data:image/jpg;base64,', '').replace("%0A", "")
                    fh = open("yzm_en.jpeg", "wb")
                    fh.write(base64.b64decode(code))
                    fh.close()

                    from tools.yundama_requests import YDMHttp
                    yundama = YDMHttp("zzzzqxp", "634498qxp", 8954, "fd03eddd0dc7ebe6eb4ce5c00012bb31")
                    code = yundama.decode("yzm_en.jpeg", 5000, 60)
                    while True:
                        if code == "":
                            code = yundama.decode("yzm_en.jpeg", 5000, 60)
                        else:
                            break
                    browser.find_element_by_xpath(
                    '//*[@id="root"]/div/main/div/div/div[1]/div/form/div[4]/div/div/div[1]/input').send_keys(
                    Keys.CONTROL + "a")
                    browser.find_element_by_xpath(
                    '//*[@id="root"]/div/main/div/div/div[1]/div/form/div[4]/div/div/div[1]/input').send_keys(code)


                    time.sleep(1)
                    move(663, 544)
                    click()
                    time.sleep(2)
                time.sleep(1)
                browser.get("https://www.zhihu.com/")

                cookies = browser.get_cookies()
                pickle.dump(cookies,open(r'D:\scrapytest\ArticleSpider\cookies\zhihu.cookie','wb'))
                cookie_dict={}
                for cookie in cookies:
                    cookie_dict[cookie["name"]]=cookie["value"]#cookie储存到本地后就可以在开始打开获取,就不用seleniun
                return [scrapy.Request(url=self.start_urls[0],dont_filter=True,cookies=cookie_dict,headers=self.headers)]

        time.sleep(1)
        browser.get("https://www.zhihu.com/")

        cookies = browser.get_cookies()
        pickle.dump(cookies, open(BASE_DIR + r"\cookies\zhihu.cookie", 'wb'))
        cookie_dict = {}
        for cookie in cookies:
            cookie_dict[cookie["name"]] = cookie["value"]  # cookie储存到本地后就可以在开始打开获取,就不用seleniun
        return [scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict, headers=self.headers)]
Beispiel #2
0
    def start_requests(self):
        chrome_options = Options()
        chrome_options.add_argument("--disable-extensions")
        chrome_options.add_experimental_option("debuggerAddress",
                                               "127.0.0.1:9222")
        browser = webdriver.Chrome(
            'C:/Users/孙佩豪/AppData/Local/Google/Chrome/Application/chromedriver.exe',
            chrome_options=chrome_options)
        try:
            browser.maximize_window()  # 最大化窗口
        except:  # 已最大化的情况 代码会出错 捕获错误
            pass

        browser.get('https://www.zhihu.com/signin')  # 打开知乎登录页面
        time.sleep(2)
        # browser.find_element_by_xpath('//div[@class="SignFlow-tabs"]/div[2]').click()    # 点击帐号密码登录

        login_success = False
        try:
            notify_ele = browser.find_element_by_xpath(
                '//div[@class="Popover PushNotifications AppHeader-notifications"]'
            )  # 是否登录成功
            login_success = True
        except:
            pass

        if not login_success:
            move(914, 329)  # 点击 帐号密码登录
            click()
            time.sleep(2)
            browser.find_element_by_css_selector(
                ".SignFlow-accountInput.Input-wrapper input").send_keys(
                    Keys.CONTROL + "a")  # 全选 然后输入账户密码
            browser.find_element_by_css_selector(
                ".SignFlow-accountInput.Input-wrapper input").send_keys(
                    "15292060685")
            browser.find_element_by_css_selector(
                ".SignFlow-password input").send_keys(Keys.CONTROL + "a")
            browser.find_element_by_css_selector(
                ".SignFlow-password input").send_keys("qq1362441")
            move(955, 566)
            click()
            click()
        # browser.find_element_by_xpath('//button[@class="Button SignFlow-submitButton Button--primary Button--blue"]').click()  # 点击登录按钮
        login_success = False

        while not login_success:
            try:
                time.sleep(1)
                notify_ele = browser.find_element_by_xpath(
                    '//div[@class="Popover PushNotifications AppHeader-notifications"]'
                )  # 是否登录成功
                login_success = True
            except:
                pass

            try:
                english_captcha_element = browser.find_element_by_class_name(
                    "Captcha-englishImg")  # 是否出现英文验证码
            except:
                english_captcha_element = None

            try:
                chinese_captcha_element = browser.find_element_by_class_name(
                    "Captcha-chineseImg")  # 是否出现中文验证码
            except:
                chinese_captcha_element = None

            if chinese_captcha_element:  # 如果产生中文验证码
                time.sleep(1)
                ele_position = chinese_captcha_element.location  # 获取节点坐标
                x_relative = ele_position["x"]  # x坐标
                y_relative = ele_position["y"]  # y坐标

                browser_navigation_panel_height = browser.execute_script(
                    'return window.outerHeight - window.innerHeight;'
                )  # 浏览器上栏高度
                browser_navigation_panel_height = 70

                time.sleep(3)
                base64_text = chinese_captcha_element.get_attribute(
                    "src")  # 提取中文验证码节点的arc属性
                code = base64_text.replace("data:image/jpg;base64,",
                                           "").replace("%0A",
                                                       "")  # 消除图片bs64编码中的无用符号
                fh = open("yzm_cn.jpeg", "wb")  # 保存文件
                fh.write(base64.b64decode(code))
                fh.close()

                z = zheye()
                positions = z.Recognize('yzm_cn.jpeg')  # 使用者也 提取倒立文字坐标

                last_position = []
                if len(positions) == 2:
                    if positions[0][0] > positions[1][0]:  # 按照顺序排列倒立文字坐标
                        last_position.append(
                            [positions[1][0], positions[1][1]])
                        last_position.append(
                            [positions[0][0], positions[0][1]])
                    else:
                        last_position.append(
                            [positions[0][0], positions[0][1]])
                        last_position.append(
                            [positions[1][0], positions[1][0]])

                if len(positions) == 2:
                    first_position = [
                        int(last_position[0][1] / 2) + x_relative,
                        int(last_position[0][0] / 2) + y_relative +
                        browser_navigation_panel_height
                    ]  # 实际页面中 倒立文字图片为正常图片缩放的一倍 所有坐标需要除2取整 来获得可以在页面中使用的坐标
                    second_position = [
                        int(last_position[1][1] / 2) + x_relative,
                        int(last_position[1][0] / 2) + y_relative +
                        browser_navigation_panel_height
                    ]

                    move(first_position[0], first_position[1]
                         )  # 坐标 起始点x坐标+倒立文字x坐标   起始点y坐标+浏览器地址栏高度+倒立文字y坐标
                    click()

                    move(second_position[0], second_position[1])
                    click()

                else:  # 如果只有一个倒立文字
                    last_position.append([positions[0][1], positions[0][1]])
                    first_position = [
                        int(last_position[0][1] / 2) + x_relative,
                        int(last_position[0][0] / 2) +
                        browser_navigation_panel_height + y_relative
                    ]
                    time.sleep(5)
                    move(first_position[0], first_position[1])
                    time.sleep(5)
                    click()

                browser.find_element_by_css_selector(
                    ".SignFlow-accountInput.Input-wrapper input").send_keys(
                        Keys.CONTROL + "a")  # 全选 然后输入账户密码
                browser.find_element_by_css_selector(
                    ".SignFlow-accountInput.Input-wrapper input").send_keys(
                        "15292060685")
                browser.find_element_by_css_selector(
                    ".SignFlow-password input").send_keys(Keys.CONTROL + "a")
                browser.find_element_by_css_selector(
                    ".SignFlow-password input").send_keys("qq1362441")
                move(954, 619)
                click()

            if english_captcha_element:  # 如果产生英文验证码
                time.sleep(1)
                base64_text = english_captcha_element.get_attribute("src")
                code = base64_text.replace("data:image/jpg;base64,",
                                           "").replace("%0A",
                                                       "")  # 消除图片bs64编码中的无用符号
                fh = open("yzm_en.jpeg", "wb")  # 保存文件
                fh.write(base64.b64decode(code))
                fh.close()

                Yundama = YDMHttp("sph116", "qq1362441", 8730,
                                  "9f94b142759f9fd86bd0e7a912bbc889")  # 实例化云打码
                code = Yundama.decode("yzm_en.jpeg", 5000, 60)  # 识别
                while True:  # 若识别失败 不停识别 直至成功
                    if code == "":  #
                        code = Yundama.decode("yzm_en.jpeg", 5000, 60)
                        time.sleep(0.5)
                    else:
                        break

                browser.find_element_by_xpath(
                    '//*[@id="root"]/div/main/div/div/div[1]/div/form/div[4]/div/div/label/input'
                ).send_keys(Keys.CONTROL + "a")  # 找到英文验证码位置
                browser.find_element_by_xpath(
                    '//*[@id="root"]/div/main/div/div/div[1]/div/form/div[4]/div/div/label/input'
                ).send_keys(code)
                move(956, 600)
                click()

            time.sleep(5)

            try:
                # notify_element = browser.find_element_by_class_name("Popover PushNotifications AppHeader-notifications")   # 查看是否出现 登录成功的节点
                # login_success = True

                Cookies = browser.get_cookies()  # 获取登录成功的cookie
                print(Cookies)
                cookie_dict = {}
                import pickle
                for cookie in Cookies:
                    # 写入文件
                    # 此处大家修改一下自己文件的所在路径
                    f = open('./ArticleSpider/cookies/zhihu/' +
                             cookie['name'] + '.zhihu', 'wb')  # 存储cookie进入本地
                    pickle.dump(cookie, f)
                    f.close()
                    cookie_dict[cookie['name']] = cookie['value']
                # browser.close()   # 暂时不关闭
                return [
                    scrapy.Request(url=self.start_urls[0],
                                   dont_filter=True,
                                   cookies=cookie_dict)
                ]  # 回调 进入解析
            except:
                pass
        print("======知乎登录成功=========")
Beispiel #3
0
    def start_requests(self):
        from selenium import webdriver
        from selenium.webdriver.chrome.options import Options
        from selenium.webdriver.common.keys import Keys
        chrome_option = Options()
        chrome_option.add_argument("--disable-extensions")
        chrome_option.add_experimental_option("debuggerAddress",
                                              "127.0.0.1:9222")

        browser = webdriver.Chrome(
            executable_path=
            "E:/chromedriver/chromedriver_win32/chromedriver.exe",
            chrome_options=chrome_option)

        try:
            browser.maximize_window()
        except:
            pass

        browser.get("https://www.zhihu.com/signin")
        browser.find_element_by_css_selector(
            ".SignFlow-accountInput.Input-wrapper input").send_keys(
                Keys.CONTROL + "a")
        browser.find_element_by_css_selector(
            ".SignFlow-accountInput.Input-wrapper input").send_keys("xxx")
        browser.find_element_by_css_selector(
            ".SignFlow-password input").send_keys(Keys.CONTROL + "a")
        browser.find_element_by_css_selector(
            ".SignFlow-password input").send_keys("xxx")
        browser.find_element_by_css_selector(
            ".Button.SignFlow-submitButton").click()
        time.sleep(10)
        login_success = False
        if login_success:
            Cookies = browser.get_cookies()
            print(Cookies)
            cookie_dict = {}
            import pickle
            for cookie in Cookies:
                # 写入文件
                # 此处大家修改一下自己文件的所在路径
                f = open(
                    './ArticleSpider/cookies/zhihu/' + cookie['name'] +
                    '.zhihu', 'wb')
                pickle.dump(cookie, f)
            f.close()
            cookie_dict[cookie['name']] = cookie['value']
            browser.close()
            return [
                scrapy.Request(url=self.start_urls[0],
                               dont_filter=True,
                               cookies=cookie_dict)
            ]
        while not login_success:
            try:
                notify_ele = browser.find_element_by_class_name(
                    "Popover PushNotifications AppHeader-notifications")
                login_success = True

                Cookies = browser.get_cookies()
                print(Cookies)
                cookie_dict = {}
                import pickle
                for cookie in Cookies:
                    # 写入文件
                    # 此处大家修改一下自己文件的所在路径
                    f = open(
                        './ArticleSpider/cookies/zhihu/' + cookie['name'] +
                        '.zhihu', 'wb')
                    pickle.dump(cookie, f)
                    f.close()
                    cookie_dict[cookie['name']] = cookie['value']
                browser.close()
                return [
                    scrapy.Request(url=self.start_urls[0],
                                   dont_filter=True,
                                   cookies=cookie_dict)
                ]
            except:
                pass

            try:
                english_captcha_element = browser.find_element_by_class_name(
                    "Captcha-englishImg")
            except:
                english_captcha_element = None
            try:
                chinese_captcha_element = browser.find_element_by_class_name(
                    "Captcha-chineseImg")
            except:
                chinese_captcha_element = None

            if chinese_captcha_element:
                ele_postion = chinese_captcha_element.location
                x_relative = ele_postion["x"]
                y_relative = ele_postion["y"]
                browser_navigation_panel_height = browser.execute_script(
                    'return window.outerHeight - window.innerHeight;')
                base64_text = chinese_captcha_element.get_attribute("src")
                import base64
                code = base64_text.replace("data:image/jpg;base64,",
                                           "").replace("%0A", "")
                fh = open("yzm_cn.jpeg", "wb")
                fh.write(base64.b64decode(code))
                fh.close()

                from zheye import zheye
                z = zheye()
                positions = z.Recognize('yzm_cn.jpeg')
                last_position = []
                if len(positions) == 2:
                    if positions[0][1] > positions[1][1]:
                        last_position.append(
                            [positions[1][1], positions[1][0]])
                        last_position.append(
                            [positions[0][1], positions[0][0]])
                    else:
                        last_position.append(
                            [positions[0][1], positions[0][0]])
                        last_position.append(
                            [positions[1][1], positions[1][0]])
                    first_position = [
                        int(last_position[0][0] / 2),
                        int(last_position[0][1] / 2)
                    ]
                    second_position = [
                        int(last_position[1][0] / 2),
                        int(last_position[1][1] / 2)
                    ]
                    move(
                        x_relative + first_position[0], y_relative +
                        browser_navigation_panel_height + first_position[1])
                    click()
                    time.sleep(3)
                    move(
                        x_relative + second_position[0], y_relative +
                        browser_navigation_panel_height + second_position[1])
                    click()
                else:
                    last_position.append([positions[0][1], positions[0][0]])
                    first_position = [
                        int(last_position[0][0] / 2),
                        int(last_position[0][1] / 2)
                    ]
                    move(
                        x_relative + first_position[0], y_relative +
                        browser_navigation_panel_height + first_position[1])
                    click()

                browser.find_element_by_css_selector(
                    ".SignFlow-accountInput.Input-wrapper input").send_keys(
                        Keys.CONTROL + "a")
                browser.find_element_by_css_selector(
                    ".SignFlow-accountInput.Input-wrapper input").send_keys(
                        "18782902568")
                browser.find_element_by_css_selector(
                    ".SignFlow-password input").send_keys(Keys.CONTROL + "a")
                browser.find_element_by_css_selector(
                    ".SignFlow-password input").send_keys("admin1234")

                move(911, 643)
                click()

            if english_captcha_element:
                base64_text = english_captcha_element.get_attribute("src")
                import base64
                code = base64_text.replace('data:image/jpg;base64,',
                                           '').replace("%0A", "")
                # print code
                fh = open("yzm_en.jpeg", "wb")
                fh.write(base64.b64decode(code))
                fh.close()

                from tools.yundama_requests import YDMHttp
                yundama = YDMHttp("xxx", "xxx", 3129, "xxx")
                code = yundama.decode("yzm_en.jpeg", 5000, 60)
                while True:
                    if code == "":
                        code = yundama.decode("yzm_en.jpeg", 5000, 60)
                    else:
                        break

                browser.find_element_by_xpath(
                    '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input'
                ).send_keys(Keys.CONTROL + "a")
                browser.find_element_by_xpath(
                    '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input'
                ).send_keys(code)

                browser.find_element_by_css_selector(
                    ".SignFlow-accountInput.Input-wrapper input").send_keys(
                        Keys.CONTROL + "a")
                browser.find_element_by_css_selector(
                    ".SignFlow-accountInput.Input-wrapper input").send_keys(
                        "xxx")
                browser.find_element_by_css_selector(
                    ".SignFlow-password input").send_keys(Keys.CONTROL + "a")
                browser.find_element_by_css_selector(
                    ".SignFlow-password input").send_keys("xxx")
                move(895, 603)
                click()
Beispiel #4
0
    def start_requests(self):
        from selenium import webdriver
        from selenium.webdriver.common.action_chains import ActionChains
        from selenium.webdriver.chrome.options import Options

        chrome_options = Options()
        chrome_options.add_argument("--disable-extensions")
        #chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")

        #browser = webdriver.Chrome(executable_path="E:/chromedriver/chromedriver_win32/chromedriver.exe",  chrome_options=chrome_options)
        browser = webdriver.Chrome(
            executable_path=
            "E:/chromedriver/chromedriver_win32/chromedriver.exe")
        import time
        try:
            browser.maximize_window()  #将窗口最大化防止定位错误
        except:
            pass
        browser.get("https://www.zhihu.com/signin")
        logo_element = browser.find_element_by_class_name("SignFlowHeader")
        # y_relative_coord = logo_element.location['y']
        browser_navigation_panel_height = browser.execute_script(
            'return window.outerHeight - window.innerHeight;')
        browser_navigation_panel_height = 71
        time.sleep(5)
        browser.find_element_by_css_selector(
            ".SignFlow-accountInput.Input-wrapper input").send_keys(
                Keys.CONTROL + "a")
        browser.find_element_by_css_selector(
            ".SignFlow-accountInput.Input-wrapper input").send_keys("xxx")

        browser.find_element_by_css_selector(
            ".SignFlow-password input").send_keys(Keys.CONTROL + "a")
        browser.find_element_by_css_selector(
            ".SignFlow-password input").send_keys("xxx")

        browser.find_element_by_css_selector(
            ".Button.SignFlow-submitButton").click()
        time.sleep(15)
        from mouse import move, click
        # move(800, 400 ,True)
        # actions = ActionChains(browser)
        # actions.move_to_element(browser.find_element_by_css_selector(
        #     ".Button.SignFlow-submitButton"))
        # actions.click(browser.find_element_by_css_selector(
        #     ".Button.SignFlow-submitButton"))
        # actions.perform()
        # actions.move_to_element_with_offset(browser.find_element_by_css_selector(
        #     ".Button.SignFlow-submitButton"), 30, 30).perform()
        #chrome的版本问题有两种解决方案
        #1. 自己启动chrome(推荐) 可以防止chromedriver被识别,因为chromedriver出现的一些js变量可以被服务器识别出来
        #2. 使用chrome60(版本)

        # 先判断是否登录成功
        login_success = False
        while not login_success:
            try:
                notify_element = browser.find_element_by_class_name(
                    "Popover PushNotifications AppHeader-notifications")
                login_success = True
            except:
                pass

            try:
                #查询是否有英文验证码
                english_captcha_element = browser.find_element_by_class_name(
                    "Captcha-englishImg")
            except:
                english_captcha_element = None
            try:
                # 查询是否有中文验证码
                chinese_captcha_element = browser.find_element_by_class_name(
                    "Captcha-chineseImg")
            except:
                chinese_captcha_element = None

            if chinese_captcha_element:
                y_relative_coord = chinese_captcha_element.location['y']
                y_absolute_coord = y_relative_coord + browser_navigation_panel_height
                x_absolute_coord = chinese_captcha_element.location['x']
                # x_absolute_coord = 842
                # y_absolute_coord = 428
                """
                保存图片
                1. 通过保存base64编码
                2. 通过crop方法
                """
                # 1. 通过保存base64编码
                base64_text = chinese_captcha_element.get_attribute("src")
                import base64
                code = base64_text.replace('data:image/jpg;base64,',
                                           '').replace("%0A", "")
                # print code
                fh = open("yzm_cn.jpeg", "wb")
                fh.write(base64.b64decode(code))
                fh.close()

                from zheye import zheye
                z = zheye()
                positions = z.Recognize("yzm_cn.jpeg")

                pos_arr = []
                if len(positions) == 2:
                    if positions[0][1] > positions[1][1]:
                        pos_arr.append([positions[1][1], positions[1][0]])
                        pos_arr.append([positions[0][1], positions[0][0]])
                    else:
                        pos_arr.append([positions[0][1], positions[0][0]])
                        pos_arr.append([positions[1][1], positions[1][0]])
                else:
                    pos_arr.append([positions[0][1], positions[0][0]])

                if len(positions) == 2:
                    first_point = [
                        int(pos_arr[0][0] / 2),
                        int(pos_arr[0][1] / 2)
                    ]
                    second_point = [
                        int(pos_arr[1][0] / 2),
                        int(pos_arr[1][1] / 2)
                    ]

                    move((x_absolute_coord + first_point[0]),
                         y_absolute_coord + first_point[1])
                    click()

                    move((x_absolute_coord + second_point[0]),
                         y_absolute_coord + second_point[1])
                    click()

                else:
                    first_point = [
                        int(pos_arr[0][0] / 2),
                        int(pos_arr[0][1] / 2)
                    ]

                    move((x_absolute_coord + first_point[0]),
                         y_absolute_coord + first_point[1])
                    click()

                browser.find_element_by_css_selector(
                    ".SignFlow-accountInput.Input-wrapper input").send_keys(
                        Keys.CONTROL + "a")
                browser.find_element_by_css_selector(
                    ".SignFlow-accountInput.Input-wrapper input").send_keys(
                        "xxx")

                browser.find_element_by_css_selector(
                    ".SignFlow-password input").send_keys(Keys.CONTROL + "a")
                browser.find_element_by_css_selector(
                    ".SignFlow-password input").send_keys("xxx")
                browser.find_element_by_css_selector(
                    ".Button.SignFlow-submitButton").click()
                browser.find_element_by_css_selector(
                    ".Button.SignFlow-submitButton").click()

            if english_captcha_element:
                # 2. 通过crop方法
                # from pil import Image
                # image = Image.open(path)
                # image = image.crop((locations["x"], locations["y"], locations["x"] + image_size["width"],
                #                     locations["y"] + image_size["height"]))  # defines crop points
                #
                # rgb_im = image.convert('RGB')
                # rgb_im.save("D:/ImoocProjects/python_scrapy/coding-92/ArticleSpider/tools/image/yzm.jpeg",
                #             'jpeg')  # saves new cropped image
                # # 1. 通过保存base64编码
                base64_text = english_captcha_element.get_attribute("src")
                import base64
                code = base64_text.replace('data:image/jpg;base64,',
                                           '').replace("%0A", "")
                # print code
                fh = open("yzm_en.jpeg", "wb")
                fh.write(base64.b64decode(code))
                fh.close()

                from tools.yundama_requests import YDMHttp
                yundama = YDMHttp("da_ge_da1", "dageda", 3129,
                                  "40d5ad41c047179fc797631e3b9c3025")
                code = yundama.decode("yzm_en.jpeg", 5000, 60)
                while True:
                    if code == "":
                        code = yundama.decode("yzm_en.jpeg", 5000, 60)
                    else:
                        break

                browser.find_element_by_xpath(
                    '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input'
                ).send_keys(code)

                browser.find_element_by_css_selector(
                    ".SignFlow-accountInput.Input-wrapper input").send_keys(
                        Keys.CONTROL + "a")
                browser.find_element_by_css_selector(
                    ".SignFlow-accountInput.Input-wrapper input").send_keys(
                        "xxx")

                browser.find_element_by_css_selector(
                    ".SignFlow-password input").send_keys(Keys.CONTROL + "a")
                browser.find_element_by_css_selector(
                    ".SignFlow-password input").send_keys("xxx")
                submit_ele = browser.find_element_by_css_selector(
                    ".Button.SignFlow-submitButton")
                browser.find_element_by_css_selector(
                    ".Button.SignFlow-submitButton").click()

            time.sleep(10)
            try:
                notify_element = browser.find_element_by_class_name(
                    "Popover PushNotifications AppHeader-notifications")
                login_success = True

                Cookies = browser.get_cookies()
                print(Cookies)
                cookie_dict = {}
                import pickle
                for cookie in Cookies:
                    # 写入文件
                    # 此处大家修改一下自己文件的所在路径
                    f = open(
                        './ArticleSpider/cookies/zhihu/' + cookie['name'] +
                        '.zhihu', 'wb')
                    pickle.dump(cookie, f)
                    f.close()
                    cookie_dict[cookie['name']] = cookie['value']
                browser.close()
                return [
                    scrapy.Request(url=self.start_urls[0],
                                   dont_filter=True,
                                   cookies=cookie_dict)
                ]
            except:
                pass

        print("yes")
    def start_requests(self):
        chrome_option = Options()
        """
        手动启动Chrome,避免被反爬识别
        Chrome安装目录下,命令行执行:chrome.exe --remote-debugging-port=9222
        验证启动是否成功:http://127.0.0.1:9222/json
        """
        chrome_option.add_argument("--disable-extensions")
        chrome_option.add_experimental_option("debuggerAddress",
                                              "127.0.0.1:9222")

        browser = webdriver.Chrome(chrome_options=chrome_option)
        try:
            browser.maximize_window()
        except:
            pass
        browser.get("https://www.zhihu.com/signin")
        browser.find_element_by_css_selector(
            ".SignFlow-accountInput.Input-wrapper input").send_keys(
                Keys.CONTROL + "a")
        browser.find_element_by_css_selector(
            ".SignFlow-accountInput.Input-wrapper input").send_keys("xxx")
        time.sleep(2)
        browser.find_element_by_css_selector(
            ".SignFlow-password input").send_keys(Keys.CONTROL + "a")
        browser.find_element_by_css_selector(
            ".SignFlow-password input").send_keys("xxx")
        browser.find_element_by_css_selector(
            ".Button.SignFlow-submitButton").click()
        time.sleep(3)
        login_success = False

        if login_success:
            cookies_list = browser.get_cookies()
            print(cookies_list)
            cookie_dict = {}
            import pickle
            for cookie in cookies_list:
                # 写入文件
                f = open(
                    './Article_Spider/cookies/zhihu/' + cookie['name'] +
                    '.zhihu', 'wb')
                pickle.dump(cookie, f)
            f.close()
            cookie_dict[cookie['name']] = cookie['value']
            browser.close()
            return [
                scrapy.Request(url=self.start_urls[0],
                               dont_filter=True,
                               cookies=cookie_dict)
            ]

        while not login_success:
            try:
                browser.find_element_by_class_name(
                    "Popover PushNotifications AppHeader-notifications")
                login_success = True

                cookies_list = browser.get_cookies()
                print(cookies_list)
                cookie_dict = {}
                for cookie in cookies_list:
                    # 写入文件
                    f = open(
                        './Article_Spider/cookies/zhihu/' + cookie['name'] +
                        '.zhihu', 'wb')
                    import pickle
                    pickle.dump(cookie, f)
                    f.close()
                    cookie_dict[cookie['name']] = cookie['value']
                browser.close()
                return [
                    scrapy.Request(url=self.start_urls[0],
                                   dont_filter=True,
                                   cookies=cookie_dict)
                ]
            except:
                pass

            try:
                english_captcha_element = browser.find_element_by_class_name(
                    "Captcha-englishImg")
            except:
                english_captcha_element = None
            try:
                chinese_captcha_element = browser.find_element_by_class_name(
                    "Captcha-chineseImg")
            except:
                chinese_captcha_element = None

            # 识别中文倒立汉字
            if chinese_captcha_element:
                ele_postion = chinese_captcha_element.location
                x_relative = ele_postion["x"]
                y_relative = ele_postion["y"]
                browser_navigation_panel_height = browser.execute_script(
                    'return window.outerHeight - window.innerHeight;')
                base64_text = chinese_captcha_element.get_attribute("src")

                code = base64_text.replace("data:image/jpg;base64,",
                                           "").replace("%0A", "")
                fh = open("verify_code.jpeg", "wb")
                fh.write(base64.b64decode(code))
                fh.close()

                z = zheye()
                positions = z.Recognize('verify_code.jpeg')
                last_position = []
                if len(positions) == 2:
                    if positions[0][1] > positions[1][1]:
                        last_position.append(
                            [positions[1][1], positions[1][0]])
                        last_position.append(
                            [positions[0][1], positions[0][0]])
                    else:
                        last_position.append(
                            [positions[0][1], positions[0][0]])
                        last_position.append(
                            [positions[1][1], positions[1][0]])
                    first_position = [
                        int(last_position[0][0] / 2),
                        int(last_position[0][1] / 2)
                    ]
                    second_position = [
                        int(last_position[1][0] / 2),
                        int(last_position[1][1] / 2)
                    ]
                    move(
                        x_relative + first_position[0], y_relative +
                        browser_navigation_panel_height + first_position[1])
                    click()
                    time.sleep(3)
                    move(
                        x_relative + second_position[0], y_relative +
                        browser_navigation_panel_height + second_position[1])
                    click()
                else:
                    last_position.append([positions[0][1], positions[0][0]])
                    first_position = [
                        int(last_position[0][0] / 2),
                        int(last_position[0][1] / 2)
                    ]
                    move(
                        x_relative + first_position[0], y_relative +
                        browser_navigation_panel_height + first_position[1])
                    click()

                browser.find_element_by_css_selector(
                    ".SignFlow-accountInput.Input-wrapper input").send_keys(
                        Keys.CONTROL + "a")
                browser.find_element_by_css_selector(
                    ".SignFlow-accountInput.Input-wrapper input").send_keys(
                        "13662241324")
                time.sleep(2)
                browser.find_element_by_css_selector(
                    ".SignFlow-password input").send_keys(Keys.CONTROL + "a")
                browser.find_element_by_css_selector(
                    ".SignFlow-password input").send_keys("root0503")

                move(674, 527)
                click()

            # 识别英语字母验证码
            if english_captcha_element:
                base64_text = english_captcha_element.get_attribute("src")
                code = base64_text.replace('data:image/jpg;base64,',
                                           '').replace("%0A", "")
                # print code
                fh = open("yzm_en.jpeg", "wb")
                fh.write(base64.b64decode(code))
                fh.close()

                from tools.yundama_requests import YDMHttp
                yundama = YDMHttp("xxx", "xxx", 3129, "xxx")
                code = yundama.decode("yzm_en.jpeg", 5000, 60)
                while True:
                    if code == "":
                        code = yundama.decode("yzm_en.jpeg", 5000, 60)
                    else:
                        break

                browser.find_element_by_xpath(
                    '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input'
                ).send_keys(Keys.CONTROL + "a")
                browser.find_element_by_xpath(
                    '//*[@id="root"]/div/main/div/div/div/div[2]/div[1]/form/div[3]/div/div/div[1]/input'
                ).send_keys(code)

                browser.find_element_by_css_selector(
                    ".SignFlow-accountInput.Input-wrapper input").send_keys(
                        Keys.CONTROL + "a")
                browser.find_element_by_css_selector(
                    ".SignFlow-accountInput.Input-wrapper input").send_keys(
                        "13662241324")
                browser.find_element_by_css_selector(
                    ".SignFlow-password input").send_keys(Keys.CONTROL + "a")
                browser.find_element_by_css_selector(
                    ".SignFlow-password input").send_keys("root0503")

                move(674, 527)
                click()