def __init__(self): self.url = 'https://my.cn.china.cn/manage.php?op=LoginShowNew' self.browser = webdriver.Chrome() self.wait = WebDriverWait(self.browser, 20) self.email = ACCOUNT self.password = PASSWORD self.chaojiying = ChaojiyingClient(CJY_USERNAME, CJY_PASSWORD, CJY_SOFT_ID)
def __init__(self): self.url = 'https://kyfw.12306.cn/otn/resources/login.html' # path是谷歌浏览器驱动的目录,如果已经将目录添加到系统变量,则不用设置此路径 path = r'F:\PycharmProjects\Python3爬虫\chromedriver.exe' chrome_options = Options() chrome_options.add_argument('--start-maximized') self.browser = webdriver.Chrome(executable_path=path, chrome_options=chrome_options) self.wait = WebDriverWait(self.browser, 20) self.username = USERNAME self.password = PASSWORD self.chaojiying = ChaojiyingClient(CHAOJIYING_USERNAME, CHAOJIYING_PASSWORD, CHAOJIYING_SOFT_ID)
def __init__(self): self.login_url = "https://passport.bilibili.com/login" caps = DesiredCapabilities().FIREFOX firefox_options = Options() firefox_options.headless = True self.browser = webdriver.Firefox( executable_path="/Users/chloeji/geckodriver", capabilities=caps, options=firefox_options) self.driver_wait = WebDriverWait(self.browser, 60) """ 设置超级鹰的用户名、密码以及软件 ID """ self.chaojiying = ChaojiyingClient(CHAOJIYING_USERNAME_, CHAOJIYING_PASSWORD_, CHAOJIYING_SOFT_ID_)
def parse_html(html): # etree_html = etree.HTML(html) screenshot = get_big_image() screenshot.save('full_screen.png') x1, y1, x2, y2 = get_position() crop_image = screenshot.crop((x1, y1, x2, y2)) file_name = 'crop.png' crop_image.save(file_name) # 向超级鹰发送图片,获取该图片的验证码 chaojiying = ChaojiyingClient(username, password, '96001') im = open(file_name, 'rb').read() captha_str = chaojiying.PostPic(im, 1006)['pic_str'] # http: // bm.e21cn.com / log / reg.aspx 网站账户密码 username_code = 'carmack55' password_code = '123456' tel = '18511405897' print(captha_str) input_username = wait.until( EC.presence_of_element_located((By.CSS_SELECTOR, 'input#username'))) input_password1 = wait.until( EC.presence_of_element_located((By.CSS_SELECTOR, 'input#pwd'))) input_password2 = wait.until( EC.presence_of_element_located((By.CSS_SELECTOR, 'input#pwd_Q'))) input_tel = wait.until( EC.presence_of_element_located((By.CSS_SELECTOR, 'input#tel'))) input_check = wait.until( EC.presence_of_element_located((By.CSS_SELECTOR, 'input#CheckCode'))) sublime = wait.until( EC.element_to_be_clickable((By.CSS_SELECTOR, 'input#btn_login'))) input_username.send_keys(username_code) input_password1.send_keys(password_code) input_password2.send_keys(password_code) input_tel.send_keys(tel) input_check.send_keys(captha_str) time.sleep(2) sublime.click()
def get_captcha(browser, class_str): """ 向超级鹰发送截下来的验证码图片,超级鹰返回验证码,得到图片验证码 :param browser: 浏览器对象 :param class_str: css选择器(定位网页中验证码节点元素) :return: 验证码字符串 """ full_screen_img = get_big_image(browser) # 保存文件在当前文件夹下,文件名为mobile_login.png(完全可以不保存大图) full_screen_img.save('mobile_login.png') # 获取验证码左上角和右下角坐标 x1, y1, x2, y2 = get_captha_position(browser, class_str) captha_img = full_screen_img.crop((x1, y1, x2, y2)) captha_img.save('mobile_captha.png') # 根据具体情况修改账号、密码、和验证码类型代号 chaojiying = ChaojiyingClient('carmack', 'Vff635241', '96001') im = open('mobile_captha.png', 'rb').read() captha_str = chaojiying.PostPic(im, 1006)['pic_str'] return captha_str
class CrackTouClick(): def __init__(self): self.url = 'https://kyfw.12306.cn/otn/resources/login.html' # path是谷歌浏览器驱动的目录,如果已经将目录添加到系统变量,则不用设置此路径 path = r'F:\PycharmProjects\Python3爬虫\chromedriver.exe' chrome_options = Options() chrome_options.add_argument('--start-maximized') self.browser = webdriver.Chrome(executable_path=path, chrome_options=chrome_options) self.wait = WebDriverWait(self.browser, 20) self.username = USERNAME self.password = PASSWORD self.chaojiying = ChaojiyingClient(CHAOJIYING_USERNAME, CHAOJIYING_PASSWORD, CHAOJIYING_SOFT_ID) def crack(self): # 调用账号密码输入函数 self.get_input_element() # 调用验证码图片剪裁函数 image = self.get_touclick_image() bytes_array = BytesIO() image.save(bytes_array, format='PNG') # 利用超级鹰打码平台的 API PostPic() 方法把图片发送给超级鹰后台,发送的图像是字节流格式,返回的结果是一个JSON result = self.chaojiying.PostPic(bytes_array.getvalue(), CHAOJIYING_KIND) print(result) # 调用验证码坐标解析函数 locations = self.get_points(result) # 调用模拟点击验证码函数 self.touch_click_words(locations) # 调用模拟点击登录函数 self.login() try: # 查找是否出现用户的姓名,若出现表示登录成功 success = self.wait.until( EC.text_to_be_present_in_element( (By.CSS_SELECTOR, '.welcome-name'), '谭先生')) print(success) cc = self.browser.find_element(By.CSS_SELECTOR, '.welcome-name') print('用户' + cc.text + '登录成功') # 若没有出现表示登录失败,继续重试,超级鹰会返回本次识别的分值 except TimeoutException: self.chaojiying.ReportError(result['pic_id']) self.crack() # 账号密码输入函数 def get_input_element(self): # 登录页面发送请求 self.browser.get(self.url) # 登录页面默认是扫码登录,所以首先要点击账号登录 login = self.wait.until( EC.presence_of_element_located( (By.CSS_SELECTOR, '.login-hd-account'))) login.click() time.sleep(3) # 查找到账号密码输入位置的元素 username = self.wait.until( EC.presence_of_element_located( (By.CSS_SELECTOR, 'input#J-userName'))) password = self.wait.until( EC.presence_of_element_located( (By.CSS_SELECTOR, 'input#J-password'))) # 输入账号密码 username.send_keys(self.username) password.send_keys(self.password) # 验证码图片剪裁函数 def get_touclick_image(self, name='12306.png'): # 获取验证码的位置 element = self.wait.until( EC.presence_of_element_located( (By.CSS_SELECTOR, '.login-pwd-code'))) time.sleep(3) location = element.location size = element.size top, bottom, left, right = location['y'], location['y'] + size[ 'height'], location['x'], location['x'] + size['width'] # 先对整个页面截图 screenshot = self.browser.get_screenshot_as_png() screenshot = Image.open(BytesIO(screenshot)) # 根据验证码坐标信息,剪裁出验证码图片 captcha = screenshot.crop((left, top, right, bottom)) captcha.save(name) return captcha # 验证码坐标解析函数,分析超级鹰返回的坐标 def get_points(self, captcha_result): # 超级鹰识别结果以字符串形式返回,每个坐标都以|分隔 groups = captcha_result.get('pic_str').split('|') # 将坐标信息变成列表的形式 locations = [[int(number) for number in group.split(',')] for group in groups] return locations # 模拟点击验证码函数 def touch_click_words(self, locations): element = self.wait.until( EC.presence_of_element_located( (By.CSS_SELECTOR, '.login-pwd-code'))) # 循环点击正确验证码的坐标 for location in locations: print(location) ActionChains(self.browser).move_to_element_with_offset( element, location[0], location[1]).click().perform() # 模拟点击登录函数 def login(self): submit = self.wait.until(EC.element_to_be_clickable( (By.ID, 'J-login'))) submit.click()
class CrackGeetest(): def __init__(self): self.url = 'https://my.cn.china.cn/manage.php?op=LoginShowNew' self.browser = webdriver.Chrome() self.wait = WebDriverWait(self.browser, 20) self.email = ACCOUNT self.password = PASSWORD self.chaojiying = ChaojiyingClient(CJY_USERNAME, CJY_PASSWORD, CJY_SOFT_ID) def __del__(self): self.browser.close() def open_image(self, path): f = open(path, mode='rb') img = f.read() f.close() return img def get_geetest_button(self): """ 获取初始验证按钮 :return: """ button = self.wait.until( EC.element_to_be_clickable((By.CLASS_NAME, 'geetest_radar_tip'))) return button def get_position(self): """ 获取验证码位置 :return: 验证码位置元组 """ img = self.wait.until( EC.presence_of_element_located((By.ID, 'showCaptcha'))) time.sleep(2) location = img.location size = img.size top, bottom, left, right = location['y'], location['y'] + size[ 'height'], location['x'], location['x'] + size['width'] return (top, bottom, left, right) def get_screenshot(self): """ 获取网页截图 :return: 截图对象 """ screenshot = self.browser.get_screenshot_as_png() screenshot = Image.open(BytesIO(screenshot)) return screenshot def get_slider(self): """ 获取滑块 :return: 滑块对象 """ slider = self.wait.until( EC.element_to_be_clickable( (By.CLASS_NAME, 'geetest_slider_button'))) return slider def get_geetest_image(self, name='captcha.png'): """ 获取验证码图片 :return: 图片对象 """ top, bottom, left, right = self.get_position() print('验证码位置', top, bottom, left, right) screenshot = self.get_screenshot() captcha = screenshot.crop((left, top, right, bottom)) captcha.save(name) return captcha def open(self): """ 打开网页输入用户名密码 :return: None """ # 打开浏览器登陆页面 self.browser.get(self.url) # 获取账号输入框 email = self.wait.until( EC.presence_of_element_located((By.ID, 'userName'))) # 获取密码输入框 password = self.wait.until( EC.presence_of_element_located((By.ID, 'userPassword'))) # 输入账号 email.send_keys(self.email) # 输入密码 password.send_keys(self.password) # 获取验证码输入框 img = self.wait.until( EC.presence_of_element_located((By.ID, 'verifystr'))) # 获取验证码 self.get_geetest_image(name="login.png") time.sleep(0.1) image = self.open_image("login.png") img_num = self.chaojiying.PostPic(image, 1902).get("pic_str") # 输入验证码 img.send_keys(img_num) # 获取登陆按钮 submit = self.wait.until(EC.presence_of_element_located( (By.ID, 'btn'))) submit.click() time.sleep(10) def get_gap(self, image1, image2): """ 获取缺口偏移量 :param image1: 不带缺口图片 :param image2: 带缺口图片 :return: """ left = 60 for i in range(left, image1.size[0]): for j in range(image1.size[1]): if not self.is_pixel_equal(image1, image2, i, j): left = i return left return left def is_pixel_equal(self, image1, image2, x, y): """ 判断两个像素是否相同 :param image1: 图片1 :param image2: 图片2 :param x: 位置x :param y: 位置y :return: 像素是否相同 """ # 取两个图片的像素点 pixel1 = image1.load()[x, y] pixel2 = image2.load()[x, y] threshold = 60 if abs(pixel1[0] - pixel2[0]) < threshold and abs( pixel1[1] - pixel2[1]) < threshold and abs( pixel1[2] - pixel2[2]) < threshold: return True else: return False def get_track(self, distance): """ 根据偏移量获取移动轨迹 :param distance: 偏移量 :return: 移动轨迹 """ # 移动轨迹 track = [] # 当前位移 current = 0 # 减速阈值 mid = distance * 4 / 5 # 计算间隔 t = 0.2 # 初速度 v = 0 while current < distance: if current < mid: # 加速度为正2 a = 2 else: # 加速度为负3 a = -3 # 初速度v0 v0 = v # 当前速度v = v0 + at v = v0 + a * t # 移动距离x = v0t + 1/2 * a * t^2 move = v0 * t + 1 / 2 * a * t * t # 当前位移 current += move # 加入轨迹 track.append(round(move)) return track def move_to_gap(self, slider, track): """ 拖动滑块到缺口处 :param slider: 滑块 :param track: 轨迹 :return: """ ActionChains(self.browser).click_and_hold(slider).perform() for x in track: ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform() time.sleep(0.5) ActionChains(self.browser).release().perform() def login(self): """ 登录 :return: None """ submit = self.wait.until( EC.element_to_be_clickable((By.CLASS_NAME, 'login-btn'))) submit.click() time.sleep(3) print('登录成功') def crack(self): # 输入用户名密码 # self.open() # home = self.browser.find_element_by_xpath("//div[@class='brand']/a") # # 执行js访问主页 # self.browser.execute_script("arguments[0].click();", home) url = 'https://cn.china.cn/' self.browser.get(url) # time.sleep(10) # 获取搜索框搜索 self.wait.until(EC.element_to_be_clickable( (By.ID, 'keyinput'))).send_keys("智能设备") # 找到公司搜索 button = self.browser.find_element_by_xpath( "//ul[@class='serch-items']/li[2]") # 执行js搜索公司 self.browser.execute_script("arguments[0].click();", button) time.sleep(1) provinces = self.browser.find_elements_by_xpath( "//div[@class='branlist-view']/ul/li/a") n = len(provinces) - 1 print(provinces) for i in range(n): provinces = self.browser.find_elements_by_xpath( "//div[@class='branlist-view']/ul/li/a") provinces[n - i].click() cities = self.browser.find_elements_by_xpath( "//div[contains(@class,'ctg-mod-brancate')][2]//ul/li/a") cn = len(cities) - 1 print(cities) for j in range(cn): cities = self.browser.find_elements_by_xpath( "//div[contains(@class,'ctg-mod-brancate')][2]//ul/li/a") cities[cn - j].click() print(self.browser.current_url) companies = self.browser.find_elements_by_xpath( "//div[@class='corpinfo']/h3/a") cpn = len(companies) - 1 # 访问公司主页 for i in range(cpn): companies = self.browser.find_elements_by_xpath( "//div[@class='corpinfo']/h3/a") companies[i].click() print(self.browser.current_url) self.browser.back() time.sleep(1) self.browser.find_element_by_class_name("rollPage").click() time.sleep(1) companies = self.browser.find_elements_by_xpath( "//div[@class='corpinfo']/h3/a") cpn = len(companies) - 1 # 访问公司主页 for i in range(cpn): companies = self.browser.find_elements_by_xpath( "//div[@class='corpinfo']/h3/a") companies[i].click() print(self.browser.current_url) self.browser.back() time.sleep(1)
class Bilibili: def __init__(self): self.login_url = "https://passport.bilibili.com/login" caps = DesiredCapabilities().FIREFOX firefox_options = Options() firefox_options.headless = True self.browser = webdriver.Firefox( executable_path="/Users/chloeji/geckodriver", capabilities=caps, options=firefox_options) self.driver_wait = WebDriverWait(self.browser, 60) """ 设置超级鹰的用户名、密码以及软件 ID """ self.chaojiying = ChaojiyingClient(CHAOJIYING_USERNAME_, CHAOJIYING_PASSWORD_, CHAOJIYING_SOFT_ID_) def send_infos(self, username, password): self.browser.get(self.login_url) username_sender = self.driver_wait.until( EC.presence_of_element_located( (By.XPATH, "//*[@id='login-username']"))) username_sender.send_keys(username) password_sender = self.driver_wait.until( EC.presence_of_element_located( (By.XPATH, "//*[@id='login-passwd']"))) password_sender.send_keys(password) def get_verify_button(self): button = self.driver_wait.until( EC.presence_of_element_located( (By.XPATH, "//a[@class='btn btn-login']"))) pprint(button.text) sleep(3) return button def get_verify_elements(self): self.driver_wait.until( EC.presence_of_element_located( (By.CLASS_NAME, "geetest_item_img"))) sleep(2) element = self.driver_wait.until( EC.presence_of_element_located( (By.CLASS_NAME, "geetest_table_box"))) logger.info("成功获取验证码节点。") return element def get_verify_pos(self): element = self.get_verify_elements() sleep(2) location = element.location print(location) size = element.size print(size) top, buttom, left, right = \ location["y"] * 2, \ (location["y"] + size["height"]) * 2, \ location["x"] * 2, \ (location["x"] + size["width"]) * 2 return top, buttom, left, right def get_screenshoot(self): # http://allselenium.info/taking-screenshot-using-python-selenium-webdriver/ screenshoot = self.browser.get_screenshot_as_png() screenshoot = Image.open(io.BytesIO(screenshoot)) sleep(5) screenshoot.save("screenshoot.png") return screenshoot def get_verify_image(self, name="need_to_verified.png"): top, buttom, left, right = self.get_verify_pos() logger.info(f"验证码位置:{top, buttom, left, right}") screenshoot = self.get_screenshoot() verification_area = screenshoot.crop((left, top, right, buttom)) verification_area.save(name) return verification_area def get_points(self, verify_result): groups = verify_result.get("pic_str").split("|") try: locations = [[int(number) for number in group.split(",")] for group in groups] except ValueError: locations = [[int(float(number)) for number in group.split(",")] for group in groups] return locations # geckodriver browser issue with viewpoint def touch_click_words(self, locations): for location in locations: print(location) X_OFFSET = location[0] Y_OFFSET = location[1] try: ActionChains(self.browser).\ move_to_element_with_offset(self.get_verify_elements(), X_OFFSET, Y_OFFSET).\ click().\ perform() sleep(3) except MoveTargetOutOfBoundsException: self.browser.execute_script( 'window.scrollTo(0, " + str(self.get_verify_elements())");' ) except Exception: pprint(traceback.format_exc()) def touch_click_verify(self): button = self.driver_wait.until( EC.element_to_be_clickable((By.CLASS_NAME, "geetest_commit"))) button.click() def login(self): submit = self.driver_wait.until( EC.element_to_be_clickable((By.CLASS_NAME, "btn btn-login"))) submit.click() sleep(10) logger.info("登录成功。") def crack(self, user, pswd): # self.browser.execute_script("document.body.style.transform='scale(0.9)';") self.browser.set_window_size(1024, 768) self.send_infos(user, pswd) button = self.get_verify_button() button.click() # 开始识别码 image = self.get_verify_image() bytes_array = io.BytesIO() image.save(bytes_array, "PNG") sleep(10) result = self.chaojiying.PostPic(bytes_array.getvalue(), CHAOJIYING_KIND) pprint(result) locations = self.get_points(result) self.touch_click_words(locations) self.touch_click_verify() sleep(3) try: success = self.driver_wait.until( EC.text_to_be_present_in_element( (By.CLASS_NAME, "bilifont bili-icon_dingdao_zhuzhan"), "主站")) pprint(success) except Exception as e: logger.warn("登录失败。", e) finally: self.browser.quit()