def main(): browser = webdriver.Chrome('./chromedriver') # 打开登录页面 (可以根据自己需要选择相应的网站进行自动登录) browser.get('http://218.197.101.24') time.sleep(1) # 截图 """ save_screenshot(fileName) 就是对当前页面进行截图 """ browser.save_screenshot('./aa.png') # 需要对验证码图片进行裁剪 # 确定验证码图片的左上角和右下角的坐标(裁剪的区域就确定) code_img_ele = browser.find_element_by_xpath( '//*[@id="VerifyCode"]') # 获取验证码图片 location = code_img_ele.location # 图片左上角坐标 size = code_img_ele.size # 验证码对应的长和宽 # 左上角右下角坐标, 验证码图片就确定下来了 //*[@id="J-loginImg"] rangle = (int(location['x']), int(location['y']), int(location['x'] + size['width']), int(location['y'] + size['height'])) i = Image.open('./aa.png') code_img_name = './code.png' # 使用crop根据指定区域进行裁剪 frame = i.crop(rangle) frame.save(code_img_name) username_input = browser.find_element_by_xpath('//*[@id="txtUserID"]') pwd_input = browser.find_element_by_xpath('//*[@id="txtUserPwd"]') code_input = browser.find_element_by_xpath('//*[@id="yzm"]') username_input.send_keys('账号') time.sleep(1) pwd_input.send_keys('密码') time.sleep(1) code_input.send_keys(chaojiying.getCodeText('./code.png', 1902)) time.sleep(1) login_button = browser.find_element_by_xpath( '//*[@id="ff"]/table/tbody/tr[5]/td[2]/input[1]') login_button.click() time.sleep(5) browser.quit()
def main(): url = 'https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx' headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Safari/605.1.15' } page_text = requests.get(url=url, headers=headers).text # 解析验证码图片img中src属性值 parser = etree.HTMLParser(encoding="utf-8") tree = etree.HTML(page_text, parser=parser) code_img_src = 'https://so.gushiwen.cn' + tree.xpath( '//*[@id="imgCode"]/@src')[0] img_data = requests.get(url=code_img_src, headers=headers).content # 将验证码保存至本地 with open('./code.jpg', 'wb') as fp: fp.write(img_data) # 调用打码平台的示例代码进行验证码图片识别 code = chaojiying.getCodeText('code.jpg', 1902) print(code)
def main(): session = requests.Session() url = 'http://www.renren.com' headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Safari/605.1.15' } page_text = session.get(url=url, headers=headers).text tree = etree.HTML(page_text) code_img_src = tree.xpath('//*[@id="verifyPic_login"]/@src')[0] code_img_data = session.get(url=code_img_src, headers=headers).content # 保存验证码图片 with open('./code.jpg', 'wb') as fp: fp.write(code_img_data) # 使用超级鹰代码对验证码图片进行识别 code_str = chaojiying.getCodeText('code.jpg', 1902) print('验证码:', code_str) # login_url = tree.xpath('//*[@id="loginForm"]/@action') login_url = 'http://www.renren.com/PLogin.do' data = { 'email': 'XXX', 'password': '******', 'icode': code_str, 'origURL': 'http://www.renren.com/home', 'domain': 'renren.com', 'key_id': '1', 'captcha_type': 'web_login', 'f': '', } # 通用验证登录成的方式是看响应状态码 response = session.post(url=login_url, headers=headers, data=data) print(response.status_code) resp = session.get(url='http://www.renren.com/974823124/profile', headers=headers).text with open('renren.html', 'w', encoding='utf-8') as fp: fp.write(resp) print('爬取完毕!')
# 验证码识别 import requests import chaojiying url = "https://so.gushiwen.cn/RandCode.ashx?t=1599137306043" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36' } img = requests.get(url, headers=headers).content with open('a.jpg', 'wb') as f: f.write(img) result = chaojiying.getCodeText('a.jpg', 1902) print(result)
session=requests.Session() url="http://www.renren.com/" headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36' } main_text=session.get(url,headers=headers).text soup=BeautifulSoup(main_text,"lxml") img_src=soup.find("img",id="verifyPic_login")["src"] # print(img_src) img=session.get(img_src,headers=headers).content with open("a.jpg","wb") as f: f.write(img) post_url="http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=202085110108" codeText=chaojiying.getCodeText("a.jpg",1902) data={ 'email': '*****@*****.**', 'icode':codeText, 'origURL': 'http://www.renren.com/home', 'domain': 'renren.com', 'key_id': '1', 'captcha_type': 'web_login', 'password': "******", 'rkey': "3d1f9abdaae1f018a49d38069fe743c8" } response=session.post(post_url,data=data,headers=headers) print(response.status_code) text=json.loads(response.text)