예제 #1
0
def main():
    browser = webdriver.Chrome('./chromedriver')

    # 打开登录页面 (可以根据自己需要选择相应的网站进行自动登录)
    browser.get('http://218.197.101.24')
    time.sleep(1)

    # 截图
    """
    save_screenshot(fileName) 就是对当前页面进行截图
    """
    browser.save_screenshot('./aa.png')

    # 需要对验证码图片进行裁剪
    # 确定验证码图片的左上角和右下角的坐标(裁剪的区域就确定)
    code_img_ele = browser.find_element_by_xpath(
        '//*[@id="VerifyCode"]')  # 获取验证码图片
    location = code_img_ele.location  # 图片左上角坐标
    size = code_img_ele.size  # 验证码对应的长和宽
    # 左上角右下角坐标, 验证码图片就确定下来了 //*[@id="J-loginImg"]
    rangle = (int(location['x']), int(location['y']),
              int(location['x'] + size['width']),
              int(location['y'] + size['height']))

    i = Image.open('./aa.png')
    code_img_name = './code.png'
    # 使用crop根据指定区域进行裁剪
    frame = i.crop(rangle)
    frame.save(code_img_name)

    username_input = browser.find_element_by_xpath('//*[@id="txtUserID"]')
    pwd_input = browser.find_element_by_xpath('//*[@id="txtUserPwd"]')
    code_input = browser.find_element_by_xpath('//*[@id="yzm"]')

    username_input.send_keys('账号')
    time.sleep(1)
    pwd_input.send_keys('密码')
    time.sleep(1)
    code_input.send_keys(chaojiying.getCodeText('./code.png', 1902))
    time.sleep(1)

    login_button = browser.find_element_by_xpath(
        '//*[@id="ff"]/table/tbody/tr[5]/td[2]/input[1]')
    login_button.click()

    time.sleep(5)

    browser.quit()
예제 #2
0
def main():
    url = 'https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx'
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Safari/605.1.15'
    }
    page_text = requests.get(url=url, headers=headers).text

    # 解析验证码图片img中src属性值
    parser = etree.HTMLParser(encoding="utf-8")
    tree = etree.HTML(page_text, parser=parser)
    code_img_src = 'https://so.gushiwen.cn' + tree.xpath(
        '//*[@id="imgCode"]/@src')[0]
    img_data = requests.get(url=code_img_src, headers=headers).content
    # 将验证码保存至本地
    with open('./code.jpg', 'wb') as fp:
        fp.write(img_data)

    # 调用打码平台的示例代码进行验证码图片识别
    code = chaojiying.getCodeText('code.jpg', 1902)
    print(code)
예제 #3
0
def main():
    session = requests.Session()
    url = 'http://www.renren.com'
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Safari/605.1.15'
    }
    page_text = session.get(url=url, headers=headers).text
    tree = etree.HTML(page_text)
    code_img_src = tree.xpath('//*[@id="verifyPic_login"]/@src')[0]
    code_img_data = session.get(url=code_img_src, headers=headers).content
    # 保存验证码图片
    with open('./code.jpg', 'wb') as fp:
        fp.write(code_img_data)

    # 使用超级鹰代码对验证码图片进行识别
    code_str = chaojiying.getCodeText('code.jpg', 1902)
    print('验证码:', code_str)

    # login_url = tree.xpath('//*[@id="loginForm"]/@action')
    login_url = 'http://www.renren.com/PLogin.do'

    data = {
        'email': 'XXX',
        'password': '******',
        'icode': code_str,
        'origURL': 'http://www.renren.com/home',
        'domain': 'renren.com',
        'key_id': '1',
        'captcha_type': 'web_login',
        'f': '',
    }
    # 通用验证登录成的方式是看响应状态码
    response = session.post(url=login_url, headers=headers, data=data)
    print(response.status_code)
    resp = session.get(url='http://www.renren.com/974823124/profile',
                       headers=headers).text
    with open('renren.html', 'w', encoding='utf-8') as fp:
        fp.write(resp)
    print('爬取完毕!')
예제 #4
0
# 验证码识别
import requests
import chaojiying

url = "https://so.gushiwen.cn/RandCode.ashx?t=1599137306043"
headers = {
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36'
}
img = requests.get(url, headers=headers).content
with open('a.jpg', 'wb') as f:
    f.write(img)

result = chaojiying.getCodeText('a.jpg', 1902)
print(result)
예제 #5
0
session=requests.Session()
url="http://www.renren.com/"
headers={
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36'
}
main_text=session.get(url,headers=headers).text
soup=BeautifulSoup(main_text,"lxml")
img_src=soup.find("img",id="verifyPic_login")["src"]
# print(img_src)
img=session.get(img_src,headers=headers).content
with open("a.jpg","wb") as f:
    f.write(img)
post_url="http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=202085110108"

codeText=chaojiying.getCodeText("a.jpg",1902)

data={
    'email': '*****@*****.**',
    'icode':codeText,
    'origURL': 'http://www.renren.com/home',
    'domain': 'renren.com',
    'key_id': '1',
    'captcha_type': 'web_login',
    'password': "******",
    'rkey': "3d1f9abdaae1f018a49d38069fe743c8"

}
response=session.post(post_url,data=data,headers=headers)
print(response.status_code)
text=json.loads(response.text)