def verify(self): headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36" } with requests.get(url='http://124.172.189.180:5010/get/') as ip: proxies = {"http": ip.text} with requests.session() as session: session.headers = headers session.proxies = proxies with session.get( url= f'http://gat.sc.gov.cn/wsga/query/car/captcha?d={str(int(time.time() * 1000))}' ) as img: # with open('sc.png', "wb+")as f: # f.write(img.content) cjy = Chaojiying_Client() result = cjy.PostPic(img.content, 1902) if result.get("err_no") == -1005: print(result.get("err_str")) return result.get("err_str") else: yzm = result.get("pic_str") # print(yzm) return session, yzm
def get_capture(self, meta): url = 'http://www.ezdrving.com/wxjswy/portalLogin/verify.htm' headers = {"Cookie": meta["jssionid"] + ";"} rs = requests.get(url=url, headers=headers) # with open("yzm.jpeg", "wb+") as f: # f.write(rs.content) cjy = Chaojiying_Client() # im = open('yzm.jpeg', 'rb').read() im = rs.content result = cjy.PostPic(im, 1902) if result.get("err_no") == -1005: print(result.get("err_str")) return result.get("err_str") else: yzm = result.get("pic_str") # yzm = input("请输入验证码:") meta["yzm"] = yzm return self.parse_data(meta)
def chaoji(): chaojiying = Chaojiying_Client('账号', '密码', '软件ID') # 用户中心>>软件ID 生成一个替换 96001 im = open('a.jpg', 'rb').read() # 本地图片文件路径 来替换 a.jpg 有时WIN系统须要// datas = chaojiying.PostPic(im, 1902) # 1902 验证码类型 官方网站>>价格体系 3.4+版 print 后要加() ocr = datas['pic_str'] # print datas print(ocr) return ocr
def login(self): index_url = 'https://jxtwap.cwddd.com/v1/index/index.html' headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36", } cookie = requests.get( url=index_url, headers=headers).history[0].headers.get("Set-Cookie").split(";")[0] # print(cookie) headers["Cookie"] = cookie v_code = requests.get( url='http://cwapp.cwddd.com/v9.0/wap/public/verify', headers=headers) # with open("login.png", "wb+") as f: # f.write(v_code.content) cjy = Chaojiying_Client() result = cjy.PostPic(v_code.content, 1902) if result.get("err_no") == -1005: print(result.get("err_str")) return result.get("err_str") else: yzm = result.get("pic_str") login_url = 'http://cwapp.cwddd.com/v9.0/wap/public/docache' headers["X-Requested-With"] = "XMLHttpRequest" data = { "username": "******", "password": "******", "verify": yzm, } logined = requests.post(url=login_url, headers=headers, data=data) # print(logined.text) if logined.json()["status"]: return cookie else: return self.login()
def verify_code(self): index_url = 'http://www.zjsgat.gov.cn:8087/was/phone/carIllegalQuery.jsp' with requests.Session() as session: session.headers.update({ "User-Agent": f"Mozilla/5.0 (Linux; Android {randint(2,10)}.0; LND-AL30 Build/HONORLND-AL30; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/66.0.3359.126 MQQBrowser/6.2 TBS/044611 Mobile Safari/537.36 MMWEBID/472 MicroMessenger/7.0.4.1420(0x27000439) Process/tools NetType/WIFI Language/zh_CN", }) with session.get(index_url) as index: # print(index.text) pattern = re.compile(r'.*?tblname=(.*?)"') tblname = re.findall(pattern, index.text)[0] # print(tblname) capture_url = 'http://www.zjsgat.gov.cn:8087/was/Kaptcha.jpg?0' with session.get(capture_url) as capture: # with open('zj.png', "wb+") as img: # img.write(capture.content) cjy = Chaojiying_Client() result = cjy.PostPic(capture.content, 1902) if result.get("err_no") == -1005: print(result.get("err_str")) return result.get("err_str") else: yzm = result.get("pic_str") # yzm = input("请输入验证码:") verify_url = 'http://www.zjsgat.gov.cn:8087/was/portals/checkManyYzm.jsp' with session.post(url=verify_url, data={"randValue": yzm}) as rs: # print(rs.text) if rs.json()["result"] == "Y": # rs = {"session": session, "tblname": tblname, "yzm": yzm} return session, tblname, yzm else: return self.verify_code()
def start_requests(self): ''' override该入口方法,以selenium模拟登录后拿到cookie交给scrapy的request使用 ''' # 从文件中读取cookie cookies = None try: if os.path.exists('./cookies/lagou.cookie'): cookies = pickle.load(open('./cookies/lagou.cookie', 'rb')) except: pass if not cookies: from selenium import webdriver from selenium.webdriver.chrome.options import Options options = Options() options.add_argument("--disable-extensions") # options.add_argument('window-size=1280x800') # options.add_experimental_option("debuggerAddress", "127.0.0.1:9222") # options.binary_location = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" browser = webdriver.Chrome(executable_path='./chromedriver', chrome_options=options) from selenium.webdriver.common.action_chains import ActionChains action = ActionChains(browser) browser.get('https://passport.lagou.com/login/login.html') browser.find_element_by_css_selector( 'div[data-view="passwordLogin"] div[data-propertyname="username"] .input.input_white' ).send_keys(LAGOU_USERNAME) browser.find_element_by_css_selector( 'div[data-view="passwordLogin"] div[data-propertyname="password"] .input.input_white' ).send_keys(LAGOU_PASSWORD) browser.find_element_by_css_selector( 'div[data-view="passwordLogin"] .sense_login_password input[type="submit"]' ).click() import time time.sleep(3) is_login = False while not is_login: try: if browser.find_element_by_css_selector('.unick'): is_login = True except: pass try: captcha_element = browser.find_element_by_css_selector( '.geetest_window>.geetest_item:first-child>.geetest_item_wrap>img' ) except: captcha_element = None if captcha_element: if action._actions: action._actions = [] img_url = captcha_element.get_attribute('src') import ssl ssl._create_default_https_context = ssl._create_unverified_context import urllib urllib.request.urlretrieve(img_url, filename='lagou_validate.jpeg') # import requests # html = requests.get(img_url) # fh = open('lagou_validate.jpeg', 'wb') # fh.write(html.content) # fh.close() from tools.chaojiying import Chaojiying_Client from settings import (chaojiying_username, chaojiying_password, chaojiying_app_id) chaojiying = Chaojiying_Client(chaojiying_username, chaojiying_password, chaojiying_app_id) im = open('lagou_validate.jpeg', 'rb').read() pos_obj = chaojiying.PostPic(im, 9004) if pos_obj['err_no'] == 0 or pos_obj['err_str'] == 'OK': pic_str = pos_obj['pic_str'] positions = pic_str.split('|') from ScrapyDemo.utils.common import get_coordinate for item in positions: point = item.split(',') el_index = get_coordinate(point) browser.find_element_by_css_selector( '.geetest_window>.geetest_item:nth-child({})'. format(el_index)).click() browser.find_element_by_css_selector( '.geetest_commit').click() time.sleep(5) cookies = browser.get_cookies() # 写入cookie到文件中 pickle.dump(cookies, open('./cookies/lagou.cookie', 'wb')) cookie_dict = {} for cookie in cookies: cookie_dict[cookie['name']] = cookie['value'] for url in self.start_urls: yield scrapy.Request(url, dont_filter=True, cookies=cookie_dict)
def start_requests(self): # 本地启动chrome chrome_option = Options() chrome_option.add_argument("--disable-extensions") chrome_option.add_experimental_option("debuggerAddress", "127.0.0.1:9222") browser = webdriver.Chrome( executable_path= "C:/Users/Administrator/PycharmProjects/Envs/Scripts/chromedriver.exe", chrome_options=chrome_option) try: browser.maximize_window() except: pass try: browser.get("https://www.zhihu.com/signin") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[1]/div[2]" ).click() browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input").send_keys( "16601052213") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[3]/div/label/input" ).send_keys(Keys.CONTROL + "a") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[3]/div/label/input" ).send_keys("ZHUHAIOO") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() time.sleep(1) login_success = False while not login_success: try: notify_ele = browser.find_element_by_xpath( "//*[@id='root']/div/div[2]/header/div[1]/a/svg") login_success = True except: pass try: english_captcha_element = browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[1]/div/form/div[4]/div/span/div/img' ) except: english_captcha_element = None try: chinese_captcha_element = browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[4]/div/div[2]/img" ) except: chinese_captcha_element = None if chinese_captcha_element: # 图片位置坐标x和y ele_position = chinese_captcha_element.location x_relative = ele_position["x"] y_relative = ele_position["y"] # 浏览器上部的y的坐标 browser_navigation_panel_height = 70 # 图片保存 base64_text = chinese_captcha_element.get_attribute("src") code = base64_text.replace("data:image/jpg;base64,", "").replace("%0A", "") fh = open("yzm_cn.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() z = zheye() positions = z.Recognize('yzm_cn.jpeg') last_position = [] if len(positions) == 2: # 如果第一个元素的x坐标大于第二个元素的x坐标,则第二个元素是第一个倒立文字 if positions[0][1] > positions[1][1]: # 所以列表里放文字的时候返过来放,先放第二个元素的xy,再放第一个元素的xy,后面的是x,前面的是y last_position.append( [positions[1][1], positions[1][0]]) last_position.append( [positions[0][1], positions[0][0]]) else: last_position.append( [positions[0][1], positions[0][0]]) last_position.append( [positions[1][1], positions[1][0]]) # 浏览器中的图片实际要比 first_position = [ int(last_position[0][0]) / 2, int(last_position[0][1] / 2) ] second_position = [ int(last_position[1][0]) / 2, int(last_position[1][1] / 2) ] mouse.move( x_relative + first_position[0], y_relative + browser_navigation_panel_height + first_position[1]) mouse.click() time.sleep(3) mouse.move( x_relative + second_position[0], y_relative + browser_navigation_panel_height + second_position[1]) mouse.click() else: last_position.append( [positions[0][1], positions[0][0]]) first_position = [ int(last_position[0][0]) / 2, int(last_position[0][1] / 2) ] mouse.move( x_relative + first_position[0], y_relative + browser_navigation_panel_height + first_position[1]) mouse.click() browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input" ).send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input" ).send_keys("16601052213") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[3]/div/label/input" ).send_keys(Keys.CONTROL + "a") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[3]/div/label/input" ).send_keys("1qaz@4321") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() if english_captcha_element: # 图片保存 base64_text = english_captcha_element.get_attribute("src") code = base64_text.replace("data:image/jpg;base64,", "").replace("%0A", "") fh = open("yzm_en.jpeg", "wb") fh.write(base64.b64decode(code)) fh.close() chaojiying = Chaojiying_Client('16601052213', 'ZHUHAIOO00', '905609') im = open('yzm_en.jpeg', 'rb').read() json_data = chaojiying.PostPic(im, 1902) code = json_data["pic_str"] while json_data["err_no"] != 0: if code == "": json_data = chaojiying.PostPic(im, 1902) code = json_data["pic_str"] else: break browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[1]/div/form/div[4]/div/div/label/input' ).send_keys(Keys.CONTROL + "a") browser.find_element_by_xpath( '//*[@id="root"]/div/main/div/div/div/div[1]/div/form/div[4]/div/div/label/input' ).send_keys(code) browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input" ).send_keys(Keys.CONTROL + "a") browser.find_element_by_css_selector( ".SignFlow-accountInput.Input-wrapper input" ).send_keys("16601052213") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[3]/div/label/input" ).send_keys(Keys.CONTROL + "a") browser.find_element_by_xpath( "//*[@id='root']/div/main/div/div/div/div[1]/div/form/div[3]/div/label/input" ).send_keys("1qaz@4321") browser.find_element_by_css_selector( ".Button.SignFlow-submitButton").click() except: cookies = browser.get_cookies() pickle.dump( cookies, open( "C:/Users/Administrator/PycharmProjects/Envs/Scripts/ArticleSpider/cookies/zhihu.cookie", "wb")) cookie_dict = {} for cookie in cookies: cookie_dict[cookie["name"]] = cookie["value"] return [ scrapy.Request(url=self.start_urls[0], dont_filter=True, cookies=cookie_dict) ]
async def get_data(self, info): cookie = self.login() headers = { "Content-Type": "application/x-www-form-urlencoded", "User-Agent": "Mozilla/5.0 (Linux; Android 8.0.0; LND-AL30 Build/HONORLND-AL30; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/68.0.3440.91 Mobile Safari/537.36", "Cookie": cookie, "X-Requested-With": "com.cwddd.jxtmobile" } verify_code = requests.get( url='https://jxtwap.cwddd.com/v1/Public/verify', headers=headers) # with open("cw.png", "wb+") as f: # f.write(verify_code.content) cjy = Chaojiying_Client() result = cjy.PostPic(verify_code.content, 1902) if result.get("err_no") == -1005: print(result.get("err_str")) return result.get("err_str") else: yzm = result.get("pic_str") # yzm = input("请输入验证码:") data = { "hpzl": info["hpzl"], "hphm": info["hphm"], "code": info["cjh"], "verify": yzm, } rs = requests.post(url='https://jxtwap.cwddd.com/v1/index/search.html', headers=headers, data=data) try: if not rs.json()["status"]: return await self.get_data(info) except Exception as e: html = etree.HTML(rs.text) divs = html.xpath('//div[@id="tab1"]/div')[:-1] # print(len(divs)) rs = {"code": 200, "msg": "成功!", "result": []} for x in divs: single = {} # print(x.xpath('div/text()')) fkkf = x.xpath('div/text()')[0].strip() # print(fkkf) single["fkje"] = int( fkkf.split(" | ")[0].replace("罚款", "").replace("元", "")) single["wfjfs"] = int( fkkf.split(" | ")[1].replace("记", "").replace("分", "")) detail = x.xpath('div/p/text()') # print(detail) single["wfsj"] = detail[1] single["wfdz"] = detail[3] single["wfxwzt"] = detail[-1] rs["result"].append(single) return rs