def verification_code_trigger(): form = { 'dt': random.randint(1, 99), 'mobile': self.phone_attr['phone'] } url = 'https://gd.ac.10086.cn/ucs/captcha/dpwd/send.jsps' self.__headers['Referer'] = ( 'https://gd.ac.10086.cn/ucs/login/load' 'ing.jsps?reqType=0\&channel=0&cid=100' '03&area=%2Fcommodity&resource=%2Fcomm' 'odity%2Fservicio%2FservicioForwarding' '%2FqueryData.jsps&loginType=3&optiona' 'l=false&exp=&backURL=http%3A%2F%2Fgd.' '10086.cn%2Fmy%2FREALTIME_LIST_SEARCH.' 'shtml') self.__headers['X-Requested-With'] = ('XMLHttpRequest') options = { 'method': 'post', 'url': url, 'form': form, 'timeout': 30, 'headers': self.__headers } response = Request.basic(options) return response if response else False
def acquireReport(self): url = "https://ipcrs.pbccrc.org.cn/simpleReport.do?method=viewReport" form = {'tradeCode': 'kyp4yi', 'reportformat': 21} form['tradeCode'] = self.auth_info['id_code'] self.headers[ 'Referer'] = 'https://ipcrs.pbccrc.org.cn/reportAction.do?method=queryReport' options = { 'method': 'post', 'url': url, 'form': form, 'params': None, 'cookies': self.cookies, 'headers': self.headers } response = Request.basic(options) if response: file_name = self.auth_info['user_name'] + '.html' #调用保存页面的函数,当页面保存成功时返回字典result值为2000 file_name2 = self.saveHtml(response.text, file_name) if file_name2: return dict(result=2000, file_name=file_name2) else: raise ValueError(u'保存错误') # if self.saveHtml(response.text,file_name)==True: # return dict(result=2000,file_name=file_name) # else: # raise ValueError(u'保存错误') else: return dict(result=4000, error='acquireReport function')
def encryption_key_request(self): form = { 'loginType': '2', 'exp': '', 'cid': '', 'area': '', 'resource': '', 'channel': '0', 'reqType': '1', 'optional': 'on', 'backURL': 'http://gd.10086.cn/service/index.shtml', } url = 'https://gd.ac.10086.cn/ucs/login/signup.jsps' self.__headers['Referer'] = ( 'https://gd.ac.10086.cn/ucs/login/loading.' 'jsps?backURL=http://gd.10086.cn/service/i' 'ndex.shtml') options = { 'method': 'post', 'url': url, 'form': form, 'timeout': 30, 'headers': self.__headers } response = Request.basic(options) return response if response else False
def inputIdcode(self): form = { 'method': 'checkTradeCode', 'code': 'kyp4yi', 'reportformat': '21' } form['code'] = self.auth_info['id_code'] url = 'https://ipcrs.pbccrc.org.cn/reportAction.do' self.headers[ 'Referer'] = 'https://ipcrs.pbccrc.org.cn/reportAction.do?method=queryReport' options = { 'method': 'post', 'url': url, 'form': form, 'params': None, 'cookies': self.cookies, 'headers': self.headers } response = Request.basic(options) #如果请求成功,获取最终的源代码,也就是调用获取源代码的方法 if response: #没有返回内容 if (response.text).strip() == str(0): return self.acquireReport() else: return dict(result=4444, error="auth_code error") else: return dict(result=4000, error='inputIdcode function error')
def get_nexthtml(self, totalnum, num, cname): print num self.headers['Referer'] = 'http://www.shuidixy.com/search?' url = 'http://www.shuidixy.com/search' params = { 'key': cname, 'searchType': 'all', 'provinceCode': '', 'capitalNumber': '', 'establishDuration': '', 'secondLevelIndustryType': '', 'sort': '', 'entry': 0, 'mark': '', 'total': totalnum, 'npage': num } options = { 'method': 'get', 'url': url, 'headers': self.headers, 'cookies': self.cookies, 'params': params, 'timeout': 30 } response = Request.basic(options) if response: bsoup = BeautifulSoup(response.content, 'lxml') self.parseHtml(bsoup)
def get_fourParams(self, chgt): url = 'http://api.geetest.com/get.php' params = { 'gt': chgt['gt'], 'challenge': chgt['challenge'], 'product': 'float', 'offline': 'flase', 'type': 'slide', 'callback': 'geetest_' + str(int(time.time() * 1000)) } self.headers['Host'] = 'api.geetest.com' self.headers['Referer'] = 'http://www.qixin.com/login' options = { 'method': 'get', 'url': url, 'headers': self.headers, 'params': params, 'cookies': self.cookies } response = Request.basic(options) if response: res = json.loads(response.content.split('(')[1][:-1]) challenge = res.get('challenge') if res.has_key( 'challenge') else "unknown" # 不完整图 bg_url = 'http://static.geetest.com/' + res.get( 'bg') if res.has_key('bg') else 'Unknown' # 完整图 fullbg_url = 'http://static.geetest.com/' + res.get( 'fullbg') if res.has_key('fullbg') else 'Unknown' return challenge, chgt['gt'], bg_url, fullbg_url
def gtLoginValidate(self, data): url = 'http://www.qixin.com/service/gtloginvalidate' form = { 'geetest_challenge': data['geetest_challenge'], 'geetest_validate': data['geetest_validate'], 'geetest_seccode': data['geetest_seccode'] } self.headers['Referer'] = 'http://www.qixin.com/login' self.headers['Origin'] = 'http://www.qixin.com' options = { 'method': 'post', 'url': url, 'headers': self.headers, 'cookies': self.cookies, 'form': form } response = Request.basic(options) result = response.content result = json.loads(response.content) r = result.get("status") if r == "success": print "登录参数认证成功........." print form cookies = dict_from_cookiejar(response.cookies) return data['geetest_seccode'], cookies else: return None
def get_firsthtml(self, cname): #定义一个判断是否在查到的列表企业中有需要的企业名字的变量 is_exist = False page_dict = {} page_list = [] self.headers['Referer'] = 'http://www.shuidixy.com/' url = 'http://www.shuidixy.com/search' params = {'key': cname, 'searchType': 'all'} options = { 'method': 'get', 'url': url, 'headers': self.headers, 'cookies': self.cookies, 'params': params, 'timeout': 30 } response = Request.basic(options) if response: bsoup = BeautifulSoup(response.content, 'lxml') #把第一页企业名称与连接解析出来 self.parseHtml(bsoup) #获取企业名称的总页数 totalnum = bsoup.find('span', class_='sd_left_ss_number').text print totalnum if totalnum == u'0': msg = "没有查到该企业的信息" print msg self.writerfile(cname.encode('utf-8'), msg, totalnum) return 0 page_dict['totalnum'] = totalnum #获取分页的具体数字 pagenums = bsoup.find( 'div', class_='sd_left-padge pageCls').find_all('span') for page in pagenums: numstr = page.contents[0].strip() try: num = int(numstr) page_list.append(num) except: pass print page_list for num in range(1, len(page_list)): self.get_nexthtml(totalnum, num, cname) #对获得的企业信息列表进行循环取出需要的元素 for i in range(len(company_lists)): sname_dict = company_lists.pop(-1) if sname_dict['cname'] == cname: self.writerfile(sname_dict['cname'], sname_dict['curl'], page_dict['totalnum']) print "写入文件完成" is_exist = True #循环结束进行判断是否有企业的信息 if is_exist == False: msg = "没有查到该企业的信息" print msg #cname是一个unicode写入操作必须进行编码(因为它是一个中文unicode) self.writerfile(cname.encode('utf-8'), msg, u'0') print "company_lists:", company_lists
def getUserRecord(): """ 获得用户信息 :return: """ params = {'_': getTimestamp(), 'menuid': '000100030001'} url = 'http://iservice.10010.com/e3/static/query/searchPerInfo/' self.headers[ 'Referer'] = 'http://iservice.10010.com/e3/query/personal_xx.html' options = { 'method': 'post', 'url': url, 'params': params, 'cookies': self.cookies, 'headers': self.headers } response = Request.basic(options) if response: item = dict() result = json.loads(response.text)['result'] try: item['user_valid'] = 1 if result[ 'usercirclestatus'] == u'有效期' else 0 except KeyError: item['user_valid'] = 1 for k, v in result['MyDetail'].items(): if k in Table.KEY_CONVERT_USER.keys(): columm_name = Table.KEY_CONVERT_USER[k] item[columm_name] = v del self.phone_attr['password'] self.user_items.append(dict(item, **self.phone_attr)) else: return dict(code=4000, func='clawCallRecords')
def loginQinbao(cookies, code): print cookies url = 'http://www.qixin.com/service/login' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36', 'Host': 'www.qixin.com', 'Referer': 'http://www.qixin.com/login' } u1 = '15112643691' u2 = '18188600650' p1 = 'python8899' p2 = 'liuwg1234' form = { 'userAcct': u2, 'userPassword': p2, 'token': '4164bbd58daa37bd421fdb913aa3c455|jordan' } form['token'] = code options = { 'url': url, 'method': 'post', 'headers': headers, 'form': form, 'cookies': cookies } response = Request.basic(options) loginres = json.loads(response.content) message = loginres['data']["message"] if message == u'登陆成功': #登录成功后服务器没有cookies返回 print message return True else: return False
def getbasic_msgcode(self): url = "http://service.js.10086.cn/my/sms.do" form_data = {"busiNum": "MY_GRZLGL_LOGIN"} self.headers[ "Referer"] = "http://service.js.10086.cn/my/MY_GRZLGL.html" self.headers["Origin"] = "http://service.js.10086.cn" self.headers["Host"] = "service.js.10086.cn" options = { 'method': 'post', 'url': url, 'form': form_data, 'params': None, 'cookies': self.cookies, 'headers': self.headers, 'timeout': 30 } response = Request.basic(options) if response.status_code == 200: result = json.loads(response.content) if result['success'] == True: return return_result(code=2000, data=None, desc=u"获取用户基本信息短信验证码成功!!!") else: return return_result(code=4000, data=None, desc=u"获取用户基本短信验证码请求失败!!!") else: return return_result(code=4000, data=None, desc=u"手机号为{}获取用户基本短信验证码请求失败!!!".format( self.phone_num))
def VisitEnerpriseverify(cookies, cname): url = 'http://www.qixin.com/search' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36', 'Host': 'www.qixin.com', 'Referer': 'http://www.qixin.com/login' } params = { 'key': cname.encode('utf-8'), 'type': 'enterprise', 'source': '', 'isGlobal': 'Y' } url2 = 'http://www.qixin.com/search/?' headers['Referer'] = url2 + urllib.urlencode(params) print headers['Referer'] options = { 'method': 'get', 'url': url, 'headers': headers, 'cookies': cookies, 'params': params, 'timeout': 30 } response = Request.basic(options) print response.status_code if response: with open('testqixinbaoverify.html', 'w') as f: f.write(response.content) return response.content else: print "请求首页没有说返回"
def VisitEnerprisenextpage(cookies, cname, pagenum): url = 'http://www.qixin.com/search' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36', 'Host': 'www.qixin.com', 'Referer': 'http://www.qixin.com/login' } #http://www.qixin.com/search?key=%E8%85%BE%E8%AE%AF&type=enterprise&source=&isGlobal=Y&page=2 #headers["Referer"]='http://www.qixin.com/search?key=%E8%85%BE%E8%AE%AF&type=enterprise&source=&isGlobal=Y' params = { 'key': cname, 'type': 'enterprise', 'source': '', 'isGlobal': 'Y', 'page': pagenum } #headers['Referer'] = 'http://www.qixin.com/' options = { 'method': 'get', 'url': url, 'headers': headers, 'cookies': cookies, 'params': params, 'timeout': 30 } response = Request.basic(options) if response: print "请求下一页成功......." return response.content
def getUniqueTag(): # form = {'month': '201602'} # form['month'] = month form = dict(month=month) url = 'http://gd.10086.cn/commodity/servicio/nostandardserv/realtimeListSearch/query.jsps' self.__headers[ 'Referer'] = 'http://gd.10086.cn/my/REALTIME_LIST_SEARCH.shtml?dt=1469030400000' options = { 'method': 'post', 'url': url, 'form': form, 'cookies': self.cookies, 'headers': self.__headers } response = Request.basic(options) if response: try: unique_tag = json.loads( response.text)['attachment'][0]['value'] return getMonthRecords(unique_tag) except (KeyError, IndexError, Exception) as ex: print 'unique_tag not found, error:', ex # return False return returnResult(4100, [], desc=u'getUniqueTag 解析错误') else: # return False return returnResult(4000, [], desc=u'getUniqueTag 网络错误')
def getCode(self, re_num=2): """获取验证码[全为数字], 并进行识别 :param re_num: 重复次数 :return: 识别结果/False """ result = time.ctime().split() url = 'http://shixin.court.gov.cn/image.jsp?' \ 'date={0}%20{1}%20{2}%20{3}%20{4}%20GMT' \ '+0800%20(%E4%B8%AD%E5%9B%BD%E6%A0%87%E5%87%86%E6%97%B6%E9%97%B4)'\ .format(result[0], result[1], result[2], result[4], result[3]) self.headers['Accept'] = 'image/webp,image/*,*/*;q=0.8' self.headers['Referer'] = 'http://shixin.court.gov.cn/' options = { 'method': 'get', 'url': url, 'cookies': self.cookies, 'headers': self.headers } response = Request.basic(options) if response and len(response.text): self.cookies.update(dict_from_cookiejar(response.cookies)) pw_code = recogImage(response.content) if pw_code: return pw_code else: re_num -= 1 return self.getCode(re_num) if re_num > 0 else False else: re_num -= 1 return self.getCode(re_num) if re_num > 0 else False
def changePage(self, pw_code, page_i): """ 请求第i页并提取当前页的所有id :param pw_code: 识别后的验证码 :param page_i: 第i页 :return: None """ form = { 'pProvince': '0', 'pCode': pw_code, 'currentPage': page_i, 'pName': self.name, 'pCardNum': self.card_num, } url = 'http://shixin.court.gov.cn/findd' self.headers['Referer'] = 'http://shixin.court.gov.cn/findd' options = { 'method': 'post', 'url': url, 'form': form, 'cookies': self.cookies, 'headers': self.headers } response = Request.basic(options) if response: selector = etree.HTML(response.content) sys_ids = self.findIDs(selector) self.id_seq.extend(sys_ids)
def LoingVsy(self): url = "http://service.js.10086.cn/actionDispatcher.do" form_data = { 'userLoginTransferProtocol': 'https', 'redirectUrl': 'my/MY_QDCX.html?t=1490234778114#home', 'reqUrl': 'login', 'busiNum': 'LOGIN', 'operType': '0', 'passwordType': '1', 'isSavePasswordVal': '0', 'isSavePasswordVal_N': '1', 'currentD': '1', 'loginFormTab': '#home', 'loginType': '1', 'phone-login': '******', 'mobile': '18800566440', 'city': 'NJDQ', 'password': '******', 'verifyCode': '', } form_data['mobile'] = self.phone_num form_data['password'] = self.password self.headers['Origin'] = 'http://service.js.10086.cn' self.headers['Host'] = 'service.js.10086.cn' self.headers[ "Referer"] = 'http://service.js.10086.cn/login.html?url=http://service.js.10086.cn/index.html' options = { 'method': 'post', 'url': url, 'form': form_data, 'params': None, 'cookies': self.cookies, 'headers': self.headers, 'timeout': 30 } response = Request.basic(options=options) if response: if response.status_code == 200: if 'login.html' not in response.content: #self.cookies=dict_from_cookiejar(response.cookies) self.cookies = response.cookies return return_result(code=2000, data=self.cookies, desc=u"登录成功!!!") else: return return_result( code=4600, data="None", desc=u"phone:{}password:{}用户名或密码错误".format( self.phone_num, self.password)) else: return return_result(code=4000, data="None", desc=u"{}登录请求网络错误".format(self.phone_num)) else: return_result(code=4000, data="None", desc=u"{}登录请求网络错误".format(self.phone_num))
def visitSys(): url = 'http://shixin.court.gov.cn/' options = {'method': 'get', 'url': url, 'headers': self.headers} response = Request.basic(options) if response: self.cookies.update(dict_from_cookiejar(response.cookies)) # invoke next process return getSessionID() else: return False
def getphonemsgfirst(self, msgnum, dict_param): url = "http://service.js.10086.cn/my/actionDispatcher.do" form_data = { 'reqUrl': 'MY_QDCXQueryNew', 'busiNum': 'QDCX', 'queryMonth': dict_param['mon'], 'queryItem': '1', 'qryPages': '', 'qryNo': '1', 'operType': '3', 'queryBeginTime': dict_param['fday'], 'queryEndTime': dict_param['lday'], 'smsNum': msgnum, 'confirmFlg': '1' } self.headers[ "Referer"] = "http://service.js.10086.cn/my/MY_QDCX.html?t=1490249727911" self.headers["Origin"] = "Origin: http://service.js.10086.cn" self.headers["Host"] = "service.js.10086.cn" options = { 'method': 'post', 'url': url, 'form': form_data, 'params': None, 'cookies': self.cookies, 'headers': self.headers, 'timeout': 30 } response = Request.basic(options) if response: if response.status_code == 200: results_dict = json.loads(response.content) if results_dict['success'] == True: self.Parsecall(results_dict, dict_param['mon']) return return_result(code=2000, data=None, desc=u"通话记录首次请求成功!!!") elif results_dict['success'] == False: return return_result(code=4610, data=None, desc=u"短信验证码错误!!!") else: return return_result(code=4000, data=None, desc=u"{" u"}用户通话记录首次请求失败!!!".format( self.phone_num)) else: return return_result(code=4000, data=None, desc=u"{" u"}用户通话记录首次请求失败!!!".format(self.phone_num))
def VisitmainPhage(self): url = 'http://www.qixin.com/' options = { 'method': 'get', 'url': url, 'headers': self.headers, 'timeout': 30 } response = Request.basic(options) if response: self.cookies = dict_from_cookiejar(response.cookies) return self.VisitLoginPage()
def VisitLoginPage(self): url = 'http://www.qixin.com/login' self.headers['Host'] = 'www.qixin.com' options = { 'method': 'get', 'url': url, 'headers': self.headers, 'cookies': self.cookies } response = Request.basic(options) if response: return self.getChallengegt()
def get_image(self, image_url): """ 获取验证码的图片 :param image_url :return: """ self.headers['Host'] = None options = {'url': image_url, 'method': 'get', 'headers': self.headers} response = Request.basic(options) if response: print response.status_code img = BytesIO(response.content) return img
def clawPageCall(date_tuple, page_no=1, resend=2): #完成单次请求[存在网络繁忙则重传] """完成单次请求""" params = {'_': '1468549625712', 'menuid': '000100030001'} form = { 'pageNo': '1', 'pageSize': '20', 'beginDate': '2016-07-01', 'endDate': '2016-07-18' } form['pageNo'] = page_no form['beginDate'] = date_tuple[0] form['endDate'] = date_tuple[1] params['_'] = getTimestamp() url = 'http://iservice.10010.com/e3/static/query/callDetail' self.headers['Referer'] = 'http://iservice.10010.com/' \ 'e3/query/call_dan.html?menuId=000100030001' options = { 'method': 'post', 'url': url, 'form': form, 'params': params, 'cookies': self.cookies, 'headers': self.headers } response = Request.basic(options) if response: try: page_json = json.loads(response.text) except ValueError: # return False return returnResult(4100, [], desc='clawPageCall1') else: if 'errorMessage' in page_json.keys( ) and resend > 0: # 存在系统繁忙 try: if page_json['errorMessage'][ 'respCode'] == '4114030193': return clawPageCall(date_tuple, page_no, resend - 1) # 繁忙重传 except KeyError: # return False return returnResult(4100, [], desc='clawPageCall2') else: return response.text else: # return False return returnResult(4000, [], desc='clawPageCall')
def getONEAPM(): url = 'http://shixin.court.gov.cn/visit.do' self.headers['Referer'] = 'http://shixin.court.gov.cn/' options = { 'method': 'get', 'url': url, 'cookies': self.cookies, 'headers': self.headers } response = Request.basic(options) if response: self.cookies.update(dict_from_cookiejar(response.cookies)) return self.cookies else: return False
def upatecode(self,result_dict): headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (' 'KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36', 'Host': 'omeo.alipay.com' } url = result_dict['url'] cookies = result_dict['cookies'] options = {'method': 'get', 'url': url,'headers':headers,'cookies': cookies} response = Request.basic(options) if response: print "get code success....." file = BytesIO(response.content) img = Image.open(file) checkcode = getCaptcha(img) return checkcode
def getCode(self, url, save_path='./code'): options = { 'method': 'get', 'url': url, 'form': None, 'stream': True, 'cookies': self.cookies, 'headers': self.headers } response = Request.basic(options) if response: #print "get Code requests success" return self.recogImage(response.content) else: return dict(result=4000, error='getNoteCode function')
def getSessionID(): url = 'http://shixin.court.gov.cn/image.jsp' self.headers['Referer'] = 'http://shixin.court.gov.cn/' options = { 'method': 'get', 'url': url, 'cookies': self.cookies, 'headers': self.headers } response = Request.basic(options) if response: self.cookies.update(dict_from_cookiejar(response.cookies)) #invoke next process return getONEAPM() else: return False
def login_url_request(self, login_url): url = login_url self.__headers['host'] = ('gd.10086.cn') self.__headers['Accept'] = ('text/html,application/xhtml+xml,applicati' 'on/xml;q=0.9,image/webp,*/*;q=0.8') self.__headers['Upgrade-Insecure-Requests'] = '1' options = { 'method': 'get', 'url': url, 'timeout': 30, 'cookies': self.cookies, 'headers': self.__headers, 'allow_redirects': False } response = Request.basic(options) return response if response else False
def session_request(self): form = {} url = 'http://gd.10086.cn/common/include/public/isOnline.jsp' self.__headers['Referer'] = ('http://gd.10086.cn/my/REALTIME_LIST_SEAR' 'CH.shtml') self.__headers['Host'] = ('gd.10086.cn') self.__headers['Origin'] = ('http://gd.10086.cn') options = { 'method': 'post', 'url': url, 'form': form, 'timeout': 30, 'headers': self.__headers } response = Request.basic(options) return response if response else False
def loginSys(self, form_item): #print form_item form = { 'org.apache.struts.taglib.html.TOKEN': '7f0a9fd773858af21f9b68a31286b086', 'method': 'login', 'date': '1479264399675', 'loginname': 'liuyading', 'password': '******', '_@IMGRC@_': 'gyvsep' } form['date'] = form_item['date'] form['_@IMGRC@_'] = form_item['code'] #验证码 form['password'] = self.auth_info['password'] #密码 form['loginname'] = self.auth_info['user_name'] #用户名 form['org.apache.struts.taglib.html.TOKEN'] = form_item['token'] url = 'https://ipcrs.pbccrc.org.cn/login.do' self.headers[ 'Referer'] = 'https://ipcrs.pbccrc.org.cn/page/login/loginreg.jsp' options = { 'method': 'post', 'url': url, 'form': form, 'params': None, 'cookies': self.cookies, 'headers': self.headers } response = Request.basic(options) #如果请求成功回到欢迎页面 if response: error = self.loginError(response.text) if error['error'] == None: return self.welcomePage() #验证码错误从新去获取验证码 elif error['error'] == 'code': if self.threshold > 0: self.threshold -= 1 form_item['code'] = self.updateCode() return self.loginSys(form_item) else: return dict(result=4200, error='image recognition failed') elif error['error'] == 'user_name': return dict(result=4600, error='user_name or pw error') else: return dict(result=4000, error='loginByJS function')