def visitLoginpage(self): '''Visit the login page''' url = 'https://ipcrs.pbccrc.org.cn/login.do?method=initLogin' self.__headers['Referer'] = 'https://ipcrs.pbccrc.org.cn/index1.do' options = { 'method': 'get', 'url': url, 'form': None, 'params': None, 'cookies': self.__cookies, 'headers': self.__headers } response = basicRequest(options) if response: path_dict = dict( date='//input[@name="date"]/@value', code_url='//img[@id="imgrc"]/@src', token= '//input[@name="org.apache.struts.taglib.html.TOKEN"]/@value') result = xpathText(response.text, path_dict) if result['date'] and result['code_url'] and result['token']: code = self.getCode(self.__host + result['code_url']) form_item = dict(token=result['token'], date=result['date'], code=code) return self.loginSys(form_item) else: return dict(result=40001, error='xpath not found') else: return dict(result=4000, func='visitLoginpage')
def inputIdCode(self): '''Give the id_code to Sys''' form = { 'method': 'checkTradeCode', 'code': 'e5pkaa', 'reportformat': '21' } form['code'] = self.__section['id_code'] url = 'https://ipcrs.pbccrc.org.cn/reportAction.do' self.__headers[ 'Referer'] = 'https://ipcrs.pbccrc.org.cn/reportAction.do?method=queryReport' options = { 'method': 'post', 'url': url, 'form': form, 'params': None, 'cookies': self.__cookies, 'headers': self.__headers } response = basicRequest(options) if response: if (response.text).strip() == str(0): return self.acquireReport() else: return dict(resul=4444, error='id_code_error') else: return dict(result=4000, func='inputIdCode')
def getProxies(): """ http://www.youdaili.net/ :return: """ options = {'method': 'get', 'url': 'http://www.youdaili.net/'} response = basicRequest(options) if not response: return False path = '//div[@class="m_box2"][1]/ul/li[1]/a/@href' href = etree.HTML(response.content).xpath(path) if not href: return False options = {'method': 'get', 'url': href[0]} response = basicRequest(options) path = '//div[@class="cont_font"]/p/span/text()' result = [ i.lstrip('\r\n') for i in etree.HTML(response.content).xpath(path) ] proxies = [re.search('(.*)\@', i).group(1) for i in result[:-1]] return proxies
def loginSys(self, form_item): '''Login the system''' form = { 'org.apache.struts.taglib.html.TOKEN': '1ec8589094a44e23e603c901536bbc59', 'method': 'login', 'date': '1467878083784', 'loginname': 'luocx1988', 'password': '******', '_@IMGRC@_': 'gr3qga' } form['date'] = form_item['date'] form['_@IMGRC@_'] = form_item['code'] form['password'] = self.__section['password'] form['loginname'] = self.__section['user_name'] form['org.apache.struts.taglib.html.TOKEN'] = form_item['token'] url = 'https://ipcrs.pbccrc.org.cn/login.do' self.__headers[ 'Referer'] = 'https://ipcrs.pbccrc.org.cn/page/login/loginreg.jsp' options = { 'method': 'post', 'url': url, 'form': form, 'params': None, 'cookies': self.__cookies, 'headers': self.__headers } response = basicRequest(options) if response: error = self.loginError(response.text) if error['error'] == None: return self.welcomePage() elif error['error'] == 'code': # code error if self.__threshold > 0: self.__threshold -= 1 form_item['code'] = self.updateCode() return self.loginSys(form_item) else: return dict(result=4440, error='image recognition failed') elif error['error'] == 'user_name': # use_name or pw error return dict(result=4400, error='user_name or pw error') else: return dict(result=4000, func='loginByJS')
def getCode(self, url, save_path='./code'): '''Download code image and then invoke the recogImage function ''' options = { 'method': 'get', 'url': url, 'form': None, 'stream': True, 'cookies': self.__cookies, 'headers': self.__headers } response = basicRequest(options) if response: image = saveImage(response) return self.recogImage(image) else: return dict(result=4000, func='getNoteCode')
def visitSys(self): '''Visit the home page''' url = 'https://ipcrs.pbccrc.org.cn/' options = { 'method': 'get', 'url': url, 'form': None, 'params': None, 'cookies': None, 'headers': self.__headers } response = basicRequest(options) if response: self.__cookies = dict_from_cookiejar(response.cookies) return self.visitLoginpage() else: return dict(result=4000, func='visitSys')
def welcomePage(self): '''Visit the welcome page afer login sucessfully''' url = 'https://ipcrs.pbccrc.org.cn/welcome.do' self.__headers['Referer'] = 'https://ipcrs.pbccrc.org.cn/login.do' options = { 'method': 'get', 'url': url, 'form': None, 'params': None, 'cookies': self.__cookies, 'headers': self.__headers } response = basicRequest(options) if response: return self.inputIdCode() else: return dict(result=4000, func='welcomePage')
def logoutSys(self): '''Logout the system''' form = {'method': 'loginOut'} url = 'https://ipcrs.pbccrc.org.cn/login.do?' + str(random.random()) self.__headers['Referer'] = 'https://ipcrs.pbccrc.org.cn/top2.do' options = { 'method': 'post', 'url': url, 'form': form, 'params': None, 'cookies': self.__cookies, 'headers': self.__headers } response = basicRequest(options) if response: return dict(result=2000) else: return dict(result=4000, func='logoutSys')
def updateCode(self, save_path='./code'): '''Verify fail and download new picture''' url = 'https://ipcrs.pbccrc.org.cn/imgrc.do?a=' + str( random.randint(1467967606991, 1767967607647)) options = { 'method': 'get', 'url': url, 'form': None, 'stream': True, 'cookies': self.__cookies, 'headers': self.__headers } response = basicRequest(options) if response: image = saveImage(response) return self.recogImage(image) else: return dict(result=4000, func='updateCode')
def acquireReport(self): '''Claw person credit information''' form = {'tradeCode': 'e5pkaa', 'reportformat': '21'} form['tradeCode'] = self.__section['id_code'] url = 'https://ipcrs.pbccrc.org.cn/simpleReport.do?method=viewReport' self.__headers[ 'Referer'] = 'https://ipcrs.pbccrc.org.cn/reportAction.do?method=queryReport' options = { 'method': 'post', 'url': url, 'form': form, 'params': None, 'cookies': self.__cookies, 'headers': self.__headers } response = basicRequest(options) if response: report_result = clawCreditReport(etree.HTML(response.text)) print report_result self.logoutSys() else: return dict(result=4000, func='acquireReport')