def acquireReport(self): '''Claw person credit information''' form = {'tradeCode': 'e5pkaa', 'reportformat': '21'} form['tradeCode'] = self.section['id_code'] url = 'https://ipcrs.pbccrc.org.cn/simpleReport.do?method=viewReport' self.headers[ 'Referer'] = 'https://ipcrs.pbccrc.org.cn/reportAction.do?method=queryReport' options = { 'method': 'post', 'url': url, 'form': form, 'params': None, 'cookies': self.cookies, 'headers': self.headers } response = basicRequest(options) if response: file_name = self.section['user_name'] + '.html' if self.saveHtml(response.text, file_name) == True: return dict(result=2000, file_name=file_name) else: raise ValueError(u'保存错误') # report_result = clawCreditReport(etree.HTML(response.text)) # print report_result else: return dict(result=4000, error='acquireReport function')
def getUserRecord(): """ 获得用户信息 :return: """ params = {'_': getTimestamp(), 'menuid': '000100030001'} url = 'http://iservice.10010.com/e3/static/query/searchPerInfo/' self.headers[ 'Referer'] = 'http://iservice.10010.com/e3/query/personal_xx.html' options = { 'method': 'post', 'url': url, 'params': params, 'cookies': self.cookies, 'headers': self.headers } response = basicRequest(options) if response: item = dict() result = json.loads(response.text)['result'] try: item['user_valid'] = 1 if result[ 'usercirclestatus'] == u'有效期' else 0 except KeyError: item['user_valid'] = 1 for k, v in result['MyDetail'].items(): if k in KEY_CONVERT_USER.keys(): columm_name = KEY_CONVERT_USER[k] item[columm_name] = v del self.phone_attr['password'] self.user_items.append(dict(item, **self.phone_attr)) else: return dict(code=4000, func='clawCallRecords')
def getPhoneAttr(phone_num): """ 调用百度api获得手机的归属地 :param phone_num: 手机号 :return:统一接口返回 example: >>searchPhoneInfo('15802028888') 正常返回key data对应的元素例子 {'phone':'13267175437', 'province':'广东', 'city':'深圳', 'company':1} company值:中国联通1; 中国移动2; 中国电信3, 其他4 """ phone_status = 6855 if str(phone_num)[0] == '0' else 6004 url = 'https://sp0.baidu.com/8aQDcjqpAAV3otqbppnN2DJv/api.php' params = {'query': phone_num, 'resource_id': phone_status} options = {'method': 'get', 'url': url, 'params': params} response = basicRequest(options) if response: try: company_type = 4 item = json.loads(response.text)['data'][0] if item['type'] in _company_convert.keys(): company_type = _company_convert[item['type']] data = { 'phone': phone_num, 'province': item['prov'], 'city': item['city'], 'company': company_type } return returnResult(code=2000, data=data, desc=u'查询成功') except (KeyError, IndexError): return returnResult(code=4500, data={}) except (ValueError, Exception): return returnResult(code=4100, data={}) else: return returnResult(code=4000, data={})
def getUniqueTag(self, month): """获得月通话数据-请求1""" form = {'month': '201602'} form['month'] = month url = 'http://gd.10086.cn/commodity/servicio/' \ 'nostandardserv/realtimeListSearch/query.jsps' self.__headers['Referer'] = 'http://gd.10086.cn/my/' \ 'REALTIME_LIST_SEARCH.shtml?dt=1469030400000' options = { 'method': 'post', 'url': url, 'form': form, 'cookies': self.cookies, 'headers': self.__headers } response = basicRequest(options) if response: try: unique_tag = json.loads( response.text)['attachment'][0]['value'] return unique_tag except (KeyError, IndexError, ValueError, Exception) as ex: print 'unique_tag not found, error:', ex return False else: return False
def inputIdCode(self): '''Give the id_code to Sys''' form = { 'method': 'checkTradeCode', 'code': 'e5pkaa', 'reportformat': '21' } form['code'] = self.section['id_code'] url = 'https://ipcrs.pbccrc.org.cn/reportAction.do' self.headers[ 'Referer'] = 'https://ipcrs.pbccrc.org.cn/reportAction.do?method=queryReport' options = { 'method': 'post', 'url': url, 'form': form, 'params': None, 'cookies': self.cookies, 'headers': self.headers } response = basicRequest(options) if response: if (response.text).strip() == str(0): return self.acquireReport() else: return dict(result=4444, error='auth_code error') else: return dict(result=4000, error='inputIdCode function')
def visitLoginpage(self): '''Visit the login page''' url = 'https://ipcrs.pbccrc.org.cn/login.do?method=initLogin' self.headers['Referer'] = 'https://ipcrs.pbccrc.org.cn/index1.do' options = { 'method': 'get', 'url': url, 'form': None, 'params': None, 'cookies': self.cookies, 'headers': self.headers } response = basicRequest(options) if response: path_dict = dict( date='//input[@name="date"]/@value', code_url='//img[@id="imgrc"]/@src', token= '//input[@name="org.apache.struts.taglib.html.TOKEN"]/@value') result = xpathText(response.text, path_dict) if result['date'] and result['code_url'] and result['token']: code = self.getCode(self.host + result['code_url']) form_item = dict(token=result['token'], date=result['date'], code=code) return self.loginSys(form_item) else: return dict(result=4100, error='xpath not found') else: return dict(result=4000, error='visitLoginpage function')
def requestURL(self, href): """ 请求URL :param href: URL :param name: 市名/区名 :return: None """ # print u'url is {url} , name is {name}'.format(url=url, name=name) href = _host + href if _host not in href else href options = { 'method': 'get', 'url': href, 'headers': self.headers, 'timeout': _time_out } response = basicRequest(options, resend_times=4) return response
def getCode(self, url, save_path='./code'): '''Download code image and then invoke the recogImage function ''' options = { 'method': 'get', 'url': url, 'form': None, 'stream': True, 'cookies': self.cookies, 'headers': self.headers } response = basicRequest(options) if response: return self.recogImage(response.content) else: return dict(result=4000, error='getNoteCode function')
def getMonthText(self, unique_tag): """获得月通话数据-请求2""" form = dict(uniqueTag=unique_tag, monthListType='0') url = 'http://gd.10086.cn/commodity/servicio/' \ 'nostandardserv/realtimeListSearch/ajaxRealQuery.jsps' # pay attention to "timeout" options = { 'method': 'post', 'url': url, 'form': form, 'cookies': self.cookies, 'timeout': 20, 'headers': self.__headers } response = basicRequest(options) return response.text if response else False
def queryUserInfo(self): """查询用户信息-请求1""" form = {'servCode': 'MY_BASICINFO'} url = 'http://gd.10086.cn/commodity/servicio/' \ 'track/servicioDcstrack/query.jsps' self.__headers['Referer'] = 'http://gd.10086.cn/' \ 'my/myService/myBasicInfo.shtml' options = { 'method': 'post', 'url': url, 'form': form, 'cookies': self.cookies, 'headers': self.__headers } response = basicRequest(options) return True if response else False
def clawPageCall(date_tuple, page_no=1, resend=2): #完成单次请求[存在网络繁忙则重传] """完成单次请求""" params = {'_': '1468549625712', 'menuid': '000100030001'} form = { 'pageNo': '1', 'pageSize': '20', 'beginDate': '2016-07-01', 'endDate': '2016-07-18' } form['pageNo'] = page_no form['beginDate'] = date_tuple[0] form['endDate'] = date_tuple[1] params['_'] = getTimestamp() url = 'http://iservice.10010.com/e3/static/query/callDetail' self.headers['Referer'] = 'http://iservice.10010.com/' \ 'e3/query/call_dan.html?menuId=000100030001' options = { 'method': 'post', 'url': url, 'form': form, 'params': params, 'cookies': self.cookies, 'headers': self.headers } response = basicRequest(options) if response: try: page_json = json.loads(response.text) except ValueError: return False else: if 'errorMessage' in page_json.keys( ) and resend > 0: # 存在系统繁忙 try: if page_json['errorMessage'][ 'respCode'] == '4114030193': return clawPageCall(date_tuple, page_no, resend - 1) # 繁忙重传 except KeyError: return False else: return response.text else: return False
def getUserInfo(self): """查询用户信息-请求2""" form = {'servCode': 'MY_BASICINFO', 'operaType': 'QUERY'} url = 'http://gd.10086.cn/commodity/servicio/' \ 'servicioForwarding/queryData.jsps' self.__headers['Referer'] = 'http://gd.10086.cn/' \ 'my/myService/myBasicInfo.shtml' options = { 'method': 'post', 'url': url, 'form': form, 'cookies': self.cookies, 'timeout': 30, 'headers': self.__headers } response = basicRequest(options) return response.text if response else False
def coroutineClawPageCall(date_tuple, page_no=1, resend=2): #完成单次请求[存在网络繁忙则重传] """完成单次请求""" params = {'_': getTimestamp(), 'menuid': '000100030001'} form = { 'pageNo': page_no, 'pageSize': '20', 'beginDate': date_tuple[0], 'endDate': date_tuple[1] } url = 'http://iservice.10010.com/e3/static/query/callDetail' self.headers[ 'Referer'] = 'http://iservice.10010.com/e3/query/call_dan.html?menuId=000100030001' options = { 'method': 'post', 'url': url, 'form': form, 'params': params, 'cookies': self.cookies, 'headers': self.headers } response = basicRequest(options) if response: try: page_json = json.loads(response.text) except ValueError: pass else: if 'errorMessage' in page_json.keys( ) and resend > 0: # 存在系统繁忙 try: if page_json['errorMessage'][ 'respCode'] == '4114030193': return clawPageCall(date_tuple, page_no, resend - 1) # 繁忙重传 except KeyError: pass else: text_seq.append(response.text) print '注意:协助完成处理{0}日到{1}日记录中的第{2}页'.format( date_tuple[0], date_tuple[1], page_no) else: pass
def loginSys(self, form_item): '''Login the system''' form = { 'org.apache.struts.taglib.html.TOKEN': '1ec8589094a44e23e603c901536bbc59', 'method': 'login', 'date': '1467878083784', 'loginname': 'luocx1988', 'password': '******', '_@IMGRC@_': 'gr3qga' } form['date'] = form_item['date'] form['_@IMGRC@_'] = form_item['code'] form['password'] = self.section['password'] form['loginname'] = self.section['user_name'] form['org.apache.struts.taglib.html.TOKEN'] = form_item['token'] url = 'https://ipcrs.pbccrc.org.cn/login.do' self.headers[ 'Referer'] = 'https://ipcrs.pbccrc.org.cn/page/login/loginreg.jsp' options = { 'method': 'post', 'url': url, 'form': form, 'params': None, 'cookies': self.cookies, 'headers': self.headers } response = basicRequest(options) if response: error = self.loginError(response.text) if error['error'] == None: return self.welcomePage() elif error['error'] == 'code': # code error if self.threshold > 0: self.threshold -= 1 form_item['code'] = self.updateCode() return self.loginSys(form_item) else: return dict(result=4200, error='image recognition failed') elif error['error'] == 'user_name': # use_name or pw error return dict(result=4600, error='user_name or pw error') else: return dict(result=4000, error='loginByJS function')
def queryInfo(): form = {'servCode': 'MY_BASICINFO'} url = 'http://gd.10086.cn/commodity/servicio/track/servicioDcstrack/query.jsps' self.__headers[ 'Referer'] = 'http://gd.10086.cn/my/myService/myBasicInfo.shtml' options = { 'method': 'post', 'url': url, 'form': form, 'cookies': self.cookies, 'headers': self.__headers } response = basicRequest(options) if response: return getInfo() else: return False
def updateCode(self, save_path='./code'): '''Verify fail and download new picture''' url = 'https://ipcrs.pbccrc.org.cn/imgrc.do?a=' + str( random.randint(1467967606991, 1767967607647)) options = { 'method': 'get', 'url': url, 'form': None, 'stream': True, 'cookies': self.cookies, 'headers': self.headers } response = basicRequest(options) if response: return self.recogImage(response.content) else: return dict(result=4000, error='updateCode function')
def visitSys(self): '''Visit the home page''' url = 'https://ipcrs.pbccrc.org.cn/' options = { 'method': 'get', 'url': url, 'form': None, 'params': None, 'cookies': None, 'headers': self.headers } response = basicRequest(options) if response: self.cookies = dict_from_cookiejar(response.cookies) return self.visitLoginpage() else: return dict(result=4000, error='visitSys funciton')
def welcomePage(self): '''Visit the welcome page afer login sucessfully''' url = 'https://ipcrs.pbccrc.org.cn/welcome.do' self.headers['Referer'] = 'https://ipcrs.pbccrc.org.cn/login.do' options = { 'method': 'get', 'url': url, 'form': None, 'params': None, 'cookies': self.cookies, 'headers': self.headers } response = basicRequest(options) if response: return self.inputIdCode() else: return dict(result=4000, error='welcomePage function')
def logoutSys(self): '''Logout the system''' form = {'method': 'loginOut'} url = 'https://ipcrs.pbccrc.org.cn/login.do?' + str(random.random()) self.headers['Referer'] = 'https://ipcrs.pbccrc.org.cn/top2.do' options = { 'method': 'post', 'url': url, 'form': form, 'params': None, 'cookies': self.cookies, 'headers': self.headers } response = basicRequest(options) if response: return dict(result=2000) else: return dict(result=4000, error='logoutSys')
def getProvince(self): """ 获得所有省的对应的url :return:[[href, province_detail_name],[]...] """ url = 'http://www.cc10000.cn/0/' options = { 'method': 'get', 'url': url, 'headers': self.headers, 'timeout': _time_out } response = basicRequest(options, resend_times=4) selector = etree.HTML(response.text) content = etree.tounicode(selector.xpath('//body/div[6]')[0]) href_and_name = re.findall('href="(/\d.*?)">(.*?)<', content) # 仅提取省,并将用详细省名代替简写省名 seq = [[ i[0], config.ROOT_DETAIL_NAMES[config.ROOT_SHORT_NAMES.index(i[1])] ] for i in href_and_name if i[1] in config.ROOT_SHORT_NAMES] self.hrefs.extend([index[0] for index in seq]) return seq
def sysCheckLoginAgain(): """ 检查是否登录 :return: getHeaderView() """ url = 'http://iservice.10010.com/e3/static/check/checklogin/?_=' + getTimestamp( ) self.headers[ 'Referer'] = 'http://iservice.10010.com/e3/query/call_dan.html?menuId=000100030001' options = { 'method': 'post', 'url': url, 'cookies': self.cookies, 'headers': self.headers } response = basicRequest(options) if response: return getHeaderView() else: return dict(code=4000, func='sysCheckLoginAgain')
def sysCheckLogin(): """ 登录检查,更新cookies :return: loginByJS()/dict """ url = 'http://iservice.10010.com/e3/static/check/checklogin/?_=' + getTimestamp( ) self.headers[ 'Referer'] = 'http://iservice.10010.com/e3/query/call_dan.html?menuId=000100030001' options = { 'method': 'post', 'url': url, 'cookies': None, 'headers': self.headers } response = basicRequest(options) if response: self.cookies.update(dict_from_cookiejar(response.cookies)) return loginByJS() else: return dict(code=4000, func='sysCheckLogin')
def loginByJS(): """ 通过get登录,更新cookies :return: judgeLogin(response)/dict() """ params = { '_': '1468474921490', # req_time + 1 'callback': 'jQuery172000024585669494775475_1468770450339', 'password': '******', 'productType': '01', 'pwdType': '01', 'redirectType': '03', 'redirectURL': 'http://www.10010.com', 'rememberMe': '1', 'req_time': '1468474921489', 'userName': '******' } params['req_time'] = getTimestamp() params['_'] = str(int(params['req_time']) + 1) params['userName'] = self.phone_attr['phone'] params['password'] = self.phone_attr['password'] url = 'https://uac.10010.com/portal/Service/MallLogin' self.headers['Referer'] = 'http://uac.10010.com/portal/hallLogin' options = { 'method': 'get', 'url': url, 'params': params, 'cookies': None, 'headers': self.headers } response = basicRequest(options) if response: return judgeLogin(response) else: return dict(code=4000, func='loginByJS')
def getHeaderView(): """ 获得账户balance(余额) :return: saveUserInfos(part_info)/dict() """ url = 'http://iservice.10010.com/e3/static/query/headerView' self.headers[ 'Referer'] = 'http://iservice.10010.com/e3/index_server.html' options = { 'method': 'post', 'url': url, 'cookies': self.cookies, 'headers': self.headers } response = basicRequest(options) if response: try: self.phone_attr['balance'] = json.loads( response.text)['result']['account'] except KeyError: self.phone_attr['balance'] = '' return getUserRecord() else: return dict(code=4000, func='getHeaderView')