def crawl_phone_bill(self, **kwargs): phone_bill = list() # today = datetime.datetime.now().strftime('%Y%m') miss_list = [] for searchMonth in self.__monthly_period(5, '%Y%m'): crawl_phone_bill_data = { 'queryMonth': searchMonth, 's': random.random() } headers = {"Accept": "*/*", "X-Requested-With": "XMLHttpRequest"} URL_PHONE_BILL = 'http://service.jx.10086.cn/service/queryWebPageInfo.action' for i in range(self.max_retry): code, key, resp = self.get(URL_PHONE_BILL, params=crawl_phone_bill_data, headers=headers) message = "网络错误" if code != 0: continue key, level, message, result = response_data.phone_bill_data( resp.text, searchMonth) # print result if level != 0: continue if result: phone_bill.append(result) break else: miss_list.append(searchMonth) self.log("request_error", message, resp) return 0, 'success', phone_bill, miss_list
def crawl_phone_bill(self, **kwargs): miss_list = [] phone_bill = list() message_list = [] crawl_phone_bill_good_ename = 'WDZD' for searchMonth in self.__monthly_period(6, '%Y%m'): crawl_phone_bill_data = { 'service': 'ajaxDirect/1/myMobile/myMobile/javascript/', 'pagename': 'myMobile', 'eventname': 'getUserBill2', 'cond_QUERY_DATE': searchMonth, 'cond_GOODS_ID': get_good_id_param(crawl_phone_bill_good_ename) } # print get_good_id_param(crawl_phone_bill_good_ename) URL_PHONE_BILL = 'http://service.cq.10086.cn/ics' headers = { "X-Requested-With": "XMLHttpRequest", "Content-Type": "application/x-www-form-urlencoded; charset=utf-8", "Referer": "http://service.cq.10086.cn/myMobile/queryBill.html" } for i in range(self.max_retry): code, key, resp = self.post(URL_PHONE_BILL, data=crawl_phone_bill_data, headers=headers) if code != 0: message = "network_request_error" continue level, key, message, result = response_data.phone_bill_data(resp.text, searchMonth) # print result if level != 0: continue if result: phone_bill.append(result) break else: if message != "network_request_error": self.log("crawler", "{}: {}".format(key, message), resp) message_list.append(key) miss_list.append(searchMonth) now_month = datetime.datetime.now().strftime("%Y%m") now_month in miss_list and miss_list.remove(now_month) if len(miss_list) == 5: temp_list = map(lambda x: x.count('request_error') or x.count('website_busy_error') or x.count('outdated_sid') or x.count('success') or 0, message_list) if temp_list.count(0) == 0: return 9, 'website_busy_error', phone_bill, miss_list else: return 9, "crawl_error", phone_bill, miss_list return 0, 'success', phone_bill, miss_list
def crawl_phone_bill(self, **kwargs): missing_list = [] crawler_num = 0 phone_bill = list() for searchMonth in self.__monthly_period(6, '%Y%m'): crawl_phone_bill_data = { "ACC_NBR": kwargs['tel'], "DATE": searchMonth, "AreaCode": self.areaCode, "usertype": "1", } URL_PHONE_BILL = 'http://ha.189.cn/service/iframe/bill/iframe_zd.jsp' for retry in xrange(self.max_retry): code, key, resp = self.post(URL_PHONE_BILL, data=crawl_phone_bill_data) if code != 0: pass elif u'数据库查询失败,请稍候再试。给您带来不便,敬请谅解' in resp.text or u'请登录后再访问该功能' in resp.text: self.log('website', 'website_busy_error', resp) else: break else: missing_list.append(searchMonth) continue key, level, message, result = response_data.phone_bill_data( self, resp, searchMonth) if level != 0: missing_list.append(searchMonth) if key != "no_data": crawler_num += 1 self.log("crawler", "{}{}".format(key, message), resp) continue phone_bill.append(result) if crawler_num > 0: return 9, 'crawl_error', phone_bill, missing_list if len(missing_list) == 6: return 9, 'website_busy_error', phone_bill, missing_list today = date.today() today_month = "%d%02d" % (today.year, today.month) if today_month in missing_list: missing_list.remove(today_month) return 0, 'success', phone_bill, missing_list
def crawl_phone_bill(self, **kwargs): # 缺失月份 missing_month_list = [] params = request_params.phone_bill_params() headers = request_headers.phone_bill_headers() crawl_error_num = 0 phone_bill = list() for searchMonth in self.__monthly_period(6, '%Y%m'): params['MONTH'] = searchMonth for item in xrange(self.max_retry): code_a, key, resp = self.get(URL_PHONE_BILL, params=params, headers=headers) if code_a != 0: continue code, key, message, result = response_data.phone_bill_data( resp.text, searchMonth) if code != 0: continue if result: phone_bill.append(result) break else: message = u'没有账单记录' continue else: if code_a == 0: self.log('crawler', message, resp) crawl_error_num += 1 missing_month_list.append(searchMonth) now_month = datetime.datetime.now().strftime("%Y%m") now_month in missing_month_list and missing_month_list.remove( now_month) if len(missing_month_list) == 5: if crawl_error_num > 0: return 9, 'crawl_error', [], missing_month_list return 9, 'website_busy_error', [], missing_month_list return 0, 'success', phone_bill, missing_month_list
def crawl_phone_bill(self, **kwargs): phone_bill = list() miss_list = [] message_list = [] # 官网无当前月账单, 返回 该月未出账单 # miss_list.append(datetime.datetime.now().strftime("%Y%m")) for searchMonth in self.__monthly_period(5, '%Y%m'): crawl_phone_bill_data = { 'flag': '999', 'billMonth': searchMonth, '_menuId': '1050344' } URL_PHONE_BILL = 'http://www.ln.10086.cn/busicenter/fee/monthbill/MonthBillMenuAction/initBusi.menu' for i in range(self.max_retry): code, key, resp = self.get(URL_PHONE_BILL, params=crawl_phone_bill_data) if code != 0: message = 'network_request_error' continue level, key, message, result = response_data.phone_bill_data(resp.text, searchMonth) if level != 0: continue if result: phone_bill.append(result) break elif i == self.max_retry - 1: message = "no_data" continue else: if message != "network_request_error": self.log("crawler", key + message, resp) message_list.append(key) miss_list.append(searchMonth) if len(miss_list) == 5: temp_list = map(lambda x: x.count('request_error') or x.count('website_busy_error') or x.count('success') or 0, message_list) if temp_list.count(0) == 0: return 9, 'website_busy_error', [], miss_list return 0, 'success', phone_bill, miss_list