예제 #1
0
 def crawl_phone_bill(self, **kwargs):
     phone_bill = list()
     # today = datetime.datetime.now().strftime('%Y%m')
     miss_list = []
     for searchMonth in self.__monthly_period(5, '%Y%m'):
         crawl_phone_bill_data = {
             'queryMonth': searchMonth,
             's': random.random()
         }
         headers = {"Accept": "*/*", "X-Requested-With": "XMLHttpRequest"}
         URL_PHONE_BILL = 'http://service.jx.10086.cn/service/queryWebPageInfo.action'
         for i in range(self.max_retry):
             code, key, resp = self.get(URL_PHONE_BILL,
                                        params=crawl_phone_bill_data,
                                        headers=headers)
             message = "网络错误"
             if code != 0:
                 continue
             key, level, message, result = response_data.phone_bill_data(
                 resp.text, searchMonth)
             # print result
             if level != 0:
                 continue
             if result:
                 phone_bill.append(result)
                 break
         else:
             miss_list.append(searchMonth)
             self.log("request_error", message, resp)
     return 0, 'success', phone_bill, miss_list
예제 #2
0
 def crawl_phone_bill(self, **kwargs):
     miss_list = []
     phone_bill = list()
     message_list = []
     crawl_phone_bill_good_ename = 'WDZD'
     for searchMonth in self.__monthly_period(6, '%Y%m'):
         crawl_phone_bill_data = {
             'service': 'ajaxDirect/1/myMobile/myMobile/javascript/',
             'pagename': 'myMobile',
             'eventname': 'getUserBill2',
             'cond_QUERY_DATE': searchMonth,
             'cond_GOODS_ID': get_good_id_param(crawl_phone_bill_good_ename)
         }
         # print get_good_id_param(crawl_phone_bill_good_ename)
         URL_PHONE_BILL = 'http://service.cq.10086.cn/ics'
         headers = {
             "X-Requested-With": "XMLHttpRequest",
             "Content-Type": "application/x-www-form-urlencoded; charset=utf-8",
             "Referer": "http://service.cq.10086.cn/myMobile/queryBill.html"
         }
         for i in range(self.max_retry):
             code, key, resp = self.post(URL_PHONE_BILL, data=crawl_phone_bill_data, headers=headers)
             if code != 0:
                 message = "network_request_error"
                 continue
             level, key, message, result = response_data.phone_bill_data(resp.text, searchMonth)
             # print result
             if level != 0:
                 continue
             if result:
                 phone_bill.append(result)
                 break
         else:
             if message != "network_request_error":
                 self.log("crawler", "{}: {}".format(key, message), resp)
             message_list.append(key)
             miss_list.append(searchMonth)
     now_month = datetime.datetime.now().strftime("%Y%m")
     now_month in miss_list and miss_list.remove(now_month)
     if len(miss_list) == 5:
         temp_list = map(lambda x: x.count('request_error') or x.count('website_busy_error') or x.count('outdated_sid') or x.count('success') or 0, message_list)
         if temp_list.count(0) == 0:
             return 9, 'website_busy_error', phone_bill, miss_list
         else:
             return 9, "crawl_error", phone_bill, miss_list
     return 0, 'success', phone_bill, miss_list
예제 #3
0
 def crawl_phone_bill(self, **kwargs):
     missing_list = []
     crawler_num = 0
     phone_bill = list()
     for searchMonth in self.__monthly_period(6, '%Y%m'):
         crawl_phone_bill_data = {
             "ACC_NBR": kwargs['tel'],
             "DATE": searchMonth,
             "AreaCode": self.areaCode,
             "usertype": "1",
         }
         URL_PHONE_BILL = 'http://ha.189.cn/service/iframe/bill/iframe_zd.jsp'
         for retry in xrange(self.max_retry):
             code, key, resp = self.post(URL_PHONE_BILL,
                                         data=crawl_phone_bill_data)
             if code != 0:
                 pass
             elif u'数据库查询失败,请稍候再试。给您带来不便,敬请谅解' in resp.text or u'请登录后再访问该功能' in resp.text:
                 self.log('website', 'website_busy_error', resp)
             else:
                 break
         else:
             missing_list.append(searchMonth)
             continue
         key, level, message, result = response_data.phone_bill_data(
             self, resp, searchMonth)
         if level != 0:
             missing_list.append(searchMonth)
             if key != "no_data":
                 crawler_num += 1
             self.log("crawler", "{}{}".format(key, message), resp)
             continue
         phone_bill.append(result)
     if crawler_num > 0:
         return 9, 'crawl_error', phone_bill, missing_list
     if len(missing_list) == 6:
         return 9, 'website_busy_error', phone_bill, missing_list
     today = date.today()
     today_month = "%d%02d" % (today.year, today.month)
     if today_month in missing_list:
         missing_list.remove(today_month)
     return 0, 'success', phone_bill, missing_list
예제 #4
0
    def crawl_phone_bill(self, **kwargs):
        # 缺失月份
        missing_month_list = []
        params = request_params.phone_bill_params()
        headers = request_headers.phone_bill_headers()

        crawl_error_num = 0
        phone_bill = list()
        for searchMonth in self.__monthly_period(6, '%Y%m'):
            params['MONTH'] = searchMonth
            for item in xrange(self.max_retry):
                code_a, key, resp = self.get(URL_PHONE_BILL,
                                             params=params,
                                             headers=headers)
                if code_a != 0:
                    continue
                code, key, message, result = response_data.phone_bill_data(
                    resp.text, searchMonth)
                if code != 0:
                    continue
                if result:
                    phone_bill.append(result)
                    break
                else:
                    message = u'没有账单记录'
                    continue
            else:
                if code_a == 0:
                    self.log('crawler', message, resp)
                    crawl_error_num += 1
                missing_month_list.append(searchMonth)
        now_month = datetime.datetime.now().strftime("%Y%m")
        now_month in missing_month_list and missing_month_list.remove(
            now_month)
        if len(missing_month_list) == 5:
            if crawl_error_num > 0:
                return 9, 'crawl_error', [], missing_month_list
            return 9, 'website_busy_error', [], missing_month_list
        return 0, 'success', phone_bill, missing_month_list
예제 #5
0
 def crawl_phone_bill(self, **kwargs):
     phone_bill = list()
     miss_list = []
     message_list = []
     # 官网无当前月账单, 返回 该月未出账单
     # miss_list.append(datetime.datetime.now().strftime("%Y%m"))
     for searchMonth in self.__monthly_period(5, '%Y%m'):
         crawl_phone_bill_data = {
             'flag': '999',
             'billMonth': searchMonth,
             '_menuId': '1050344'
         }
         URL_PHONE_BILL = 'http://www.ln.10086.cn/busicenter/fee/monthbill/MonthBillMenuAction/initBusi.menu'
         for i in range(self.max_retry):
             code, key, resp = self.get(URL_PHONE_BILL, params=crawl_phone_bill_data)
             if code != 0:
                 message = 'network_request_error'
                 continue
             level, key, message, result = response_data.phone_bill_data(resp.text, searchMonth)
             if level != 0:
                 continue
             if result:
                 phone_bill.append(result)
                 break
             elif i == self.max_retry - 1:
                 message = "no_data"
                 continue
         else:
             if message != "network_request_error":
                 self.log("crawler", key + message, resp)
             message_list.append(key)
             miss_list.append(searchMonth)
     if len(miss_list) == 5:
         temp_list = map(lambda x: x.count('request_error') or x.count('website_busy_error') or x.count('success') or 0, message_list)
         if temp_list.count(0) == 0:
             return 9, 'website_busy_error', [], miss_list
     return 0, 'success', phone_bill, miss_list