Exemple #1
0
    def parse_queryHistoryBill(self, response):
        """
        解析账单
        """
        text = response.text
        meta = response.meta
        item = meta["item"]
        try:
            this_month = meta["last_bill_month"]
            this_month_str = this_month.strftime("%Y%m")
            if '"result":' in text:
                datas = json_loads(text)["result"]
                fee = datas.get("allfee")
                if fee is not None:
                    fee = float(fee)
                item["history_bill"][this_month_str] = {"all_fee": fee}
            elif '"historyResultList"' in text and "企业套餐" in text:
                yield item
                yield from self.error_handle(item["username"],
                                             "联通---暂不支持企业套餐用户验证。",
                                             "联通---暂不支持企业套餐用户验证。")
                return
            elif "bill_" + this_month_str + "_retry" not in meta:
                # 重发一次
                self.logger.error("联通---重试账单:(username:%s, password:%s) %s" %
                                  (item["username"], item["password"], text))
                sleep(0.6)
                request = response.request.copy()
                request.meta["bill_" + this_month_str + "_retry"] = 1
                yield request
                return
            else:
                self.logger.error("联通---获取账单失败:(username:%s, password:%s) %s" %
                                  (item["username"], item["password"], text))

            # 继续获取另一个月的账单
            if meta["bill_count"] < self.BILL_COUNT_LIMIT:  # 只需要最近6个月的
                yield self._get_historyBill_request(response, this_month)
            else:
                # 获取交费记录
                meta["payment_count"] = 0
                item["history_payment"] = defaultdict(dict)
                yield self._get_payment_request(response, date.today())
        except Exception:
            yield item
            yield from self.except_handle(item["username"],
                                          "联通---解析账单失败: %s" % text)
def send_sms_code(request):
    """
    登录发送短信验证码
    :param request:
    :return:
    """
    ret_data = {}
    try:
        args = request.POST
        session = request.session
        if args.get("is_first", False) == "true":
            username = args["username"].strip()
            account_type = args["account_type"]
            key = username + ACCOUNT_CRAWLING_SMS_HEADERS_SSDB_SUFFIX + account_type
            ssdb_conn = get_ssdb_conn()
            headers_data = ssdb_conn.get(key)
            if not headers_data:
                add_ajax_error_json(ret_data, "获取短信验证码失败")
                return JsonResponse(ret_data)

            headers_dict = json_loads(headers_data)
            send_url = headers_dict.get("url", "")
            session["send_url"] = send_url
            session["last_send_time"] = time()

            # 第一次会自动发送,默认为发送成功
            res_json = {"stat": "ok", "info": {"sent": True}}
        else:
            last_send_time = session.get("last_send_time", 0)
            need_sleep_time = max(last_send_time + SMS_SLEEP_TIME + 2 -
                                  time(), 0) if last_send_time else 0
            sleep(need_sleep_time)

            send_url = session.get("send_url")
            res_json = get_response_by_requests(
                send_url, headers=DEFAULT_HEADERS).json()
        if res_json.get("stat") == "ok" and res_json.get("info",
                                                         {}).get("sent"):
            add_ajax_ok_json(ret_data)
        else:
            error_msg = res_json.get("info", {}).get("errorMessage")
            add_ajax_error_json(ret_data, error_msg or "发送短信验证码失败")
    except Exception:
        add_ajax_error_json(ret_data, "发送短信验证码出错")

    return JsonResponse(ret_data)
Exemple #3
0
    def parse_checklogin(self, response):
        """
        解析身份证信息
        """
        text = response.text
        meta = response.meta
        item = meta["item"]
        try:
            if '"userInfo":' in text:  # 成功
                datas = json_loads(text)["userInfo"]

                opendate = datas["opendate"]
                registration_time = opendate[:4] + "-" + opendate[
                    4:6] + "-" + opendate[6:8]
                item["registration_time"] = registration_time
                item["in_nets_duration"] = get_in_nets_duration_by_start_date(
                    registration_time)
                item["identification_number"] = datas["certnum"]
                item["identification_addr"] = datas["certaddr"]
                item["real_name"] = datas["custName"]
                item["sex"] = Sex.Male if datas[
                    "custsex"] == "1" else Sex.Female
                item["package"] = datas["brand_name"] + "-" + datas[
                    "packageName"]
                item["status"] = UserStatus.Opened if datas[
                    "status"] == "开通" else UserStatus.Shutdown
            else:
                self.logger.error(
                    "联通---获取身份证信息失败:(username:%s, password:%s) %s" %
                    (item["username"], item["password"], text))

            # # 获取是否实名制
            # yield Request("http://iservice.10010.com/e3/static/transact/supRegistCheckController"
            #               "?_=" + get_js_time(), self.parse_supRegistCheck, dont_filter=True,
            #               meta=meta, method="POST", errback=self.err_callback)

            # 获取通话记录
            sleep(0.6)
            meta["call_count"] = 0
            item["history_call"] = defaultdict(dict)
            yield self._get_callDetail_request(response, date.today())
        except Exception:
            yield item
            yield from self.except_handle(item["username"],
                                          "联通---解析身份证信息失败: %s" % text)
Exemple #4
0
    def parse_item(self, response):
        text = response.text
        item = response.meta["item"]
        try:
            error = False
            if response.status == 302:
                self.logger.error("被执行人---详情:访问频繁")
                sleep(self.sleep_time)
                error = True
            elif text == "{}":
                self.logger.warning("被执行人---详情验证码错误。")
                error = True
            elif "请开启J" in text:
                self.logger.error("被执行人---详情:请开启JavaScript并刷新该页")
                sleep(self.sleep_time)
                error = True
            elif text.startswith("<!DOCTYPE"):
                yield item
                return

            if error:
                form_data = parse_qs(urlsplit(response.url).query)
                old_captcha_code = form_data["j_captcha"][0]
                if old_captcha_code == self.captcha_code:
                    self.captcha_code = self.get_captcha_code(response)

                form_data_new = {
                    "id": form_data["id"][0],
                    "j_captcha": self.captcha_code,
                    "captchaId": self.captcha_id,
                }
                yield Request("http://zhixing.court.gov.cn/search/newdetail?" +
                              urlencode(form_data_new),
                              self.parse_item,
                              dont_filter=True,
                              meta=response.meta,
                              errback=self.err_callback)
            else:
                data = json_loads(text)
                item["id"] = data.get("partyCardNum", "")
                item["execution_court"] = data.get("execCourtName")
                item["execution_money"] = data.get("execMoney")
                yield item
        except Exception:
            self.logger.exception("text(%s) url(%s)" % (text, response.url))
 def parse_vcode_url(self, response):
     try:
         # 得到验证码请求
         json_url = json_loads(response.body)['url']
         yield Request(url=self.start_url + json_url,
                       headers=self.headers,
                       callback=self.parse_vcode,
                       dont_filter=True,
                       errback=self.err_callback)
     except Exception:
         self.logger.error("贷款盟---得到验证码请求访问失败!")
         url = self.headers.get('Referer', '')
         if url:
             yield Request(url=url,
                           headers=self.headers,
                           callback=self.parse,
                           dont_filter=True,
                           errback=self.err_callback)
def _get_phone_info_from_aliyun_api1(phone_num):
    """
    阿里云(api1,可以无限购买,每次购买0元1000次)
    """
    url = "http://showphone.market.alicloudapi.com/6-1?num=%s" % phone_num
    req = Request(url)
    req.add_header('Authorization', 'APPCODE 4dda59bc51eb4fd78bcdb3c54e5c3405')
    data = json_loads(urlopen(req, timeout=ASK_TIMEOUT).read())
    if data["showapi_res_body"]["ret_code"] == 0:
        result = data["showapi_res_body"]
        brand = space_pattern.sub("", result["name"].replace("虚拟运营商", ""))
        for i in ["移动", "联通", "电信"]:
            if brand.startswith(i):
                brand = i
                break
        return brand, result["prov"], result["city"].rstrip("市")
    else:
        raise Exception
Exemple #7
0
    def parse_extraction(self, response):
        """
        贷款还款明细
        :param self:
        :param response:
        :return:
        """
        meta = response.meta
        item = meta["item"]
        try:
            info = json_loads(response.text)
            item["fetch_detail"] = info.get("datalist", [])

            # 抓取完成
            yield from self.crawling_done(item)
        except Exception:
            yield from self.except_handle(item["username"],
                                          "广州公积金中心--提取进度解析异常")
Exemple #8
0
def qq_get_qrcode_status(request):
    """
    检查qq登录二维码图片有效性
    :param request:
    :return:
    """
    ret_data = {}
    try:
        args = request.POST
        cookies = json_loads(args.get('cookies', {}))
        appid = args.get('appid', '522005705')
        daid = args.get('daid', '4')
        ptqrtoken = __hash_33(cookies.get('qrsig'))
        scan_url = "https://ssl.ptlogin2.qq.com/ptqrlogin?u1=https%3A%2F%2Fmail.qq.com%2Fcgi-bin%2Freadtemplate%3" \
                   "Fcheck%3Dfalse%26t%3Dloginpage_new_jump%26vt%3Dpassport%26vm%3Dwpt%26ft%3Dlogi" \
                   "npage%26target%3D&ptqrtoken={0}&ptredirect=0&h=1&t=1&g=1&from_ui=1&pt" \
                   "lang=2052&action=1-1-1513651703600&js_ver=10232&js_type=1&login_s" \
                   "ig=&pt_uistyle=25&aid={1}&daid={2}&".format(ptqrtoken, appid, daid)
        headers = QQ_GET_QRCODE_STATUS_HEADERS.copy()
        headers['Cookie'] = "qrsig=" + cookies.get('qrsig')
        scan_text = http_get(scan_url, headers=headers, cookies=cookies).text
        qr_result_list = qq_qr_result_list_pattern.search(scan_text).group(
            1).split(',')
        qr_result_code = qq_qr_result_info_pattern.search(
            qr_result_list[0]).group(1)
        qr_result_url = qq_qr_result_info_pattern.search(
            qr_result_list[2]).group(1)
        qr_result_status = qq_qr_result_info_pattern.search(
            qr_result_list[4]).group(1)
        qr_result_nick_name = qq_qr_result_info_pattern.search(
            qr_result_list[5]).group(1)
        data = {
            'qr_code': qr_result_code,
            'qr_url': qr_result_url,
            'qr_status': qr_result_status,
            'qr_nick_name': qr_result_nick_name
        }

        ret_data["data"] = data
        add_ajax_ok_json(ret_data)
    except Exception:
        add_ajax_error_json(ret_data, "二维码失效")
    finally:
        return JsonResponse(ret_data)
Exemple #9
0
def parse_spdb_credit_email_html(html_string, subject=""):
    bs_obj = BeautifulSoup(html_string, "lxml")

    bill_info = {}

    account_info_str = bs_obj.find("td").getText(strip=True)

    find_name = spdb_name_pattern.search(account_info_str)
    if find_name:
        bill_info['real_name'] = find_name.group(1)

    find_repayment = spdb_repayment_pattern.search(account_info_str)
    if find_repayment:
        bill_info['repayment'] = find_repayment.group(1).replace(",", "")

    find_due_date = spdb_due_date_pattern.search(account_info_str)
    if find_due_date:
        bill_info['due_date'] = find_due_date.group(1).replace("/", DATE_SEP)

    try:
        url1 = bs_obj.find('span',
                           text='点击').findParent("table").find('a').get('href')

        headers = SPDB_HEADERS.copy()
        r1 = http_get(url1, headers=headers)
        cookie_str = r1.headers.get('Set-Cookie')
        headers['Cookie'] = cookie_str
        url2 = 'https://ebill.spdbccc.com.cn/cloudbank-portal/myBillController/loadHomeData.action'

        r = http_post(url2, headers=headers)
        json_info = json_loads(r.text)
        bill_info['card_num'] = json_info.get('cardNo')
        bill_info['due_date'] = json_info.get('dueDate')
        bill_info['repayment'] = json_info.get('stmtAmt')
        bill_info['min_repayment'] = json_info.get('minPay')
        bill_info['credit_limit'] = json_info.get('creditLimit')
        bill_info['cash_limit'] = json_info.get('cashLimit')
        bill_info['bill_date'] = json_info.get('closeDate')
    except Exception:
        pass

    result = {'bill_info': bill_info, 'bill_detail': []}

    return result
Exemple #10
0
    def parse_login(self, response):
        """
        登录数据解析
        :param response:
        :return:
        """
        meta = response.meta
        item = meta["item"]
        try:
            info = json_loads(response.text)
            data = info["datalist"][0]

            if data["jyjg"] != "1":
                msg = data["sbyy"]
                yield from self.error_handle(item["username"],
                                             msg,
                                             tell_msg=msg)
                return
            else:
                item["mobile"] = data.get("sjhm", "")
                item["private_no"] = data.get("gjjzh", "")
                item["real_name"] = data.get("xm", "")
                item["identification_number"] = data.get("zjh", "")
                item["identification_type"] = "身份证"

            # 请求缴存信息接口
            self.logger.info("请求缴存信息接口->%s" % self.userdeposit_url)
            req_data = {
                "gjjzh": item["private_no"],
                "zjh": item["identification_number"],
                "qqly": "1001",
                "key": self.key,
                "version": self.version,
            }
            yield FormRequest(url=self.userdeposit_url,
                              callback=self.parse_userdeposit,
                              headers=self.headers,
                              formdata=req_data,
                              meta=meta,
                              dont_filter=True,
                              errback=self.err_callback)
        except Exception:
            yield from self.except_handle(item["username"],
                                          "广州公积金中心---登录数据解析异常")
 def get_count_by_condition(self, court, date):
     """
     根据查询条件得到要查询条件对应的结果数量
     :param court:
     :param date:
     :return:
     """
     adapters.DEFAULT_RETRIES = 5
     while True:
         sleep(0.5)  # 每次请求至少间隔0.5秒
         try:
             self.logger.info("current proxy->%s" % self.proxy)
             self.logger.info("get_count_by_condition->%s,%s" %
                              (court, date))
             data = {
                 "Param": "法院名称:%s,裁判日期:%s" % (court, date),
                 "Index": "1",
                 "Page": "5",
                 "Order": "法院层级",
                 "Direction": "asc",
             }
             proxies = {
                 "http": "http://%s" % self.proxy,
             }
             s = Session()
             s.keep_alive = False
             r = s.post(self.count_url,
                        data=data,
                        headers=self.headers,
                        proxies=proxies,
                        timeout=60)
             text = r.text.replace("\\", "").strip("\"")
             self.logger.info("response text->%s" % r.text)
             result = json_loads(text)
             count = result[0]["Count"]
             self.logger.info("get_count_by_condition,count->%s" % count)
             return int(count)
         except Exception as e:
             self.logger.info("get_count_by_condition,error->%s" % str(e))
             if not isinstance(e, ProxyError):
                 self.logger.info("sleep start!")
                 sleep(5)  # 不是代理出错的话间隔5秒重试
                 self.logger.info("sleep end!")
             self.proxy = self.proxy_api.get_proxy_one()  # 更换代理
Exemple #12
0
 def parse_detail(self, response):
     meta = response.meta
     item = meta['item']
     try:
         json_result = json_loads(response.text)['data']['body']
         bill_record = self.get_bill_record(meta['bankname'],
                                            meta['subject'], json_result)
         bill_records = item['bill_records']
         bill_records.append(bill_record)
         if meta['count'] == len(bill_records):
             yield from self.crawling_done(
                 item, logout_request=self.get_logout_request(meta))
     except Exception:
         yield item
         yield from self.except_handle(
             item['username'],
             msg="账单解析异常",
             tell_msg="账单解析异常",
             logout_request=self.get_logout_request(meta))
Exemple #13
0
    def parse_fee(self, response):
        """
        解析余额信息
        """
        text = response.text
        meta = response.meta
        item = meta["item"]
        try:
            if '"retCode":"000000"' in text:  # 成功
                datas = json_loads(text)["data"]
                curFeeTotal = float(datas.get("curFeeTotal", 0))
                oweFee = float(datas.get("oweFee", 0))
                curFee = float(datas.get("curFee", curFeeTotal))
                item["balance"] = min(curFeeTotal, curFee) \
                    if curFeeTotal > 0 else min(curFeeTotal, curFee, -oweFee)

                self.logger.critical("curFeeTotal(%s) oweFee(%s) curFee(%s)" %
                                     (curFeeTotal, oweFee, curFee))
            elif '"retCode":"570007"' in text:  # 系统繁忙!
                yield from self._retry_request(response)
                return
            elif '"retCode":"500003"' in text:  # session信息为空,请先登录!
                yield item
                yield from self.error_handle(
                    item["username"],
                    "移动---获取余额信息失败: (username:%s, password:%s) %s" %
                    (item["username"], item["password"], text),
                    "认证失败,请刷新页面重试。",
                    logout_request=self.get_logout_request(meta))
                return
            else:
                self.logger.error(
                    "移动---获取余额信息失败:(username:%s, password:%s) %s" %
                    (item["username"], item["password"], text))

            # 获取交费记录
            yield self._get_historyPayment_request(response)
        except Exception:
            yield item
            yield from self.except_handle(
                item["username"],
                "移动---解析余额信息失败: %s" % text,
                logout_request=self.get_logout_request(meta))
Exemple #14
0
 def _parse_qrcode_login_result(self, response):
     """
     解析二维码登录结果
     :return:
     """
     meta = response.meta
     username = meta["item"]["username"]
     try:
         ret_data = json_loads(response.text)
         ret_url = ret_data.get("url", "")
         if "dangerousVerify" in ret_url:
             self.logger.info("账户存在安全风险,需要短信验证")
             yield from self._dangerous_verify_scrapy(
                 username, ret_url, response)
         else:
             self.logger.info("扫描二维码登录成功")
             yield self._yield_order_request(response)
     except Exception:
         yield from self.except_handle(username, "解析二维码登录结果失败,登录失败")
Exemple #15
0
 def str_to_json(self, content, pattern=None, charset="utf-8"):
     """
     将str转化为json
     :param content:
     :param pattern:
     :param charset:
     :return:
     """
     try:
         if pattern is None:
             pattern = self.reg_tar_str
         if isinstance(content, bytes):
             content = content.decode(charset)
         tar = pattern.search(content)
         if tar:
             return json_loads(tar.group(1))
         return
     except Exception:
         return
Exemple #16
0
 def parse_detail(self, response):
     meta = response.meta
     item = meta['item']
     try:
         the_data = json_loads(response.text)['data']
         bill_record = self.get_bill_record(meta['bankname'],
                                            the_data['subject'],
                                            the_data['display'])
         bill_records = item['bill_records']
         bill_records.append(bill_record)
         if len(bill_records) == meta['count']:
             # 如果匹配出来如果没有结果,说明有抓取到用户相关信息.直接返回完成.并退出
             yield from self.crawling_done(
                 item, logout_request=self.get_logout_request(meta))
     except Exception:
         yield from self.except_handle(
             item['username'],
             "账单解析异常",
             tell_msg="账单解析异常",
             logout_request=self.get_logout_request(meta))
Exemple #17
0
 def parse(self, response):
     meta = response.meta
     item = meta['item']
     try:
         info = item['password'].split("|", 1)
         item['password'] = ''
         qr_result_url = info[0]
         cookies = json_loads(info[1])
         meta['retry_time'] = 0
         yield Request(qr_result_url,
                       callback=self.check_sig,
                       meta=meta,
                       cookies=cookies,
                       dont_filter=True,
                       errback=self.err_callback)
         # yield from self.qr_login(response)
     except Exception:
         yield from self.except_handle(item['username'],
                                       '登录入口解析失败',
                                       tell_msg="邮箱登录失败,请刷新页面重试")
Exemple #18
0
def do_img_upload():
    with Session() as _session:
        try:
            _session.headers[
                "Referer"] = YINDING_HOST + "login/initLogin.goInit.do"
            _session.headers[
                "User-Agent"] = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0"
            _session.headers[
                "Accept-Language"] = "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3"
            _session.post(YINDING_HOST + "login/initLogin.loginOut.do")

            data = {
                "userInfo.userName": "******",
                "userInfo.password": "******",
            }
            resp = _session.post(
                YINDING_HOST + "login/initLogin.validationUser.do", data,
                **KWARGS)
            if resp.status_code != 200:
                raise LoginFailedException("验证用户返回非200")

            json_data = json_loads(resp.text)
            if json_data["resultType"] != "SUCCESS":
                raise LoginFailedException(resp.text)

            data = {
                "userName": "******",
                "password": "******",
            }
            resp = _session.post(
                YINDING_HOST + "login/initLogin.loginSystem.do", data,
                **KWARGS)
            if resp.status_code != 200:
                raise LoginFailedException("登录返回非200")
            elif "退出登录" not in resp.text:
                raise LoginFailedException("登录失败")

            _do_img_upload(_session)
        finally:
            # 退出登录
            resp = _session.post(YINDING_HOST + "login/initLogin.loginOut.do")
Exemple #19
0
    def parse_queryHistoryBill(self, response):
        """
        解析账单信息
        """
        text = response.text
        meta = response.meta
        item = meta["item"]
        try:
            if '"retCode":"000000"' in text:
                # 近6个月的账单都已经包含着数据里
                item["history_bill"] = {
                    bill["billMonth"]: {
                        "all_fee": float(bill["billFee"])
                    }
                    for bill in json_loads(text)["data"]
                }
            elif '"retCode":"570007"' in text:  # 系统繁忙!
                yield from self._retry_request(response)
                return
            elif '"retCode":"500003"' in text:  # session信息为空,请先登录!
                yield item
                yield from self.error_handle(
                    item["username"],
                    "移动---获取历史账单失败: (username:%s, password:%s) %s" %
                    (item["username"], item["password"], text),
                    "认证失败,请刷新页面重试。",
                    logout_request=self.get_logout_request(meta))
                return
            else:
                self.logger.error(
                    "移动---获取历史账单失败:(username:%s, password:%s) %s" %
                    (item["username"], item["password"], text))

            meta["sendSMSpwd_count"] = 1
            yield self.casual_request(response)
        except Exception:
            yield item
            yield from self.except_handle(
                item["username"],
                "移动---解析历史账单失败: %s" % text,
                logout_request=self.get_logout_request(meta))
    def parse_payment_list(self, response):
        """解析缴费记录"""
        text = response.text
        meta = response.meta
        item = meta["item"]
        username = item["username"]

        try:
            if '"resultCode":"0000"' in text:
                self.logger.info("[电信-" + username + "]: 获取缴费记录成功!")

                # 近6个月的交费记录都已经包含着数据里
                datas = json_loads(text)["responseData"]["data"]["billingCycles"]
                history_payment_dic = {month: [] for month in get_months_str_by_number(6).split(",")}
                for payments in datas:
                    if not payments["paymentDetails"]:
                        continue

                    for payment in payments["paymentDetails"]:
                        pay_date = payment["stateDate"]
                        history_payment_dic[payments["billCycle"]].append({
                            "time": pay_date[0:4] + "-" + pay_date[4:6] + "-" + pay_date[6:] + " 00:00:00",
                            "channel": payment["payChannelId"],
                            "fee": payment["paymentAmount"]
                        })
                item["history_payment"] = history_payment_dic

                # 处理完,返回爬取的结果
                yield from self.crawling_done(item)
            else:
                yield item
                tell_msg = self.get_err_msg(text)
                yield from self.error_handle(username,
                                             "电信---获取缴费记录失败:(username:%s, password:%s) %s"
                                             % (username, item["password"], text),
                                             tell_msg=tell_msg)
        except Exception:
            yield item
            yield from self.except_handle(username, "电信---解析缴费记录失败: %s" % text)
Exemple #21
0
    def parse_userview(self, response):
        """
        缴存明细信息处理
        :param self:
        :param response:
        :return:
        """
        meta = response.meta
        item = meta["item"]
        try:
            info = json_loads(response.text)
            new_data = list()
            for data in info.get("datalist", []):
                data_dict = dict()
                data_dict["depmny"] = data["fse"]
                data_dict["acctime"] = data["ywrq"]
                data_dict["bustype"] = data["ywlxms"]
                new_data.append(data_dict)
            item["payment_detail"] = new_data

            # 请求贷款信息接口
            self.logger.info("请求贷款信息接口->%s" % self.userloan_url)
            req_data = {
                "zjh": item["identification_number"],
                "qqly": "1001",
                "key": self.key,
                "version": self.version,
            }
            yield FormRequest(url=self.userloan_url,
                              callback=self.parse_userloan,
                              headers=self.headers,
                              formdata=req_data,
                              meta=meta,
                              dont_filter=True,
                              errback=self.err_callback)
        except Exception:
            yield from self.except_handle(item["username"],
                                          "广州公积金中心---缴存明细数据解析异常")
Exemple #22
0
def xuexin_get_vcode(request):
    """
    获取手机验证码
    https://account.chsi.com.cn/account/getmphonpincode.action  post
    captch:MzxrYm
    mobilePhone:15908143404
    optType:REGISTER
    ignoremphone:true
    :param request:
    :return:
    """
    args = request.POST
    mphone = args.get("mphone", "")
    captch = args.get("captch", "")
    url = "https://account.chsi.com.cn/account/getmphonpincode.action"
    data = {
        "captch": captch,
        "mobilePhone": mphone,
        "optType": "REGISTER",
        "ignoremphone": "true",
    }

    req_cookie = request.session.get("req_cookie")
    response = http_post(url,
                         data=data,
                         headers=HEADERS,
                         verify=False,
                         cookies=req_cookie)

    text = response.text.replace("'", "\"")
    text = vcode_pattern.sub('', text)

    result = dict()
    result["result"] = json_loads(text)
    add_ajax_ok_json(result)

    return JsonResponse(result)
Exemple #23
0
 def parse(self, response):
     id = response.meta["id"]
     address = response.meta["address"]
     old_address = "%s|$|%s" % (id, address)
     ak = response.meta["ak"]
     try:
         data = json_loads(response.text)
         status = data["status"]
         if status == 0:
             item = GpsspgItem()
             item["id"] = id
             item["address"] = address
             item["lng"] = data["result"]["location"]["lng"]  # 纬度值
             item["lat"] = data["result"]["location"]["lat"]  # 经度值
             # item["precise"] = data["result"]["precise"]  # 是否精确查找
             # item["confidence"] = data["result"]["confidence"]  # 可信度
             # item["level"] = data["result"]["level"]  # 地址类型
             # print("id: %s, address: %s, lng: %s, lat: %s, precise: %s, confidence: %s, level: %s" %
             #       (item["id"], item["address"], item["lng"], item["lat"], item["precise"], item["confidence"], item["level"]))
             # 查询到的结果存入到mongodb
             yield item
         elif status == 4 or status >= 300:
             self.logger.error("ak:%s, 当日请求超出配额,%s,%s" % (ak, address, id))
             # ak超出当日限额,修改状态为不可用
             self.set_ak(ak_key=ak, status=False)
             self.push_address_queue(old_address)
         else:
             self.logger.error("ak:%s, 请求出错,状态码:%s,%s,%s" %
                               (ak, status, address, id))
             self.push_address_queue(old_address)
     except Exception as e:
         self.logger.error("ak:%s, 处理出错,出错信息:%s,%s,%s" %
                           (ak, str(e), address, id))
         self.push_address_queue(old_address)
     request = self.get_request()
     if request:
         yield request
Exemple #24
0
def load_and_store_train_data(captcha_store_directory,
                              train_data_path,
                              loop_count=1):
    with open(train_data_path, "r") as train_data_file:
        result = train_data_file.read()
        result = json_loads(result)

    size = len(result) + 1
    for i in range(loop_count):
        resp = http_get("https://passport.jd.com/new/login.aspx")
        soup = BeautifulSoup(resp.text, "html.parser")
        auth_code_input = soup.select_one("#JD_Verification1")
        image_url = auth_code_input["src2"]
        resp = http_get("https:%s" % image_url)
        with Image.open(BytesIO(resp.content)) as image:
            knn_distance = get_knn_distance(image)
            knn_distance = str(float('%.2f' % knn_distance))

            if knn_distance not in result:
                image.save("%s/captcha_%d.jpg" %
                           (captcha_store_directory, size))
                image.show()
                # 输入验证码
                label = input("请输入验证码:")
                label = label.strip()
                result[knn_distance] = (knn_distance, label,
                                        "captcha_%d.jpg" % size)
                size += 1

    with open(train_data_path, "w") as train_data_file:
        train_data_file.write(json_dumps(result))

    # if __name__ == "__main__":
    load_and_store_train_data(
        "F:\work\公司文档\爬虫\京东验证码",
        "F:\software\pycharm\workspace\crawler\crawler_bqjr\crawler_bqjr\spiders\\b2c_ecommerce_spiders\\train_data.json",
        20)
    def parse(self, response):
        area_result = self.area_regex.search(response.text).group(1).replace(
            "//", "#").replace(";", "")
        area_result = self.replace_pattern.sub("", area_result)
        area_result = area_result.replace('name', '"name"') \
            .replace('cityList', '"cityList"').replace('areaList', '"areaList"')

        area_result = json_loads(area_result)
        query_condition = self.query_condition
        for result in reversed(area_result):
            province = result["name"]
            for cityList in reversed(result["cityList"]):
                condition = dict()
                city = cityList["name"]
                condition["province"] = province
                condition["city"] = city
                condition["area"] = ""
                if not len(cityList["areaList"]):
                    query_condition.append(condition)
                else:
                    for area in reversed(cityList["areaList"]):
                        area_condition = dict()
                        area_condition["area"] = area
                        query_condition.append(
                            dict(condition, **area_condition))

        # 根据地区条件组合查询
        query = query_condition.pop()
        self.post_data["province"] = query["province"]
        self.post_data["city"] = query["city"]
        self.post_data["area"] = query["area"]
        self.current_condition = query
        request = FormRequest(self.post_url,
                              self.parse_detail,
                              formdata=self.post_data)
        # self.set_proxy(request)
        yield request
    def parse_detail(self, response):
        result = json_loads(response.text)
        if result["ErrorMsg"] == "成功":
            for info in result["data"]["areaList"]:
                name = info["name"]
                the_id = info["cardNum"]

                key = name + the_id
                if self.is_search_name_exists(key):
                    continue
                self.record_search_name(key)

                item = ShixinListItem()
                item["from_web"] = "kuaicha"
                item["name"] = name
                item["id"] = the_id
                yield item
        elif result["ErrorMsg"] == "已达到每日查询次数上限":
            # self.proxy = choice(self.proxy_list)
            self.query_condition.append(self.current_condition)
            self.logger.info("今日查询次数达到上限,明日再查")
            sleep_to_tomorrow()

        if self.query_condition:
            query = self.query_condition.pop()
            self.post_data["province"] = query["province"]
            self.post_data["city"] = query["city"]
            self.post_data["area"] = query["area"]
            self.current_condition = query
            request = FormRequest(self.post_url,
                                  self.parse_detail,
                                  formdata=self.post_data)
            # self.set_proxy(request)
            yield request
        else:
            self.logger.info("所有条件查询结束")
Exemple #27
0
    def _parse_order_info(self, response):
        """
        解析订单信息
        :param response:
        :return:
        """
        meta = response.meta
        item = meta["item"]
        orders = item.get("orders")
        if not orders:
            orders = []
        try:
            json_page = json_loads(response.text)
            if not json_page.get("error"):
                page = json_page.get("page")
                curr_page = page.get("currentPage")
                total_page = page.get("totalPage")
                # total_num = page.get("totalNumber")
                main_orders = json_page.get("mainOrders", [])
                for one in main_orders:
                    temp_dic = {}
                    order_info = one.get("orderInfo", {})
                    pay_info = one.get("payInfo", {})
                    temp_dic["b2c"] = order_info.get("b2C")
                    temp_dic["order_no"] = order_info.get("id")
                    temp_dic["create_time"] = order_info.get("createTime")
                    temp_dic["order_fee"] = pay_info.get("actualFee")
                    temp_dic["post_type"] = pay_info.get("postType", "")
                    temp_dic["shop_name"] = one.get("seller",
                                                    {}).get("shopName")
                    temp_dic["status"] = one.get("statusInfo", {}).get("text")
                    # 获取子订单
                    sub_orders = one.get("subOrders", [])
                    sub_order_list = []
                    for sub_order in sub_orders:
                        temp_item = {}
                        # 商品名
                        goods_name = sub_order.get("itemInfo", {}).get("title")
                        temp_item["goods_name"] = goods_name
                        # 数量
                        quantity = sub_order.get("quantity")
                        temp_item["quantity"] = quantity
                        sub_order_list.append(temp_item)

                    temp_dic["sub_orders"] = sub_order_list

                    # 获取商品物流信息
                    detail_url = "https://tradearchive.taobao.com/trade/detail/" \
                                 "trade_item_detail.htm?bizOrderId={id}".format(id=temp_dic.get("order_no"))
                    logistics = self._get_logistics_info(
                        detail_url, cookies=meta["cookies"])
                    temp_dic["logistics"] = logistics

                    orders.append(temp_dic)
                item["orders"] = orders
                if int(curr_page) != int(total_page) + 1:
                    query_req = self.__generate_order_request(
                        meta, page_num=int(curr_page) + 1)
                    if query_req:
                        yield query_req
                    else:
                        yield item
                        yield from self.crawling_failed(
                            item["username"], "生成请求订单信息失败")
                else:
                    self.logger.info("---->所有信息抓取完成")
                    yield from self.crawling_done(item)
            else:
                yield item
                yield from self.crawling_failed(item["username"], "请求订单信息失败")
        except Exception:
            yield item
            yield from self.except_handle(item["username"], "解析订单信息异常")
def _get_baidu_api_ret(url):
    req = Request(url)
    req.add_header('apikey', '72a0d84c0c95a75cd5aa8c1c3e698946')
    return json_loads(urlopen(req, timeout=ASK_TIMEOUT).read())
Exemple #29
0
    def parse_payment(self, response):
        """
        解析交费记录
        """
        text = response.text
        meta = response.meta
        item = meta["item"]
        try:
            this_month = meta["last_payment_month"]
            this_month_str = this_month.strftime("%Y%m")
            if '"pageMap":' in text:
                datas = json_loads(text)["pageMap"]

                the_payment_count = datas.get("totalCount", 0)
                if the_payment_count > self.CALL_PAGE_SIZE_LIMIT:
                    self.logger.error("联通---单月交费次数过大:username:%s, "
                                      "password:%s, month:%s, totalCount:%d" %
                                      (item["username"], item["password"],
                                       this_month, the_payment_count))

                payment_list = [{
                    "time": payment["paydate"],
                    "fee": float(payment["payfee"]),
                    "channel": payment["payment"],
                } for payment in datas["result"]]

                if len(payment_list) != the_payment_count:
                    self.logger.error(
                        "联通---交费次数异常:username:%s, password:%s, "
                        "month:%s, totalCount:%d, payment_list_len:%d" %
                        (item["username"], item["password"], this_month,
                         the_payment_count, len(payment_list)))

                item["history_payment"][this_month_str] = payment_list
            elif 'respCode":"2114000283"' in text:  # 无交费记录
                item["history_payment"][this_month_str] = []
            elif "payment_" + this_month_str + "_retry" not in meta:
                # 重发一次
                self.logger.error("联通---重试交费记录:(username:%s, password:%s) %s" %
                                  (item["username"], item["password"], text))
                sleep(0.6)
                request = response.request.copy()
                request.meta["payment_" + this_month_str + "_retry"] = 1
                yield request
                return
            else:
                self.logger.error(
                    "联通---获取交费记录失败:(username:%s, password:%s, month:%s) %s" %
                    (item["username"], item["password"], this_month, text))

            # 继续获取另一个月的交费记录
            if meta["payment_count"] < self.CALL_COUNT_LIMIT:  # 只需要最近6个月的
                last_month = get_last_month_from_date(this_month)
                yield self._get_payment_request(response, last_month)
            else:
                # 处理完,返回爬取的结果
                yield from self.crawling_done(item)
        except Exception:
            yield item
            yield from self.except_handle(item["username"],
                                          "联通---解析交费记录失败: %s" % text)
Exemple #30
0
    def parse_msgDetail(self, response):
        """
        解析短信记录
        """
        text = response.text
        meta = response.meta
        item = meta["item"]
        try:
            this_month = meta["last_msg_month"]
            this_month_str = this_month.strftime("%Y%m")
            if '"pageMap":' in text:
                datas = json_loads(text)["pageMap"]

                the_msg_count = datas.get("totalCount", 0)
                if the_msg_count > self.CALL_PAGE_SIZE_LIMIT:
                    self.logger.error("联通---单月短信次数过大:username:%s, "
                                      "password:%s, month:%s, totalCount:%d" %
                                      (item["username"], item["password"],
                                       this_month, the_msg_count))

                msg_list = [{
                    "time":
                    msg["smsdate"] + " " + msg["smstime"],
                    "type":
                    MsgType.Send if "2" == msg["smstype"] else MsgType.Receive,
                    "other_num":
                    msg["othernum"],
                } for msg in datas["result"]]

                if len(msg_list) != the_msg_count:
                    self.logger.error(
                        "联通---短信次数异常:username:%s, password:%s, "
                        "month:%s, totalCount:%d, msg_list_len:%d" %
                        (item["username"], item["password"], this_month,
                         the_msg_count, len(msg_list)))

                item["history_msg"][this_month_str] = msg_list
            elif 'respCode":"2114030170"' in text:  # 无记录
                item["history_msg"][this_month_str] = []
            elif "msg_" + this_month_str + "_retry" not in meta:
                # 重发一次
                self.logger.error("联通---重试短信记录:(username:%s, password:%s) %s" %
                                  (item["username"], item["password"], text))
                sleep(0.6)
                request = response.request.copy()
                request.meta["msg_" + this_month_str + "_retry"] = 1
                yield request
                return
            else:
                self.logger.error(
                    "联通---获取短信记录失败:(username:%s, password:%s, month:%s) %s" %
                    (item["username"], item["password"], this_month, text))

            # 继续获取另一个月的通话记录
            if meta["msg_count"] < self.CALL_COUNT_LIMIT:  # 只需要最近6个月的
                last_month = get_last_month_from_date(this_month)
                yield self._get_msgDetail_request(response, last_month)
            else:
                # 获取过去6个月的账单信息
                meta["bill_count"] = 0
                item["history_bill"] = defaultdict(dict)
                yield self._get_historyBill_request(response, date.today())
        except Exception:
            yield item
            yield from self.except_handle(item["username"],
                                          "联通---解析短信记录失败: %s" % text)