예제 #1
0
 def _get_callDetail_image_captcha(self, response, username):
     cookiejar = get_cookiejar_from_response(response)
     headers = get_headers_from_response(response)
     url = "http://shop.10086.cn/i/authImg?t=" + str(rand_0_1())
     self.set_image_captcha_headers_to_ssdb(headers, username)
     return get_content_by_requests(url,
                                    headers=headers,
                                    cookie_jar=cookiejar)
예제 #2
0
    def parse(self, response):
        headers = self.headers.copy()
        meta = response.meta
        meta['headers'] = headers
        meta['captcha_retry_time'] = 5
        item = meta["item"]
        item['xueli'] = []

        if response.status == 302:
            yield from self.parse_login(response)
        else:
            try:
                self.logger.info("请求登录接口->%s" % self.user_login)
                lt = response.xpath(
                    '//input[@name="lt"]/@value').extract_first("")
                captcha_code = None
                # self.logger.debug('captcha1 ' + str(response.xpath('//input[@id="captcha"]').extract_first("")))
                # self.logger.debug('captcha2 ' + str(response.xpath('//div[@class="ct_input errors"]').extract_first("")))
                if response.xpath('//input[@id="captcha"]').extract_first() \
                        or response.xpath('//div[@class="ct_input errors"]').extract_first():
                    meta['captcha_retry_time'] -= 1
                    cookiejar = get_cookiejar_from_response(response)
                    url = "https://account.chsi.com.cn/passport/captcha.image?id=" + str(
                        random())
                    captcha_body = get_content_by_requests(
                        url,
                        headers,
                        cookie_jar=cookiejar,
                        proxies={
                            "https": response.meta['proxy'],
                            "http": response.meta['proxy']
                        })
                    captcha_code = self.ask_image_captcha(captcha_body,
                                                          item['username'],
                                                          file_type=".jpeg")
                req_data = self.get_req_data(self.user_login,
                                             user_name=item["username"],
                                             password=item["password"],
                                             lt=lt,
                                             captcha=captcha_code)
                self.logger.debug(req_data)
                headers['Cookie'] = response.headers.get('Set-Cookie').decode()
                headers['Referer'] = self._start_url_
                r = FormRequest(headers=headers,
                                url=self._start_url_,
                                callback=self.parse_login,
                                formdata=req_data,
                                meta=meta,
                                errback=self.err_callback,
                                dont_filter=True)
                yield r
            except Exception:
                yield from self.except_handle(meta["item"]["username"],
                                              "学信网---爬虫解析入口异常")
예제 #3
0
 def _verify_callDetail_captcha(self, response, username, captcha_code):
     """
     返回是否正确
     """
     cookiejar = get_cookiejar_from_response(response)
     headers = get_headers_from_response(response)
     url = "http://shop.10086.cn/i/v1/res/precheck/" + username + "?captchaVal=" \
           + captcha_code + "&_=" + get_js_time()
     info = get_content_by_requests(url,
                                    headers=headers,
                                    cookie_jar=cookiejar)
     return b'"retCode":"000000"' in info
예제 #4
0
 def verify_captcha(self, response, captcha_code):
     """
     移动有一个url可以在不提交表单的情况下,先检查验证码是否正确
     返回是否正确
     """
     cookiejar = get_cookiejar_from_response(response)
     headers = get_headers_from_response(response)
     url = b"https://login.10086.cn/verifyCaptcha?inputCode=" \
           + captcha_code.encode('unicode-escape', "ignore").replace(b"\\u", b"")
     info = get_content_by_requests(url,
                                    headers=headers,
                                    cookie_jar=cookiejar)
     return b'"resultCode":"0"' in info
예제 #5
0
 def check_need_sms_captcha(self, response, username):
     cookiejar = get_cookiejar_from_response(response)
     headers = get_headers_from_response(response)
     form_data = {
         "accountType": "01",
         "account": username,
         "timestamp": get_js_time(),
         # "pwdType": "02",
     }
     info = get_content_by_requests(
         "https://login.10086.cn/needVerifyCode.htm?" +
         urlencode(form_data),
         headers=headers,
         cookie_jar=cookiejar)
     return b'"needVerifyCode":"1"' in info
예제 #6
0
    def get_unisecid_request(self, response):
        cookiejar = get_cookiejar_from_response(response)
        headers = get_headers_from_response(response)
        the_time = get_js_time()

        url = "https://uac.10010.com/oauth2/genqr?" + the_time
        r = get_response_by_requests(url,
                                     headers=headers,
                                     cookie_jar=cookiejar)
        cookie = r.headers.get('Set-Cookie')
        result = {}
        k_v_list = cookie.split(';')
        name, value = k_v_list[0].split('=')
        result['name'] = name
        result['value'] = value
        for k_v in islice(k_v_list, 1, None):
            k, v = k_v.split('=', 1)
            result[k] = v

        return result
예제 #7
0
    def parse(self, response):
        meta = response.meta
        item = meta["item"]
        username = item["username"]
        password = item["password"]

        try:
            cookiejar = get_cookiejar_from_response(response)
            headers = get_headers_from_response(response)
            headers['Referer'] = response.url
            url = "https://ipcrs.pbccrc.org.cn/imgrc.do?" + get_js_time()
            captcha_body = get_content_by_requests_post(url,
                                                        headers=headers,
                                                        cookie_jar=cookiejar)
            captcha_code = self.parse_capatcha(captcha_body)
            self.logger.info("验证码识别结果:%s" % captcha_code)

            token = response.xpath(
                "//input[@name='org.apache.struts.taglib.html.TOKEN']/@value"
            ).extract_first("")
            date = response.xpath(
                "//input[@name='date']/@value").extract_first("")
            datas = {
                "org.apache.struts.taglib.html.TOKEN": token,
                "method": "login",
                "date": date,
                "loginname": username,
                "password": password,
                "_@IMGRC@_": captcha_code
            }

            yield FormRequest("https://ipcrs.pbccrc.org.cn/login.do",
                              headers=self.headers,
                              formdata=datas,
                              callback=self.parse_login,
                              meta=meta,
                              dont_filter=True)
        except Exception:
            yield from self.except_handle(username,
                                          msg="人行征信---登录入口解析失败",
                                          tell_msg="个人信息报告数据爬取失败,请刷新页面重试!")
예제 #8
0
    def request_sms_code(self, response, username):
        """
        请求移动发送登录的短信验证码
        """
        cookiejar = get_cookiejar_from_response(response)
        headers = get_headers_from_response(response)

        form_data = {"userName": username}

        # # 不知道这是干嘛的
        # url = "https://login.10086.cn/chkNumberAction.action"
        # info = get_content_by_requests_post(url, data=form_data,
        #                                     headers=headers, cookie_jar=cookiejar)

        form_data.update({"type": "01", "channelID": "12003"})
        url = "https://login.10086.cn/sendRandomCodeAction.action"
        info = get_content_by_requests_post(url,
                                            data=form_data,
                                            headers=headers,
                                            cookie_jar=cookiejar)
        return info == b'0'
예제 #9
0
 def get_need_captcha_response(self, response, username, pwd_type="02"):
     """
     询问是否需要验证码
     """
     cookiejar = get_cookiejar_from_response(response)
     headers = get_headers_from_response(response)
     the_time = get_js_time()
     form_data = {
         'userName':
         username,
         'pwdType':
         pwd_type,
         '_':
         int(the_time) + 1,
         'callback':
         "jQuery1720" + str(randint(1E16, 1E17 - 1)) + "_" + the_time
     }
     url = "http://uac.10010.com/portal/Service/CheckNeedVerify?" + urlencode(
         form_data)
     return get_response_by_requests(url,
                                     headers=headers,
                                     cookie_jar=cookiejar)
예제 #10
0
 def parse_login(self, response):
     meta = response.meta
     item = meta["item"]
     # self.logger.debug(response.request.body.decode())
     # self.logger.debug('header ' + str(response.headers))
     if response.status != 302:
         if response.xpath('//div[@id="status"]/text()').extract_first():
             yield from self.error_handle(
                 item["username"],
                 "%s 账号或密码错误" % item["username"],
                 tell_msg=response.xpath(
                     '//div[@id="status"]/text()').extract_first())
             return
         if response.xpath('//input[@id="captcha"]').extract_first() \
                 or response.xpath('//div[@class="ct_input errors"]').extract_first():
             meta['captcha_retry_time'] -= 1
             if meta['captcha_retry_time'] < 0:
                 yield from self.error_handle(item["username"],
                                              "%s 图片验证码请求五次,退出" %
                                              item["username"],
                                              tell_msg='验证码已刷新五次,请重试')
                 return
             lt = response.xpath(
                 '//input[@name="lt"]/@value').extract_first("")
             cookiejar = get_cookiejar_from_response(response)
             url = "https://account.chsi.com.cn/passport/captcha.image?id=" + str(
                 random())
             headers = meta['headers']
             captcha_body = get_content_by_requests(url,
                                                    headers,
                                                    cookie_jar=cookiejar)
             captcha_code = self.ask_image_captcha(captcha_body,
                                                   item['username'],
                                                   file_type=".jpeg")
             req_data = self.get_req_data(self.user_login,
                                          user_name=item["username"],
                                          password=item["password"],
                                          lt=lt,
                                          captcha=captcha_code)
             try:
                 headers['Cookie'] = response.headers.get(
                     'Set-Cookie').decode()
             except Exception:
                 pass
             self.logger.debug(req_data)
             self.logger.debug(headers)
             r = FormRequest(headers=headers,
                             url=self._start_url_,
                             callback=self.parse_login,
                             formdata=req_data,
                             meta=meta,
                             errback=self.err_callback,
                             dont_filter=True)
             yield r
         else:
             yield from self.error_handle(item["username"],
                                          "%s 账号或密码错误" % item["username"],
                                          tell_msg='账号或密码错误')
             return
     else:
         try:
             get_jsession_url = response.headers.get('Location')
             if get_jsession_url:
                 get_jsession_url = get_jsession_url.decode()
                 self.logger.info("请求获取sessionid接口->%s" % get_jsession_url)
                 headers = meta['headers']
                 headers['Referer'] = response.url
                 yield Request(headers=meta['headers'],
                               url=get_jsession_url,
                               callback=self.parse_getJsession,
                               meta=meta,
                               errback=self.err_callback,
                               dont_filter=True)
             else:
                 yield from self.error_handle(item["username"],
                                              "%s 账号或密码错误" %
                                              item["username"],
                                              tell_msg='账号或密码错误')
         except Exception:
             yield from self.except_handle(item["username"],
                                           "学信网---登录数据解析异常")