Python fake_useragent 예제들, engine.common.Utils.fake_useragent Python 예제들

예제 #1

0

파일 보기

파일: CDFund.py 프로젝트: ski2per/arch1ve

    async def get(self):
        captcha_url = "http://www.cdzfgjj.gov.cn/api.php?op=checkcode&code_len=4&font_size=20&width=130&height=50"
        headers = {
            "User-Agent": fake_useragent()
        }

        try:
            request = HTTPRequest(captcha_url, method="GET", headers=headers)
            response = await self.browser.fetch(request)

            if response.code == 200:
                # Get cookie from response header
                cookie = response.headers.get_list("Set-Cookie")[0]
                print("in captcha handler", cookie)
                m = re.match('PHPSESSID=(.*?);\s', cookie)
                if m:
                    session = m.group(1)
                else:
                    self.logger.debug("get cookie error")
                    raise HTTPError

                # Change cookie key name to JSESSIONID_INVOICE
                self.set_header("Set-Cookie", cookie)
                self.set_header("Content-Type", 'image')
                self.write(response.body)

            else:
                self.logger.debug("none 200 response code")
                raise HTTPError

        except HTTPError:
            self.logger.error("[ InvoiceCaptchaHandler - get() ] caught HTTPError")
            self.send_json_response({"msg": "invoice captcha error"}, 0)

예제 #2

0

파일 보기

파일: SocialService.py 프로젝트: ski2per/archive

async def get_detail(url):
    headers = {
        "User-Agent": fake_useragent()
    }
    request = HTTPRequest(url, method="GET", headers=headers)
    response = await AsyncHTTPClient().fetch(request)
    html = response.body.decode("utf-8")
    script = re.compile('<script>(.+?)</script>', re.DOTALL).findall(html)[0]

    matches = re.compile('start_num==(\d)\){(.+?)}', re.DOTALL).findall(script)
    details = {}
    for match in matches:
        pid = int(match[0])
        contents = re.compile('html\("(.+?)"\);').findall(match[1])

        detail = {
            "description": contents[0],
            "result": contents[1],
            "report_date": contents[2]
        }

        details[pid] = detail

    # Process NO. 6

    last_else_script = re.compile('else.?{(.+?)}', re.DOTALL).findall(script)[0]
    last_content = re.compile('html\("(.+?)"\);').findall(last_else_script)

    details[6] = {
        "description": last_content[0],
        "result": last_content[1],
        "report_date": last_content[2]
    }

    return details

예제 #3

0

파일 보기

파일: CDFund.py 프로젝트: ski2per/arch1ve

    async def prepare(self):
        login_url = "http://www.cdzfgjj.gov.cn/index.php?m=content&c=gjj&a=login"
        tmp_cookie = self.get_query_argument("PHPSESSID")
        self.COOKIE = "PHPSESSID=" + unquote(tmp_cookie)
        print("in cdfund handler", self.COOKIE)

        card = self.get_query_argument("card")
        password = self.get_query_argument("password")
        captcha = self.get_query_argument("captcha")

        data = {
            "cardNo": card,
            "password": password,
            "verifyCode": captcha,
        }

        headers = {
            "User-Agent": fake_useragent(),
            "Content-Type": "application/x-www-form-urlencoded",
            "Cookie": self.COOKIE,
        }

        request = HTTPRequest(login_url, method="POST", headers=headers, body=urlencode(data))
        response = await self.browser.fetch(request)
        if response.code == 200:
            print("login success")
        else:
            self.logger.debug("none 200 response code")
            raise HTTPError

예제 #4

0

파일 보기

    async def do_process_logic(self, *args):
        category = args[0].lower()

        url_template = "http://app1.sfda.gov.cn/datasearch/face3/search.jsp"
        base_url_template = "http://app1.sfda.gov.cn/datasearch/face3/{}"

        headers = {
            "Accept":
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Encoding": "gzip, deflate, sdch",
            "Accept-Language":
            "en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2",
            "Cache-Control": "max-age=0",
            "Connection": "keep-alive",
            "Host": "app1.sfda.gov.cn",
            "Origin": "http://app1.sfda.gov.cn",
            "User-Agent": fake_useragent(),
            "Referer":
            "http://app1.sfda.gov.cn/datasearch/face3/base.jsp?tableId=120&tableName=TABLE120&title=%CA%B3%C6%B7%C9%FA%B2%FA%D0%ED%BF%C9%BB%F1%D6%A4%C6%F3%D2%B5(SC)&bcId=145275419693611287728573704379",
            "Cookie": fetch_cookie()
        }

        query = self.get_query_argument("query")
        page_args = self.get_query_arguments("page")
        page = page_args[0] if len(page_args) else "1"

        # Assemble POST data
        if category not in self.POST_DATA:
            self.send_json_response(self.config["error"]["REQUEST_ERR"], 0)
            return
        else:
            data = self.POST_DATA[category]
            data["keyword"] = quote(query, encoding="utf-8")
            data["curstart"] = page

            request = HTTPRequest(url_template,
                                  method="POST",
                                  headers=headers,
                                  body=urlencode(data))
            response = await self.browser.fetch(request)

            html = response.body.decode("utf-8").strip()
            root = lxml.html.fromstring(html)
            links = root.xpath('//table/tr/td/p/a')
            result_list = []
            for link in links:
                href = link.attrib["href"].split(',')[1].strip("'")
                match = re.match(r'(content.+tableView=)(.+)(&Id.+)', href)
                href = "".join([
                    match.group(1),
                    quote(match.group(2), encoding="gb2312"),
                    match.group(3)
                ])
                tmp = {
                    "title": link.text_content().strip(),
                    "link": base_url_template.format(href)
                }
                result_list.append(tmp)

            self.send_json_response(result_list)

예제 #5

0

파일 보기

파일: SocialService.py 프로젝트: ski2per/arch1ve

async def get_detail(url):
    headers = {"User-Agent": fake_useragent()}
    request = HTTPRequest(url, method="GET", headers=headers)
    response = await AsyncHTTPClient().fetch(request)
    html = response.body.decode("utf-8")
    script = re.compile('<script>(.+?)</script>', re.DOTALL).findall(html)[0]

    matches = re.compile('start_num==(\d)\){(.+?)}', re.DOTALL).findall(script)
    details = {}
    for match in matches:
        pid = int(match[0])
        contents = re.compile('html\("(.+?)"\);').findall(match[1])

        detail = {
            "description": contents[0],
            "result": contents[1],
            "report_date": contents[2]
        }

        details[pid] = detail

    # Process NO. 6

    last_else_script = re.compile('else.?{(.+?)}',
                                  re.DOTALL).findall(script)[0]
    last_content = re.compile('html\("(.+?)"\);').findall(last_else_script)

    details[6] = {
        "description": last_content[0],
        "result": last_content[1],
        "report_date": last_content[2]
    }

    return details

예제 #6

0

파일 보기

    async def do_process_logic(self):
        # URL where source data(page) is located
        url = "http://www.cdepb.gov.cn/cdepbws/Web/gov/airquality.aspx"

        # Setup HTTP header for specific request
        headers = {
            "User-Agent": fake_useragent()
        }

        # Data passed with HTTP request if any
        # data = {}

        request = HTTPRequest(url, method="GET", headers=headers)
        response = await self.browser.fetch(request)
        html = response.body.decode("utf-8")
        root = lxml.html.fromstring(html)

        """
        BEGIN - Specific page analysis
        """
        city_aqi_div = root.xpath('//div[@class="CityAQI"]')[0]
        # city_aqi_div = root.xpath('/html/body/div[2]/div[5]/div[2]/div[2]')[0]
        main_index = city_aqi_div.xpath('.//span[@id="ContentBody_AqiData"]')[0].text_content()
        aqi_level = city_aqi_div.xpath('.//span[@id="ContentBody_StdName"]')[0].text_content()
        main_pollution = city_aqi_div.xpath('.//span[@id="ContentBody_FirstPoll"]')[0].text_content()
        update_time = city_aqi_div.xpath('.//span[@id="ContentBody_AQITime"]')[0].text_content()

        matches = re.findall('\d+', str(update_time))
        update_time = "{}-{}-{} {}".format(*matches)

        aqi = {
            "main_index": main_index,
            "main_pollution": main_pollution,
            "aqi_level": aqi_level,
            "time": update_time
        }

        pollution_table = city_aqi_div.xpath('//tbody')[0]
        pollution_table_tr = pollution_table.xpath('.//tr')

        pollutions = []
        for tr in pollution_table_tr:
            tds = tr.xpath('.//td')  # lxml
            poll = {
                # "pollution": tds[0].string,
                # "index": tds[1].string
                "pollution": tds[0].text_content(),
                "index": tds[1].text_content()
            }
            pollutions.append(poll)

        aqi["pollutions"] = pollutions
        """
        END - Specific page analysis
        """

        # Invoke this method to send json response
        self.send_json_response([aqi])

예제 #7

0

파일 보기

파일: SocialSecurity.py 프로젝트: ski2per/arch1ve

    def make_request(self, method, url, start=1, end=20):
        headers = {"User-Agent": fake_useragent(), "Cookie": self.session}
        data = {"startRow": start, "endRow": end}

        if method == "get":
            request = HTTPRequest(url, method=method.upper(), headers=headers)
        else:
            request = HTTPRequest(url,
                                  method=method.upper(),
                                  headers=headers,
                                  body=urlencode(data))

        return request

예제 #8

0

파일 보기

파일: FoodDrugAdmin.py 프로젝트: ski2per/archive

    async def do_process_logic(self, *args):
        category = args[0].lower()

        url_template = "http://app1.sfda.gov.cn/datasearch/face3/search.jsp"
        base_url_template = "http://app1.sfda.gov.cn/datasearch/face3/{}"

        headers = {
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Encoding": "gzip, deflate, sdch",
            "Accept-Language": "en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2",
            "Cache-Control": "max-age=0",
            "Connection": "keep-alive",
            "Host": "app1.sfda.gov.cn",
            "Origin": "http://app1.sfda.gov.cn",
            "User-Agent": fake_useragent(),
            "Referer": "http://app1.sfda.gov.cn/datasearch/face3/base.jsp?tableId=120&tableName=TABLE120&title=%CA%B3%C6%B7%C9%FA%B2%FA%D0%ED%BF%C9%BB%F1%D6%A4%C6%F3%D2%B5(SC)&bcId=145275419693611287728573704379",
            "Cookie": fetch_cookie()
        }

        query = self.get_query_argument("query")
        page_args = self.get_query_arguments("page")
        page = page_args[0] if len(page_args) else "1"

        # Assemble POST data
        if category not in self.POST_DATA:
            self.send_json_response(self.config["error"]["REQUEST_ERR"], 0)
            return
        else:
            data = self.POST_DATA[category]
            data["keyword"] = quote(query, encoding="utf-8")
            data["curstart"] = page

            request = HTTPRequest(url_template, method="POST", headers=headers, body=urlencode(data))
            response = await self.browser.fetch(request)

            html = response.body.decode("utf-8").strip()
            root = lxml.html.fromstring(html)
            links = root.xpath('//table/tr/td/p/a')
            result_list = []
            for link in links:
                href = link.attrib["href"].split(',')[1].strip("'")
                match = re.match(r'(content.+tableView=)(.+)(&Id.+)', href)
                href = "".join([match.group(1), quote(match.group(2), encoding="gb2312"), match.group(3)])
                tmp = {
                    "title": link.text_content().strip(),
                    "link": base_url_template.format(href)
                }
                result_list.append(tmp)

            self.send_json_response(result_list)

예제 #9

0

파일 보기

파일: RealEstateSrvOrg.py 프로젝트: ski2per/arch1ve

    async def do_process_logic(self):
        url = "http://www.cdfgj.gov.cn/BusinessQuery/BusSearch.aspx?action=ucEnterpriseQuery&Class=13"

        headers = {
            "Content-Type": "application/x-www-form-urlencoded",
            "User-Agent": fake_useragent()
            # "Cookie": cookie,
            # "Cookie": self.request.headers["Cookie"],
        }

        tmp_cookie = self.get_query_argument("JSESSIONID_INVOICE")
        cookie = "NETSCAPE_ID=" + unquote(tmp_cookie)
        headers["Cookie"] = cookie

        org_type = self.get_query_argument("type")
        org_name = self.get_query_argument("name")
        captcha = self.get_query_argument("captcha")

        data = {
            "__VIEWSTATEGENERATOR": "74F44EA4",
            "ID_ucEnterpriseQuery$ddlEnterpriseType": org_type,
            "ID_ucEnterpriseQuery$txtName": org_name,
            "ID_ucEnterpriseQuery$txtRandomCode": captcha,
            "__VIEWSTATE":
            "/7q+lMJNWwtjfXt4wCcX/buarTl1CXpolPC5i21QOAmuD5888znPXyJ4SlAAfk0HmfeeVMx5H571JnvNXROoem0otSLiX73pzikEjaPaj46p9qqgfqNbyGfkbB/Z/JJYYy94w3IdjeW6j2lGVed6Xv8+uVMhNJtk7szqdCJoipUY6U/NttwCtUc88yYnLqU8eJ5YGFcEfDkdIyXfdZt4DerSIYjT+MqsOtRim09uZ1qyggVYvzatFmK/xNh3IjTHbHfO//R4fs99WseIBBDFL96lbWmXxbcSQ5rteHbUWwNv+OoNZ7hnOxXPuAQZC9gvjiUGpx00LavO9KfSfxLKpwm8LaKgENO9kP0l6zorHh2PvSgr8abhuJKcQuh46DoRihtKp1gYv/cbG+fUVwiLhGhOxsJoXlKJmk2Fyz+cXMy6SuDSFPMz/UWPSJIaSQodeP5L7+LNLIVH76uvww7oDAElcOjmjjmNB4riG52ZbS/yJkTWvFx5flwLZB/OJ7k17GkcFL93T8xuAfGaCUukdj3H+zkyNxI9z3E0BT2JZtBzIJlVpQZ9pnGndAa2hyo16aozmBUAnXN0La3DkArjB5SUcN21vuGR+tcts07tK+43j49imna17dWnC5NjfTU77DmtgNwchz111tFtyKLXrBX6RLNS6o4Sr7Adza9ysq8ANjSb5KBQem61ZA+i1lbOeORmfcoeTXqjIZ93r++lPEWdPaEv84lctfxLwwkHFstcgcqucxtG1Zi3zvglzoxZ7A+XgcmgDzBbEKkeGZg5kgsG5PePtCSk5I9FxEK/AirfO9WldVMSo2nm/IOUbMshpW5+e1p+3u3ApeQzGlUd3vaYXHUyll7/tKS1tlZnNN3ysQguUAJGCyZqqWEgXRpYmC2Wwf4bR7Shc997xP+QaOT31AfHOyiA0Bd5PYA70yvXsMISJdQZOpommTCszJeZJzqtOpspf5OhUyWObs/rCuKAUVcbZOZLCoGMoGJaUcMtH9p8W/K8qgu42K06ww6qrKfXvu2O9F6Zl8HNqsSjNupcmXRjx6My+gnj3YU1A/PrLyb/DPU+hTKQldreh0IH+bjU2a/E62ihTMdUSBE4mmGwLCa9/L0AKpD3LSaswdgc+c5ZbY+A7x342NI=",
            "ID_ucEnterpriseQuery$ImageButton4.x": 57,
            "ID_ucEnterpriseQuery$ImageButton4.y": 6,
        }

        request = HTTPRequest(url,
                              method="POST",
                              headers=headers,
                              body=urlencode(data))
        response = await self.browser.fetch(request)
        html = response.body.decode("utf-8")
        root = lxml.html.fromstring(html)
        trs = root.xpath('//table[@id="ID_ucEnterpriseQuery_GridView1"]/tr')

        result_list = []
        for n in range(1, len(trs)):
            tds = trs[n].xpath('.//td')
            tmp = {
                "name": tds[0].text_content().strip(),
                "area": tds[1].text_content().strip(),
                "level": tds[2].text_content().strip(),
                "addr": tds[3].text_content().strip(),
            }
            result_list.append(tmp)

        self.send_json_response(result_list)

예제 #10

0

파일 보기

    async def do_process_logic(self):
        url = "http://www.cdtaxi.cn/zhaolingxx/index_{}.html"
        base_url = "http://www.cdtaxi.cn{}"

        headers = {
            "Host": "www.cdtaxi.cn",
            "Connection": "keep-alive",
            "Cache-Control": "max-age=0",
            "Origin": "http://www.cdtaxi.cn",
            # "Referer": "http://www.cdtaxi.cn/shiwudj/add.html",
            "User-Agent": fake_useragent(),
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Encoding": "gzip, deflate",
            "Upgrade-Insecure-Request": "1",
            "Accept-Language": "zh-CN,zh;q=0.8",
        }
        page_args = self.get_query_arguments("page")
        page = page_args[0] if len(page_args) else 1

        request = HTTPRequest(url.format(page), method="GET", headers=headers)
        response = await self.browser.fetch(request)

        html = response.body.decode("utf-8")
        root = lxml.html.fromstring(html)
        divs = root.xpath('//div[@class="special_p"]')
        result_list = []
        for div in divs:
            link = div.xpath('./a[1]')
            tmp_url = base_url.format(link[0].attrib["href"])
            request = HTTPRequest(tmp_url, method="GET", headers=headers)
            response = await self.browser.fetch(request)
            if response.code == 200:
                html = response.body.decode("utf-8")
                root = lxml.html.fromstring(html)
                found = root.xpath('//div[@class="newsshow"]/p')
                content = found[0].text_content().strip()

                # Content not in <p> but in <div>
                if not content:
                    found = root.xpath('//div[@class="newsshow"]/div')
                    content = found[1].text_content().strip()

                tmp = {
                    "content": content
                }
                result_list.append(tmp)
            else:
                continue
        self.send_json_response(result_list)

예제 #11

0

파일 보기

    async def do_process_logic(self):
        url = "http://i.rc114.com/InfoQuery_ArcInfo_Pub.aspx/queryData"

        headers = {
            "Content-Type": "application/json",
            "User-Agent": fake_useragent(),
        }

        data = {
            "name": self.get_query_argument("name"),
            "id": self.get_query_argument("id") ,
        }

        # browser = AsyncHTTPClient()
        request = HTTPRequest(url, method="POST", headers=headers, body=json.dumps(data))
        response = await self.browser.fetch(request)

        result = response.body.decode("utf-8")
        # data = json_decode(response.body)
        data = json.loads(result)
        tmp = data["d"]
        tmp = re.sub(r'\'', r'"', tmp)
        data = json.loads(tmp)

        # Receive JSON
        # {"state": 3 ,"error_message":'您的身份证位数不正确，请重新填写'}
        # {"state": 3 ,"error_message":'您的身份证无效，请重新填写'}

        if data["state"] == 3:
            self.send_json_response(data["error_message"], 0)

        # Receive JSON
        # {"error_message":'' ,"doc_state_title":'档案状态:' , "doc_id_title":'档案编号:',"doc_id": ...}
        else:
            # Found file info
            if "doc_id" in data:
                file_info = {
                    "name": data["person_name"],
                    "doc_id": data["doc_id"],
                    "school": data["graduate_school"],
                    "doc_state": data["doc_state"],
                    "doc_unit": data["doc_now_unit_name"],
                    "doc_old_unit": data["doc_old_unit"],
                    "doc_in_time": data["doc_in_time"]
                }
                self.send_json_response([file_info])
            # File info not found
            else:
                self.send_json_response(data["doc_state"], 0)

예제 #12

0

파일 보기

파일: SocialSecurity.py 프로젝트: ski2per/archive

    def make_request(self, method, url, start=1, end=20):
        headers = {
            "User-Agent": fake_useragent(),
            "Cookie": self.session
        }
        data = {
            "startRow": start,
            "endRow": end
        }

        if method == "get":
            request = HTTPRequest(url, method=method.upper(), headers=headers)
        else:
            request = HTTPRequest(url, method=method.upper(), headers=headers, body=urlencode(data))

        return request

예제 #13

0

파일 보기

파일: Invoice.py 프로젝트: ski2per/arch1ve

    async def do_process_logic(self):
        url = "http://182.151.197.163:7002/FPCY_SCDS_WW/wwfpcy"

        headers = {
            "User-Agent": fake_useragent(),
            "Content-Type": "application/x-www-form-urlencoded",
            # "Cookie": cookie,
            # "Cookie": self.request.headers["Cookie"],
        }

        tmp_cookie = self.get_query_argument("JSESSIONID_INVOICE")
        cookie = "JSESSIONID=" + unquote(tmp_cookie)
        headers["Cookie"] = cookie

        invoice_code = self.get_query_argument("invoice_code")
        invoice_num = self.get_query_argument("invoice_num")
        invoice_psd = self.get_query_argument("invoice_psd")
        captcha = self.get_query_argument("captcha")

        data = {
            "fpdm0": invoice_code,
            "fphm0": invoice_num,
            "yzm0": invoice_psd,
            "imgcode": captcha,
        }

        request = HTTPRequest(url,
                              method="POST",
                              headers=headers,
                              body=urlencode(data))
        response = await self.browser.fetch(request)
        html = response.body.decode("utf-8")
        root = lxml.html.fromstring(html)

        span1 = root.xpath('//span[@id="cxjj"]')
        span2 = root.xpath('//span[@id="message"]')

        if len(span1):
            result = str(span1[0].text_content())
            result = re.sub(r'\r|\n|\t', r'', result)
            self.send_json_response([{"msg": result}])
        elif len(span2):
            result = str(span2[0].text_content())
            result = re.sub(r'\r|\n|\t', r'', result)
            self.send_json_response([{"msg": result}])
        else:
            self.send_json_response(self.config["error"]["SIMPLE_ERR"], 0)

예제 #14

0

파일 보기

    async def do_process_logic(self):
        url = "http://rkk.cdpf.org.cn/queryDistrictrecordCDPF.action"

        headers = {
            "User-Agent": fake_useragent(),
            "Content-Type": "application/x-www-form-urlencoded",
            # "Cookie": cookie,
            # "Cookie": self.request.headers["Cookie"],
        }

        tmp_cookie = self.get_query_argument("JSESSIONID_INVOICE")
        cookie = "JSESSIONID=" + unquote(tmp_cookie)
        headers["Cookie"] = cookie

        name = self.get_query_argument("name")
        no = self.get_query_argument("no")
        captcha = self.get_query_argument("captcha")

        data = {
            "getAjax": "true",
            "type": "211948D59141A611",
            "name": name,
            "cid": no,
            "cardType": 1,
            "checkCode": captcha
        }

        request = HTTPRequest(url,
                              method="POST",
                              headers=headers,
                              body=urlencode(data))
        response = await self.browser.fetch(request)
        html = response.body.decode("gbk")
        root = lxml.html.fromstring(html)

        p1 = root.xpath('//p[@id="checkad1"]')
        p2 = root.xpath('//p[@id="checkad2"]')

        if len(p1):
            result = str(p1[0].text_content())
            self.send_json_response([{"msg": result}], 0)
        elif len(p2):
            result = str(p2[0].text_content())
            self.send_json_response([{"msg": result}])
        else:
            self.send_json_response(self.config["error"]["SIMPLE_ERR"], 0)

예제 #15

0

파일 보기

파일: RealEstateSrvOrg.py 프로젝트: ski2per/archive

    async def do_process_logic(self):
        url = "http://www.cdfgj.gov.cn/BusinessQuery/BusSearch.aspx?action=ucEnterpriseQuery&Class=13"

        headers = {
            "Content-Type": "application/x-www-form-urlencoded",
            "User-Agent": fake_useragent()
            # "Cookie": cookie,
            # "Cookie": self.request.headers["Cookie"],
        }

        tmp_cookie = self.get_query_argument("JSESSIONID_INVOICE")
        cookie = "NETSCAPE_ID=" + unquote(tmp_cookie)
        headers["Cookie"] = cookie

        org_type = self.get_query_argument("type")
        org_name = self.get_query_argument("name")
        captcha = self.get_query_argument("captcha")

        data = {
            "__VIEWSTATEGENERATOR": "74F44EA4",
            "ID_ucEnterpriseQuery$ddlEnterpriseType": org_type,
            "ID_ucEnterpriseQuery$txtName": org_name,
            "ID_ucEnterpriseQuery$txtRandomCode": captcha,
            "__VIEWSTATE": "/7q+lMJNWwtjfXt4wCcX/buarTl1CXpolPC5i21QOAmuD5888znPXyJ4SlAAfk0HmfeeVMx5H571JnvNXROoem0otSLiX73pzikEjaPaj46p9qqgfqNbyGfkbB/Z/JJYYy94w3IdjeW6j2lGVed6Xv8+uVMhNJtk7szqdCJoipUY6U/NttwCtUc88yYnLqU8eJ5YGFcEfDkdIyXfdZt4DerSIYjT+MqsOtRim09uZ1qyggVYvzatFmK/xNh3IjTHbHfO//R4fs99WseIBBDFL96lbWmXxbcSQ5rteHbUWwNv+OoNZ7hnOxXPuAQZC9gvjiUGpx00LavO9KfSfxLKpwm8LaKgENO9kP0l6zorHh2PvSgr8abhuJKcQuh46DoRihtKp1gYv/cbG+fUVwiLhGhOxsJoXlKJmk2Fyz+cXMy6SuDSFPMz/UWPSJIaSQodeP5L7+LNLIVH76uvww7oDAElcOjmjjmNB4riG52ZbS/yJkTWvFx5flwLZB/OJ7k17GkcFL93T8xuAfGaCUukdj3H+zkyNxI9z3E0BT2JZtBzIJlVpQZ9pnGndAa2hyo16aozmBUAnXN0La3DkArjB5SUcN21vuGR+tcts07tK+43j49imna17dWnC5NjfTU77DmtgNwchz111tFtyKLXrBX6RLNS6o4Sr7Adza9ysq8ANjSb5KBQem61ZA+i1lbOeORmfcoeTXqjIZ93r++lPEWdPaEv84lctfxLwwkHFstcgcqucxtG1Zi3zvglzoxZ7A+XgcmgDzBbEKkeGZg5kgsG5PePtCSk5I9FxEK/AirfO9WldVMSo2nm/IOUbMshpW5+e1p+3u3ApeQzGlUd3vaYXHUyll7/tKS1tlZnNN3ysQguUAJGCyZqqWEgXRpYmC2Wwf4bR7Shc997xP+QaOT31AfHOyiA0Bd5PYA70yvXsMISJdQZOpommTCszJeZJzqtOpspf5OhUyWObs/rCuKAUVcbZOZLCoGMoGJaUcMtH9p8W/K8qgu42K06ww6qrKfXvu2O9F6Zl8HNqsSjNupcmXRjx6My+gnj3YU1A/PrLyb/DPU+hTKQldreh0IH+bjU2a/E62ihTMdUSBE4mmGwLCa9/L0AKpD3LSaswdgc+c5ZbY+A7x342NI=",
            "ID_ucEnterpriseQuery$ImageButton4.x": 57,
            "ID_ucEnterpriseQuery$ImageButton4.y": 6,
        }

        request = HTTPRequest(url, method="POST", headers=headers, body=urlencode(data))
        response = await self.browser.fetch(request)
        html = response.body.decode("utf-8")
        root = lxml.html.fromstring(html)
        trs = root.xpath('//table[@id="ID_ucEnterpriseQuery_GridView1"]/tr')

        result_list = []
        for n in range(1, len(trs)):
            tds = trs[n].xpath('.//td')
            tmp = {
                "name": tds[0].text_content().strip(),
                "area": tds[1].text_content().strip(),
                "level": tds[2].text_content().strip(),
                "addr": tds[3].text_content().strip(),
            }
            result_list.append(tmp)

        self.send_json_response(result_list)

예제 #16

0

파일 보기

파일: SocialService.py 프로젝트: ski2per/archive

    async def do_process_logic(self):
        url = "http://222.85.152.12:8803/JiaManage/guest/ShowJiaGuiYangHome.xhtml"
        purl = urlparse(url)
        base_url = purl.scheme + "://" + purl.netloc

        headers = {
            "User-Agent": fake_useragent()
        }

        request = HTTPRequest(url, method="GET", headers=headers)
        response = await self.browser.fetch(request)
        html = response.body.decode("utf-8")
        root = lxml.html.fromstring(html)
        trs = root.xpath('//div[@class="part_2_table float_right"]/table/tr')
        posts = {}
        detail_url = ""
        for tr in trs:
            a = tr.xpath('.//td[1]/a[1]')[0]
            title = a.text_content()
            finish_date = tr.xpath('.//td[2]')[0].text_content()

            detail_url = base_url + a.attrib["href"]
            post_id = int(detail_url.split("=")[1])

            post = {
                "title": title,
                "finish_date": finish_date
            }

            posts[post_id] = post

        real_url = detail_url.split('?')[0]
        details = await get_detail(real_url)

        for i in range(1, len(list(posts.keys()))+1):
            posts[i].update(details[i])

        results = []
        for k, v in posts.items():
            v["post_id"] = k
            results.append(v)

        # Invoke this method to send json response
        self.send_json_response(results)

예제 #17

0

파일 보기

    async def do_process_logic(self):
        url_template = "http://www.cdfgj.gov.cn/SCXX/ShowNew.aspx?iname={name}&region={district}&page={page}&st={from_time}&et={to_time}"

        headers = {"User-Agent": fake_useragent()}

        name_args = self.get_query_arguments("name")
        district_args = self.get_query_arguments("district")
        from_args = self.get_query_arguments("from")
        to_args = self.get_query_arguments("to")
        page_args = self.get_query_arguments("page")

        params = dict()
        params["name"] = name_args[0] if len(name_args) else ""
        params["district"] = district_args[0] if len(district_args) else ""
        params["from_time"] = from_args[0] if len(from_args) else ""
        params["to_time"] = to_args[0] if len(to_args) else ""
        params["page"] = page_args[0] if len(page_args) else 1

        request = HTTPRequest(url_template.format(**params),
                              method="GET",
                              headers=headers)
        response = await self.browser.fetch(request)
        html = response.body.decode("utf-8")

        root = lxml.html.fromstring(html)
        trs = root.xpath('//table[@id="ID_ucShowNew_gridView"]/tr')
        tr_num = len(trs)

        result_list = []
        for n in range(1, tr_num):
            tds = trs[n].xpath('.//td')
            tmp = {
                "no": tds[0].text_content().strip(),
                "name": tds[1].text_content().strip(),
                "district": tds[2].text_content().strip(),
                "addr": tds[3].text_content().strip(),
                "use": tds[4].text_content().strip(),
                "developer": tds[5].text_content().strip(),
                "area": tds[6].text_content().strip(),
                "time": tds[7].text_content().strip(),
            }
            result_list.append(tmp)

        self.send_json_response(result_list)

예제 #18

0

파일 보기

파일: Invoice.py 프로젝트: ski2per/archive

    async def do_process_logic(self):
        url = "http://182.151.197.163:7002/FPCY_SCDS_WW/wwfpcy"

        headers = {
            "User-Agent": fake_useragent(),
            "Content-Type": "application/x-www-form-urlencoded",
            # "Cookie": cookie,
            # "Cookie": self.request.headers["Cookie"],
        }

        tmp_cookie = self.get_query_argument("JSESSIONID_INVOICE")
        cookie = "JSESSIONID=" + unquote(tmp_cookie)
        headers["Cookie"] = cookie

        invoice_code = self.get_query_argument("invoice_code")
        invoice_num = self.get_query_argument("invoice_num")
        invoice_psd = self.get_query_argument("invoice_psd")
        captcha = self.get_query_argument("captcha")

        data = {
            "fpdm0": invoice_code,
            "fphm0": invoice_num,
            "yzm0": invoice_psd,
            "imgcode": captcha,
        }

        request = HTTPRequest(url, method="POST", headers=headers, body=urlencode(data))
        response = await self.browser.fetch(request)
        html = response.body.decode("utf-8")
        root = lxml.html.fromstring(html)

        span1 = root.xpath('//span[@id="cxjj"]')
        span2 = root.xpath('//span[@id="message"]')

        if len(span1):
            result = str(span1[0].text_content())
            result = re.sub(r'\r|\n|\t', r'', result)
            self.send_json_response([{"msg": result}])
        elif len(span2):
            result = str(span2[0].text_content())
            result = re.sub(r'\r|\n|\t', r'', result)
            self.send_json_response([{"msg": result}])
        else:
            self.send_json_response(self.config["error"]["SIMPLE_ERR"], 0)

예제 #19

0

파일 보기

파일: PreForSale.py 프로젝트: ski2per/archive

    async def do_process_logic(self):
        url_template = "http://www.cdfgj.gov.cn/SCXX/ShowNew.aspx?iname={name}&region={district}&page={page}&st={from_time}&et={to_time}"

        headers = {
            "User-Agent": fake_useragent()
        }

        name_args = self.get_query_arguments("name")
        district_args = self.get_query_arguments("district")
        from_args = self.get_query_arguments("from")
        to_args = self.get_query_arguments("to")
        page_args = self.get_query_arguments("page")

        params = dict()
        params["name"] = name_args[0] if len(name_args) else ""
        params["district"] = district_args[0] if len(district_args) else ""
        params["from_time"] = from_args[0] if len(from_args) else ""
        params["to_time"] = to_args[0] if len(to_args) else ""
        params["page"] = page_args[0] if len(page_args) else 1

        request = HTTPRequest(url_template.format(**params), method="GET", headers=headers)
        response = await self.browser.fetch(request)
        html = response.body.decode("utf-8")

        root = lxml.html.fromstring(html)
        trs = root.xpath('//table[@id="ID_ucShowNew_gridView"]/tr')
        tr_num = len(trs)

        result_list = []
        for n in range(1, tr_num):
            tds = trs[n].xpath('.//td')
            tmp = {
                "no": tds[0].text_content().strip(),
                "name": tds[1].text_content().strip(),
                "district": tds[2].text_content().strip(),
                "addr": tds[3].text_content().strip(),
                "use": tds[4].text_content().strip(),
                "developer": tds[5].text_content().strip(),
                "area": tds[6].text_content().strip(),
                "time": tds[7].text_content().strip(),
            }
            result_list.append(tmp)

        self.send_json_response(result_list)

예제 #20

0

파일 보기

    async def do_process_logic(self):
        url = "http://www.cdtaxi.cn/shiwudj/index_{}.html"

        headers = {
            "Host": "www.cdtaxi.cn",
            "Connection": "keep-alive",
            "Cache-Control": "max-age=0",
            "Origin": "http://www.cdtaxi.cn",
            # "Referer": "http://www.cdtaxi.cn/shiwudj/add.html",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Encoding": "gzip, deflate",
            "Upgrade-Insecure-Request": "1",
            "Accept-Language": "zh-CN,zh;q=0.8",
            "User-Agent": fake_useragent()
        }

        page_args = self.get_query_arguments("page")
        page = page_args[0] if len(page_args) else 1

        request = HTTPRequest(url.format(page), method="GET", headers=headers)
        response = await self.browser.fetch(request)
        html = response.body.decode("utf-8")
        root = lxml.html.fromstring(html)
        items = root.xpath('//div[@class="xlost_item"]')
        total = len(items)
        result_list = []
        for n in range(1, total):
            tmp = {}

            tds = items[n].xpath('.//table/tbody/tr[1]/td')
            tmp["date"] = tds[0].text_content().strip()
            tmp["lost_item"] = tds[1].xpath('.//a')[0].attrib["title"]
            tmp["pickup"] = tds[2].xpath('.//a')[0].attrib["title"]
            tmp["getoff"] = tds[3].xpath('.//a')[0].attrib["title"]
            tmp["contact"] = tds[4].text_content().strip()
            tmp["tel"] = tds[5].text_content().strip()
            tmp["status"] = tds[6].text_content().strip()

            tr2 = items[n].xpath('.//table/tbody/tr[2]')[0]
            tmp["resp"] = tr2.text_content().strip().split('：')[1]
            result_list.append(tmp)

        self.send_json_response(result_list)

예제 #21

0

파일 보기

파일: DisabilityCert.py 프로젝트: ski2per/archive

    async def do_process_logic(self):
        url = "http://rkk.cdpf.org.cn/queryDistrictrecordCDPF.action"

        headers = {
            "User-Agent": fake_useragent(),
            "Content-Type": "application/x-www-form-urlencoded",
            # "Cookie": cookie,
            # "Cookie": self.request.headers["Cookie"],
        }

        tmp_cookie = self.get_query_argument("JSESSIONID_INVOICE")
        cookie = "JSESSIONID=" + unquote(tmp_cookie)
        headers["Cookie"] = cookie

        name = self.get_query_argument("name")
        no = self.get_query_argument("no")
        captcha = self.get_query_argument("captcha")

        data = {
            "getAjax": "true",
            "type": "211948D59141A611",
            "name": name,
            "cid": no,
            "cardType": 1,
            "checkCode": captcha
        }

        request = HTTPRequest(url, method="POST", headers=headers, body=urlencode(data))
        response = await self.browser.fetch(request)
        html = response.body.decode("gbk")
        root = lxml.html.fromstring(html)

        p1 = root.xpath('//p[@id="checkad1"]')
        p2 = root.xpath('//p[@id="checkad2"]')

        if len(p1):
            result = str(p1[0].text_content())
            self.send_json_response([{"msg": result}], 0)
        elif len(p2):
            result = str(p2[0].text_content())
            self.send_json_response([{"msg": result}])
        else:
            self.send_json_response(self.config["error"]["SIMPLE_ERR"], 0)

예제 #22

0

파일 보기

    async def do_process_logic(self):
        captcha_url = "http://rkk.cdpf.org.cn/rand.jsp"
        headers = {"User-Agent": fake_useragent()}

        request = HTTPRequest(captcha_url, method="GET", headers=headers)
        response = await self.browser.fetch(request)

        # Get cookie from response header
        cookie = response.headers.get_list("Set-Cookie")[0]
        m = re.match('JSESSIONID=(.*?);\s', cookie)
        if m:
            session = m.group(1)
        else:
            raise HTTPError(500, self.config["error"]["GET_COOKIE_ERR"])

        # Change cookie key name to JSESSIONID_INVOICE
        self.set_cookie("JSESSIONID_INVOICE", session)
        # self.set_header("Set-Cookie", cookie)
        self.set_header("Content-Type", 'image')
        self.write(response.body)

예제 #23

0

파일 보기

파일: Barcode.py 프로젝트: ski2per/arch1ve

    async def do_process_logic(self):
        url_template = "http://search.anccnet.com/searchResult2.aspx?keyword={0}"

        headers = {
            "Accept":
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Encoding": "gzip, deflate, sdch",
            "Accept-Language":
            "en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2",
            "Cache-Control": "max-age=0",
            "Connection": "keep-alive",
            "Host": "search.anccnet.com",
            "Referer":
            "http://www.ancc.org.cn/Service/queryTools/Internal.aspx",
            "Upgrade-Insecure-Requests": "1",
            "User-Agent": fake_useragent()
        }

        barcode = quote(self.get_query_argument("code"), encoding="gb2312")
        request = HTTPRequest(url_template.format(barcode),
                              method="GET",
                              headers=headers)
        response = await self.browser.fetch(request)
        html = response.body.decode("gb2312")
        root = lxml.html.fromstring(html)
        info_dds = root.xpath('//dl[@class="p-info"]/dd')
        supplier_dds = root.xpath('//dl[@class="p-supplier"]/dd')

        if len(info_dds) and len(supplier_dds):
            # result_list = []
            info = {
                "barcode": info_dds[0].text_content().strip(),
                "name": info_dds[1].text_content().strip(),
                "specs": info_dds[2].text_content().strip(),
                "desc": info_dds[3].text_content().strip(),
                "brand": supplier_dds[0].text_content().strip(),
                "manufacturer": supplier_dds[1].text_content().strip(),
            }
            self.send_json_response([info])
        else:
            self.send_json_response(self.config["error"]["NO_RESULT_ERR"], 0)

예제 #24

0

파일 보기

파일: RealEstateSrvOrg.py 프로젝트: ski2per/arch1ve

    async def do_process_logic(self):
        captcha_url = "http://www.cdfgj.gov.cn/BusinessQuery/UserControls/RandomCode.axd"
        headers = {"User-Agent": fake_useragent()}

        request = HTTPRequest(captcha_url, method="GET", headers=headers)
        response = await self.browser.fetch(request)

        # Get cookie from response header
        cookie = response.headers.get_list("Set-Cookie")[0]
        m = re.match('NETSCAPE_ID=(.*?);', cookie)
        if m:
            session = m.group(1)
        else:
            raise HTTPError(500, self.config["error"]["GET_COOKIE_ERR"])

        # Change cookie key name to JSESSIONID_INVOICE
        # The name of "JSESSIONID_INVOICE" must be fixed
        # The name of "JSESSIONID_INVOICE" must be fixed
        # The name of "JSESSIONID_INVOICE" must be fixed
        self.set_cookie("JSESSIONID_INVOICE", session)
        self.set_header("Content-Type", 'image')
        self.write(response.body)

예제 #25

0

파일 보기

파일: DisabilityCert.py 프로젝트: ski2per/archive

    async def do_process_logic(self):
        captcha_url = "http://rkk.cdpf.org.cn/rand.jsp"
        headers = {
            "User-Agent": fake_useragent()
        }

        request = HTTPRequest(captcha_url, method="GET", headers=headers)
        response = await self.browser.fetch(request)

        # Get cookie from response header
        cookie = response.headers.get_list("Set-Cookie")[0]
        m = re.match('JSESSIONID=(.*?);\s', cookie)
        if m:
            session = m.group(1)
        else:
            raise HTTPError(500, self.config["error"]["GET_COOKIE_ERR"])

        # Change cookie key name to JSESSIONID_INVOICE
        self.set_cookie("JSESSIONID_INVOICE", session)
        # self.set_header("Set-Cookie", cookie)
        self.set_header("Content-Type", 'image')
        self.write(response.body)

예제 #26

0

파일 보기

파일: SocialService.py 프로젝트: ski2per/arch1ve

    async def do_process_logic(self):
        url = "http://222.85.152.12:8803/JiaManage/guest/ShowJiaGuiYangHome.xhtml"
        purl = urlparse(url)
        base_url = purl.scheme + "://" + purl.netloc

        headers = {"User-Agent": fake_useragent()}

        request = HTTPRequest(url, method="GET", headers=headers)
        response = await self.browser.fetch(request)
        html = response.body.decode("utf-8")
        root = lxml.html.fromstring(html)
        trs = root.xpath('//div[@class="part_2_table float_right"]/table/tr')
        posts = {}
        detail_url = ""
        for tr in trs:
            a = tr.xpath('.//td[1]/a[1]')[0]
            title = a.text_content()
            finish_date = tr.xpath('.//td[2]')[0].text_content()

            detail_url = base_url + a.attrib["href"]
            post_id = int(detail_url.split("=")[1])

            post = {"title": title, "finish_date": finish_date}

            posts[post_id] = post

        real_url = detail_url.split('?')[0]
        details = await get_detail(real_url)

        for i in range(1, len(list(posts.keys())) + 1):
            posts[i].update(details[i])

        results = []
        for k, v in posts.items():
            v["post_id"] = k
            results.append(v)

        # Invoke this method to send json response
        self.send_json_response(results)

예제 #27

0

파일 보기

    async def do_process_logic(self):
        url = "http://www.cdtaxi.cn/shiwudj/add.html"
        captcha_url = "http://www.cdtaxi.cn/admin.php?m=flogin&a=verify"

        headers = {
            "User-Agent": fake_useragent()
        }

        # Request captcha and cookie
        request = HTTPRequest(captcha_url, method="GET", headers=headers)
        response = await self.browser.fetch(request)

        captcha_data = response.body
        # Get cookie from response header
        cookie = response.headers.get_list("Set-Cookie")[0]
        m = re.match('PHPSESSID=(.*?);\s', cookie)
        if m:
            session = m.group(1)
            cookie = "PHPSESSID={}".format(session)
        else:
            raise HTTPError(500, self.config["error"]["GET_COOKIE_ERR"])

        # Get form hidden value
        headers["Cookie"] = cookie
        request0 = HTTPRequest(url, method="GET", headers=headers)
        response0 = await self.browser.fetch(request0)
        html = response0.body.decode("utf-8")
        root = lxml.html.fromstring(html)
        hidden = root.xpath('//input[@name="__hash__"]')
        form_hash = str(hidden[0].attrib["value"])

        # Final response
        # Change cookie key name to JSESSIONID_INVOICE
        self.set_cookie("JSESSIONID_INVOICE", session)
        self.set_cookie("HIDDEN_FORM_HASH", form_hash)
        # self.set_header("Set-Cookie", cookie)
        self.set_header("Content-Type", 'image')
        self.write(captcha_data)

예제 #28

0

파일 보기

파일: CDFund.py 프로젝트: ski2per/arch1ve

    async def get(self):
        account_url = "http://www.cdzfgjj.gov.cn/index.php?m=content&c=gjj&a=account"
        headers = {
            "User-Agent": fake_useragent()
        }
        try:
            print(self.COOKIE)
            headers["Cookie"] = self.COOKIE

            request = HTTPRequest(account_url, method="GET", headers=headers)
            response = await self.browser.fetch(request)
            if response.code == 200:
                html = response.body.decode("utf-8")
                root = lxml.html.fromstring(html)
                tables = root.xpath('//div[@class="w-main"]/table')
                result_list = []
                for table in tables:
                    tds = table.xpath('.//tr/td[@class="c"]')
                    tmp = {
                        "client_no": tds[0].text_content().strip(),
                        "client_name": tds[1].text_content().strip(),
                        "deposit_to_date": tds[2].text_content().strip(),
                        "deposit_base": tds[3].text_content().strip(),
                        "deposit_unit": tds[4].text_content().strip(),
                        "deposit_personal": tds[5].text_content().strip(),
                        "balance": tds[6].text_content().strip(),
                        "account_status": tds[6].text_content().strip(),
                    }
                    result_list.append(tmp)
                self.send_json_response({"results": result_list})

            else:
                self.logger.debug("none 200 response code")
                raise HTTPError

        except HTTPError:
            self.logger.error("[ InvoiceCheckHandler - get() ] caught HTTPError")
            self.send_json_response({"msg": "invoice check error"}, 0)

예제 #29

0

파일 보기

파일: Barcode.py 프로젝트: ski2per/archive

    async def do_process_logic(self):
        url_template = "http://search.anccnet.com/searchResult2.aspx?keyword={0}"

        headers = {
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Encoding": "gzip, deflate, sdch",
            "Accept-Language": "en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2",
            "Cache-Control": "max-age=0",
            "Connection": "keep-alive",
            "Host": "search.anccnet.com",
            "Referer": "http://www.ancc.org.cn/Service/queryTools/Internal.aspx",
            "Upgrade-Insecure-Requests": "1",
            "User-Agent": fake_useragent()
        }

        barcode = quote(self.get_query_argument("code"),encoding="gb2312")
        request = HTTPRequest(url_template.format(barcode), method="GET", headers=headers)
        response = await self.browser.fetch(request)
        html = response.body.decode("gb2312")
        root = lxml.html.fromstring(html)
        info_dds = root.xpath('//dl[@class="p-info"]/dd')
        supplier_dds = root.xpath('//dl[@class="p-supplier"]/dd')

        if len(info_dds) and len(supplier_dds):
            # result_list = []
            info = {
                "barcode": info_dds[0].text_content().strip(),
                "name": info_dds[1].text_content().strip(),
                "specs": info_dds[2].text_content().strip(),
                "desc": info_dds[3].text_content().strip(),
                "brand": supplier_dds[0].text_content().strip(),
                "manufacturer": supplier_dds[1].text_content().strip(),
            }
            self.send_json_response([info])
        else:
            self.send_json_response(self.config["error"]["NO_RESULT_ERR"], 0)

예제 #30

0

파일 보기

파일: RealEstateSrvOrg.py 프로젝트: ski2per/archive

    async def do_process_logic(self):
        captcha_url = "http://www.cdfgj.gov.cn/BusinessQuery/UserControls/RandomCode.axd"
        headers = {
            "User-Agent": fake_useragent()
        }

        request = HTTPRequest(captcha_url, method="GET", headers=headers)
        response = await self.browser.fetch(request)

        # Get cookie from response header
        cookie = response.headers.get_list("Set-Cookie")[0]
        m = re.match('NETSCAPE_ID=(.*?);', cookie)
        if m:
            session = m.group(1)
        else:
            raise HTTPError(500, self.config["error"]["GET_COOKIE_ERR"])

        # Change cookie key name to JSESSIONID_INVOICE
        # The name of "JSESSIONID_INVOICE" must be fixed
        # The name of "JSESSIONID_INVOICE" must be fixed
        # The name of "JSESSIONID_INVOICE" must be fixed
        self.set_cookie("JSESSIONID_INVOICE", session)
        self.set_header("Content-Type", 'image')
        self.write(response.body)

예제 #31

0

파일 보기

파일: SocialSecurity.py 프로젝트: ski2per/arch1ve

    async def do_process_logic(self, *args):
        action_str = args[0]

        # ============================
        # Login and return basic info
        # ============================
        if action_str.startswith("login"):
            uid = self.get_query_argument("id")
            password = self.get_query_argument("password")

            login_url = "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/directQuery.action"
            headers = {
                "Content-Type": "application/x-www-form-urlencoded",
                "User-Agent": fake_useragent(),
            }
            data = {"userVo.idNo": uid, "userVo.password": password}
            request = HTTPRequest(login_url,
                                  method="POST",
                                  headers=headers,
                                  body=urlencode(data))
            response = await self.browser.fetch(request)

            html = lxml.html.fromstring(response.body.decode("utf-8"))
            error = html.xpath('//input[@id="errorMessage"]')

            # Login error
            if len(error):
                error_msg = error[0].attrib["value"]
                self.send_json_response(error_msg, 0)
            else:
                # Get cookie from response header
                cookie = response.headers.get_list("Set-Cookie")[0]
                m = re.match('JSESSIONID=(.*?);\s', cookie)
                if m:
                    # Get cookie "JSESSIONID"
                    session = m.group(1)

                    # Request user info
                    info_url = "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/persionBasic.action"
                    headers = {
                        "User-Agent": fake_useragent(),
                        "Cookie": cookie
                    }
                    request = HTTPRequest(info_url,
                                          method="GET",
                                          headers=headers)
                    response = await self.browser.fetch(request)

                    if response.code == 200:
                        html = lxml.html.fromstring(
                            response.body.decode("utf-8"))

                        trs = html.xpath(
                            '//div[@class="person_base_info"]/table/tr')
                        info = {
                            "name":
                            trs[0].xpath(".//td")[1].text_content().strip(),
                            "personal_no":
                            trs[1].xpath(".//td")[1].text_content().strip(),
                            "id":
                            trs[2].xpath(".//td")[1].text_content().strip(),
                            "gender":
                            trs[3].xpath(".//td")[1].text_content().strip(),
                            "nationality":
                            trs[4].xpath(".//td")[1].text_content().strip(),
                            "birth_date":
                            trs[5].xpath(".//td")[1].text_content().strip(),
                            "resident_type":
                            trs[6].xpath(".//td")[1].text_content().strip(),
                            "work_type":
                            trs[7].xpath(".//td")[1].text_content().strip(),
                            "first_insured_date":
                            trs[8].xpath(".//td")[1].text_content().strip(),
                            "insured_status":
                            trs[9].xpath(".//td")[1].text_content().strip(),
                        }

                        # Change cookie key name to JSESSIONID_INVOICE
                        self.set_cookie("JSESSIONID_INVOICE", session)
                        # self.set_header("Set-Cookie", cookie)
                        self.send_json_response([info])
                    else:
                        self.send_json_response([])

                else:
                    raise HTTPError(500,
                                    self.config["error"]["GET_COOKIE_ERR"])

        else:
            action = action_str.split(
                '/')  # pension | medical | injury | unemployment | maternity
            opt = action[1] if len(
                action) > 1 else ""  # info | account | detail | balance

            query_params = dict(parse_qsl(self.request.query, True))
            if "JSESSIONID_INVOICE" not in query_params:
                raise MissingArgumentError("JSESSIONID_INVOICE")
            else:
                tmp_cookie = self.get_query_argument("JSESSIONID_INVOICE")

            session = "JSESSIONID=" + unquote_plus(tmp_cookie)

            p = query_params["page"] if "page" in query_params else ""
            ps = query_params[
                "page_size"] if "page_size" in query_params else ""
            page, page_size = self.init_paging_param(p, ps)
            # Start row and end row
            start = page + 1
            end = page + page_size

            # ==============================
            # Endowment insurance (pension)
            # ==============================
            if action_str.startswith("pension"):
                url = {
                    "info":
                    "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/pensionSecurity.action",
                    "account":
                    "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/pAList.action",
                    "detail":
                    "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/pCList.action"
                }
                method = "get" if opt == "info" else "post"

                pension = Pension(session)
                response = await self.browser.fetch(
                    pension.make_request(method, url[opt], start, end))
                raw = response.body.decode("utf-8")
                result = pension.parse_html(opt, raw)
                self.send_json_response(result)
            # ==================
            # Medical insurance
            # ==================
            elif action_str.startswith("medical"):
                url = {
                    "info":
                    "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/cBList.action",
                    "account":
                    "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/cAList.action",
                    "detail":
                    "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/cCList.action",
                    "balance":
                    "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/cFList.action"
                }
                method = "get" if opt == "info" else "post"

                medical = Medical(session)
                response = await self.browser.fetch(
                    medical.make_request(method, url[opt], start, end))
                raw = response.body.decode("utf-8")
                result = medical.parse_html(opt, raw)
                self.send_json_response(result)
            # ============================
            # Employment injury insurance
            # ============================
            elif action_str.startswith("injury"):
                url = {
                    "info":
                    "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/iBInfo.action",
                    "detail":
                    "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/iCList.action"
                }
                method = "get" if opt == "info" else "post"

                injury = Injury(session)
                response = await self.browser.fetch(
                    injury.make_request(method, url[opt], start, end))
                raw = response.body.decode("utf-8")
                result = injury.parse_html(opt, raw)
                self.send_json_response(result)
            # =======================
            # Unemployment insurance
            # =======================
            elif action_str.startswith("unemployment"):
                url = {
                    "info":
                    "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/uBInfo.action",
                    "detail":
                    "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/uCList.action"
                }
                method = "get" if opt == "info" else "post"

                unemployment = Unemployment(session)
                response = await self.browser.fetch(
                    unemployment.make_request(method, url[opt], start, end))
                raw = response.body.decode("utf-8")
                result = unemployment.parse_html(opt, raw)
                self.send_json_response(result)
            # ====================
            # Maternity insurance
            # ====================
            else:
                url = {
                    "info":
                    "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/queryBirthInsuredInfo.action",
                    "detail":
                    "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/queryBirthJfDetail.action?startRow=1&endRow=1200"
                }
                method = "get"

                maternity = Maternity(session)
                response = await self.browser.fetch(
                    maternity.make_request(method, url[opt], start, end))
                raw = response.body.decode("utf-8")
                result = maternity.parse_html(opt, raw)
                self.send_json_response(result)

예제 #32

0

파일 보기

파일: SocialService.py 프로젝트: ski2per/archive

    async def do_process_logic(self):
        url = "http://222.85.152.12:8803/JiaManage/guest/ShowJiaGuiYangHome.xhtml"

        headers = {
            "User-Agent": fake_useragent()
        }

        request = HTTPRequest(url, method="GET", headers=headers)
        response = await self.browser.fetch(request)
        html = response.body.decode("utf-8")
        root = lxml.html.fromstring(html)

        statistics = []

        # id="box1"
        div = root.xpath('//div[@id="box1"]')[0]
        script = div.xpath('.//script')[0].text_content()
        list_data = re.compile('value:(\d+).+?name:\'(.+?)\'').findall(script)
        box1 = {
            "id":1,
            "name": "共受理各类案件处理情况",
            "data": list_data
        }
        statistics.append(box1)

        # id="box2"
        div = root.xpath('//div[@id="box2"]')[0]
        script = div.xpath('.//script')[0].text_content()
        list_data = re.compile('data:.?\[(.+?)\]', re.DOTALL).findall(script)

        area = re.sub('[\t|\n|\r|\s|"]', '', list_data[0])
        area_list = area.split(',')

        data = re.sub('\s', '', list_data[1])
        data_list = [int(i) for i in data.split(',')]
        box2 = {
            "id": 2,
            "name": "各区（市、县）案件处理情况",
            "data": list(zip(area_list, data_list))
        }
        statistics.append(box2)

        # id="box3"
        div = root.xpath('//div[@id="box3"]')[0]
        script = div.xpath('.//script')[0].text_content()
        list_data = re.compile('data:.?\[(.+?)\]', re.DOTALL).findall(script)

        area = re.sub('[\t|\n|\r|\s|"]', '', list_data[0])
        area_list = area.split(',')

        data = re.sub('\s', '', list_data[1])
        data_list = [int(i) for i in data.split(',')]
        box3 = {
            "id": 3,
            "name": "市级联动部门案卷处理情况",
            "data": list(zip(area_list, data_list))
        }
        statistics.append(box3)

        # id="box4"
        div = root.xpath('//div[@id="box4"]')[0]
        script = div.xpath('.//script')[0].text_content()
        list_data = re.compile('data:.?\[(.+?)\]', re.DOTALL).findall(script)

        area = re.sub('[\t|\n|\r|\s|"]', '', list_data[0])
        area_list = area.split(',')

        data = re.sub('\s', '', list_data[1])
        data_list = [int(i) for i in data.split(',')]
        box4 = {
            "id": 4,
            "name": "平台案件类别",
            "data": list(zip(area_list, data_list))
        }
        statistics.append(box4)

        self.send_json_response(statistics)

예제 #33

0

파일 보기

파일: SocialSecurity.py 프로젝트: ski2per/archive

    async def do_process_logic(self, *args):
        action_str = args[0]

        # ============================
        # Login and return basic info
        # ============================
        if action_str.startswith("login"):
            uid = self.get_query_argument("id")
            password = self.get_query_argument("password")

            login_url = "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/directQuery.action"
            headers = {
                "Content-Type": "application/x-www-form-urlencoded",
                "User-Agent": fake_useragent(),
            }
            data = {
                "userVo.idNo": uid,
                "userVo.password": password
            }
            request = HTTPRequest(login_url, method="POST", headers=headers, body=urlencode(data))
            response = await self.browser.fetch(request)

            html = lxml.html.fromstring(response.body.decode("utf-8"))
            error = html.xpath('//input[@id="errorMessage"]')

            # Login error
            if len(error):
                error_msg = error[0].attrib["value"]
                self.send_json_response(error_msg, 0)
            else:
                # Get cookie from response header
                cookie = response.headers.get_list("Set-Cookie")[0]
                m = re.match('JSESSIONID=(.*?);\s', cookie)
                if m:
                    # Get cookie "JSESSIONID"
                    session = m.group(1)

                    # Request user info
                    info_url = "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/persionBasic.action"
                    headers = {
                        "User-Agent": fake_useragent(),
                        "Cookie": cookie
                    }
                    request = HTTPRequest(info_url, method="GET", headers=headers)
                    response = await self.browser.fetch(request)

                    if response.code == 200:
                        html = lxml.html.fromstring(response.body.decode("utf-8"))

                        trs = html.xpath('//div[@class="person_base_info"]/table/tr')
                        info = {
                            "name": trs[0].xpath(".//td")[1].text_content().strip(),
                            "personal_no": trs[1].xpath(".//td")[1].text_content().strip(),
                            "id": trs[2].xpath(".//td")[1].text_content().strip(),
                            "gender": trs[3].xpath(".//td")[1].text_content().strip(),
                            "nationality": trs[4].xpath(".//td")[1].text_content().strip(),
                            "birth_date": trs[5].xpath(".//td")[1].text_content().strip(),
                            "resident_type": trs[6].xpath(".//td")[1].text_content().strip(),
                            "work_type": trs[7].xpath(".//td")[1].text_content().strip(),
                            "first_insured_date": trs[8].xpath(".//td")[1].text_content().strip(),
                            "insured_status": trs[9].xpath(".//td")[1].text_content().strip(),
                        }

                        # Change cookie key name to JSESSIONID_INVOICE
                        self.set_cookie("JSESSIONID_INVOICE", session)
                        # self.set_header("Set-Cookie", cookie)
                        self.send_json_response([info])
                    else:
                        self.send_json_response([])

                else:
                    raise HTTPError(500, self.config["error"]["GET_COOKIE_ERR"])

        else:
            action = action_str.split('/')  # pension | medical | injury | unemployment | maternity
            opt = action[1] if len(action) > 1 else ""  # info | account | detail | balance

            query_params = dict(parse_qsl(self.request.query, True))
            if "JSESSIONID_INVOICE" not in query_params:
                raise MissingArgumentError("JSESSIONID_INVOICE")
            else:
                tmp_cookie = self.get_query_argument("JSESSIONID_INVOICE")

            session = "JSESSIONID=" + unquote_plus(tmp_cookie)

            p = query_params["page"] if "page" in query_params else ""
            ps = query_params["page_size"] if "page_size" in query_params else ""
            page, page_size = self.init_paging_param(p, ps)
            # Start row and end row
            start = page + 1
            end = page + page_size

            # ==============================
            # Endowment insurance (pension)
            # ==============================
            if action_str.startswith("pension"):
                url = {
                    "info": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/pensionSecurity.action",
                    "account": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/pAList.action",
                    "detail": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/pCList.action"
                }
                method = "get" if opt == "info" else "post"

                pension = Pension(session)
                response = await self.browser.fetch(pension.make_request(method, url[opt], start, end))
                raw = response.body.decode("utf-8")
                result = pension.parse_html(opt, raw)
                self.send_json_response(result)
            # ==================
            # Medical insurance
            # ==================
            elif action_str.startswith("medical"):
                url = {
                    "info": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/cBList.action",
                    "account": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/cAList.action",
                    "detail": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/cCList.action",
                    "balance": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/cFList.action"
                }
                method = "get" if opt == "info" else "post"

                medical = Medical(session)
                response = await self.browser.fetch(medical.make_request(method, url[opt], start, end))
                raw = response.body.decode("utf-8")
                result = medical.parse_html(opt, raw)
                self.send_json_response(result)
            # ============================
            # Employment injury insurance
            # ============================
            elif action_str.startswith("injury"):
                url = {
                    "info": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/iBInfo.action",
                    "detail": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/iCList.action"
                }
                method = "get" if opt == "info" else "post"

                injury = Injury(session)
                response = await self.browser.fetch(injury.make_request(method, url[opt], start, end))
                raw = response.body.decode("utf-8")
                result = injury.parse_html(opt, raw)
                self.send_json_response(result)
            # =======================
            # Unemployment insurance
            # =======================
            elif action_str.startswith("unemployment"):
                url = {
                    "info": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/uBInfo.action",
                    "detail": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/uCList.action"
                }
                method = "get" if opt == "info" else "post"

                unemployment = Unemployment(session)
                response = await self.browser.fetch(unemployment.make_request(method, url[opt], start, end))
                raw = response.body.decode("utf-8")
                result = unemployment.parse_html(opt, raw)
                self.send_json_response(result)
            # ====================
            # Maternity insurance
            # ====================
            else:
                url = {
                    "info": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/queryBirthInsuredInfo.action",
                    "detail": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/queryBirthJfDetail.action?startRow=1&endRow=1200"
                }
                method = "get"

                maternity = Maternity(session)
                response = await self.browser.fetch(maternity.make_request(method, url[opt], start, end))
                raw = response.body.decode("utf-8")
                result = maternity.parse_html(opt, raw)
                self.send_json_response(result)

예제 #34

0

파일 보기

파일: AssociateConstructor.py 프로젝트: ski2per/arch1ve

    async def do_process_logic(self):
        url = "http://jzsgl.coc.gov.cn/archisearch/AjaxAction/DataServices.aspx"

        headers = {
            'Accept': 'application/json, text/javascript, */*; q=0.01',
            'Accept-Encoding': 'gzip, deflate',
            'Accept-Language':
            'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2',
            'Connection': 'keep-alive',
            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
            'Host': 'jzsgl.coc.gov.cn',
            'Origin': 'http://jzsgl.coc.gov.cn',
            'Referer':
            'http://jzsgl.coc.gov.cn/archisearch/cxejjzs/rylist.aspx?sjbm=510000&qymc=&xm=&zclb=00&zczy=%E5%85%A8%E9%83%A8&zczsbh=&zch=&zyzgzsbh=',
            'X-Referer':
            'http://jzsgl.coc.gov.cn/archisearch/cxejjzs/rylist.aspx?sjbm=510000&qymc=&xm=&zclb=00&zczy=%E5%85%A8%E9%83%A8&zczsbh=&zch=&zyzgzsbh=',
            'X-Requested-With': 'XMLHttpRequest',
            "User-Agent": fake_useragent()
        }

        name_args = self.get_query_arguments("name")
        ent_args = self.get_query_arguments("ent")
        # 注册号(川251141528602)
        reg_no_args = self.get_query_arguments("reg_no")
        # 注册证书编号
        cer_no_args = self.get_query_arguments("cer_no")
        # 执业资格证书编号
        qua_no_args = self.get_query_arguments("qua_no")
        page_args = self.get_query_arguments("page")

        params = {
            "Xm": name_args[0] if len(name_args) else "",
            "Qymc": ent_args[0] if len(ent_args) else "",
            "Zcbh": reg_no_args[0] if len(reg_no_args) else "",
            "Zsbh": cer_no_args[0] if len(cer_no_args) else "",
            "Zgzsbh": qua_no_args[0] if len(qua_no_args) else "",
            "Sjbm": "510000",  # 省级编号(51000-四川省)
            "Zclb": "00",
            "PageNo": int(page_args[0]) if len(page_args) else 1,
        }

        data = {'action': '020103', 'param': params}

        labels = {
            "00": "最新状态",
            "01": "初始注册",
            "02": "变更注册",
            "03": "延续注册",
            "04": "增项注册",
            "05": "重新注册",
            "06": "遗失补办",
            "07": "注销注册",
        }

        request = HTTPRequest(url,
                              method="POST",
                              headers=headers,
                              body=urlencode(data))
        response = await self.browser.fetch(request)
        data = json.loads(response.body.decode("utf-8"))
        constructors = data["AppendData"]["Data"]

        result_list = []
        for constructor in constructors:
            prof = []
            valid = []
            for item in constructor["zyList"].split('^'):
                tmp = item.split('|')
                prof.append(tmp[0])
                valid.append(tmp[1])

            profession = ','.join(prof)
            validity = ','.join(valid)

            tmp = {
                "province": constructor["sjmc"],
                "enterprise": constructor["qymc"],
                "name": constructor["xm"],
                "register_no": constructor["zcbh"].strip(),
                "register_certificate_no": constructor["zsbh"].strip(),
                "qualification_certificate_no": constructor["zgzsbh"].strip(),
                "profession": profession,
                "validity": validity,
                "type": labels[constructor["zclb"]]
            }
            result_list.append(tmp)

        self.send_json_response(result_list)

예제 #35

0

파일 보기

    async def do_process_logic(self):
        url = "http://www.cdtaxi.cn/shiwudj/add.html"

        headers = {
            "Host": "www.cdtaxi.cn",
            "Connection": "keep-alive",
            "Cache-Control": "max-age=0",
            "Origin": "http://www.cdtaxi.cn",
            "Referer": "http://www.cdtaxi.cn/shiwudj/add.html",
            "Content-Type": "application/x-www-form-urlencoded",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Encoding": "gzip, deflate",
            "Upgrade-Insecure-Request": "1",
            "Accept-Language": "zh-CN,zh;q=0.8",
            "User-Agent": fake_useragent()
        }

        # Prepare cookie
        username = self.get_query_argument("name")
        form_hash = self.get_query_argument("HIDDEN_FORM_HASH")
        captcha = self.get_query_argument("captcha")
        tmp_cookie = self.get_query_argument("JSESSIONID_INVOICE")

        cookie = "PHPSESSID=" + unquote(tmp_cookie)
        headers["Cookie"] = cookie

        # Prepare post data
        tel_args = self.get_query_arguments("tel")
        pickup_args = self.get_query_arguments("pickup")
        pickup_time_args = self.get_query_arguments("pickup_time")
        getoff_args = self.get_query_arguments("getoff")
        getoff_time_args = self.get_query_arguments("getoff_time")
        company_args = self.get_query_arguments("company")
        car_type_args = self.get_query_arguments("car_type")
        plate_no_args = self.get_query_arguments("plate_no")
        invoice_sum_args = self.get_query_arguments("invoice_sum")
        invoice_code_args = self.get_query_arguments("invoice_code")
        invoice_no_args = self.get_query_arguments("invoice_no")
        lost_item_args = self.get_query_arguments("lost_item")
        msg_args = self.get_query_arguments("msg")
        time_args = self.get_query_arguments("time")

        data = {
            "username": username,
            "telephone": tel_args[0] if len(tel_args) else "",
            "first_ads": pickup_args[0] if len(pickup_args) else "",
            "uptime": pickup_time_args[0] if len(pickup_time_args) else "",
            "last_ads": getoff_args[0] if len(getoff_args) else "",
            "downtime": getoff_time_args[0] if len(getoff_time_args) else "",
            "gongsi": company_args[0] if len(company_args) else "",
            "chexing": car_type_args[0] if len(car_type_args) else "",
            "fapiao": invoice_sum_args[0] if len(invoice_sum_args) else "",
            "chepai": plate_no_args[0] if len(plate_no_args) else "",
            "daima": invoice_code_args[0] if len(invoice_code_args) else "",
            "haoma": invoice_no_args[0] if len(invoice_no_args) else "",
            "title": lost_item_args[0] if len(lost_item_args) else "",
            "content": msg_args[0] if len(msg_args) else "",
            "time": time_args[0] if len(time_args) else "",
            "dengji": "1",
            "verify": captcha,
            "__hash__": form_hash,
            "Submit": "提 交"

        }

        request = HTTPRequest(url, method="POST", headers=headers, body=urlencode(data))
        response = await self.browser.fetch(request)
        html = response.body.decode("utf-8")
        root = lxml.html.fromstring(html)
        span = root.xpath('//span[@class="green"]')[0]
        msg = span.text_content().strip()
        if msg != "留言提交成功":
            self.send_json_response({"msg": msg}, 0)
        else:
            self.send_json_response({"msg": msg})

예제 #36

0

파일 보기

파일: SocialService.py 프로젝트: ski2per/arch1ve

    async def do_process_logic(self):
        url = "http://222.85.152.12:8803/JiaManage/guest/ShowJiaGuiYangHome.xhtml"

        headers = {"User-Agent": fake_useragent()}

        request = HTTPRequest(url, method="GET", headers=headers)
        response = await self.browser.fetch(request)
        html = response.body.decode("utf-8")
        root = lxml.html.fromstring(html)

        statistics = []

        # id="box1"
        div = root.xpath('//div[@id="box1"]')[0]
        script = div.xpath('.//script')[0].text_content()
        list_data = re.compile('value:(\d+).+?name:\'(.+?)\'').findall(script)
        box1 = {"id": 1, "name": "共受理各类案件处理情况", "data": list_data}
        statistics.append(box1)

        # id="box2"
        div = root.xpath('//div[@id="box2"]')[0]
        script = div.xpath('.//script')[0].text_content()
        list_data = re.compile('data:.?\[(.+?)\]', re.DOTALL).findall(script)

        area = re.sub('[\t|\n|\r|\s|"]', '', list_data[0])
        area_list = area.split(',')

        data = re.sub('\s', '', list_data[1])
        data_list = [int(i) for i in data.split(',')]
        box2 = {
            "id": 2,
            "name": "各区（市、县）案件处理情况",
            "data": list(zip(area_list, data_list))
        }
        statistics.append(box2)

        # id="box3"
        div = root.xpath('//div[@id="box3"]')[0]
        script = div.xpath('.//script')[0].text_content()
        list_data = re.compile('data:.?\[(.+?)\]', re.DOTALL).findall(script)

        area = re.sub('[\t|\n|\r|\s|"]', '', list_data[0])
        area_list = area.split(',')

        data = re.sub('\s', '', list_data[1])
        data_list = [int(i) for i in data.split(',')]
        box3 = {
            "id": 3,
            "name": "市级联动部门案卷处理情况",
            "data": list(zip(area_list, data_list))
        }
        statistics.append(box3)

        # id="box4"
        div = root.xpath('//div[@id="box4"]')[0]
        script = div.xpath('.//script')[0].text_content()
        list_data = re.compile('data:.?\[(.+?)\]', re.DOTALL).findall(script)

        area = re.sub('[\t|\n|\r|\s|"]', '', list_data[0])
        area_list = area.split(',')

        data = re.sub('\s', '', list_data[1])
        data_list = [int(i) for i in data.split(',')]
        box4 = {
            "id": 4,
            "name": "平台案件类别",
            "data": list(zip(area_list, data_list))
        }
        statistics.append(box4)

        self.send_json_response(statistics)

예제 #37

0

파일 보기

파일: AssociateConstructor.py 프로젝트: ski2per/archive

    async def do_process_logic(self):
        url = "http://jzsgl.coc.gov.cn/archisearch/AjaxAction/DataServices.aspx"

        headers = {
            'Accept': 'application/json, text/javascript, */*; q=0.01',
            'Accept-Encoding': 'gzip, deflate',
            'Accept-Language': 'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2',
            'Connection': 'keep-alive',
            'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
            'Host': 'jzsgl.coc.gov.cn',
            'Origin': 'http://jzsgl.coc.gov.cn',
            'Referer': 'http://jzsgl.coc.gov.cn/archisearch/cxejjzs/rylist.aspx?sjbm=510000&qymc=&xm=&zclb=00&zczy=%E5%85%A8%E9%83%A8&zczsbh=&zch=&zyzgzsbh=',
            'X-Referer': 'http://jzsgl.coc.gov.cn/archisearch/cxejjzs/rylist.aspx?sjbm=510000&qymc=&xm=&zclb=00&zczy=%E5%85%A8%E9%83%A8&zczsbh=&zch=&zyzgzsbh=',
            'X-Requested-With': 'XMLHttpRequest',
            "User-Agent": fake_useragent()
        }

        name_args = self.get_query_arguments("name")
        ent_args = self.get_query_arguments("ent")
        # 注册号(川251141528602)
        reg_no_args = self.get_query_arguments("reg_no")
        # 注册证书编号
        cer_no_args = self.get_query_arguments("cer_no")
        # 执业资格证书编号
        qua_no_args = self.get_query_arguments("qua_no")
        page_args = self.get_query_arguments("page")

        params = {
            "Xm": name_args[0] if len(name_args) else "",
            "Qymc": ent_args[0] if len(ent_args) else "",
            "Zcbh": reg_no_args[0] if len(reg_no_args) else "",
            "Zsbh": cer_no_args[0] if len(cer_no_args) else "",
            "Zgzsbh": qua_no_args[0] if len(qua_no_args) else "",
            "Sjbm": "510000",   # 省级编号(51000-四川省)
            "Zclb": "00",
            "PageNo": int(page_args[0]) if len(page_args) else 1,
        }

        data = {
            'action': '020103',
            'param': params
        }

        labels = {
            "00": "最新状态",
            "01": "初始注册",
            "02": "变更注册",
            "03": "延续注册",
            "04": "增项注册",
            "05": "重新注册",
            "06": "遗失补办",
            "07": "注销注册",
        }

        request = HTTPRequest(url, method="POST", headers=headers, body=urlencode(data))
        response = await self.browser.fetch(request)
        data = json.loads(response.body.decode("utf-8"))
        constructors = data["AppendData"]["Data"]

        result_list = []
        for constructor in constructors:
            prof = []
            valid = []
            for item in constructor["zyList"].split('^'):
                tmp = item.split('|')
                prof.append(tmp[0])
                valid.append(tmp[1])

            profession = ','.join(prof)
            validity = ','.join(valid)

            tmp = {
                "province": constructor["sjmc"],
                "enterprise": constructor["qymc"],
                "name": constructor["xm"],
                "register_no": constructor["zcbh"].strip(),
                "register_certificate_no": constructor["zsbh"].strip(),
                "qualification_certificate_no": constructor["zgzsbh"].strip(),
                "profession": profession,
                "validity": validity,
                "type": labels[constructor["zclb"]]
            }
            result_list.append(tmp)

        self.send_json_response(result_list)