async def get(self): captcha_url = "http://www.cdzfgjj.gov.cn/api.php?op=checkcode&code_len=4&font_size=20&width=130&height=50" headers = { "User-Agent": fake_useragent() } try: request = HTTPRequest(captcha_url, method="GET", headers=headers) response = await self.browser.fetch(request) if response.code == 200: # Get cookie from response header cookie = response.headers.get_list("Set-Cookie")[0] print("in captcha handler", cookie) m = re.match('PHPSESSID=(.*?);\s', cookie) if m: session = m.group(1) else: self.logger.debug("get cookie error") raise HTTPError # Change cookie key name to JSESSIONID_INVOICE self.set_header("Set-Cookie", cookie) self.set_header("Content-Type", 'image') self.write(response.body) else: self.logger.debug("none 200 response code") raise HTTPError except HTTPError: self.logger.error("[ InvoiceCaptchaHandler - get() ] caught HTTPError") self.send_json_response({"msg": "invoice captcha error"}, 0)
async def get_detail(url): headers = { "User-Agent": fake_useragent() } request = HTTPRequest(url, method="GET", headers=headers) response = await AsyncHTTPClient().fetch(request) html = response.body.decode("utf-8") script = re.compile('<script>(.+?)</script>', re.DOTALL).findall(html)[0] matches = re.compile('start_num==(\d)\){(.+?)}', re.DOTALL).findall(script) details = {} for match in matches: pid = int(match[0]) contents = re.compile('html\("(.+?)"\);').findall(match[1]) detail = { "description": contents[0], "result": contents[1], "report_date": contents[2] } details[pid] = detail # Process NO. 6 last_else_script = re.compile('else.?{(.+?)}', re.DOTALL).findall(script)[0] last_content = re.compile('html\("(.+?)"\);').findall(last_else_script) details[6] = { "description": last_content[0], "result": last_content[1], "report_date": last_content[2] } return details
async def prepare(self): login_url = "http://www.cdzfgjj.gov.cn/index.php?m=content&c=gjj&a=login" tmp_cookie = self.get_query_argument("PHPSESSID") self.COOKIE = "PHPSESSID=" + unquote(tmp_cookie) print("in cdfund handler", self.COOKIE) card = self.get_query_argument("card") password = self.get_query_argument("password") captcha = self.get_query_argument("captcha") data = { "cardNo": card, "password": password, "verifyCode": captcha, } headers = { "User-Agent": fake_useragent(), "Content-Type": "application/x-www-form-urlencoded", "Cookie": self.COOKIE, } request = HTTPRequest(login_url, method="POST", headers=headers, body=urlencode(data)) response = await self.browser.fetch(request) if response.code == 200: print("login success") else: self.logger.debug("none 200 response code") raise HTTPError
async def do_process_logic(self, *args): category = args[0].lower() url_template = "http://app1.sfda.gov.cn/datasearch/face3/search.jsp" base_url_template = "http://app1.sfda.gov.cn/datasearch/face3/{}" headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, sdch", "Accept-Language": "en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2", "Cache-Control": "max-age=0", "Connection": "keep-alive", "Host": "app1.sfda.gov.cn", "Origin": "http://app1.sfda.gov.cn", "User-Agent": fake_useragent(), "Referer": "http://app1.sfda.gov.cn/datasearch/face3/base.jsp?tableId=120&tableName=TABLE120&title=%CA%B3%C6%B7%C9%FA%B2%FA%D0%ED%BF%C9%BB%F1%D6%A4%C6%F3%D2%B5(SC)&bcId=145275419693611287728573704379", "Cookie": fetch_cookie() } query = self.get_query_argument("query") page_args = self.get_query_arguments("page") page = page_args[0] if len(page_args) else "1" # Assemble POST data if category not in self.POST_DATA: self.send_json_response(self.config["error"]["REQUEST_ERR"], 0) return else: data = self.POST_DATA[category] data["keyword"] = quote(query, encoding="utf-8") data["curstart"] = page request = HTTPRequest(url_template, method="POST", headers=headers, body=urlencode(data)) response = await self.browser.fetch(request) html = response.body.decode("utf-8").strip() root = lxml.html.fromstring(html) links = root.xpath('//table/tr/td/p/a') result_list = [] for link in links: href = link.attrib["href"].split(',')[1].strip("'") match = re.match(r'(content.+tableView=)(.+)(&Id.+)', href) href = "".join([ match.group(1), quote(match.group(2), encoding="gb2312"), match.group(3) ]) tmp = { "title": link.text_content().strip(), "link": base_url_template.format(href) } result_list.append(tmp) self.send_json_response(result_list)
async def get_detail(url): headers = {"User-Agent": fake_useragent()} request = HTTPRequest(url, method="GET", headers=headers) response = await AsyncHTTPClient().fetch(request) html = response.body.decode("utf-8") script = re.compile('<script>(.+?)</script>', re.DOTALL).findall(html)[0] matches = re.compile('start_num==(\d)\){(.+?)}', re.DOTALL).findall(script) details = {} for match in matches: pid = int(match[0]) contents = re.compile('html\("(.+?)"\);').findall(match[1]) detail = { "description": contents[0], "result": contents[1], "report_date": contents[2] } details[pid] = detail # Process NO. 6 last_else_script = re.compile('else.?{(.+?)}', re.DOTALL).findall(script)[0] last_content = re.compile('html\("(.+?)"\);').findall(last_else_script) details[6] = { "description": last_content[0], "result": last_content[1], "report_date": last_content[2] } return details
async def do_process_logic(self): # URL where source data(page) is located url = "http://www.cdepb.gov.cn/cdepbws/Web/gov/airquality.aspx" # Setup HTTP header for specific request headers = { "User-Agent": fake_useragent() } # Data passed with HTTP request if any # data = {} request = HTTPRequest(url, method="GET", headers=headers) response = await self.browser.fetch(request) html = response.body.decode("utf-8") root = lxml.html.fromstring(html) """ BEGIN - Specific page analysis """ city_aqi_div = root.xpath('//div[@class="CityAQI"]')[0] # city_aqi_div = root.xpath('/html/body/div[2]/div[5]/div[2]/div[2]')[0] main_index = city_aqi_div.xpath('.//span[@id="ContentBody_AqiData"]')[0].text_content() aqi_level = city_aqi_div.xpath('.//span[@id="ContentBody_StdName"]')[0].text_content() main_pollution = city_aqi_div.xpath('.//span[@id="ContentBody_FirstPoll"]')[0].text_content() update_time = city_aqi_div.xpath('.//span[@id="ContentBody_AQITime"]')[0].text_content() matches = re.findall('\d+', str(update_time)) update_time = "{}-{}-{} {}".format(*matches) aqi = { "main_index": main_index, "main_pollution": main_pollution, "aqi_level": aqi_level, "time": update_time } pollution_table = city_aqi_div.xpath('//tbody')[0] pollution_table_tr = pollution_table.xpath('.//tr') pollutions = [] for tr in pollution_table_tr: tds = tr.xpath('.//td') # lxml poll = { # "pollution": tds[0].string, # "index": tds[1].string "pollution": tds[0].text_content(), "index": tds[1].text_content() } pollutions.append(poll) aqi["pollutions"] = pollutions """ END - Specific page analysis """ # Invoke this method to send json response self.send_json_response([aqi])
def make_request(self, method, url, start=1, end=20): headers = {"User-Agent": fake_useragent(), "Cookie": self.session} data = {"startRow": start, "endRow": end} if method == "get": request = HTTPRequest(url, method=method.upper(), headers=headers) else: request = HTTPRequest(url, method=method.upper(), headers=headers, body=urlencode(data)) return request
async def do_process_logic(self, *args): category = args[0].lower() url_template = "http://app1.sfda.gov.cn/datasearch/face3/search.jsp" base_url_template = "http://app1.sfda.gov.cn/datasearch/face3/{}" headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, sdch", "Accept-Language": "en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2", "Cache-Control": "max-age=0", "Connection": "keep-alive", "Host": "app1.sfda.gov.cn", "Origin": "http://app1.sfda.gov.cn", "User-Agent": fake_useragent(), "Referer": "http://app1.sfda.gov.cn/datasearch/face3/base.jsp?tableId=120&tableName=TABLE120&title=%CA%B3%C6%B7%C9%FA%B2%FA%D0%ED%BF%C9%BB%F1%D6%A4%C6%F3%D2%B5(SC)&bcId=145275419693611287728573704379", "Cookie": fetch_cookie() } query = self.get_query_argument("query") page_args = self.get_query_arguments("page") page = page_args[0] if len(page_args) else "1" # Assemble POST data if category not in self.POST_DATA: self.send_json_response(self.config["error"]["REQUEST_ERR"], 0) return else: data = self.POST_DATA[category] data["keyword"] = quote(query, encoding="utf-8") data["curstart"] = page request = HTTPRequest(url_template, method="POST", headers=headers, body=urlencode(data)) response = await self.browser.fetch(request) html = response.body.decode("utf-8").strip() root = lxml.html.fromstring(html) links = root.xpath('//table/tr/td/p/a') result_list = [] for link in links: href = link.attrib["href"].split(',')[1].strip("'") match = re.match(r'(content.+tableView=)(.+)(&Id.+)', href) href = "".join([match.group(1), quote(match.group(2), encoding="gb2312"), match.group(3)]) tmp = { "title": link.text_content().strip(), "link": base_url_template.format(href) } result_list.append(tmp) self.send_json_response(result_list)
async def do_process_logic(self): url = "http://www.cdfgj.gov.cn/BusinessQuery/BusSearch.aspx?action=ucEnterpriseQuery&Class=13" headers = { "Content-Type": "application/x-www-form-urlencoded", "User-Agent": fake_useragent() # "Cookie": cookie, # "Cookie": self.request.headers["Cookie"], } tmp_cookie = self.get_query_argument("JSESSIONID_INVOICE") cookie = "NETSCAPE_ID=" + unquote(tmp_cookie) headers["Cookie"] = cookie org_type = self.get_query_argument("type") org_name = self.get_query_argument("name") captcha = self.get_query_argument("captcha") data = { "__VIEWSTATEGENERATOR": "74F44EA4", "ID_ucEnterpriseQuery$ddlEnterpriseType": org_type, "ID_ucEnterpriseQuery$txtName": org_name, "ID_ucEnterpriseQuery$txtRandomCode": captcha, "__VIEWSTATE": "/7q+lMJNWwtjfXt4wCcX/buarTl1CXpolPC5i21QOAmuD5888znPXyJ4SlAAfk0HmfeeVMx5H571JnvNXROoem0otSLiX73pzikEjaPaj46p9qqgfqNbyGfkbB/Z/JJYYy94w3IdjeW6j2lGVed6Xv8+uVMhNJtk7szqdCJoipUY6U/NttwCtUc88yYnLqU8eJ5YGFcEfDkdIyXfdZt4DerSIYjT+MqsOtRim09uZ1qyggVYvzatFmK/xNh3IjTHbHfO//R4fs99WseIBBDFL96lbWmXxbcSQ5rteHbUWwNv+OoNZ7hnOxXPuAQZC9gvjiUGpx00LavO9KfSfxLKpwm8LaKgENO9kP0l6zorHh2PvSgr8abhuJKcQuh46DoRihtKp1gYv/cbG+fUVwiLhGhOxsJoXlKJmk2Fyz+cXMy6SuDSFPMz/UWPSJIaSQodeP5L7+LNLIVH76uvww7oDAElcOjmjjmNB4riG52ZbS/yJkTWvFx5flwLZB/OJ7k17GkcFL93T8xuAfGaCUukdj3H+zkyNxI9z3E0BT2JZtBzIJlVpQZ9pnGndAa2hyo16aozmBUAnXN0La3DkArjB5SUcN21vuGR+tcts07tK+43j49imna17dWnC5NjfTU77DmtgNwchz111tFtyKLXrBX6RLNS6o4Sr7Adza9ysq8ANjSb5KBQem61ZA+i1lbOeORmfcoeTXqjIZ93r++lPEWdPaEv84lctfxLwwkHFstcgcqucxtG1Zi3zvglzoxZ7A+XgcmgDzBbEKkeGZg5kgsG5PePtCSk5I9FxEK/AirfO9WldVMSo2nm/IOUbMshpW5+e1p+3u3ApeQzGlUd3vaYXHUyll7/tKS1tlZnNN3ysQguUAJGCyZqqWEgXRpYmC2Wwf4bR7Shc997xP+QaOT31AfHOyiA0Bd5PYA70yvXsMISJdQZOpommTCszJeZJzqtOpspf5OhUyWObs/rCuKAUVcbZOZLCoGMoGJaUcMtH9p8W/K8qgu42K06ww6qrKfXvu2O9F6Zl8HNqsSjNupcmXRjx6My+gnj3YU1A/PrLyb/DPU+hTKQldreh0IH+bjU2a/E62ihTMdUSBE4mmGwLCa9/L0AKpD3LSaswdgc+c5ZbY+A7x342NI=", "ID_ucEnterpriseQuery$ImageButton4.x": 57, "ID_ucEnterpriseQuery$ImageButton4.y": 6, } request = HTTPRequest(url, method="POST", headers=headers, body=urlencode(data)) response = await self.browser.fetch(request) html = response.body.decode("utf-8") root = lxml.html.fromstring(html) trs = root.xpath('//table[@id="ID_ucEnterpriseQuery_GridView1"]/tr') result_list = [] for n in range(1, len(trs)): tds = trs[n].xpath('.//td') tmp = { "name": tds[0].text_content().strip(), "area": tds[1].text_content().strip(), "level": tds[2].text_content().strip(), "addr": tds[3].text_content().strip(), } result_list.append(tmp) self.send_json_response(result_list)
async def do_process_logic(self): url = "http://www.cdtaxi.cn/zhaolingxx/index_{}.html" base_url = "http://www.cdtaxi.cn{}" headers = { "Host": "www.cdtaxi.cn", "Connection": "keep-alive", "Cache-Control": "max-age=0", "Origin": "http://www.cdtaxi.cn", # "Referer": "http://www.cdtaxi.cn/shiwudj/add.html", "User-Agent": fake_useragent(), "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate", "Upgrade-Insecure-Request": "1", "Accept-Language": "zh-CN,zh;q=0.8", } page_args = self.get_query_arguments("page") page = page_args[0] if len(page_args) else 1 request = HTTPRequest(url.format(page), method="GET", headers=headers) response = await self.browser.fetch(request) html = response.body.decode("utf-8") root = lxml.html.fromstring(html) divs = root.xpath('//div[@class="special_p"]') result_list = [] for div in divs: link = div.xpath('./a[1]') tmp_url = base_url.format(link[0].attrib["href"]) request = HTTPRequest(tmp_url, method="GET", headers=headers) response = await self.browser.fetch(request) if response.code == 200: html = response.body.decode("utf-8") root = lxml.html.fromstring(html) found = root.xpath('//div[@class="newsshow"]/p') content = found[0].text_content().strip() # Content not in <p> but in <div> if not content: found = root.xpath('//div[@class="newsshow"]/div') content = found[1].text_content().strip() tmp = { "content": content } result_list.append(tmp) else: continue self.send_json_response(result_list)
async def do_process_logic(self): url = "http://i.rc114.com/InfoQuery_ArcInfo_Pub.aspx/queryData" headers = { "Content-Type": "application/json", "User-Agent": fake_useragent(), } data = { "name": self.get_query_argument("name"), "id": self.get_query_argument("id") , } # browser = AsyncHTTPClient() request = HTTPRequest(url, method="POST", headers=headers, body=json.dumps(data)) response = await self.browser.fetch(request) result = response.body.decode("utf-8") # data = json_decode(response.body) data = json.loads(result) tmp = data["d"] tmp = re.sub(r'\'', r'"', tmp) data = json.loads(tmp) # Receive JSON # {"state": 3 ,"error_message":'您的身份证位数不正确,请重新填写'} # {"state": 3 ,"error_message":'您的身份证无效,请重新填写'} if data["state"] == 3: self.send_json_response(data["error_message"], 0) # Receive JSON # {"error_message":'' ,"doc_state_title":'档案状态:' , "doc_id_title":'档案编号:',"doc_id": ...} else: # Found file info if "doc_id" in data: file_info = { "name": data["person_name"], "doc_id": data["doc_id"], "school": data["graduate_school"], "doc_state": data["doc_state"], "doc_unit": data["doc_now_unit_name"], "doc_old_unit": data["doc_old_unit"], "doc_in_time": data["doc_in_time"] } self.send_json_response([file_info]) # File info not found else: self.send_json_response(data["doc_state"], 0)
def make_request(self, method, url, start=1, end=20): headers = { "User-Agent": fake_useragent(), "Cookie": self.session } data = { "startRow": start, "endRow": end } if method == "get": request = HTTPRequest(url, method=method.upper(), headers=headers) else: request = HTTPRequest(url, method=method.upper(), headers=headers, body=urlencode(data)) return request
async def do_process_logic(self): url = "http://182.151.197.163:7002/FPCY_SCDS_WW/wwfpcy" headers = { "User-Agent": fake_useragent(), "Content-Type": "application/x-www-form-urlencoded", # "Cookie": cookie, # "Cookie": self.request.headers["Cookie"], } tmp_cookie = self.get_query_argument("JSESSIONID_INVOICE") cookie = "JSESSIONID=" + unquote(tmp_cookie) headers["Cookie"] = cookie invoice_code = self.get_query_argument("invoice_code") invoice_num = self.get_query_argument("invoice_num") invoice_psd = self.get_query_argument("invoice_psd") captcha = self.get_query_argument("captcha") data = { "fpdm0": invoice_code, "fphm0": invoice_num, "yzm0": invoice_psd, "imgcode": captcha, } request = HTTPRequest(url, method="POST", headers=headers, body=urlencode(data)) response = await self.browser.fetch(request) html = response.body.decode("utf-8") root = lxml.html.fromstring(html) span1 = root.xpath('//span[@id="cxjj"]') span2 = root.xpath('//span[@id="message"]') if len(span1): result = str(span1[0].text_content()) result = re.sub(r'\r|\n|\t', r'', result) self.send_json_response([{"msg": result}]) elif len(span2): result = str(span2[0].text_content()) result = re.sub(r'\r|\n|\t', r'', result) self.send_json_response([{"msg": result}]) else: self.send_json_response(self.config["error"]["SIMPLE_ERR"], 0)
async def do_process_logic(self): url = "http://rkk.cdpf.org.cn/queryDistrictrecordCDPF.action" headers = { "User-Agent": fake_useragent(), "Content-Type": "application/x-www-form-urlencoded", # "Cookie": cookie, # "Cookie": self.request.headers["Cookie"], } tmp_cookie = self.get_query_argument("JSESSIONID_INVOICE") cookie = "JSESSIONID=" + unquote(tmp_cookie) headers["Cookie"] = cookie name = self.get_query_argument("name") no = self.get_query_argument("no") captcha = self.get_query_argument("captcha") data = { "getAjax": "true", "type": "211948D59141A611", "name": name, "cid": no, "cardType": 1, "checkCode": captcha } request = HTTPRequest(url, method="POST", headers=headers, body=urlencode(data)) response = await self.browser.fetch(request) html = response.body.decode("gbk") root = lxml.html.fromstring(html) p1 = root.xpath('//p[@id="checkad1"]') p2 = root.xpath('//p[@id="checkad2"]') if len(p1): result = str(p1[0].text_content()) self.send_json_response([{"msg": result}], 0) elif len(p2): result = str(p2[0].text_content()) self.send_json_response([{"msg": result}]) else: self.send_json_response(self.config["error"]["SIMPLE_ERR"], 0)
async def do_process_logic(self): url = "http://222.85.152.12:8803/JiaManage/guest/ShowJiaGuiYangHome.xhtml" purl = urlparse(url) base_url = purl.scheme + "://" + purl.netloc headers = { "User-Agent": fake_useragent() } request = HTTPRequest(url, method="GET", headers=headers) response = await self.browser.fetch(request) html = response.body.decode("utf-8") root = lxml.html.fromstring(html) trs = root.xpath('//div[@class="part_2_table float_right"]/table/tr') posts = {} detail_url = "" for tr in trs: a = tr.xpath('.//td[1]/a[1]')[0] title = a.text_content() finish_date = tr.xpath('.//td[2]')[0].text_content() detail_url = base_url + a.attrib["href"] post_id = int(detail_url.split("=")[1]) post = { "title": title, "finish_date": finish_date } posts[post_id] = post real_url = detail_url.split('?')[0] details = await get_detail(real_url) for i in range(1, len(list(posts.keys()))+1): posts[i].update(details[i]) results = [] for k, v in posts.items(): v["post_id"] = k results.append(v) # Invoke this method to send json response self.send_json_response(results)
async def do_process_logic(self): url_template = "http://www.cdfgj.gov.cn/SCXX/ShowNew.aspx?iname={name}®ion={district}&page={page}&st={from_time}&et={to_time}" headers = {"User-Agent": fake_useragent()} name_args = self.get_query_arguments("name") district_args = self.get_query_arguments("district") from_args = self.get_query_arguments("from") to_args = self.get_query_arguments("to") page_args = self.get_query_arguments("page") params = dict() params["name"] = name_args[0] if len(name_args) else "" params["district"] = district_args[0] if len(district_args) else "" params["from_time"] = from_args[0] if len(from_args) else "" params["to_time"] = to_args[0] if len(to_args) else "" params["page"] = page_args[0] if len(page_args) else 1 request = HTTPRequest(url_template.format(**params), method="GET", headers=headers) response = await self.browser.fetch(request) html = response.body.decode("utf-8") root = lxml.html.fromstring(html) trs = root.xpath('//table[@id="ID_ucShowNew_gridView"]/tr') tr_num = len(trs) result_list = [] for n in range(1, tr_num): tds = trs[n].xpath('.//td') tmp = { "no": tds[0].text_content().strip(), "name": tds[1].text_content().strip(), "district": tds[2].text_content().strip(), "addr": tds[3].text_content().strip(), "use": tds[4].text_content().strip(), "developer": tds[5].text_content().strip(), "area": tds[6].text_content().strip(), "time": tds[7].text_content().strip(), } result_list.append(tmp) self.send_json_response(result_list)
async def do_process_logic(self): url_template = "http://www.cdfgj.gov.cn/SCXX/ShowNew.aspx?iname={name}®ion={district}&page={page}&st={from_time}&et={to_time}" headers = { "User-Agent": fake_useragent() } name_args = self.get_query_arguments("name") district_args = self.get_query_arguments("district") from_args = self.get_query_arguments("from") to_args = self.get_query_arguments("to") page_args = self.get_query_arguments("page") params = dict() params["name"] = name_args[0] if len(name_args) else "" params["district"] = district_args[0] if len(district_args) else "" params["from_time"] = from_args[0] if len(from_args) else "" params["to_time"] = to_args[0] if len(to_args) else "" params["page"] = page_args[0] if len(page_args) else 1 request = HTTPRequest(url_template.format(**params), method="GET", headers=headers) response = await self.browser.fetch(request) html = response.body.decode("utf-8") root = lxml.html.fromstring(html) trs = root.xpath('//table[@id="ID_ucShowNew_gridView"]/tr') tr_num = len(trs) result_list = [] for n in range(1, tr_num): tds = trs[n].xpath('.//td') tmp = { "no": tds[0].text_content().strip(), "name": tds[1].text_content().strip(), "district": tds[2].text_content().strip(), "addr": tds[3].text_content().strip(), "use": tds[4].text_content().strip(), "developer": tds[5].text_content().strip(), "area": tds[6].text_content().strip(), "time": tds[7].text_content().strip(), } result_list.append(tmp) self.send_json_response(result_list)
async def do_process_logic(self): url = "http://www.cdtaxi.cn/shiwudj/index_{}.html" headers = { "Host": "www.cdtaxi.cn", "Connection": "keep-alive", "Cache-Control": "max-age=0", "Origin": "http://www.cdtaxi.cn", # "Referer": "http://www.cdtaxi.cn/shiwudj/add.html", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate", "Upgrade-Insecure-Request": "1", "Accept-Language": "zh-CN,zh;q=0.8", "User-Agent": fake_useragent() } page_args = self.get_query_arguments("page") page = page_args[0] if len(page_args) else 1 request = HTTPRequest(url.format(page), method="GET", headers=headers) response = await self.browser.fetch(request) html = response.body.decode("utf-8") root = lxml.html.fromstring(html) items = root.xpath('//div[@class="xlost_item"]') total = len(items) result_list = [] for n in range(1, total): tmp = {} tds = items[n].xpath('.//table/tbody/tr[1]/td') tmp["date"] = tds[0].text_content().strip() tmp["lost_item"] = tds[1].xpath('.//a')[0].attrib["title"] tmp["pickup"] = tds[2].xpath('.//a')[0].attrib["title"] tmp["getoff"] = tds[3].xpath('.//a')[0].attrib["title"] tmp["contact"] = tds[4].text_content().strip() tmp["tel"] = tds[5].text_content().strip() tmp["status"] = tds[6].text_content().strip() tr2 = items[n].xpath('.//table/tbody/tr[2]')[0] tmp["resp"] = tr2.text_content().strip().split(':')[1] result_list.append(tmp) self.send_json_response(result_list)
async def do_process_logic(self): captcha_url = "http://rkk.cdpf.org.cn/rand.jsp" headers = {"User-Agent": fake_useragent()} request = HTTPRequest(captcha_url, method="GET", headers=headers) response = await self.browser.fetch(request) # Get cookie from response header cookie = response.headers.get_list("Set-Cookie")[0] m = re.match('JSESSIONID=(.*?);\s', cookie) if m: session = m.group(1) else: raise HTTPError(500, self.config["error"]["GET_COOKIE_ERR"]) # Change cookie key name to JSESSIONID_INVOICE self.set_cookie("JSESSIONID_INVOICE", session) # self.set_header("Set-Cookie", cookie) self.set_header("Content-Type", 'image') self.write(response.body)
async def do_process_logic(self): url_template = "http://search.anccnet.com/searchResult2.aspx?keyword={0}" headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, sdch", "Accept-Language": "en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2", "Cache-Control": "max-age=0", "Connection": "keep-alive", "Host": "search.anccnet.com", "Referer": "http://www.ancc.org.cn/Service/queryTools/Internal.aspx", "Upgrade-Insecure-Requests": "1", "User-Agent": fake_useragent() } barcode = quote(self.get_query_argument("code"), encoding="gb2312") request = HTTPRequest(url_template.format(barcode), method="GET", headers=headers) response = await self.browser.fetch(request) html = response.body.decode("gb2312") root = lxml.html.fromstring(html) info_dds = root.xpath('//dl[@class="p-info"]/dd') supplier_dds = root.xpath('//dl[@class="p-supplier"]/dd') if len(info_dds) and len(supplier_dds): # result_list = [] info = { "barcode": info_dds[0].text_content().strip(), "name": info_dds[1].text_content().strip(), "specs": info_dds[2].text_content().strip(), "desc": info_dds[3].text_content().strip(), "brand": supplier_dds[0].text_content().strip(), "manufacturer": supplier_dds[1].text_content().strip(), } self.send_json_response([info]) else: self.send_json_response(self.config["error"]["NO_RESULT_ERR"], 0)
async def do_process_logic(self): captcha_url = "http://www.cdfgj.gov.cn/BusinessQuery/UserControls/RandomCode.axd" headers = {"User-Agent": fake_useragent()} request = HTTPRequest(captcha_url, method="GET", headers=headers) response = await self.browser.fetch(request) # Get cookie from response header cookie = response.headers.get_list("Set-Cookie")[0] m = re.match('NETSCAPE_ID=(.*?);', cookie) if m: session = m.group(1) else: raise HTTPError(500, self.config["error"]["GET_COOKIE_ERR"]) # Change cookie key name to JSESSIONID_INVOICE # The name of "JSESSIONID_INVOICE" must be fixed # The name of "JSESSIONID_INVOICE" must be fixed # The name of "JSESSIONID_INVOICE" must be fixed self.set_cookie("JSESSIONID_INVOICE", session) self.set_header("Content-Type", 'image') self.write(response.body)
async def do_process_logic(self): captcha_url = "http://rkk.cdpf.org.cn/rand.jsp" headers = { "User-Agent": fake_useragent() } request = HTTPRequest(captcha_url, method="GET", headers=headers) response = await self.browser.fetch(request) # Get cookie from response header cookie = response.headers.get_list("Set-Cookie")[0] m = re.match('JSESSIONID=(.*?);\s', cookie) if m: session = m.group(1) else: raise HTTPError(500, self.config["error"]["GET_COOKIE_ERR"]) # Change cookie key name to JSESSIONID_INVOICE self.set_cookie("JSESSIONID_INVOICE", session) # self.set_header("Set-Cookie", cookie) self.set_header("Content-Type", 'image') self.write(response.body)
async def do_process_logic(self): url = "http://222.85.152.12:8803/JiaManage/guest/ShowJiaGuiYangHome.xhtml" purl = urlparse(url) base_url = purl.scheme + "://" + purl.netloc headers = {"User-Agent": fake_useragent()} request = HTTPRequest(url, method="GET", headers=headers) response = await self.browser.fetch(request) html = response.body.decode("utf-8") root = lxml.html.fromstring(html) trs = root.xpath('//div[@class="part_2_table float_right"]/table/tr') posts = {} detail_url = "" for tr in trs: a = tr.xpath('.//td[1]/a[1]')[0] title = a.text_content() finish_date = tr.xpath('.//td[2]')[0].text_content() detail_url = base_url + a.attrib["href"] post_id = int(detail_url.split("=")[1]) post = {"title": title, "finish_date": finish_date} posts[post_id] = post real_url = detail_url.split('?')[0] details = await get_detail(real_url) for i in range(1, len(list(posts.keys())) + 1): posts[i].update(details[i]) results = [] for k, v in posts.items(): v["post_id"] = k results.append(v) # Invoke this method to send json response self.send_json_response(results)
async def do_process_logic(self): url = "http://www.cdtaxi.cn/shiwudj/add.html" captcha_url = "http://www.cdtaxi.cn/admin.php?m=flogin&a=verify" headers = { "User-Agent": fake_useragent() } # Request captcha and cookie request = HTTPRequest(captcha_url, method="GET", headers=headers) response = await self.browser.fetch(request) captcha_data = response.body # Get cookie from response header cookie = response.headers.get_list("Set-Cookie")[0] m = re.match('PHPSESSID=(.*?);\s', cookie) if m: session = m.group(1) cookie = "PHPSESSID={}".format(session) else: raise HTTPError(500, self.config["error"]["GET_COOKIE_ERR"]) # Get form hidden value headers["Cookie"] = cookie request0 = HTTPRequest(url, method="GET", headers=headers) response0 = await self.browser.fetch(request0) html = response0.body.decode("utf-8") root = lxml.html.fromstring(html) hidden = root.xpath('//input[@name="__hash__"]') form_hash = str(hidden[0].attrib["value"]) # Final response # Change cookie key name to JSESSIONID_INVOICE self.set_cookie("JSESSIONID_INVOICE", session) self.set_cookie("HIDDEN_FORM_HASH", form_hash) # self.set_header("Set-Cookie", cookie) self.set_header("Content-Type", 'image') self.write(captcha_data)
async def get(self): account_url = "http://www.cdzfgjj.gov.cn/index.php?m=content&c=gjj&a=account" headers = { "User-Agent": fake_useragent() } try: print(self.COOKIE) headers["Cookie"] = self.COOKIE request = HTTPRequest(account_url, method="GET", headers=headers) response = await self.browser.fetch(request) if response.code == 200: html = response.body.decode("utf-8") root = lxml.html.fromstring(html) tables = root.xpath('//div[@class="w-main"]/table') result_list = [] for table in tables: tds = table.xpath('.//tr/td[@class="c"]') tmp = { "client_no": tds[0].text_content().strip(), "client_name": tds[1].text_content().strip(), "deposit_to_date": tds[2].text_content().strip(), "deposit_base": tds[3].text_content().strip(), "deposit_unit": tds[4].text_content().strip(), "deposit_personal": tds[5].text_content().strip(), "balance": tds[6].text_content().strip(), "account_status": tds[6].text_content().strip(), } result_list.append(tmp) self.send_json_response({"results": result_list}) else: self.logger.debug("none 200 response code") raise HTTPError except HTTPError: self.logger.error("[ InvoiceCheckHandler - get() ] caught HTTPError") self.send_json_response({"msg": "invoice check error"}, 0)
async def do_process_logic(self): url_template = "http://search.anccnet.com/searchResult2.aspx?keyword={0}" headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, sdch", "Accept-Language": "en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2", "Cache-Control": "max-age=0", "Connection": "keep-alive", "Host": "search.anccnet.com", "Referer": "http://www.ancc.org.cn/Service/queryTools/Internal.aspx", "Upgrade-Insecure-Requests": "1", "User-Agent": fake_useragent() } barcode = quote(self.get_query_argument("code"),encoding="gb2312") request = HTTPRequest(url_template.format(barcode), method="GET", headers=headers) response = await self.browser.fetch(request) html = response.body.decode("gb2312") root = lxml.html.fromstring(html) info_dds = root.xpath('//dl[@class="p-info"]/dd') supplier_dds = root.xpath('//dl[@class="p-supplier"]/dd') if len(info_dds) and len(supplier_dds): # result_list = [] info = { "barcode": info_dds[0].text_content().strip(), "name": info_dds[1].text_content().strip(), "specs": info_dds[2].text_content().strip(), "desc": info_dds[3].text_content().strip(), "brand": supplier_dds[0].text_content().strip(), "manufacturer": supplier_dds[1].text_content().strip(), } self.send_json_response([info]) else: self.send_json_response(self.config["error"]["NO_RESULT_ERR"], 0)
async def do_process_logic(self): captcha_url = "http://www.cdfgj.gov.cn/BusinessQuery/UserControls/RandomCode.axd" headers = { "User-Agent": fake_useragent() } request = HTTPRequest(captcha_url, method="GET", headers=headers) response = await self.browser.fetch(request) # Get cookie from response header cookie = response.headers.get_list("Set-Cookie")[0] m = re.match('NETSCAPE_ID=(.*?);', cookie) if m: session = m.group(1) else: raise HTTPError(500, self.config["error"]["GET_COOKIE_ERR"]) # Change cookie key name to JSESSIONID_INVOICE # The name of "JSESSIONID_INVOICE" must be fixed # The name of "JSESSIONID_INVOICE" must be fixed # The name of "JSESSIONID_INVOICE" must be fixed self.set_cookie("JSESSIONID_INVOICE", session) self.set_header("Content-Type", 'image') self.write(response.body)
async def do_process_logic(self, *args): action_str = args[0] # ============================ # Login and return basic info # ============================ if action_str.startswith("login"): uid = self.get_query_argument("id") password = self.get_query_argument("password") login_url = "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/directQuery.action" headers = { "Content-Type": "application/x-www-form-urlencoded", "User-Agent": fake_useragent(), } data = {"userVo.idNo": uid, "userVo.password": password} request = HTTPRequest(login_url, method="POST", headers=headers, body=urlencode(data)) response = await self.browser.fetch(request) html = lxml.html.fromstring(response.body.decode("utf-8")) error = html.xpath('//input[@id="errorMessage"]') # Login error if len(error): error_msg = error[0].attrib["value"] self.send_json_response(error_msg, 0) else: # Get cookie from response header cookie = response.headers.get_list("Set-Cookie")[0] m = re.match('JSESSIONID=(.*?);\s', cookie) if m: # Get cookie "JSESSIONID" session = m.group(1) # Request user info info_url = "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/persionBasic.action" headers = { "User-Agent": fake_useragent(), "Cookie": cookie } request = HTTPRequest(info_url, method="GET", headers=headers) response = await self.browser.fetch(request) if response.code == 200: html = lxml.html.fromstring( response.body.decode("utf-8")) trs = html.xpath( '//div[@class="person_base_info"]/table/tr') info = { "name": trs[0].xpath(".//td")[1].text_content().strip(), "personal_no": trs[1].xpath(".//td")[1].text_content().strip(), "id": trs[2].xpath(".//td")[1].text_content().strip(), "gender": trs[3].xpath(".//td")[1].text_content().strip(), "nationality": trs[4].xpath(".//td")[1].text_content().strip(), "birth_date": trs[5].xpath(".//td")[1].text_content().strip(), "resident_type": trs[6].xpath(".//td")[1].text_content().strip(), "work_type": trs[7].xpath(".//td")[1].text_content().strip(), "first_insured_date": trs[8].xpath(".//td")[1].text_content().strip(), "insured_status": trs[9].xpath(".//td")[1].text_content().strip(), } # Change cookie key name to JSESSIONID_INVOICE self.set_cookie("JSESSIONID_INVOICE", session) # self.set_header("Set-Cookie", cookie) self.send_json_response([info]) else: self.send_json_response([]) else: raise HTTPError(500, self.config["error"]["GET_COOKIE_ERR"]) else: action = action_str.split( '/') # pension | medical | injury | unemployment | maternity opt = action[1] if len( action) > 1 else "" # info | account | detail | balance query_params = dict(parse_qsl(self.request.query, True)) if "JSESSIONID_INVOICE" not in query_params: raise MissingArgumentError("JSESSIONID_INVOICE") else: tmp_cookie = self.get_query_argument("JSESSIONID_INVOICE") session = "JSESSIONID=" + unquote_plus(tmp_cookie) p = query_params["page"] if "page" in query_params else "" ps = query_params[ "page_size"] if "page_size" in query_params else "" page, page_size = self.init_paging_param(p, ps) # Start row and end row start = page + 1 end = page + page_size # ============================== # Endowment insurance (pension) # ============================== if action_str.startswith("pension"): url = { "info": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/pensionSecurity.action", "account": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/pAList.action", "detail": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/pCList.action" } method = "get" if opt == "info" else "post" pension = Pension(session) response = await self.browser.fetch( pension.make_request(method, url[opt], start, end)) raw = response.body.decode("utf-8") result = pension.parse_html(opt, raw) self.send_json_response(result) # ================== # Medical insurance # ================== elif action_str.startswith("medical"): url = { "info": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/cBList.action", "account": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/cAList.action", "detail": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/cCList.action", "balance": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/cFList.action" } method = "get" if opt == "info" else "post" medical = Medical(session) response = await self.browser.fetch( medical.make_request(method, url[opt], start, end)) raw = response.body.decode("utf-8") result = medical.parse_html(opt, raw) self.send_json_response(result) # ============================ # Employment injury insurance # ============================ elif action_str.startswith("injury"): url = { "info": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/iBInfo.action", "detail": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/iCList.action" } method = "get" if opt == "info" else "post" injury = Injury(session) response = await self.browser.fetch( injury.make_request(method, url[opt], start, end)) raw = response.body.decode("utf-8") result = injury.parse_html(opt, raw) self.send_json_response(result) # ======================= # Unemployment insurance # ======================= elif action_str.startswith("unemployment"): url = { "info": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/uBInfo.action", "detail": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/uCList.action" } method = "get" if opt == "info" else "post" unemployment = Unemployment(session) response = await self.browser.fetch( unemployment.make_request(method, url[opt], start, end)) raw = response.body.decode("utf-8") result = unemployment.parse_html(opt, raw) self.send_json_response(result) # ==================== # Maternity insurance # ==================== else: url = { "info": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/queryBirthInsuredInfo.action", "detail": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/queryBirthJfDetail.action?startRow=1&endRow=1200" } method = "get" maternity = Maternity(session) response = await self.browser.fetch( maternity.make_request(method, url[opt], start, end)) raw = response.body.decode("utf-8") result = maternity.parse_html(opt, raw) self.send_json_response(result)
async def do_process_logic(self): url = "http://222.85.152.12:8803/JiaManage/guest/ShowJiaGuiYangHome.xhtml" headers = { "User-Agent": fake_useragent() } request = HTTPRequest(url, method="GET", headers=headers) response = await self.browser.fetch(request) html = response.body.decode("utf-8") root = lxml.html.fromstring(html) statistics = [] # id="box1" div = root.xpath('//div[@id="box1"]')[0] script = div.xpath('.//script')[0].text_content() list_data = re.compile('value:(\d+).+?name:\'(.+?)\'').findall(script) box1 = { "id":1, "name": "共受理各类案件处理情况", "data": list_data } statistics.append(box1) # id="box2" div = root.xpath('//div[@id="box2"]')[0] script = div.xpath('.//script')[0].text_content() list_data = re.compile('data:.?\[(.+?)\]', re.DOTALL).findall(script) area = re.sub('[\t|\n|\r|\s|"]', '', list_data[0]) area_list = area.split(',') data = re.sub('\s', '', list_data[1]) data_list = [int(i) for i in data.split(',')] box2 = { "id": 2, "name": "各区(市、县)案件处理情况", "data": list(zip(area_list, data_list)) } statistics.append(box2) # id="box3" div = root.xpath('//div[@id="box3"]')[0] script = div.xpath('.//script')[0].text_content() list_data = re.compile('data:.?\[(.+?)\]', re.DOTALL).findall(script) area = re.sub('[\t|\n|\r|\s|"]', '', list_data[0]) area_list = area.split(',') data = re.sub('\s', '', list_data[1]) data_list = [int(i) for i in data.split(',')] box3 = { "id": 3, "name": "市级联动部门案卷处理情况", "data": list(zip(area_list, data_list)) } statistics.append(box3) # id="box4" div = root.xpath('//div[@id="box4"]')[0] script = div.xpath('.//script')[0].text_content() list_data = re.compile('data:.?\[(.+?)\]', re.DOTALL).findall(script) area = re.sub('[\t|\n|\r|\s|"]', '', list_data[0]) area_list = area.split(',') data = re.sub('\s', '', list_data[1]) data_list = [int(i) for i in data.split(',')] box4 = { "id": 4, "name": "平台案件类别", "data": list(zip(area_list, data_list)) } statistics.append(box4) self.send_json_response(statistics)
async def do_process_logic(self, *args): action_str = args[0] # ============================ # Login and return basic info # ============================ if action_str.startswith("login"): uid = self.get_query_argument("id") password = self.get_query_argument("password") login_url = "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/directQuery.action" headers = { "Content-Type": "application/x-www-form-urlencoded", "User-Agent": fake_useragent(), } data = { "userVo.idNo": uid, "userVo.password": password } request = HTTPRequest(login_url, method="POST", headers=headers, body=urlencode(data)) response = await self.browser.fetch(request) html = lxml.html.fromstring(response.body.decode("utf-8")) error = html.xpath('//input[@id="errorMessage"]') # Login error if len(error): error_msg = error[0].attrib["value"] self.send_json_response(error_msg, 0) else: # Get cookie from response header cookie = response.headers.get_list("Set-Cookie")[0] m = re.match('JSESSIONID=(.*?);\s', cookie) if m: # Get cookie "JSESSIONID" session = m.group(1) # Request user info info_url = "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/persionBasic.action" headers = { "User-Agent": fake_useragent(), "Cookie": cookie } request = HTTPRequest(info_url, method="GET", headers=headers) response = await self.browser.fetch(request) if response.code == 200: html = lxml.html.fromstring(response.body.decode("utf-8")) trs = html.xpath('//div[@class="person_base_info"]/table/tr') info = { "name": trs[0].xpath(".//td")[1].text_content().strip(), "personal_no": trs[1].xpath(".//td")[1].text_content().strip(), "id": trs[2].xpath(".//td")[1].text_content().strip(), "gender": trs[3].xpath(".//td")[1].text_content().strip(), "nationality": trs[4].xpath(".//td")[1].text_content().strip(), "birth_date": trs[5].xpath(".//td")[1].text_content().strip(), "resident_type": trs[6].xpath(".//td")[1].text_content().strip(), "work_type": trs[7].xpath(".//td")[1].text_content().strip(), "first_insured_date": trs[8].xpath(".//td")[1].text_content().strip(), "insured_status": trs[9].xpath(".//td")[1].text_content().strip(), } # Change cookie key name to JSESSIONID_INVOICE self.set_cookie("JSESSIONID_INVOICE", session) # self.set_header("Set-Cookie", cookie) self.send_json_response([info]) else: self.send_json_response([]) else: raise HTTPError(500, self.config["error"]["GET_COOKIE_ERR"]) else: action = action_str.split('/') # pension | medical | injury | unemployment | maternity opt = action[1] if len(action) > 1 else "" # info | account | detail | balance query_params = dict(parse_qsl(self.request.query, True)) if "JSESSIONID_INVOICE" not in query_params: raise MissingArgumentError("JSESSIONID_INVOICE") else: tmp_cookie = self.get_query_argument("JSESSIONID_INVOICE") session = "JSESSIONID=" + unquote_plus(tmp_cookie) p = query_params["page"] if "page" in query_params else "" ps = query_params["page_size"] if "page_size" in query_params else "" page, page_size = self.init_paging_param(p, ps) # Start row and end row start = page + 1 end = page + page_size # ============================== # Endowment insurance (pension) # ============================== if action_str.startswith("pension"): url = { "info": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/pensionSecurity.action", "account": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/pAList.action", "detail": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/pCList.action" } method = "get" if opt == "info" else "post" pension = Pension(session) response = await self.browser.fetch(pension.make_request(method, url[opt], start, end)) raw = response.body.decode("utf-8") result = pension.parse_html(opt, raw) self.send_json_response(result) # ================== # Medical insurance # ================== elif action_str.startswith("medical"): url = { "info": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/cBList.action", "account": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/cAList.action", "detail": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/cCList.action", "balance": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/cFList.action" } method = "get" if opt == "info" else "post" medical = Medical(session) response = await self.browser.fetch(medical.make_request(method, url[opt], start, end)) raw = response.body.decode("utf-8") result = medical.parse_html(opt, raw) self.send_json_response(result) # ============================ # Employment injury insurance # ============================ elif action_str.startswith("injury"): url = { "info": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/iBInfo.action", "detail": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/iCList.action" } method = "get" if opt == "info" else "post" injury = Injury(session) response = await self.browser.fetch(injury.make_request(method, url[opt], start, end)) raw = response.body.decode("utf-8") result = injury.parse_html(opt, raw) self.send_json_response(result) # ======================= # Unemployment insurance # ======================= elif action_str.startswith("unemployment"): url = { "info": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/uBInfo.action", "detail": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/uCList.action" } method = "get" if opt == "info" else "post" unemployment = Unemployment(session) response = await self.browser.fetch(unemployment.make_request(method, url[opt], start, end)) raw = response.body.decode("utf-8") result = unemployment.parse_html(opt, raw) self.send_json_response(result) # ==================== # Maternity insurance # ==================== else: url = { "info": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/queryBirthInsuredInfo.action", "detail": "http://cqjy.cqhrss.gov.cn/cqwx/wx/socialSecurity/queryBirthJfDetail.action?startRow=1&endRow=1200" } method = "get" maternity = Maternity(session) response = await self.browser.fetch(maternity.make_request(method, url[opt], start, end)) raw = response.body.decode("utf-8") result = maternity.parse_html(opt, raw) self.send_json_response(result)
async def do_process_logic(self): url = "http://jzsgl.coc.gov.cn/archisearch/AjaxAction/DataServices.aspx" headers = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2', 'Connection': 'keep-alive', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Host': 'jzsgl.coc.gov.cn', 'Origin': 'http://jzsgl.coc.gov.cn', 'Referer': 'http://jzsgl.coc.gov.cn/archisearch/cxejjzs/rylist.aspx?sjbm=510000&qymc=&xm=&zclb=00&zczy=%E5%85%A8%E9%83%A8&zczsbh=&zch=&zyzgzsbh=', 'X-Referer': 'http://jzsgl.coc.gov.cn/archisearch/cxejjzs/rylist.aspx?sjbm=510000&qymc=&xm=&zclb=00&zczy=%E5%85%A8%E9%83%A8&zczsbh=&zch=&zyzgzsbh=', 'X-Requested-With': 'XMLHttpRequest', "User-Agent": fake_useragent() } name_args = self.get_query_arguments("name") ent_args = self.get_query_arguments("ent") # 注册号(川251141528602) reg_no_args = self.get_query_arguments("reg_no") # 注册证书编号 cer_no_args = self.get_query_arguments("cer_no") # 执业资格证书编号 qua_no_args = self.get_query_arguments("qua_no") page_args = self.get_query_arguments("page") params = { "Xm": name_args[0] if len(name_args) else "", "Qymc": ent_args[0] if len(ent_args) else "", "Zcbh": reg_no_args[0] if len(reg_no_args) else "", "Zsbh": cer_no_args[0] if len(cer_no_args) else "", "Zgzsbh": qua_no_args[0] if len(qua_no_args) else "", "Sjbm": "510000", # 省级编号(51000-四川省) "Zclb": "00", "PageNo": int(page_args[0]) if len(page_args) else 1, } data = {'action': '020103', 'param': params} labels = { "00": "最新状态", "01": "初始注册", "02": "变更注册", "03": "延续注册", "04": "增项注册", "05": "重新注册", "06": "遗失补办", "07": "注销注册", } request = HTTPRequest(url, method="POST", headers=headers, body=urlencode(data)) response = await self.browser.fetch(request) data = json.loads(response.body.decode("utf-8")) constructors = data["AppendData"]["Data"] result_list = [] for constructor in constructors: prof = [] valid = [] for item in constructor["zyList"].split('^'): tmp = item.split('|') prof.append(tmp[0]) valid.append(tmp[1]) profession = ','.join(prof) validity = ','.join(valid) tmp = { "province": constructor["sjmc"], "enterprise": constructor["qymc"], "name": constructor["xm"], "register_no": constructor["zcbh"].strip(), "register_certificate_no": constructor["zsbh"].strip(), "qualification_certificate_no": constructor["zgzsbh"].strip(), "profession": profession, "validity": validity, "type": labels[constructor["zclb"]] } result_list.append(tmp) self.send_json_response(result_list)
async def do_process_logic(self): url = "http://www.cdtaxi.cn/shiwudj/add.html" headers = { "Host": "www.cdtaxi.cn", "Connection": "keep-alive", "Cache-Control": "max-age=0", "Origin": "http://www.cdtaxi.cn", "Referer": "http://www.cdtaxi.cn/shiwudj/add.html", "Content-Type": "application/x-www-form-urlencoded", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate", "Upgrade-Insecure-Request": "1", "Accept-Language": "zh-CN,zh;q=0.8", "User-Agent": fake_useragent() } # Prepare cookie username = self.get_query_argument("name") form_hash = self.get_query_argument("HIDDEN_FORM_HASH") captcha = self.get_query_argument("captcha") tmp_cookie = self.get_query_argument("JSESSIONID_INVOICE") cookie = "PHPSESSID=" + unquote(tmp_cookie) headers["Cookie"] = cookie # Prepare post data tel_args = self.get_query_arguments("tel") pickup_args = self.get_query_arguments("pickup") pickup_time_args = self.get_query_arguments("pickup_time") getoff_args = self.get_query_arguments("getoff") getoff_time_args = self.get_query_arguments("getoff_time") company_args = self.get_query_arguments("company") car_type_args = self.get_query_arguments("car_type") plate_no_args = self.get_query_arguments("plate_no") invoice_sum_args = self.get_query_arguments("invoice_sum") invoice_code_args = self.get_query_arguments("invoice_code") invoice_no_args = self.get_query_arguments("invoice_no") lost_item_args = self.get_query_arguments("lost_item") msg_args = self.get_query_arguments("msg") time_args = self.get_query_arguments("time") data = { "username": username, "telephone": tel_args[0] if len(tel_args) else "", "first_ads": pickup_args[0] if len(pickup_args) else "", "uptime": pickup_time_args[0] if len(pickup_time_args) else "", "last_ads": getoff_args[0] if len(getoff_args) else "", "downtime": getoff_time_args[0] if len(getoff_time_args) else "", "gongsi": company_args[0] if len(company_args) else "", "chexing": car_type_args[0] if len(car_type_args) else "", "fapiao": invoice_sum_args[0] if len(invoice_sum_args) else "", "chepai": plate_no_args[0] if len(plate_no_args) else "", "daima": invoice_code_args[0] if len(invoice_code_args) else "", "haoma": invoice_no_args[0] if len(invoice_no_args) else "", "title": lost_item_args[0] if len(lost_item_args) else "", "content": msg_args[0] if len(msg_args) else "", "time": time_args[0] if len(time_args) else "", "dengji": "1", "verify": captcha, "__hash__": form_hash, "Submit": "提 交" } request = HTTPRequest(url, method="POST", headers=headers, body=urlencode(data)) response = await self.browser.fetch(request) html = response.body.decode("utf-8") root = lxml.html.fromstring(html) span = root.xpath('//span[@class="green"]')[0] msg = span.text_content().strip() if msg != "留言提交成功": self.send_json_response({"msg": msg}, 0) else: self.send_json_response({"msg": msg})
async def do_process_logic(self): url = "http://222.85.152.12:8803/JiaManage/guest/ShowJiaGuiYangHome.xhtml" headers = {"User-Agent": fake_useragent()} request = HTTPRequest(url, method="GET", headers=headers) response = await self.browser.fetch(request) html = response.body.decode("utf-8") root = lxml.html.fromstring(html) statistics = [] # id="box1" div = root.xpath('//div[@id="box1"]')[0] script = div.xpath('.//script')[0].text_content() list_data = re.compile('value:(\d+).+?name:\'(.+?)\'').findall(script) box1 = {"id": 1, "name": "共受理各类案件处理情况", "data": list_data} statistics.append(box1) # id="box2" div = root.xpath('//div[@id="box2"]')[0] script = div.xpath('.//script')[0].text_content() list_data = re.compile('data:.?\[(.+?)\]', re.DOTALL).findall(script) area = re.sub('[\t|\n|\r|\s|"]', '', list_data[0]) area_list = area.split(',') data = re.sub('\s', '', list_data[1]) data_list = [int(i) for i in data.split(',')] box2 = { "id": 2, "name": "各区(市、县)案件处理情况", "data": list(zip(area_list, data_list)) } statistics.append(box2) # id="box3" div = root.xpath('//div[@id="box3"]')[0] script = div.xpath('.//script')[0].text_content() list_data = re.compile('data:.?\[(.+?)\]', re.DOTALL).findall(script) area = re.sub('[\t|\n|\r|\s|"]', '', list_data[0]) area_list = area.split(',') data = re.sub('\s', '', list_data[1]) data_list = [int(i) for i in data.split(',')] box3 = { "id": 3, "name": "市级联动部门案卷处理情况", "data": list(zip(area_list, data_list)) } statistics.append(box3) # id="box4" div = root.xpath('//div[@id="box4"]')[0] script = div.xpath('.//script')[0].text_content() list_data = re.compile('data:.?\[(.+?)\]', re.DOTALL).findall(script) area = re.sub('[\t|\n|\r|\s|"]', '', list_data[0]) area_list = area.split(',') data = re.sub('\s', '', list_data[1]) data_list = [int(i) for i in data.split(',')] box4 = { "id": 4, "name": "平台案件类别", "data": list(zip(area_list, data_list)) } statistics.append(box4) self.send_json_response(statistics)
async def do_process_logic(self): url = "http://jzsgl.coc.gov.cn/archisearch/AjaxAction/DataServices.aspx" headers = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4,zh-TW;q=0.2', 'Connection': 'keep-alive', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Host': 'jzsgl.coc.gov.cn', 'Origin': 'http://jzsgl.coc.gov.cn', 'Referer': 'http://jzsgl.coc.gov.cn/archisearch/cxejjzs/rylist.aspx?sjbm=510000&qymc=&xm=&zclb=00&zczy=%E5%85%A8%E9%83%A8&zczsbh=&zch=&zyzgzsbh=', 'X-Referer': 'http://jzsgl.coc.gov.cn/archisearch/cxejjzs/rylist.aspx?sjbm=510000&qymc=&xm=&zclb=00&zczy=%E5%85%A8%E9%83%A8&zczsbh=&zch=&zyzgzsbh=', 'X-Requested-With': 'XMLHttpRequest', "User-Agent": fake_useragent() } name_args = self.get_query_arguments("name") ent_args = self.get_query_arguments("ent") # 注册号(川251141528602) reg_no_args = self.get_query_arguments("reg_no") # 注册证书编号 cer_no_args = self.get_query_arguments("cer_no") # 执业资格证书编号 qua_no_args = self.get_query_arguments("qua_no") page_args = self.get_query_arguments("page") params = { "Xm": name_args[0] if len(name_args) else "", "Qymc": ent_args[0] if len(ent_args) else "", "Zcbh": reg_no_args[0] if len(reg_no_args) else "", "Zsbh": cer_no_args[0] if len(cer_no_args) else "", "Zgzsbh": qua_no_args[0] if len(qua_no_args) else "", "Sjbm": "510000", # 省级编号(51000-四川省) "Zclb": "00", "PageNo": int(page_args[0]) if len(page_args) else 1, } data = { 'action': '020103', 'param': params } labels = { "00": "最新状态", "01": "初始注册", "02": "变更注册", "03": "延续注册", "04": "增项注册", "05": "重新注册", "06": "遗失补办", "07": "注销注册", } request = HTTPRequest(url, method="POST", headers=headers, body=urlencode(data)) response = await self.browser.fetch(request) data = json.loads(response.body.decode("utf-8")) constructors = data["AppendData"]["Data"] result_list = [] for constructor in constructors: prof = [] valid = [] for item in constructor["zyList"].split('^'): tmp = item.split('|') prof.append(tmp[0]) valid.append(tmp[1]) profession = ','.join(prof) validity = ','.join(valid) tmp = { "province": constructor["sjmc"], "enterprise": constructor["qymc"], "name": constructor["xm"], "register_no": constructor["zcbh"].strip(), "register_certificate_no": constructor["zsbh"].strip(), "qualification_certificate_no": constructor["zgzsbh"].strip(), "profession": profession, "validity": validity, "type": labels[constructor["zclb"]] } result_list.append(tmp) self.send_json_response(result_list)