def get_data(s): error_msg = "请开启JavaScript并刷新该页" url = "http://wenshu.court.gov.cn/CreateContentJS/CreateContentJS.aspx" params = { "DocID": s, } #time.sleep(2) response = session.get(url, params=params, proxies=random.choice(proxy_list)) text = response.content.decode() if error_msg in text: retry = 3 for _ in range(retry): redirect_url = decrypt_wzws(text) response = session.get(redirect_url) text = response.content.decode() if error_msg not in text: break else: print("连续{}次获取数据失败".format(retry)) group_dict = parse_detail(response.text) #pprint(group_dict) return group_dict
def detail_page(self): """文书详情页""" url = "http://wenshu.court.gov.cn/CreateContentJS/CreateContentJS.aspx" params = { "DocID": "029bb843-b458-4d1c-8928-fe80da403cfe", } response = self.session.get(url, params=params) # 请求1 text = response.content.decode() if "请开启JavaScript并刷新该页" in text: # 如果使用代理,确保请求1和请求2的ip为同一个,否则将继续返回"请开启JavaScript并刷新该页" redirect_url = decrypt_wzws(text) response = self.session.get(redirect_url) # 请求2 group_dict = parse_detail(response.text) pprint(group_dict)
def test_list_page(self): url = "http://wenshu.court.gov.cn/List/ListContent" data = { "Param": "案件类型:执行案件", "Index": 1, "Page": 10, "Order": "法院层级", "Direction": "asc", "vl5x": Vl5x(self.session.cookies.setdefault("vjkl5", Vjkl5())), "number": Number(), "guid": Guid(), } response = self.session.post(url, data=data) text = response.content.decode() if self.error_msg in text: retry = 3 for _ in range(retry): redirect_url = decrypt_wzws(text) response = self.session.post(redirect_url, data=data) text = response.content.decode() if self.error_msg not in text: break else: self.fail("连续{}次获取数据失败".format(retry)) json_data = json.loads(response.json()) print("列表数据:", json_data) run_eval = json_data.pop(0)["RunEval"] try: key = decrypt_runeval(run_eval) except ValueError as e: raise ValueError("返回脏数据") from e else: print("RunEval解析完成:", key, "\n") key = key.encode() for item in json_data: cipher_text = item["文书ID"] print("解密:", cipher_text) plain_text = decrypt_doc_id(doc_id=cipher_text, key=key) print("成功, 文书ID:", plain_text, "\n")
def test_detail_page(self): url = "http://wenshu.court.gov.cn/CreateContentJS/CreateContentJS.aspx" params = { "DocID": "a8b745f3-43ac-402c-99bf-68b9a9cae635", } response = self.session.get(url, params=params) text = response.content.decode() if self.error_msg in text: retry = 3 for _ in range(retry): redirect_url = decrypt_wzws(text) response = self.session.get(redirect_url) text = response.content.decode() if self.error_msg not in text: break else: self.fail("连续{}次获取数据失败".format(retry)) group_dict = parse_detail(response.text) pprint(group_dict)
def list_page(self): """文书列表页""" url = "http://wenshu.court.gov.cn/List/ListContent" data = { "Param": "案件类型:刑事案件", "Index": 1, "Page": 10, "Order": "法院层级", "Direction": "asc", "vl5x": Vl5x(self.session.cookies.setdefault("vjkl5", Vjkl5())), "number": Number(), "guid": Guid(), } response = self.session.post(url, data=data) # 请求1 text = response.content.decode() if "请开启JavaScript并刷新该页" in text: # 如果使用代理,确保请求1和请求2的ip为同一个,否则将继续返回"请开启JavaScript并刷新该页" redirect_url = decrypt_wzws(text) response = self.session.post(redirect_url, data=data) # 请求2 json_data = json.loads(response.json()) print("列表数据:", json_data) run_eval = json_data.pop(0)["RunEval"] try: key = decrypt_runeval(run_eval) except ValueError as e: raise ValueError("返回脏数据") from e else: print("RunEval解析完成:", key, "\n") key = key.encode() for item in json_data: cipher_text = item["文书ID"] print("解密:", cipher_text) plain_text = decrypt_doc_id(doc_id=cipher_text, key=key) print("成功, 文书ID:", plain_text, "\n")