def generate_reason_text(self): operate_mysql = OperateMysql() name_id_tup = operate_mysql.query_top_level_reason() for tup in name_id_tup: with open(tup[0] + ".txt", "w") as f: f.write("") return name_id_tup
def parse_childern_reason(self, name, childern_name_id_tup): for i in range(len(childern_name_id_tup)): operate_mysql = OperateMysql() childern_name_id_tup_2 = operate_mysql.search_childern( childern_name_id_tup[i][1]) if len(childern_name_id_tup_2) == 0: with open(name + ".txt", "a", encoding="utf8") as f: f.write(childern_name_id_tup[i][0] + "\n") else: self.parse_childern_reason(name, childern_name_id_tup_2)
def save_reason_to_mysql(self): anyou_total_list = self.get_anyou_total_list() print(len(anyou_total_list)) for category_dict in anyou_total_list: operate_mysql_2 = OperateMysql() reason_id = category_dict["id"] parent_id = category_dict["parent"] name = category_dict["text"] print(reason_id, parent_id, name) operate_mysql_2.save_case_reason(reason_id, parent_id, name)
def __init__(self): self.url = "http://www.lawsdata.com/js/data/reason.json" self.headers = { "Host": "www.lawsdata.com", "Origin": "http: // www.lawsdata.com", "Referer":"http://www.lawsdata.com/?q=eyJtIjoiYWR2YW5jZSIsImEiOnsidGV4dHMiOlt7InR5cGUiOiJhbGwiLCJzdWJUeXBlIjoiIiwidmFsdWUiOiLmoYgifV0sImNhc2VUeXBlIjpbIjIiXSwiZnV6enlNZWFzdXJlIjowfSwic20iOnsidGV4dFNlYXJjaCI6WyJzaW5nbGUiXSwibGl0aWdhbnRTZWFyY2giOlsicGFyYWdyYXBoIl19fQ==&s=", "User-Agent": "Mozilla / 5.0(Windows NT 6.1; Win64; x64) AppleWebKit/537.36(KHTML, like Gecko) Chrome /70.0.3538.110 Safari/537.36", "X-Requested-With": "XMLHttpRequest" } self.outer_reason = config["outer_reason"] self.operate_mysql = OperateMysql()
def write_mysql_to_text(self): operate_mysql = OperateMysql() name_id_tup = operate_mysql.query_top_level_reason() for tup in name_id_tup: operate_mysql = OperateMysql() name = tup[0] id = tup[1] childern_name_id_tup = operate_mysql.search_childern(id) self.parse_childern_reason(name, childern_name_id_tup)
class Page_Url(): def __init__(self, anyou): self.db = DB() self.anyou = anyou self.operate = OperateMysql() def run(self): pass def generate_url(self): i = self.operate.query_i_from_case_reason(self.anyou) if i != None: data = { "m": "advance", "a": { "caseType": ["3"], "reasonId": [i], "fuzzyMeasure": "0", "reason": self.anyou }, "sm": { "textSearch": ["single"], "litigantSearch": ["paragraph"] } } # print(data) # json_str = json.dumps(data,ensure_ascii=False).encode("utf-8") json_str = str(data) str_list = json_str.split(" ") json_str = "".join(str_list).encode("utf-8") return "http://www.lawsdata.com/?" + base64.b64encode( json_str).decode("utf-8") + "&s=" def get_first_page(self): cookie = get_cookie() headers = { "cookie": cookie, "Host": "www.lawsdata.com", "Referer": "http://www.lawsdata.com/?q = eydtJzonYWR2YW5jZScsJ2EnOnsnY2FzZVR5cGUnOlsnMyddLCdyZWFzb25JZCc6WycwMDMwMDEwMjMnXSwnZnV6enlNZWFzdXJlJzonMCcsJ3JlYXNvbic6J+mjn+WTgeiNr+WTgeWuieWFqOihjOaUv+euoeeQhijpo5/lk4HjgIHoja/lk4EpJ30sJ3NtJzp7J3RleHRTZWFyY2gnOlsnc2luZ2xlJ10sJ2xpdGlnYW50U2VhcmNoJzpbJ3BhcmFncmFwaCddfX0=&s=", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0(Windows NT 6.1;Win64;x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36" }
def save_reason_to_mysql(self, anyou_total_list): for category_dict in anyou_total_list: operate_mysql_1 = OperateMysql() operate_mysql_2 = OperateMysql() id_str = category_dict["id"] res = re.match(r"(.*?)::(.*?)::(.*)", id_str) name = res.group(1) reason_id = res.group(3) parent_id = res.group(2) if parent_id == "TopLevel": pass else: parent_id = operate_mysql_1.search_reason_id_by_name(parent_id) operate_mysql_2.save_case_reason(reason_id, parent_id, name) if "children" in category_dict: children_reason_list = category_dict["children"] self.save_reason_to_mysql(children_reason_list)
class wenshuID(): def __init__(self, reason): self.url = "http://www.lawsdata.com/s/" self.operate_mysql = OperateMysql() self.reason = reason def form_data(self): i = self.operate_mysql.query_i_from_case_reason(self.reason) if i != None: data = { "q": { "caseType": ["2"], "reasonId": [i], "fuzzyMeasure": "0", "pageNo": 1, "sortField": "referencedType", "sortOrder": "desc" }, "subLibraryId": None } else: data = None return data # 生成refer的url def generate_refer_url(self): i = self.operate_mysql.query_i_from_case_reason(self.reason) if i != None: data = { "m": "advance", "a": { "caseType": ["3"], "reasonId": [i], "fuzzyMeasure": "0", "reason": self.reason }, "sm": { "textSearch": ["single"], "litigantSearch": ["paragraph"] } } json_str = str(data) str_list = json_str.split(" ") json_str = "".join(str_list).encode("utf-8") return "http://www.lawsdata.com/?" + base64.b64encode( json_str).decode("utf-8") + "&s=" return None # 生成所需的请求头 def form_headers(self, cookie): refer_url = self.generate_refer_url(self.reason) if refer_url == None: print("获取案由id失败,数据库不存在该案由") headers = { "Host": "www.lawsdata.com", "Origin": "http://www.lawsdata.com", "Referer": refer_url, "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36", "X-Requested-With": "XMLHttpRequest", "Cookie": cookie } return headers def get_index_contnet(self, proxy, data, headers): resp = requests.post(self.url, data=data, headers=headers, proxies=proxy) count = resp.json()["numFound"] if count > 500: for province in resp["facetFields"]["provinceId"]: province_count = province[count] if province_count > 500: resp = requests.post("") else: return {"data": data, "url": self.url}
def __init__(self, reason): self.url = "http://www.lawsdata.com/s/" self.operate_mysql = OperateMysql() self.reason = reason
def __init__(self, anyou): self.db = DB() self.anyou = anyou self.operate = OperateMysql()