Beispiel #1
0
 def generate_reason_text(self):
     operate_mysql = OperateMysql()
     name_id_tup = operate_mysql.query_top_level_reason()
     for tup in name_id_tup:
         with open(tup[0] + ".txt", "w") as f:
             f.write("")
     return name_id_tup
Beispiel #2
0
 def parse_childern_reason(self, name, childern_name_id_tup):
     for i in range(len(childern_name_id_tup)):
         operate_mysql = OperateMysql()
         childern_name_id_tup_2 = operate_mysql.search_childern(
             childern_name_id_tup[i][1])
         if len(childern_name_id_tup_2) == 0:
             with open(name + ".txt", "a", encoding="utf8") as f:
                 f.write(childern_name_id_tup[i][0] + "\n")
         else:
             self.parse_childern_reason(name, childern_name_id_tup_2)
Beispiel #3
0
 def save_reason_to_mysql(self):
     anyou_total_list = self.get_anyou_total_list()
     print(len(anyou_total_list))
     for category_dict in anyou_total_list:
         operate_mysql_2 = OperateMysql()
         reason_id = category_dict["id"]
         parent_id = category_dict["parent"]
         name = category_dict["text"]
         print(reason_id, parent_id, name)
         operate_mysql_2.save_case_reason(reason_id, parent_id, name)
 def __init__(self):
     self.url = "http://www.lawsdata.com/js/data/reason.json"
     self.headers = {
         "Host": "www.lawsdata.com",
         "Origin": "http: // www.lawsdata.com",
         "Referer":"http://www.lawsdata.com/?q=eyJtIjoiYWR2YW5jZSIsImEiOnsidGV4dHMiOlt7InR5cGUiOiJhbGwiLCJzdWJUeXBlIjoiIiwidmFsdWUiOiLmoYgifV0sImNhc2VUeXBlIjpbIjIiXSwiZnV6enlNZWFzdXJlIjowfSwic20iOnsidGV4dFNlYXJjaCI6WyJzaW5nbGUiXSwibGl0aWdhbnRTZWFyY2giOlsicGFyYWdyYXBoIl19fQ==&s=",
         "User-Agent": "Mozilla / 5.0(Windows NT 6.1; Win64; x64) AppleWebKit/537.36(KHTML, like Gecko) Chrome /70.0.3538.110 Safari/537.36",
         "X-Requested-With": "XMLHttpRequest"
     }
     self.outer_reason = config["outer_reason"]
     self.operate_mysql = OperateMysql()
Beispiel #5
0
    def write_mysql_to_text(self):
        operate_mysql = OperateMysql()
        name_id_tup = operate_mysql.query_top_level_reason()

        for tup in name_id_tup:
            operate_mysql = OperateMysql()
            name = tup[0]
            id = tup[1]
            childern_name_id_tup = operate_mysql.search_childern(id)
            self.parse_childern_reason(name, childern_name_id_tup)
class Page_Url():
    def __init__(self, anyou):
        self.db = DB()
        self.anyou = anyou
        self.operate = OperateMysql()

    def run(self):
        pass

    def generate_url(self):
        i = self.operate.query_i_from_case_reason(self.anyou)
        if i != None:
            data = {
                "m": "advance",
                "a": {
                    "caseType": ["3"],
                    "reasonId": [i],
                    "fuzzyMeasure": "0",
                    "reason": self.anyou
                },
                "sm": {
                    "textSearch": ["single"],
                    "litigantSearch": ["paragraph"]
                }
            }
            # print(data)
            # json_str = json.dumps(data,ensure_ascii=False).encode("utf-8")
            json_str = str(data)
            str_list = json_str.split(" ")
            json_str = "".join(str_list).encode("utf-8")
            return "http://www.lawsdata.com/?" + base64.b64encode(
                json_str).decode("utf-8") + "&s="

    def get_first_page(self):
        cookie = get_cookie()
        headers = {
            "cookie":
            cookie,
            "Host":
            "www.lawsdata.com",
            "Referer":
            "http://www.lawsdata.com/?q = eydtJzonYWR2YW5jZScsJ2EnOnsnY2FzZVR5cGUnOlsnMyddLCdyZWFzb25JZCc6WycwMDMwMDEwMjMnXSwnZnV6enlNZWFzdXJlJzonMCcsJ3JlYXNvbic6J+mjn+WTgeiNr+WTgeWuieWFqOihjOaUv+euoeeQhijpo5/lk4HjgIHoja/lk4EpJ30sJ3NtJzp7J3RleHRTZWFyY2gnOlsnc2luZ2xlJ10sJ2xpdGlnYW50U2VhcmNoJzpbJ3BhcmFncmFwaCddfX0=&s=",
            "Upgrade-Insecure-Requests":
            "1",
            "User-Agent":
            "Mozilla/5.0(Windows NT 6.1;Win64;x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"
        }
Beispiel #7
0
 def save_reason_to_mysql(self, anyou_total_list):
     for category_dict in anyou_total_list:
         operate_mysql_1 = OperateMysql()
         operate_mysql_2 = OperateMysql()
         id_str = category_dict["id"]
         res = re.match(r"(.*?)::(.*?)::(.*)", id_str)
         name = res.group(1)
         reason_id = res.group(3)
         parent_id = res.group(2)
         if parent_id == "TopLevel":
             pass
         else:
             parent_id = operate_mysql_1.search_reason_id_by_name(parent_id)
         operate_mysql_2.save_case_reason(reason_id, parent_id, name)
         if "children" in category_dict:
             children_reason_list = category_dict["children"]
             self.save_reason_to_mysql(children_reason_list)
Beispiel #8
0
class wenshuID():
    def __init__(self, reason):
        self.url = "http://www.lawsdata.com/s/"
        self.operate_mysql = OperateMysql()
        self.reason = reason

    def form_data(self):
        i = self.operate_mysql.query_i_from_case_reason(self.reason)
        if i != None:
            data = {
                "q": {
                    "caseType": ["2"],
                    "reasonId": [i],
                    "fuzzyMeasure": "0",
                    "pageNo": 1,
                    "sortField": "referencedType",
                    "sortOrder": "desc"
                },
                "subLibraryId": None
            }
        else:
            data = None
        return data

    # 生成refer的url
    def generate_refer_url(self):
        i = self.operate_mysql.query_i_from_case_reason(self.reason)
        if i != None:
            data = {
                "m": "advance",
                "a": {
                    "caseType": ["3"],
                    "reasonId": [i],
                    "fuzzyMeasure": "0",
                    "reason": self.reason
                },
                "sm": {
                    "textSearch": ["single"],
                    "litigantSearch": ["paragraph"]
                }
            }
            json_str = str(data)
            str_list = json_str.split(" ")
            json_str = "".join(str_list).encode("utf-8")
            return "http://www.lawsdata.com/?" + base64.b64encode(
                json_str).decode("utf-8") + "&s="
        return None

    # 生成所需的请求头
    def form_headers(self, cookie):
        refer_url = self.generate_refer_url(self.reason)
        if refer_url == None:
            print("获取案由id失败,数据库不存在该案由")
        headers = {
            "Host": "www.lawsdata.com",
            "Origin": "http://www.lawsdata.com",
            "Referer": refer_url,
            "User-Agent":
            "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36",
            "X-Requested-With": "XMLHttpRequest",
            "Cookie": cookie
        }
        return headers

    def get_index_contnet(self, proxy, data, headers):
        resp = requests.post(self.url,
                             data=data,
                             headers=headers,
                             proxies=proxy)
        count = resp.json()["numFound"]
        if count > 500:
            for province in resp["facetFields"]["provinceId"]:
                province_count = province[count]
                if province_count > 500:
                    resp = requests.post("")
        else:
            return {"data": data, "url": self.url}
Beispiel #9
0
 def __init__(self, reason):
     self.url = "http://www.lawsdata.com/s/"
     self.operate_mysql = OperateMysql()
     self.reason = reason
Beispiel #10
0
 def __init__(self, anyou):
     self.db = DB()
     self.anyou = anyou
     self.operate = OperateMysql()