Ejemplo n.º 1
0
def get_vjx(**kwargs):
    with open(wenshujs, 'r', encoding='utf-8') as fp:
        js = fp.read()
    ect = execjs.compile(js)
    url = 'http://wenshu.court.gov.cn/List/List?sorttype=1&conditions=searchWord+3+AJLX++%E6%A1%88%E4%BB%B6%E7%B1%BB%E5%9E%8B:%E8%A1%8C%E6%94%BF%E6%A1%88%E4%BB%B6'
    while True:
        try:
            headers = {
                'User-Agent':
                'User-Agent:Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5',
                'Cookie': None,
                # 'Accept': '*/*',
                # 'User-Agent': random.choice(my_headers),
                # 'Accept-Encoding': 'gzip, deflate',
                # 'Accept-Language': 'zh-CN,zh;q=0.9',
                # 'Connection': 'keep-alive',
                # 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
                # 'X-Requested-With': 'XMLHttpRequest',
                # 'Host': 'wenshu.court.gov.cn',
                # 'Origin': 'http://wenshu.court.gov.cn',
            }
            res1 = re_request(url=url, headers=headers)
            vjkl5 = res1.headers['Set-Cookie'].split(';')[0].split('=')[1]
            vl5x = ect.call('getKey', vjkl5)
            return vl5x, vjkl5
        except:
            pass
Ejemplo n.º 2
0
def get_vjx(**kwargs):
    with open(wenshujs, 'r', encoding='utf-8') as fp:
        js = fp.read()
    ect = execjs.compile(js)
    url = 'http://wenshu.court.gov.cn/List/List?sorttype=1&conditions=searchWord+1+AJLX++%E6%A1%88%E4%BB%B6%E7%B1%BB%E5%9E%8B:%E5%88%91%E4%BA%8B%E6%A1%88%E4%BB%B6'
    while True:
        try:
            headers = {
                'User-Agent': 'User-Agent:Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5',
                'Cookie': None,
            }
            res1 = re_request(url=url, headers=headers)
            vjkl5 = res1.headers['Set-Cookie'].split(';')[0].split('=')[1]
            vl5x = ect.call('getKey', vjkl5)
            return vl5x, vjkl5
        except:
            pass
Ejemplo n.º 3
0
def get_detail_info(**kwargs):
    run_eval = kwargs.get("runeval")
    doc_id_src = kwargs.get("docid")
    result = doc_id_decyrpt(run_eval, doc_id_src)
    # result = doc_id(run_eval, doc_id_src)
    # log.crawler.info("密钥为:%s" % result)
    url_doc = 'http://wenshu.court.gov.cn/CreateContentJS/CreateContentJS.aspx?DocID=' + result
    item = {"data_source": "http://wenshu.court.gov.cn/"}
    item["CASE_NAME"] = kwargs.get("CASE_NAME", "")
    item["CASE_TIME"] = kwargs.get("CASE_TIME", "")
    item["CASE_TYPE"] = kwargs.get("CASE_TYPE", "")
    item["CASE_NUM"] = kwargs.get("CASE_NUM", "")
    item["COURT_NAME"] = kwargs.get("COURT_NAME", "")
    hashstr = item["CASE_NAME"] + item["CASE_TIME"]
    hashvalue = getmd5(hashstr)
    item["hash"] = hashvalue
    if pl == "Windows":
        filedir = "D:\\workplace\\wenshu_zhixing"
    else:
        filedir = "/data/wenshu_zhixing"
    myname = socket.getfqdn(socket.gethostname())
    myaddr = socket.gethostbyname(myname)
    item["IP"] = myaddr
    file = hashvalue + ".html"
    file_name = os.path.join(filedir, file)
    item["DOC_DIR"] = file_name
    # i = 0
    # while i < 10:
    for _ in range(1000):
        headers = {
            'User-Agent':
            'User-Agent:Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5',
            'Cookie': None,
            # 'Accept': 'text/javascript, application/javascript, */*',
            # 'User-Agent': random.choice(my_headers),
            # 'Accept-Encoding': 'gzip, deflate',
            # 'Accept-Language': 'zh-CN,zh;q=0.9',
            # 'Connection': 'keep-alive',
            # 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
            # 'X-Requested-With': 'XMLHttpRequest',
            # 'Host': 'wenshu.court.gov.cn',
            # 'Referer': 'http://wenshu.court.gov.cn/content/content?DocID={}&KeyWord='.format(result)
        }
        resp = re_request(url=url_doc, headers=headers)
        resp.encoding = resp.apparent_encoding
        a = resp.text
        # a = get_html(method='get', url=url_doc)
        text1 = re.findall(r'\\"Html\\":\\"(.*?)\\"}";', a)
        if len(text1) > 0:
            t2 = """
                <!DOCTYPE html>
                <html lang="en">
                <head>
                    <meta charset="UTF-8">
                    <title>Title</title>
                </head>
                <body>
                %s
                </body>
                </html>
                """
            a = t2 % text1[0]
            with open(file_name, 'w', encoding='utf-8') as f:
                f.write(a)
            log.crawler.info('数据保存成功......')
            table = "TB_WENSHU_ZHIXING"
            hr.cache_dict_redis(table, item)
            return