Beispiel #1
0
 def get_json(self):
     company_list = read_company2(self.path)
     for c in company_list:
         company = c.strip()
         if company:
             i = Item_dump(company)
             ret = i.item_dump()
             if not ret:
                 item = {}
                 item["company"] = company
                 item["type"] = self.type
                 nd = int(time.time()) * 1000
                 nd1 = nd + 2500000
                 url = "https://sp0.baidu.com/8aQDcjqpAAV3otqbppnN2DJv/api.php?resource_id=6899&query=%E5%A4%B1%E4%BF%A1%E8%A2%AB%E6%89%A7%E8%A1%8C%E4%BA%BA%E5%90%8D%E5%8D%95&cardNum=&iname={}&areaName=&ie=utf-8&oe=utf-8&format=json&t={}&cb=jQuery1102025075011778413225_{}&_={}".format(
                     company, nd1, self.code, nd)
                 u = Rand_ua()
                 ua = u.rand_chose()
                 headers = {
                     "User-Agent": ua,
                 }
                 try:
                     ret = requests.get(url, headers=headers, timeout=60)
                 except Exception as e:
                     with open('log/ss_log.log', 'a') as f:
                         now = str(datetime.datetime.now())
                         f.write(now + ',' + str(e) + ',' + company + ',' +
                                 '失信信息' + '\n')
                     continue
                 json = ret.content.decode()
                 h = Handel_json(json, company)
                 ss_list = h.handel_json()
                 item["失信信息"] = ss_list
                 print(item)
                 self.save_mongodb(item)
                 time.sleep(3)
Beispiel #2
0
 def __init__(self,company_name):
     u = Rand_ua()
     ua = u.rand_chose()
     self.cp_url = "http://wenshu.court.gov.cn/"
     self.company_name = company_name
     options = webdriver.ChromeOptions()
     path = "E:\python开发环境\chromedriver.exe"
     options.add_argument('--user-agent={}'.format(ua))
     self.driver = webdriver.Chrome(chrome_options=options, executable_path=path)
Beispiel #3
0
 def __init__(self, path, text_list=None, type=None,proxies=None):
     self.proxies = proxies
     self.u = Rand_ua()
     self.client = pymongo.MongoClient(host='127.0.0.1', port=27017)
     self.conn = self.client["qg_ss"]['shesu_info']
     self.type = type  # 公司类型
     self.text_list = text_list  # 测试列表
     self.path = path  # 公司名单路径
     self.captcha_url = 'http://zhixing.court.gov.cn/search/captcha.do?captchaId=fda97538121240b38b0c73eeac144dbe&random={}'
def text_dama():
    u = Rand_ua()
    ua = u.rand_chose()
    headers = {'User-Agent': ua}
    captcha_url = 'http://zhixing.court.gov.cn/search/captcha.do?captchaId=fda97538121240b38b0c73eeac144dbe&random={}'.format(
        random.randint(999999999999999, 9999999999999999) / 10000000000000000)
    response = requests.get(captcha_url, headers=headers)
    print(type(response.content))
    dmt = DamatuApi("469819183", "54188")
    ret = dmt.decode(response.content, 42)
    print(ret)
Beispiel #5
0
    def __init__(self, company):
        u = Rand_ua()
        ua = u.rand_chose()
        try:
            options = webdriver.ChromeOptions()
            options.add_argument('--user-agent={}'.format(ua))
            self.driver = webdriver.Chrome(chrome_options=options)
        except Exception as e:
            Log('log/cp_log.log', e=e)
            return

        self.cp_url = "http://wenshu.court.gov.cn/"
        self.company = company
def _search_company(company, captcha_url):
    u = Rand_ua()
    ua = u.rand_chose()
    headers = {'User-Agent': ua}
    try:
        captcha_response = requests.get(captcha_url,
                                        headers=headers,
                                        timeout=60)
        # captcha = indetify(captcha_response.content)
        dmt = DamatuApi("469819183", "54188")
        captcha = dmt.decode(captcha_response.content, 42)

    except Exception as e:
        print(e)
        with open("log/shesu_log.log", 'a') as f:
            now = str(datetime.datetime.now())
            f.write(now + ',' + company + ',' + str(e) + '\n')
        return "验证码获取失败!"

    post_data = {
        "searchCourtName": "全国法院(包含地方各级法院)",
        "selectCourtId": 1,
        "selectCourtArrange": 1,
        "pname": company,
        "cardNum": "",
        "j_captcha": captcha,
        "captchaId": "fda97538121240b38b0c73eeac144dbe"
    }
    # print(post_data)
    resp = requests.post('http://zhixing.court.gov.cn/search/newsearch',
                         data=post_data,
                         headers=headers,
                         timeout=60)
    print("查询++++++++++", resp.status_code)
    content = resp.content.decode()
    html = etree.HTML(content)
    # 判断验证码是否错误
    text = html.xpath("//title/text()")[0]
    print("*" * 20, text)
    # 处理验证码,超过5次忽略,计入日志
    assert (text != "验证码出现错误,请重新输入!" and resp.status_code == 200)
    # 请求详情页
    ss_list = get_detail(html, captcha, headers, company)

    return ss_list
Beispiel #7
0
    def __init__(self, company):
        u = Rand_ua()
        ua = u.rand_chose()

        try:
            options = webdriver.ChromeOptions()
            path = "E:\python开发环境\chromedriver.exe"
            options.add_argument('--user-agent={}'.format(ua))
            # options.add_argument("headless")
            # prefs = {'profile.default_content_setting_values': {'images': 2}}
            # options.add_experimental_option('prefs', prefs)  # 采用无图模式效果不理想
            self.driver = webdriver.Chrome(chrome_options=options,executable_path=path)
        except Exception as e:
            Log('log/cp_log.log', e=e)
            # self.display.stop()
            return

        self.cp_url = "http://wenshu.court.gov.cn/"
        self.company = company
Beispiel #8
0
 def get_detail(self, href):
     # /content/content?DocID=b18f2733-6f07-4d42-ab8b-d1859ce3222f&KeyWord=江苏和信工程咨询有限公司'
     # http://wenshu.court.gov.cn/CreateContentJS/CreateContentJS.aspx?DocID=d47506cc-644b-4c51-bffe-a71c0102f2b9
     if href != "未获取到文书id":
         d_id = re.findall(r"DocID=(.*?)&", href)
         doc_id = d_id[0] if d_id else 0
         if doc_id:
             u = Rand_ua()
             ua = u.rand_chose()
             headers = {"User-Agent": ua}
             d_url = "http://wenshu.court.gov.cn/CreateContentJS/CreateContentJS.aspx?DocID={}".format(
                 doc_id)
             print(d_url)
             try:
                 ret = requests.get(d_url, headers=headers, timeout=60)
             except Exception as e:
                 print("*" * 10, str(e))
                 Log('log/cp_log.log', e=e)
                 return "获取文书内容失败"
             try:
                 html = ret.content.decode()
                 # print(html)
                 xml = etree.HTML(html)
             except Exception as e:
                 print("*" * 10, str(e))
                 Log('log/cp_log.log', e=e)
                 return "获取文书内容失败"
             try:
                 ws_detail = xml.xpath("//body//text()")
             except Exception as e:
                 print("*" * 10, str(e))
                 Log('log/cp_log.log', e=e)
                 return "获取文书内容失败"
             time.sleep(2)
             return ws_detail
     else:
         return "未获取到文书id"