Esempio n. 1
0
def get_list(string):
    info = {}
    flag = 0
    try:
        headers = config.headers_index
        content, status_code = Send_Request().send_request(
            config.index_url, headers)
        if status_code == 200:
            result = etree.HTML(content,
                                parser=etree.HTMLParser(encoding='utf-8'))
            id = result.xpath('//span[@class = "shouButton"]/@onclick')[0]
            pattern = re.compile(".*QueryIndex\('','(.*?)'\).*")
            match_id = re.findall(pattern, id)[0]
            url = config.list_url.format(match_id)
            #随机生成UA
            a = random.randrange(1, 1001)  # 1-1000中生成随机数
            headers = config.headers
            params = config.list_parmas.format(string)
            theline = linecache.getline(r'user-agent.txt', a)
            theline = theline.replace("\n", '')
            headers["User-Agent"] = theline
            result = requests.post(url, params, headers=headers)
            status_code = result.status_code
            s = chardet.detect(result.content)["encoding"]
            if status_code == 200 and s == 'utf-8':
                pattern = re.compile(u".*无查询结果.*|.*访问频繁.*|.*访问异常.*")
                match = re.findall(pattern, result.content)
                if len(match) == 0:
                    content = etree.HTML(
                        result.content,
                        parser=etree.HTMLParser(encoding='utf-8'))
                    list = content.xpath("//li")
                    for i, single in enumerate(list):
                        item = single.xpath(".//a/@href")[0]
                        url = config.host + item
                        info[i] = url
                    flag = 1
                else:
                    flag = 100000003
            else:
                flag = 100000004
        else:
            flag = 10000004

    except Exception, e:
        logging.error("search error:%s" % e)
        flag = 100000004
Esempio n. 2
0
 def get_deatail_info(self, detail_url, info):
     dict = {
         u"执行事项": "items",
         u"裁定书文号": "rule_no",
         u"证照种类": "cert_cate",
         u"证照号码": "cert_code",
         u"冻结期限自": "start_date",
         u"冻结期限至": "end_date",
         u"冻结期限": "period",
         u"公示日期": "pub_date"
     }
     headers = config.headers
     result, status_code = Send_Request().send_requests(detail_url, headers)
     if status_code == 200:
         data = result.xpath(result,
                             parser=etree.HTMLParser(encoding='utf-8'))
         for key, value in dict:
             content = deal_html_code.get_match_info(key, data)
             info[value] = content
     else:
         logging.info("获取司法协助详情信息失败!")