Esempio n. 1
0
def get_index(code):
    province = deal_html_code.judge_province(code)
    first_url = config.url_list[province].format(code)
    result,status_code = Send_Request().send_requests(first_url)
    data = None
    if status_code ==200:
        info = json.loads(result)["info"][0]
        uuid = info["uuid"]
        second_url = config.detail_list[province].format(uuid)
        data = Send_Request().send_requests(second_url)[0]
    return data
Esempio n. 2
0
def name(url):
    headers = config.headers_detail
    content, status_code = Send_Request().send_request(url, headers)
    if status_code == 200:
        flag = 1
        result = etree.xpath(content,
                             parser=etree.HTMLParser(encoding='utf-8'))
        dl = result.xpath("//div[@class = viewBox']//dl")[0]
        info = {}
        if "企业名称" in content:
            datallist = etree.tostring(dl).split(
                '<dd style="border-bottom:1px solid #AE0000;padding-bottom:10px;">'
            )
            datallist.remove(datallist[-1])
            pattern = re.compile(u".*共(.*?)页.*")
            number = re.findall(pattern, content)
            if len(number) == 1:
                totalpage = int(number[0])
            else:
                totalpage = 0
            if int(totalpage) == 1:
                j = 0
                deal_single_info(datallist, info, j)
            else:
                j = 0
                deal_single_info(datallist, info, j)
                entid = deal_html_code.match_entid(url)
                cid = deal_html_code.match_cid(url)
                href = out_invest_url.format(entid, cid)
                for k in xrange(2, totalpage + 1):
                    content, status_code = Send_Request().send_request(href)
                    if status_code == 200:
                        start = k * 5 + 1
                        result = etree.HTML(
                            content, parser=etree.HTMLParser(encoding='utf-8'))
                        dl = result.xpath("//div[@class='viewBox']/dl")[0]
                        datalist = etree.tostring(dl).split(
                            '<dd style="border-bottom:1px solid #AE0000;padding-bottom:10px;">'
                        )

                        if len(datalist) > 0:
                            datalist.remove(datalist[-1])
                            deal_single_info(datalist, info, start)
                    else:
                        pass
        else:
            flag = 100000004

    else:
        flag = 100000004
    if flag == 1:
        info = deal_html_code.remove_repeat(info)
    return info, flag
Esempio n. 3
0
def name(url):
    headers = config.headers_detail
    content, status_code = Send_Request().send_request(url, headers)
    # print content
    info = {}
    if status_code == 200:
        flag = 1
        result = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8'))
        dl = result.xpath("//div[@class='viewBox']//dl")[0]

        datalist = etree.tostring(dl).split(
            '<dt style="color:#333;margin-bottom:10px;"/>')
        datalist.remove(datalist[0])
        if len(datalist) > 0:
            pattern = re.compile(".*共(.*?)页.*")
            number = re.findall(pattern, content)
            if len(number) == 1:
                totalpage = int(number[0])
            else:
                totalpage = 0
            if int(totalpage) == 1:
                j = 0
                deal_single_info(datalist, info, j)
            else:
                j = 0
                deal_single_info(datalist, info, j)
                entid = deal_html_code.match_entid(url)
                cid = deal_html_code.match_cid(url)

                for k in xrange(2, totalpage + 1):
                    href = share_url.format(entid, cid, k)
                    content, status_code = Send_Request().send_request(
                        href, headers)
                    if status_code == 200:
                        start = k * 5 + 1
                        result = etree.HTML(
                            content, parser=etree.HTMLParser(encoding='utf-8'))
                        dl = result.xpath("//div[@class='viewBox']//dl")[0]
                        datalist = etree.tostring(dl).split(
                            '<dt style="color:#333;margin-bottom:10px;"/>')
                        datalist.remove(datalist[0])

                        if len(datalist) > 0:
                            deal_single_info(datalist, info, start)
                    else:
                        pass
        else:
            logging.info("无股东及出资信息")
    else:
        flag = 100000004

    info = deal_html_code.remove_repeat(info)
    return info, flag
Esempio n. 4
0
 def get_info(self):
     headers = config.headers
     url = self._url.format(self._pripid)
     result, status_code = Send_Request().send_requests(url, headers)
     info = {}
     if status_code == 200:
         data = etree.xpath(result,
                            parser=etree.HTMLParser(encoding='utf-8'))
         tr_list = data.xpath(
             "//table[id= 'table_jyyc']//tr[@name = 'jyyc']")
         for i, singledata in enumerate(tr_list):
             temp = {}
             td_list = singledata.xpath("./td")
             temp["types"] = '经营异常'
             temp["in_reason"] = deal_html_code.remove_symbol(
                 td_list[1].xpath("string(.)"))
             in_date = deal_html_code.remove_symbol(
                 td_list[2].xpath("string(.)"))
             temp["in_date"] = deal_html_code.change_chinese_date(in_date)
             temp["out_reason"] = deal_html_code.remove_symbol(
                 td_list[4].xpath("string(.)"))
             out_date = deal_html_code.remove_symbol(
                 td_list[5].xpath("string(.)"))
             temp["out_date"] = deal_html_code.change_chinese_date(out_date)
             temp["gov_dept"] = deal_html_code.remove_symbol(
                 td_list[6].xpath("string(.)"))
             temp["out_gov"] = deal_html_code.remove_symbol(
                 td_list[7].xpath("string(.)"))
             info[i] = temp
     return info
Esempio n. 5
0
 def get_year_href(self):
     information = {}
     result, status_code = Send_Request().send_requests(self.url)
     # print result
     pattern = re.compile('.*/index/invalidLink.*|.*页面不存在.*')
     fail = re.findall(pattern, result)
     flag = 1
     if status_code == 200 and len(fail) == 0:
         pattern = re.compile(r'\[(.*?)\]')
         result = re.findall(pattern, result)[0]
         pattern = re.compile(u'{.*?}')
         result = re.findall(pattern, result)
         for i in xrange(len(result)):
             singledata = json.loads(result[i])
             anCheId = singledata["anCheId"]
             anCheYear = singledata["anCheYear"]
             entType = singledata["entType"]
             annRepFrom = singledata["annRepFrom"]
             province = anCheId[15:17]
             province = config.province[province]
             information[i] = [
                 anCheId, anCheYear, province, entType, annRepFrom
             ]
     else:
         flag = 100000004
         logging.info("report url fail")
     return information, flag
Esempio n. 6
0
	def name(self, url):
		info = {}
		content, status_code = Send_Request().send_request(url)
		if status_code == 200:
			flag = 1
			result = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8'))
			dl = result.xpath("//div[@class='viewBox']//dl")[0]
			dlcontent = etree.tostring(dl)
			string = '<dd style="border-top:1px dashed #ccc;">'
			dllist = dlcontent.split(string)
			dllist.remove(dllist[-1])
			for i, single in enumerate(dllist):
				single = etree.HTML(single, parser=etree.HTMLParser(encoding='utf-8'))
				# string = u"主体名称"
				# name = self.deal_dd_content(string,single)
				string = u"抽查检查日期"
				check_date = self.deal_dd_content(string, single)
				string = u"检查实施机关"
				gov_dept = self.deal_dd_content(string, single)
				string = u"抽查检查结果"
				result = self.deal_dd_content(string, single)
				if u"抽查信息" in url:
					types = "抽查"
				elif u"检查信息" in url:
					types = "检查"
				else:
					pass
				info[i] = [check_date, gov_dept, result, types]
		else:
			flag = 100000004
		# print info,flag
		return info, flag
Esempio n. 7
0
 def name(self, url):
     info = {}
     content, status_code = Send_Request().send_request(url)
     if status_code == 200:
         flag = 1
         result = etree.HTML(content,
                             parser=etree.HTMLParser(encoding="utf-8"))
         dlinfo = result.xpath("//div[@class ='viewBox']//dl")[0]
         dl = etree.tostring(dlinfo).split("<br/>")
         # 将最后一项的无用数据移除
         dl.remove(dl[-1])
         for i, single in enumerate(dl):
             single = etree.HTML(single,
                                 parser=etree.HTMLParser(encoding="utf-8"))
             name = deal_html_code.remove_symbol(
                 single.xpath(".//dt")[0].xpath("string(.)"))
             templist = single.xpath('.//dd')
             types = deal_html_code.remove_symbol(
                 templist[0].xpath("string(.)"))
             license_type = deal_html_code.remove_symbol(
                 templist[1].xpath('string(.)'))
             license_code = deal_html_code.remove_symbol(
                 templist[2].xpath('string(.)'))
             info[i] = [name, types, license_type, license_code]
     else:
         flag = 100000004
     return info, flag
Esempio n. 8
0
def update_all_info(url, gs_basic_id):
    result, status_code = Send_Request().send_requests(url)
    pattern = re.compile(".*返回首页.*")
    fail = re.findall(pattern, result)
    if status_code == 200 and len(fail) == 0:
        urllist,flag = QGGS_basic.main(url,gs_basic_id)
        if flag <100000001:
            QGGS_black.main(gs_basic_id, urllist["black"])
            QGGS_branch.main(gs_basic_id, urllist["branch"])
            QGGS_brand.main(gs_basic_id, urllist["brand"])
            QGGS_change.main(gs_basic_id, urllist["change"])
            QGGS_change.main(gs_basic_id, urllist["change2"])
            QGGS_check.main(gs_basic_id, urllist["check"])
            QGGS_clear.main(gs_basic_id, urllist["clear"])
            QGGS_except.main(gs_basic_id, urllist["except"])
            QGGS_freeze.main(gs_basic_id, urllist["freeze"])
            QGGS_mort.main(gs_basic_id, urllist["mort"])
            QGGS_permit.main(gs_basic_id, urllist["permit"])
            QGGS_permit2.main(gs_basic_id, urllist["permit2"])
            QGGS_person.main(gs_basic_id, urllist["person"])
            QGGS_punish.main(gs_basic_id, urllist["punish"])
            QGGS_punish2.main(gs_basic_id, urllist["punish2"])
            QGGS_shareholder.main(gs_basic_id, urllist["shareholder"])
            QGGS_stock.main(gs_basic_id, urllist["stock"])
            QGGS_report.main(gs_basic_id, urllist["report"])
        else:
            logging.error('基本信息页访问失败!')
    else:
        logging.error('网页打开过程出错!')
Esempio n. 9
0
def name(url):
    result, status_code = Send_Request().send_requests(url)
    info = {}
    if status_code == 200:
        data = json.loads(result)["data"]
        if len(data) > 0:
            data = data[0]
            info = {}
            uuid = data["soseId"]
            if_owe = int(data["unpaidSocialInsDis"])
            if_basenum = int(data["totalWagesDis"])
            if_periodamount = int(data["totalPaymentDis"])
            birth_owe = data["unpaidSocialInsSo510"]
            birth_num = data["so510"]
            birth = data["totalPaymentSo510"]
            birth_base = data["totalWagesSo510"]
            old_num = data["so110"]
            old_owe = data["unpaidSocialInsSo110"]
            old = data["totalPaymentSo110"]
            old_base = data["totalWagesSo110"]
            unemploy = data["totalPaymentSo210"]
            unemploy_base = data["totalWagesSo210"]
            unemploy_owe = data["unpaidSocialInsSo210"]
            unemploy_num = data["so210"]
            medical = data["totalPaymentSo310"]
            medical_base = data["totalWagesSo310"]
            medical_owe = data["unpaidSocialInsSo310"]
            medical_num = data["so310"]
            injury = data["totalPaymentSo410"]
            injury_owe = data["unpaidSocialInsSo410"]
            injury_num = data["so410"]
            info[0] = [uuid,if_owe,if_basenum,if_periodamount,birth_owe,birth_num,birth,birth_base,old_num,old_owe,old,old_base,\
                       unemploy,unemploy_base,unemploy_owe,unemploy_num,medical,medical_base,medical_owe,medical_num,injury,injury_owe,injury_num]

    return info
Esempio n. 10
0
def name(url):
    headers = config.headers_detail
    content, status_code = Send_Request().send_request(url, headers)
    info = {}
    if status_code == 200:
        flag = 1
        result = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8'))
        dl = result.path("//div[@class= 'viewBox']/dl")[0]
        datallist = etree.tostring(dl).split(
            '<dd style="border-bottom:1px solid #AE0000;padding-bottom:10px;">'
        )
        datallist.remove(datallist[-1])
        for i, single in enumerate(datallist):
            single = etree.xpath(content,
                                 parser=etree.HTMLParser(encoding='utf-8'))
            string = u"股东"
            name = deal_dd_content(string, single)
            string = u"变更前"
            percent_pre = deal_dd_content(string, single)
            string = u"变更后"
            percent_after = deal_dd_content(string, single)
            string = u"变更日期"
            dates = deal_dd_content(string, single)
            info[i] = [name, percent_pre, percent_after, dates]
    else:
        flag = 100000004
    if flag == 1:
        deal_html_code.remove_repeat(info)
    return info, flag
Esempio n. 11
0
 def name(self, url):
     info = {}
     content, status_code = Send_Request().send_request(url)
     if status_code == 200:
         flag = 1
         result = etree.HTML(content,
                             parser=etree.HTMLParser(encoding='utf-8'))
         dl = result.xpath('//div[@class= "viewBox"]//dl')[0]
         datalist = etree.tostring(dl).replace("<dl>", '').replace(
             "</dl>", '').split('<dd style="border-top:1px dashed #ccc;">')
         datalist.remove(datalist[-1])
         for i, single in enumerate(datalist):
             single = etree.HTML(single,
                                 parser=etree.HTMLParser(encoding='utf-8'))
             string = u'商标名称'
             ia_name = self.deal_dd_content(string, single)
             string = u'商标注册号'
             ia_zch = self.deal_dd_content(string, single)
             string = u'认定类别'
             ia_type = self.deal_dd_content(string, single)
             info[i] = [ia_name, ia_zch, ia_type]
         # print info
     else:
         flag = 100000004
     return info, flag
Esempio n. 12
0
	def get_info(self):
		info = {}
		name = urllib.quote(self._name)
		url = self._url.format(name, 1)
		result, status_code = Send_Request().send_requests(url, config.headers)
		if status_code == 200:
			start = 0
			data = etree.HTML(result, parser=etree.HTMLParser(encoding='utf-8'))
			self.deal_single_page(info, data, start)
			totalpage = data.xpath("//input[@id = 'totalPage_sbxx']/@value")[0]
			for i in xrange(2, int(totalpage) + 1):
				start = (i - 1) * 6  # 定义开始位置
				url = self._url.format(name, i)
				result, start_code = Send_Request().send_requests(url, config.headers)
				data = etree.HTML(result, parser=etree.HTMLParser(encoding='utf-8'))
				self.deal_single_page(info, data, start)
Esempio n. 13
0
	def name(self, url):
		info = {}
		content, status_code = Send_Request().send_request(url)
		if status_code == 200:
			flag = 1
			result = etree.HTML(content, parser=etree.HTMLParser(encoding="utf-8"))
			dl = result.xpath("//div[@class = 'viewBox']//dl")[0]
			dlcontent = etree.tostring(dl)
			string = '<dd style="border-top:1px dashed #ccc;">'
			dllist = dlcontent.split(string)
			dllist.remove(dllist[-1])
			for i, single in enumerate(dllist):
				single = etree.HTML(single, parser=etree.HTMLParser(encoding="utf-8"))
				string = u"主体名称"
				name = self.deal_dd_content(string, single)
				string = u"行政处罚决定书文号"
				number = self.deal_dd_content(string, single)
				string = u"处罚事由"
				types = self.deal_dd_content(string, single)
				string = u"处罚依据"
				basis = self.deal_dd_content(string, single)
				string = u"处罚结果"
				result = self.deal_dd_content(string, single)
				# print result
				string = u"处罚决定日期"
				date = self.deal_dd_content(string, single)
				string = u"处罚机构"
				gov_dept = self.deal_dd_content(string, single)
				info[i] = [name, number, types, basis, result, date, gov_dept]
		else:
			flag = 100000004
		return info, flag
Esempio n. 14
0
 def name(self, url):
     info = {}
     headers = config.headers_detail
     content, status_code = Send_Request().send_request(url, headers)
     if status_code == 200:
         flag = 1
         result = etree.HTML(content,
                             parser=etree.HTMLParser(encoding='utf-8'))
         # total = result.xpath("//table[@id='tableIdStyle']//div/text()")[0]
         # pattern = re.compile(u".*记录总数(.*?)条.*")
         # number = re.findall(pattern,total)
         # if len(number)==1:
         # 	temp =int(number[0])
         trlist = result.xpath("//table[@id = 'tableIdStyle']//tr")
         for i, single in enumerate(trlist):
             tdlist = single.xpath("./td")
             if len(tdlist) == 0 or len(tdlist) < 4:
                 pass
             else:
                 name = deal_html_code.remove_symbol(
                     tdlist[1].xpath("string(.)"))
                 code = deal_html_code.remove_symbol(
                     tdlist[2].xpath("string(.)"))
                 gov_dept = deal_html_code.remove_symbol(
                     tdlist[5].xpath("string(.)"))
                 info[i] = [name, code, gov_dept]
     else:
         flag = 100000004
     return info, flag
Esempio n. 15
0
 def get_report_branch_href(self, url, cookies):
     branch_list = {}
     content, status_code = Send_Request().send_request3(
         url, cookies, headers)
     if status_code == 200:
         flag = 1
         result = etree.HTML(content,
                             parser=etree.HTMLParser(encoding='utf-8'))
         div_list = result.xpath("//div[@class='categ_info_title_wz']")
         for i, single in enumerate(div_list):
             href = config.host + single.xpath('./a/@href')[0]
             text = single.xpath('./a/text()')[0]
             if '企业基本信息' in text:
                 branch_list["basic"] = str(href)
             elif "股东及出资信息" in text:
                 branch_list["share"] = str(href)
             elif "对外投资信息" in text:
                 branch_list["invest"] = str(href)
             elif "企业资产状况信息" in text:
                 branch_list["run"] = str(href)
             elif "生产经营情况" in text:
                 branch_list["run"] = str(href)
             elif "担保信息" in text:
                 branch_list["assure"] = str(href)
             elif "股权变更信息" in text:
                 branch_list["schange"] = str(href)
             elif "网站或网店信息" in text:
                 branch_list["web"] = str(href)
     else:
         flag = 100000004
     return branch_list, flag
Esempio n. 16
0
	def name(self, url):
		headers = config.headers_detail
		content, status_code = Send_Request().send_request(url, headers)
		info = {}
		
		if status_code == 200:
			# print content
			flag = 1
			result = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8'))
			dl = result.xpath("//div[@class='viewBox']//dl")[0]
			datalsit = deal_html_code.remove_space(etree.tostring(dl)).split('<br/>')
			datalsit.remove(datalsit[-1])
			for i, single in enumerate(datalsit):
				single = etree.HTML(single, parser=etree.HTMLParser(encoding='utf-8'))
				name = single.xpath("//dt[@style='color:#333;margin-bottom:10px;']/text()")
				string = u"投资人类型"
				types = self.deal_dd_content(string, single)
				string = u"认缴出资金额"
				reg_amount = self.deal_dd_content(string, single)
				string = u"认缴出资方式"
				ra_ways = self.deal_dd_content(string, single)
				string = u"认缴出资时间"
				ra_date = self.deal_dd_content(string, single)
				string = u"实缴出资金额"
				true_amount = self.deal_dd_content(string, single)
				string = u"实缴出资方式"
				ta_ways = self.deal_dd_content(string, single)
				string = u"实缴出资时间"
				ta_date = self.deal_dd_content(string, single)
				info[i] = [name, types, reg_amount, ra_ways, ra_date, true_amount, ta_ways, ta_date]
		else:
			flag = 100000004
		return info, flag
Esempio n. 17
0
	def name(self, url):
		info = {}
		content, status_code = Send_Request().send_request(url)
		if status_code == 200:
			flag = 1
			result = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8'))
			dl = result.xpath("//div[@class='viewBox']//dl")[0]
			string = u'执行法院'
			court = self.deal_dd_content(string, dl)
			string = u'被执行人'
			executor = self.deal_dd_content(string, dl)
			string = u'执行文书文号'
			rule_no = self.deal_dd_content(string, dl)
			string = u'执行事项'
			items = self.deal_dd_content(string, dl)
			string = u'冻结开始日期'
			start_date = self.deal_dd_content(string, dl)
			string = u'冻结结束日期'
			end_date = self.deal_dd_content(string, dl)
			string = u'公示日期'
			pub_date = self.deal_dd_content(string, dl)
			string = u"被执行人持有股权"
			stock = self.deal_dd_content(string, dl)
			string = u'被执行人证件种类'
			cert_cate = self.deal_dd_content(string, dl)
			string = u'被执行人证件号码'
			cert_code = self.deal_dd_content(string, dl)
			string = u"解冻日期"
			end_freeze = self.deal_dd_content(string, dl)
			info[0] = [court, executor, rule_no, items, start_date, end_date, pub_date, stock, cert_cate, cert_code,
					   end_freeze]
		else:
			flag = 100000004
		# print info,flag
		return info, flag
Esempio n. 18
0
def deal_detail_content(detail_url):
    # print detail_url
    detail_code, status_code = Send_Request().send_requests(detail_url)
    if status_code == 200:
        detail_code = json.loads(detail_code)["data"]
        if len(detail_code[1]) != 0:
            content1 = detail_code[1][0]
        elif len(detail_code[0]) != 0:
            content1 = detail_code[0][0]
        if len(content1) != 0:
            if "conDate" in content1.keys():
                ra_date = content1["conDate"]
                ra_date = change_date_style(ra_date)
                ta_date = ra_date
            else:
                ta_date = None
                ra_date = None
            if "conForm_CN" in content1.keys():
                ra_ways = content1["conForm_CN"]
                ta_ways = ra_ways
            else:
                ta_ways = None
                ra_ways = None
            if "subConAm" in content1.keys():
                reg_amount = content1["subConAm"]
            else:
                reg_amount = None
            if "acConAm" in content1.keys():
                true_amount = content1["acConAm"]
            else:
                true_amount = None

    return ra_date, ra_ways, true_amount, reg_amount, ta_ways, ta_date
Esempio n. 19
0
 def get_detail_info(self, detail_url, info):
     dict = {
         u"种类": "cates",
         u"范围": "ranges",
         u"期限": "period",
         u"备注": "remark",
     }
     headers = config.headers
     result, status_code = Send_Request().send_requests(detail_url, headers)
     if status_code == 200:
         data = etree.xpath(result,
                            parser=etree.HTMLParser(encoding='utf-8'))
         string = u"被担保债权概况信息"
         table = data.xpath("//*[contains(.,'%s')]" % string)[0]
         for key, value in dict.iteritems():
             info[value] = deal_html_code.get_match_info(key, table)
         string = u"抵押权人概况信息"
         person_info = data.xpath("//*[contains(.,'%s')]" % string)[0]
         string = u"抵押权物概况信息"
         goods_info = data.xpath("//*[contains(.,'%s')]" % string)[0]
     else:
         info["cates"] = ''
         info["ranges"] = ''
         info["period"] = ''
         info["remark"] = ''
         person_info = {}
         goods_info = {}
     return person_info, goods_info
Esempio n. 20
0
 def get_info(self):
     url = self._url.format(self._pripid)
     headers = config.headers
     result, status_code = Send_Request().send_requests(url,
                                                        headers=headers)
     data = etree.HTML(result, parser=etree.HTMLParser(encoding='utf-8'))
     tr_list = data.xpath("//table[@id ='table_xzxk']//tr[name = 'xzxk']")
     info = {}
     for i, singledata in enumerate(tr_list):
         td_list = singledata.xpath("./td")
         if len(td_list) == 0:
             continue
         temp = {}
         # number = deal_html_code.remove_symbol(td_list[0].xpath("string(.)"))
         temp["name"] = ''
         temp["code"] = deal_html_code.remove_symbol(
             td_list[1].xpath("string(.)"))
         temp["filename"] = deal_html_code.remove_symbol(
             td_list[2].xpath("string(.)"))
         start_date = deal_html_code.remove_symbol(
             td_list[3].xpath("string(.)"))
         temp["start_date"] = deal_html_code.change_chinese_date(start_date)
         end_date = deal_html_code.remove_symbol(
             td_list[4].xpath("string(.)"))
         temp["end_date"] = deal_html_code.change_chinese_date(end_date)
         temp["gov_dept"] = deal_html_code.remove_symbol(
             td_list[6].xpath("string(.)"))
         temp["content"] = deal_html_code.remove_symbol(
             td_list[5].xpath("string(.)"))
         info[i] = temp
     return info
Esempio n. 21
0
def get_list(string):
    info = {}
    flag = 0
    try:
        headers = config.headers_index
        content, status_code = Send_Request().send_request(
            config.index_url, headers)
        if status_code == 200:
            result = etree.HTML(content,
                                parser=etree.HTMLParser(encoding='utf-8'))
            id = result.xpath('//span[@class = "shouButton"]/@onclick')[0]
            pattern = re.compile(".*QueryIndex\('','(.*?)'\).*")
            match_id = re.findall(pattern, id)[0]
            url = config.list_url.format(match_id)
            #随机生成UA
            a = random.randrange(1, 1001)  # 1-1000中生成随机数
            headers = config.headers
            params = config.list_parmas.format(string)
            theline = linecache.getline(r'user-agent.txt', a)
            theline = theline.replace("\n", '')
            headers["User-Agent"] = theline
            result = requests.post(url, params, headers=headers)
            status_code = result.status_code
            s = chardet.detect(result.content)["encoding"]
            if status_code == 200 and s == 'utf-8':
                pattern = re.compile(u".*无查询结果.*|.*访问频繁.*|.*访问异常.*")
                match = re.findall(pattern, result.content)
                if len(match) == 0:
                    content = etree.HTML(
                        result.content,
                        parser=etree.HTMLParser(encoding='utf-8'))
                    list = content.xpath("//li")
                    for i, single in enumerate(list):
                        item = single.xpath(".//a/@href")[0]
                        url = config.host + item
                        info[i] = url
                    flag = 1
                else:
                    flag = 100000003
            else:
                flag = 100000004
        else:
            flag = 10000004

    except Exception, e:
        logging.error("search error:%s" % e)
        flag = 100000004
Esempio n. 22
0
 def get_info(self,url, url_pattern):
     result, status_code = Send_Request().send_requests(url)
     data = json.loads(result)["data"]
     if status_code == 200:
         if len(data) == 0:
             data = None
             logging.info('暂无 %s' % url_pattern)
     return data
Esempio n. 23
0
def get_detail(info):
	detaillist = {}
	for key in info.keys():
		url = info[key]
		content, status_code = Send_Request().send_request(url)
		if status_code == 200:
			detaillist[key] = deal_single_info(content)
		time.sleep(0.5)
	return detaillist
Esempio n. 24
0
def name(url):
    headers = config.headers_detail
    content, status_code = Send_Request().send_request(url, headers)
    if status_code == 200:
        flag = 1
        result = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8'))
        dl = result.xpath("//div[@class='viewBox']/dl")[0]
        info = {}
        if "企业名称" in content:
            pattern = re.compile(".*共(.*?)页.*")
            number = re.findall(pattern, content)
            if len(number) == 1:
                totalpage = int(number[0])
            else:
                totalpage = 0
            if int(totalpage) == 1:
                j = 0
                deal_single_info(dl, info, j)
            else:
                j = 0
                deal_single_info(dl, info, j)
                entid = deal_html_code.match_entid(url)
                cid = deal_html_code.match_cid(url)
                for k in xrange(2, totalpage + 1):
                    href = out_invest_url.format(entid, cid, k)
                    content, status_code = Send_Request().send_request(
                        href, headers)
                    if status_code == 200:
                        start = (k - 1) * 5 + 1
                        result = etree.HTML(
                            content, parser=etree.HTMLParser(encoding='utf-8'))
                        dl = result.xpath("//div[@class='viewBox']/dl")[0]
                        deal_single_info(dl, info, start)
                    else:
                        pass
        else:
            flag = 100000004
    else:
        flag = 100000004
    if flag == 1:
        info = deal_html_code.remove_repeat(info)
    return info, flag
Esempio n. 25
0
def get_detail(info):
	detaillist = {}
	for key in info.keys():
		url = info[key]
		# print url
		headers = config.headers_detail
		content, status_code = Send_Request().send_request(url, headers)
		# print status_code
		if status_code == 200:
			detaillist[key] = deal_single_info(content)
		time.sleep(0.5)
	return detaillist
Esempio n. 26
0
 def get_deatail_info(self, detail_url, info):
     dict = {
         u"执行事项": "items",
         u"裁定书文号": "rule_no",
         u"证照种类": "cert_cate",
         u"证照号码": "cert_code",
         u"冻结期限自": "start_date",
         u"冻结期限至": "end_date",
         u"冻结期限": "period",
         u"公示日期": "pub_date"
     }
     headers = config.headers
     result, status_code = Send_Request().send_requests(detail_url, headers)
     if status_code == 200:
         data = result.xpath(result,
                             parser=etree.HTMLParser(encoding='utf-8'))
         for key, value in dict:
             content = deal_html_code.get_match_info(key, data)
             info[value] = content
     else:
         logging.info("获取司法协助详情信息失败!")
Esempio n. 27
0
def get_url_list(url):
    result, status_code = Send_Request().send_requests(url)
    pattern = re.compile(".*返回首页.*|.*'/index/invalidLink'.*")
    fail = re.findall(pattern, result)
    if status_code == 200 and len(fail) == 0:
        information, flag = get_basic_info(result, status_code)
        url = get_singleinfo_url(result)
    else:
        information = None
        flag = 100000004
        url = {}
    return information,flag,url
Esempio n. 28
0
def get_html_data(url, print_url):
    headers = config.headers
    info = {}
    result, status_code = Send_Request().send_requests(url, headers)
    if status_code == 200:
        flag = 1
        data = etree.HTML(result, parser=etree.HTMLParser(encoding='utf-8'))
        for key, value in dict.iteritems():
            info[value] = deal_html_code.match_info(key, data)
        if info["shareholder1"] != '':
            info["shareholder"] = info["shareholder1"]
        elif info["shareholder2"] != '':
            info["shareholder"] = info["shareholder2"]
        elif info["shareholder3"] != '':
            info["shareholder"] = info["shareholder3"]
        # 最后删除不要的键值,以防下面循环去键值对应信息时出错
        del info["shareholder1"]
        del info["shareholder2"]
        del info["shareholder3"]
    else:
        flag = 100000004
        print '获取基本信息失败!'

    print_info, status_code = Send_Request().send_requests(print_url, headers)
    if status_code == 200:
        print_data = etree.HTML(print_info,
                                parser=etree.HTMLParser(encoding='utf-8'))
        string = u'人员信息'
        info["person"] = deal_html_code.match_info(string, print_data)

        if info["person"] == '':
            string = u'成员信息'
            info["person"] = deal_html_code.match_info(string, print_data)
        string = u'分支机构'
        info["branch"] = deal_html_code.match_info(string, print_data)
        # 将整个打印页的内容先赋值给info["report1],传递给Report类,report类根据年份查找对应年份的信息

        info["report1"] = print_data

    return info, flag
Esempio n. 29
0
 def get_preport_url(self, anCheId):
     info = {}
     url = host + '/corp-query-entprise-info-vAnnualPbReportBaseInfoForJs-%s.html' % anCheId
     result, status_code = Send_Request().send_requests(url)
     if status_code == 200:
         data = json.loads(result)
         vannualSfcAssertUrl = host + data["vAnnPbAssetUrl"]
         webSiteInfoUrl = host + data["webSiteInfoUrl"] + "?entType=17"
         # annSfcSocsecinfoUrl = host+data["annSfcSocsecinfoUrl"]
         annulLicenceUrl = host + data["annulLicenceUrl"] + "?entType=17"
         info["permit"] = annulLicenceUrl
         info["web"] = webSiteInfoUrl
         # info["society"] = annSfcSocsecinfoUrl
         info["base"] = vannualSfcAssertUrl
     return info
Esempio n. 30
0
 def name(self, url):
     info = {}
     content, status_code = Send_Request().send_request(url)
     if status_code == 200:
         result = etree.HTML(content,
                             parser=etree.HTMLParser(encoding='utf-8'))
         dl = result.xpath("//div[@class ='viewBox']//dl")[0]
         datalist = etree.tostring(dl).split(
             '<dd style="border-top:1px dashed #ccc;">')
         datalist.remove(datalist[-1])
         for i, single in enumerate(datalist):
             single = etree.HTML(single,
                                 parser=etree.HTMLParser(encoding="utf-8"))
             if u"登记编号" in content:
                 string = u'登记编号'
                 code = self.deal_dd_content(string, single)
             else:
                 code = None
             if u"登记日期" in content:
                 string = u"登记日期"
                 dates = self.deal_dd_content(string, single)
             else:
                 dates = '0000-00-00'
             if u"登记机关" in content:
                 string = u"登记机关"
                 dept = self.deal_dd_content(string, single)
             else:
                 dept = None
             string = u"抵押权人名称"
             person_name = self.deal_dd_content(string, single)
             string = u"抵押权人注册号"
             number = self.deal_dd_content(string, single)
             string = u"被担保债权种类"
             cates = self.deal_dd_content(string, single)
             string = u"被担保债权数额"
             amount = self.deal_dd_content(string, single)
             string = u"担保范围"
             ranges = self.deal_dd_content(string, single)
             string = u"履行债务开始日期"
             start_date = self.deal_dd_content(string, single)
             string = u"履行债务结束日期"
             end_date = self.deal_dd_content(string, single)
             period = start_date + '至' + end_date
             info[i] = [
                 code, dates, dept, person_name, number, cates, amount,
                 ranges, period
             ]
     return info