def name(url): headers = config.headers_detail content, status_code = Send_Request().send_request(url, headers) info = {} if status_code == 200: flag = 1 result = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8')) dl = result.path("//div[@class= 'viewBox']/dl")[0] datallist = etree.tostring(dl).split( '<dd style="border-bottom:1px solid #AE0000;padding-bottom:10px;">' ) datallist.remove(datallist[-1]) for i, single in enumerate(datallist): single = etree.xpath(content, parser=etree.HTMLParser(encoding='utf-8')) string = u"股东" name = deal_dd_content(string, single) string = u"变更前" percent_pre = deal_dd_content(string, single) string = u"变更后" percent_after = deal_dd_content(string, single) string = u"变更日期" dates = deal_dd_content(string, single) info[i] = [name, percent_pre, percent_after, dates] else: flag = 100000004 if flag == 1: deal_html_code.remove_repeat(info) return info, flag
def name(url): headers = config.headers_detail content, status_code = Send_Request().send_request(url, headers) # print content info = {} if status_code == 200: flag = 1 result = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8')) dl = result.xpath("//div[@class='viewBox']//dl")[0] datalist = etree.tostring(dl).split( '<dt style="color:#333;margin-bottom:10px;"/>') datalist.remove(datalist[0]) if len(datalist) > 0: pattern = re.compile(".*共(.*?)页.*") number = re.findall(pattern, content) if len(number) == 1: totalpage = int(number[0]) else: totalpage = 0 if int(totalpage) == 1: j = 0 deal_single_info(datalist, info, j) else: j = 0 deal_single_info(datalist, info, j) entid = deal_html_code.match_entid(url) cid = deal_html_code.match_cid(url) for k in xrange(2, totalpage + 1): href = share_url.format(entid, cid, k) content, status_code = Send_Request().send_request( href, headers) if status_code == 200: start = k * 5 + 1 result = etree.HTML( content, parser=etree.HTMLParser(encoding='utf-8')) dl = result.xpath("//div[@class='viewBox']//dl")[0] datalist = etree.tostring(dl).split( '<dt style="color:#333;margin-bottom:10px;"/>') datalist.remove(datalist[0]) if len(datalist) > 0: deal_single_info(datalist, info, start) else: pass else: logging.info("无股东及出资信息") else: flag = 100000004 info = deal_html_code.remove_repeat(info) return info, flag
def name(url): headers = config.headers_detail content, status_code = Send_Request().send_request(url, headers) if status_code == 200: flag = 1 result = etree.xpath(content, parser=etree.HTMLParser(encoding='utf-8')) dl = result.xpath("//div[@class = viewBox']//dl")[0] info = {} if "企业名称" in content: datallist = etree.tostring(dl).split( '<dd style="border-bottom:1px solid #AE0000;padding-bottom:10px;">' ) datallist.remove(datallist[-1]) pattern = re.compile(u".*共(.*?)页.*") number = re.findall(pattern, content) if len(number) == 1: totalpage = int(number[0]) else: totalpage = 0 if int(totalpage) == 1: j = 0 deal_single_info(datallist, info, j) else: j = 0 deal_single_info(datallist, info, j) entid = deal_html_code.match_entid(url) cid = deal_html_code.match_cid(url) href = out_invest_url.format(entid, cid) for k in xrange(2, totalpage + 1): content, status_code = Send_Request().send_request(href) if status_code == 200: start = k * 5 + 1 result = etree.HTML( content, parser=etree.HTMLParser(encoding='utf-8')) dl = result.xpath("//div[@class='viewBox']/dl")[0] datalist = etree.tostring(dl).split( '<dd style="border-bottom:1px solid #AE0000;padding-bottom:10px;">' ) if len(datalist) > 0: datalist.remove(datalist[-1]) deal_single_info(datalist, info, start) else: pass else: flag = 100000004 else: flag = 100000004 if flag == 1: info = deal_html_code.remove_repeat(info) return info, flag
def name(url): headers = config.headers_detail content, status_code = Send_Request().send_request(url, headers) info = {} if status_code == 200: flag = 1 result = etree.HTML(content, parser=etree.HTMLParser(encoding="utf-8")) dl = result.xpath("//div[@class= 'viewBox']//dl")[0] datalist = etree.tostring(dl).split('<br/>') datalist.remove(datalist[-1]) for i, single in enumerate(datalist): single = etree.HTML(single, parser=etree.HTMLParser(encoding="utf-8")) string = u"许可文件名称" types = deal_dd_content(string, single) string = u"有效期至" valto = deal_dd_content(string, single) uuid = '' info[i] = [types, valto, uuid] else: flag = 100000004 if flag == 1: deal_html_code.remove_repeat(info) return info, flag
def name(url): headers = config.headers_detail content, status_code = Send_Request().send_request(url, headers) info = {} if status_code == 200: flag = 1 result = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8')) dl = result.xpath("//div[@class= 'viewBox']//dl")[0] ddlist = dl.xpath('./dd') remark = -1 for i, single in enumerate(ddlist): if i % 4 == 0: remark += 1 name = single.xpath("./text()")[0] name = deal_html_code.remove_space(name) info.setdefault(remark, []).append(name) elif i % 4 == 1: percent_pre = single.xpath("./text()")[0] percent_pre = deal_html_code.remove_space(percent_pre) info.setdefault(remark, []).append(percent_pre) elif i % 4 == 2: percent_after = single.xpath("./text()")[0] percent_after = deal_html_code.remove_space(percent_after) info.setdefault(remark, []).append(percent_after) elif i % 4 == 3: dates = single.xpath("./text()")[0] dates = deal_html_code.remove_space(dates) info.setdefault(remark, []).append(dates) uuid = '' info.setdefault(remark, []).append(uuid) else: flag = 100000004 if flag == 1: deal_html_code.remove_repeat(info) return info, flag
def name(self, url): headers = config.headers_detail content, status_code = Send_Request().send_request(url, headers) info = {} if status_code == 200: # print content flag = 1 result = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8')) dl = result.xpath("//div[@class='viewBox']//dl")[0] datalsit = etree.tostring(dl).split('<br/>') datalsit.remove(datalsit[-1]) for i, single in enumerate(datalsit): single = etree.HTML(single, parser=etree.HTMLParser(encoding='utf-8')) name = single.xpath( "//dt[@style='color:#333;margin-bottom:10px;']/text()")[0] string = u"投资人类型" types = self.deal_dd_content(string, single) string = u"认缴出资金额" reg_amount = self.deal_dd_content(string, single) string = u"认缴出资方式" ra_ways = self.deal_dd_content(string, single) string = u"认缴出资时间" ra_date = self.deal_dd_content(string, single) if ra_date == '': ra_date = '0000-00-00' string = u"实缴出资金额" true_amount = self.deal_dd_content(string, single) string = u"实缴出资方式" ta_ways = self.deal_dd_content(string, single) string = u"实缴出资时间" ta_date = self.deal_dd_content(string, single) if ta_date == '': ta_date = '0000-00-00' info[i] = [ name, types, reg_amount, ra_ways, ra_date, true_amount, ta_ways, ta_date ] else: flag = 100000004 if len(info) > 0: info = deal_html_code.remove_repeat(info) return info, flag
def name(url): headers = config.headers_detail content, status_code = Send_Request().send_request(url, headers) if status_code == 200: flag = 1 result = etree.HTML(content, parser=etree.HTMLParser(encoding='utf-8')) dl = result.xpath("//div[@class='viewBox']/dl")[0] info = {} if "企业名称" in content: pattern = re.compile(".*共(.*?)页.*") number = re.findall(pattern, content) if len(number) == 1: totalpage = int(number[0]) else: totalpage = 0 if int(totalpage) == 1: j = 0 deal_single_info(dl, info, j) else: j = 0 deal_single_info(dl, info, j) entid = deal_html_code.match_entid(url) cid = deal_html_code.match_cid(url) for k in xrange(2, totalpage + 1): href = out_invest_url.format(entid, cid, k) content, status_code = Send_Request().send_request( href, headers) if status_code == 200: start = (k - 1) * 5 + 1 result = etree.HTML( content, parser=etree.HTMLParser(encoding='utf-8')) dl = result.xpath("//div[@class='viewBox']/dl")[0] deal_single_info(dl, info, start) else: pass else: flag = 100000004 else: flag = 100000004 if flag == 1: info = deal_html_code.remove_repeat(info) return info, flag