def get_basic_href(result, hreflist): list = result.xpath('.//div[@id="categ_info_table_wz_0"]//div[@class="categ_info_02"]') for i, single in enumerate(list): string = u"发起人" list = single.xpath("./a[contains(text(),'%s')]" % string) if len(list) == 1: data = list[0] hreflist["shareholder"] = deal_html_code.match_href(data) else: string = u"出资历史信息" list = single.xpath("./a[contains(text(),'%s')]" % string) if len(list) == 1: data = list[0] hreflist["sharehistory"] = deal_html_code.match_href(data) else: string = u"变更登记信息" list = single.xpath("./a[contains(text(),'%s')]" % string) if len(list) == 1: data = list[0] hreflist["change"] = deal_html_code.match_href(data) else: string = u"清算信息" list = single.xpath("./a[contains(text(),'%s')]" % string) if len(list) == 1: data = list[0] hreflist["clear"] = deal_html_code.match_href(data) else: string = u"投资人" list = single.xpath("./a[contains(text(),'%s')]" % string) if len(list) == 1: data = list[0] hreflist["shareholder"] = deal_html_code.match_href(data) else: pass # 保留代码,新增情况提供空间
def get_self_pubilc_href(result, hreflist): # string = u"企业自报" string = u'年报' list = result.xpath( './/div[@id="categ_info_table_wz_8"]//div[@class="categ_info_02"]/a[contains(text(),"%s")]' % string) if len(list) == 1: data = list[0] hreflist["report"] = deal_html_code.match_href(data) else: logging.info("该企业无年报信息")
def get_warn_info_href(result, hreflist): list = result.xpath( './/div[@id="categ_info_table_wz_4"]//div[@class="categ_info_02"]') if len(list) == 0: logging.info("该企业无警示信息") else: for i, single in enumerate(list): string = u"行政处罚" list = single.xpath("./a[contains(text(),'%s')]" % string) if len(list) == 1: data = list[0] hreflist["punish"] = str(deal_html_code.match_href(data)) else: string = u"异常名录" list = single.xpath("./a[contains(text(),'%s')]" % string) if len(list) == 1: data = list[0] hreflist["except"] = str(deal_html_code.match_href(data)) else: pass # 保留代码,为后期变动提供空间
def get_permit_href(result, hreflist): # string = "许可资质信息" info = [] urllist = result.xpath( "//div[@id='categ_info_table_wz_1']//div[@class='categ_info_02']/a") if len(urllist) == 0: logging.info("许可资质信息中无行政许可信息") else: for i, single in enumerate(urllist): href = deal_html_code.match_href(single) info.append(str(href)) hreflist["permit"] = info
def get_good_info_href(result, hreflist): # string = "良好信息" info = [] urllist = result.xpath( "//div[@id='categ_info_table_wz_2']//div[@class='categ_info_02']") if len(urllist) == 0: logging.info("该企业中无良好信息") else: for i, single in enumerate(urllist): string = u"商标信息" list = single.xpath("./a[contains(text(),'%s')]" % string) if len(list) == 1: data = list[0] href = deal_html_code.match_href(data) info.append(str(href)) else: pass if len(info) == 0: logging.info("该企业中无商标信息") else: hreflist["brand"] = info
def get_prompt_info_href(result, hreflist): # string = "提示信息" list = result.xpath( './/div[@id="categ_info_table_wz_3"]//div[@class="categ_info_02"]') if len(list) == 0: logging.info("无提示信息") else: checklist = [] freezelist = [] for i, single in enumerate(list): string = u"股权冻结信息" list = single.xpath("./a[contains(text(),'%s')]" % string) if len(list) == 1: data = list[0] href = deal_html_code.match_href(data) freezelist.append(str(href)) else: string = u"股权质押" list = single.xpath("./a[contains(text(),'%s')]" % string) if len(list) == 1: data = list[0] hreflist["stock"] = str(deal_html_code.match_href(data)) else: string = u"抽查信息" list = single.xpath("./a[contains(text(),'%s')]" % string) if len(list) == 1: data = list[0] href = deal_html_code.match_href(data) checklist.append(str(href)) else: string = u"检查信息" list = single.xpath("./a[contains(text(),'%s')]" % string) if len(list) == 1: data = list[0] href = deal_html_code.match_href(data) checklist.append(str(href)) else: string = u"股权解冻" list = single.xpath("./a[contains(text(),'%s')]" % string) if len(list) == 1: data = list[0] href = deal_html_code.match_href(data) freezelist.append(href) else: string = u"动产抵押" list = single.xpath( "./a[contains(text(),'%s')]" % string) if len(list) == 1: data = list[0] hreflist["mort"] = str( deal_html_code.match_href(data)) if len(checklist) == 0: logging.info("无抽查检查信息") else: hreflist["check"] = checklist if len(freezelist) == 0: logging.info("无冻结信息") else: hreflist["freeze"] = freezelist