def get_detail_info(self, detail_url, info): dict = { u"种类": "cates", u"范围": "ranges", u"期限": "period", u"备注": "remark", } headers = config.headers result, status_code = Send_Request().send_requests(detail_url, headers) if status_code == 200: data = etree.xpath(result, parser=etree.HTMLParser(encoding='utf-8')) string = u"被担保债权概况信息" table = data.xpath("//*[contains(.,'%s')]" % string)[0] for key, value in dict.iteritems(): info[value] = deal_html_code.get_match_info(key, table) string = u"抵押权人概况信息" person_info = data.xpath("//*[contains(.,'%s')]" % string)[0] string = u"抵押权物概况信息" goods_info = data.xpath("//*[contains(.,'%s')]" % string)[0] else: info["cates"] = '' info["ranges"] = '' info["period"] = '' info["remark"] = '' person_info = {} goods_info = {} return person_info, goods_info
def get_info(self, data): info = {} for key, value in config.report_lab_dict.iteritems(): info[value] = deal_html_code.get_match_info(key, data) # 这两种情况的采集没有太大意义就不再入库 if info["birth_num"] == '' or info["birth_num"] == '人': info = {} else: # 判定欠费金额,实际缴费金额,缴费基数 是否公示 # 判定标准选取生育的各个对应信息进行标准, # 即认为如果生育、医疗、养老、失业中有一个欠费,实缴,基数是不公示的 # 则其他的也是不公示的 if info["birth_owe"] > 0: if_owe = 0 else: if_owe = 1 info["if_owe"] = if_owe if info["birth_base"] == 0: if_basenum = 0 else: if_basenum = 1 info["if_basenum"] = if_basenum if info["birth"] == 0: if_periodamount = 0 else: if_periodamount = 1 info["if_periodamount"] = if_periodamount for key, value in info.iteritems(): print key, value if "if" in key: continue info[key] = deal_html_code.match_float(value) return info
def get_info(self, data): info = {} for key, value in dict.iteritems(): info[value] = deal_html_code.get_match_info(key, data) # 调用juege_if_public判断是否公示,判断是否公示是根据所取到的该部分信息中是否含有元 # 进行判断的,可能总结不够全面,以后见到不含元的再更改函数 info["if_asset"] = self.judge_if_public(info["asset"]) # 判断完是否公示后调用match_float取里面的数字 info["asset"] = deal_html_code.match_float(info["asset"]) info["if_benifit"] = self.judge_if_public(info["benifit"]) info["benifit"] = deal_html_code.match_float(info["benifit"]) info["if_main"] = self.judge_if_public(info["main_income"]) info["main_income"] = deal_html_code.match_float(info["main_income"]) info["if_net"] = self.judge_if_public(info["net_income"]) info["net_income"] = deal_html_code.match_float(info["net_income"]) info["if_tax"] = self.judge_if_public(info["tax"]) info["tax"] = deal_html_code.match_float(info["tax"]) info["if_loan"] = self.judge_if_public(info["loan"]) info["loan"] = deal_html_code.match_float(info["loan"]) info["if_subsidy"] = self.judge_if_public(info["subsidy"]) info["subsidy"] = deal_html_code.match_float(info["subsidy"]) info["if_income"] = deal_html_code.match_float(info["income"]) info["income"] = self.judge_if_public(info["income"]) info["if_profit"] = deal_html_code.match_float(info["profit"]) info["profit"] = self.judge_if_public(info["profit"]) info["if_debt"] = deal_html_code.match_float(info["debt"]) info["debt"] = self.judge_if_public(info["debt"]) return info
def get_info(self, data): info = {} for key, value in config.report_basic_dict.iteritems(): info[value] = deal_html_code.get_match_info(key, data) if u"不公示" in info["employee"] or info["employee"] == '': info["if_empnum"] = 0 else: info["if_empnum"] = 1 womennum = info["womennum"] if u"不公示" in womennum or womennum == '': info["if_womennum"] = 0 else: info["if_womennum"] = 1 holding = info["holding"] if u"不公示" in holding or holding == '': if_holding = 0 info["if_holding"] = if_holding else: info["if_holding"] = 1 info["if_invest"] = self.transform_ifornot(info["if_invest"]) info["if_fwarnnt"] = self.transform_ifornot(info["if_fwarnnt"]) info["if_website"] = self.transform_ifornot(info["if_website"]) info["if_sharetrans"] = self.transform_ifornot(info["if_sharetrans"]) return info
def get_info(self, data): info = {} string = u'清算组负责人' leader = deal_html_code.get_match_info(string, data) list = leader.split('、') for i, value in enumerate(list): temp = {} temp["person_name"] = value temp["position"] = string info[i] = temp string = u"清算组成员" member = deal_html_code.get_match_info(string, data) list = member.split('、') for j, value in enumerate(list, i + 1): temp = {} temp["person_name"] = value temp["position"] = string info[j] = temp return info
def get_deatail_info(self, detail_url, info): dict = { u"执行事项": "items", u"裁定书文号": "rule_no", u"证照种类": "cert_cate", u"证照号码": "cert_code", u"冻结期限自": "start_date", u"冻结期限至": "end_date", u"冻结期限": "period", u"公示日期": "pub_date" } headers = config.headers result, status_code = Send_Request().send_requests(detail_url, headers) if status_code == 200: data = result.xpath(result, parser=etree.HTMLParser(encoding='utf-8')) for key, value in dict: content = deal_html_code.get_match_info(key, data) info[value] = content else: logging.info("获取司法协助详情信息失败!")
def get_single_info(self, detail_url): dict = { u"注册号": "ia_zch", u"公告日期": "ia_zcdate", u"类别": "ia_flh", u"公告期号": "ia_zcgg", u"起止日期": "ia_zyqqx", u'服务项目': "ia_servicelist" } result, status_code = Send_Request().send_requests(detail_url, config.headers) temp = {} if status_code == 200: data = etree.HTML(result, parser=etree.HTMLParser(encoding='utf-8')) for key, value in dict.iteritems(): temp[value] = deal_html_code.get_match_info(key, data) else: temp["ia_zch"] = '' temp["ia_zcdate"] = '' temp["ia_flh"] = '' temp["ia_zcgg"] = '' temp["ia_zyqqx"] = '' temp["ia_servicelist"] = '' return temp