def get_info(self): headers = config.headers url = self._url.format(self._pripid) result, status_code = Send_Request().send_requests(url, headers) info = {} if status_code == 200: data = etree.xpath(result, parser=etree.HTMLParser(encoding='utf-8')) tr_list = data.xpath( "//table[id= 'table_jyyc']//tr[@name = 'jyyc']") for i, singledata in enumerate(tr_list): temp = {} td_list = singledata.xpath("./td") temp["types"] = '经营异常' temp["in_reason"] = deal_html_code.remove_symbol( td_list[1].xpath("string(.)")) in_date = deal_html_code.remove_symbol( td_list[2].xpath("string(.)")) temp["in_date"] = deal_html_code.change_chinese_date(in_date) temp["out_reason"] = deal_html_code.remove_symbol( td_list[4].xpath("string(.)")) out_date = deal_html_code.remove_symbol( td_list[5].xpath("string(.)")) temp["out_date"] = deal_html_code.change_chinese_date(out_date) temp["gov_dept"] = deal_html_code.remove_symbol( td_list[6].xpath("string(.)")) temp["out_gov"] = deal_html_code.remove_symbol( td_list[7].xpath("string(.)")) info[i] = temp return info
def get_info(self): url = self._url.format(self._pripid) headers = config.headers result, status_code = Send_Request().send_requests(url, headers=headers) data = etree.HTML(result, parser=etree.HTMLParser(encoding='utf-8')) tr_list = data.xpath("//table[@id ='table_xzxk']//tr[name = 'xzxk']") info = {} for i, singledata in enumerate(tr_list): td_list = singledata.xpath("./td") if len(td_list) == 0: continue temp = {} # number = deal_html_code.remove_symbol(td_list[0].xpath("string(.)")) temp["name"] = '' temp["code"] = deal_html_code.remove_symbol( td_list[1].xpath("string(.)")) temp["filename"] = deal_html_code.remove_symbol( td_list[2].xpath("string(.)")) start_date = deal_html_code.remove_symbol( td_list[3].xpath("string(.)")) temp["start_date"] = deal_html_code.change_chinese_date(start_date) end_date = deal_html_code.remove_symbol( td_list[4].xpath("string(.)")) temp["end_date"] = deal_html_code.change_chinese_date(end_date) temp["gov_dept"] = deal_html_code.remove_symbol( td_list[6].xpath("string(.)")) temp["content"] = deal_html_code.remove_symbol( td_list[5].xpath("string(.)")) info[i] = temp return info
def get_detail(self, string, data, json_data, flag): table = data.xpath("//*[contains(.,'%s')]" % string)[0].xpath(".//following-sibline::*[1]") td = table[0].xpath(".//td") if flag == 'rj': if len(td) < 3: logging.info("该条数据无认缴信息!") json_data["ra_ways"] = '' json_data["ra_date"] = '0000-00-00' else: ra_ways = deal_html_code.remove_symbol( td[0].xpath("string(.)")) ra_date = deal_html_code.remove_symbol( td[2].xpath("string(.)")) ra_date = deal_html_code.change_chinese_date(ra_date) json_data["ra_ways"] = ra_ways json_data["ra_date"] = ra_date elif flag == 'sj': if len(td) < 3: logging.info("该条数据无实缴信息!") json_data["ta_ways"] = '' json_data["ta_date"] = '0000-00-00' else: ta_ways = deal_html_code.remove_symbol( td[0].xpath("string(.)")) ta_date = deal_html_code.remove_symbol( td[2].xpath("string(.)")) ta_date = deal_html_code.change_chinese_date(ta_date) json_data["ta_ways"] = ta_ways json_data["ta_date"] = ta_date
def get_info(self, data): tr_list = data.xpath(".//tr[@name = 'xzxk']") info = {} for i, singledata in enumerate(tr_list): temp = {} td_list = singledata.xpath("./td") # number = deal_html_code.remove_symbol(td_list[0].xpath("string(.)")) temp["name"] = '' temp["code"] = deal_html_code.remove_symbol(td_list[1].xpath("string(.)")) temp["filename"] = deal_html_code.remove_symbol(td_list[2].xpath("string(.)")) start_date = deal_html_code.remove_symbol(td_list[3].xpath("string(.)")) temp["start_date"] = deal_html_code.change_chinese_date(start_date) end_date = deal_html_code.remove_symbol(td_list[4].xpath("string(.)")) temp["end_date"] = deal_html_code.change_chinese_date(end_date) temp["gov_dept"] = deal_html_code.remove_symbol(td_list[5].xpath("string(.)")) temp["content"] = deal_html_code.remove_symbol(td_list[6].xpath("string(.)")) temp["status"] = deal_html_code.remove_symbol(td_list[7].xpath("string(.)")) onclick = td_list[8].xpath("./a/@onclick") if len(onclick) == 0: logging.info("该条信息无详情信息!") else: onclick = onclick[0] tuple = deal_html_code.match_key_content(str(onclick)) pripid = tuple[0] xh = tuple[1] lx = tuple[2] detail_url = self._url.format(pripid, xh, lx) self.get_detail_info(detail_url) info[i] = temp return info
def get_info(self, data): tr_list = data.xpath(".//tr") info = {} for i, singledata in enumerate(tr_list): temp = {} td_list = singledata.xpath("./td") # 针对表头和为信息的情况进行特殊对待 if len(td_list) == 0 or len(td_list) == 1: continue temp["name"] = deal_html_code.remove_symbol( td_list[1].xpath("string(.)")) reg_amount = deal_html_code.remove_symbol( td_list[2].xpath("string(.)")) temp["reg_amount"] = deal_html_code.match_float(reg_amount) reg_date = deal_html_code.remove_symbol( td_list[3].xpath("string(.)")) temp["reg_date"] = deal_html_code.change_chinese_date(reg_date) temp["reg_way"] = deal_html_code.remove_symbol( td_list[4].xpath("string(.)")) ac_amount = deal_html_code.remove_symbol( td_list[5].xpath("string(.)")) temp["ac_amount"] = deal_html_code.match_float(ac_amount) ac_date = deal_html_code.remove_symbol( td_list[6].xpath("string(.)")) temp["ac_date"] = deal_html_code.change_chinese_date(ac_date) temp["ac_way"] = deal_html_code.remove_symbol( td_list[7].xpath("string(.)")) info[i] = temp return info
def deal_single_info(self, data, info): for i, singledata in enumerate(data): number = singledata["PEN_DEC_NO"] types = singledata["ILLEG_ACT_TYPE"] content = singledata["PEN_TYPE"] date = singledata["PUNISH_DATE"] date = deal_html_code.change_chinese_date(date) pub_date = singledata["CREATE_DATE"] pub_date = deal_html_code.change_chinese_date(pub_date) gov_dept = singledata["PUNISH_ORG_NAME"] punish_type = singledata["TYPE"] if types == None: types = singledata["PUNISH_CAUSE"] else: pass ID = singledata["ID"] ORG = singledata["ORG"] SEQ_ID = singledata["SEQ_ID"] if punish_type == '1': pdfurl = pdf_url.format(ORG, ID, SEQ_ID) else: pdfurl = '' name = '' RN = singledata["RN"] info[RN] = [ number, types, content, date, pub_date, gov_dept, name, pdfurl ]
def name(self, data): information = {} if len(data) > 0: for i, singledata in enumerate(data): number = singledata["penDecNo"] if "illegActType" in data.keys(): types = singledata["illegActType"] elif "illegAct" in data.keys(): types = singledata["illegAct"] if "penPunishCon" in singledata.keys(): content = singledata["penPunishCon"] else: content = '' if 'penDecissDate' in singledata.keys(): date = singledata["penDecissDate"] date = deal_html_code.change_chinese_date(date) else: date = '' if "noticeDate" in singledata.keys(): updateDate = singledata["noticeDate"] pub_date = deal_html_code.change_chinese_date(updateDate) else: pub_date = '0000-00-00' if "penOrgan" in singledata.keys(): gov_dept = singledata["penOrgan"] else: gov_dept = '' name = None information[i] = [ number, types, content, date, name, gov_dept, pub_date ] return information
def name(self, data): information = {} for i, singledata in enumerate(data): types = '黑名单' if "bulletinListed" in singledata.keys(): in_reason = singledata["bulletinListed"] in_reason = deal_html_code.remove_symbol(in_reason) else: in_reason = '' if "abnTime" in singledata.keys(): in_date = singledata["abnTime"] in_date = change_chinese_date(in_date) else: in_date = '0000-00-00' if "bulletinRemoved" in singledata.keys(): out_reason = singledata["bulletinRemoved"] out_reason = deal_html_code.remove_symbol(out_reason) else: out_reason = '' if "remTime" in singledata.keys(): out_date = singledata["remTime"] out_date = change_chinese_date(out_date) else: out_date = '0000-00-00' if "remOrganInterpreted" in singledata.keys(): gov_dept = singledata["remOrganInterpreted"] else: gov_dept = '' information[i] = [ types, in_reason, in_date, out_reason, out_date, gov_dept ] return information
def name(self, data): information = {} for i, singledata in enumerate(data): uuid = singledata["annlFwarnntId"] creditor = singledata["more"] debtor = singledata["mortgagor"] cates = singledata["priClaSecKindInterpreted"] amount = singledata["priClaSecAm"] if 'pefPerForm' in singledata.keys(): pefPerForm = singledata["pefPerForm"] pefPerForm = change_chinese_date(pefPerForm) else: pefPerForm = '' if "pefPerTo" in singledata.keys(): pefPerTo = singledata["pefPerTo"] pefPerTo = change_chinese_date(pefPerTo) else: pefPerTo = '' deadline = str(pefPerForm) + '至' + str(pefPerTo) period = singledata["guaranPeriodInterpreted"] ways = singledata["gaTypeInterpreted"] if_fwarnnt = int(singledata["fwarnntSign"]) information[i] = [ uuid, creditor, debtor, cates, amount, deadline, period, ways, if_fwarnnt ] return information
def get_info(self, data): info = {} for i, singledata in enumerate(data): temp = {} td_list = singledata.xpath("./td") if len(td_list) == 0: continue temp["equityNo"] = deal_html_code.remove_symbol( td_list[1].xpath("string(.)")) temp["pledgor"] = deal_html_code.remove_symbol( td_list[2].xpath("string(.)")) temp["pledBLicNo"] = deal_html_code.remove_symbol( td_list[3].xpath("string(.)")) temp["impAm"] = deal_html_code.remove_symbol( td_list[4].xpath("string(.)")) temp["impOrg"] = deal_html_code.remove_symbol( td_list[5].xpath("string(.)")) temp["impOrgBLicNo"] = deal_html_code.remove_symbol( td_list[6].xpath("string(.)")) equPleDate = deal_html_code.remove_symbol( td_list[7].xpath("string(.)")) temp["equPleDate"] = deal_html_code.change_chinese_date(equPleDate) publicDate = deal_html_code.remove_symbol( td_list[9].xpath("string(.)")) temp["type"] = deal_html_code.remove_symbol( td_list[8].xpath("string(.)")) temp["publicDate"] = deal_html_code.change_chinese_date(publicDate) info[i] = temp return info
def name(self, data): information = {} for i in xrange(len(data)): singledata = data[i] types = '经营异常' if "speCauseInterpreted" in singledata.keys(): in_reason = singledata["speCauseInterpreted"] else: in_reason = '' if 'abnTime' in singledata.keys(): in_date = singledata["abnTime"] in_date = deal_html_code.change_chinese_date(in_date) else: in_date = '0000-00-00' if "remExcpResInterpreted" in singledata.keys(): out_reason = singledata["remExcpResInterpreted"] out_reason = deal_html_code.remove_symbol(out_reason) else: out_reason = '' if 'remDate' in singledata.keys(): out_date = singledata["remDate"] out_date = deal_html_code.change_chinese_date(out_date) else: out_date = '0000-00-00' if "decOrgInterpreted" in singledata.keys(): gov_dept = singledata["decOrgInterpreted"] else: gov_dept = '' information[i] = [ types, in_reason, in_date, out_reason, out_date, gov_dept ] return information
def get_info(self, basic_data): # 取出配置中基本信息的字典 info_dict = config.info_dict info = {} # 将info_dict的value值定义为info的key值 for key, value in info_dict.items(): info[value] = self.deal_td_content(key, basic_data) # print "info %s --> %s"%(value,info[value]) if info["code"].startswith("9"): info["ccode"] = info["code"] else: info["ccode"] = '' info["start_date"] = deal_html_code.change_chinese_date( info["start_date"]) info["end_date"] = deal_html_code.change_chinese_date(info["end_date"]) # 确定注册日期取哪个,将最终结果给info["reg_date"], # 若其中一个不为空则取其中一个作为最终值 # 如果两个值都为空说明没有取到值 # 要么总结不够全面,要么页面中本来就没有该值,自定义值为'' if info["reg_date1"] == '' and info["reg_date2"] == '': info["reg_date"] = '0000-00-00' elif info["reg_date1"] != '': info["reg_date"] = deal_html_code.change_chinese_date( info["reg_date1"]) elif info["reg_date"] != '': info["reg_date"] = deal_html_code.change_chinese_date( info["reg_date2"]) # 判断法定代表人取哪个,思路与上面注册日期的取法类似 if info["legal_person1"] == '' and info["legal_peraon2"] == '': info["legal_person"] = '' elif info["legal_person1"] != '': info["legal_person"] = info["legal_person1"] elif info["legal_person2"] != '': info["legal_person"] = re.split(u'、', info["legal_person2"])[0] + '等' # 判断注册资本取哪个 if info["reg_amount1"] == '' and info["reg_amount2"] == '': info["reg_amount"] = '' elif info["reg_amount1"] != '': info["reg_amount"] = info["reg_amount1"] elif info["reg_amount2"] != '': info["reg_amount"] = info["reg_amount2"] # 判断场所取哪个 if info["reg_address1"] == '' and info["reg_address2"] == '' and info[ "reg_address3"]: info["reg_address"] = '' elif info["reg_address1"] != '': info["reg_address"] = info["reg_address1"] elif info["reg_address2"] != '': info["reg_address"] = info["reg_address2"] elif info["reg_address3"] != '': info["reg_address3"] = info["reg_address3"] info["appr_date"] = deal_html_code.change_chinese_date( info["appr_date"]) # for key,value in info.items(): # print key + '-->'+value return info
def name(self,data): information = {} for i,singledata in enumerate(data): uuid = singledata["annlInvestorId"] name = singledata["inv"] if len(singledata["entAnnlInvtSet"])!=0: entAnnlInvtSet = singledata["entAnnlInvtSet"][0] if "subConAm" in entAnnlInvtSet.keys(): reg_amount = entAnnlInvtSet["subConAm"] else: reg_amount = '' if 'conDate' in entAnnlInvtSet.keys(): reg_date = entAnnlInvtSet["conDate"] reg_date = change_chinese_date(reg_date) else: reg_date = '' if "conFormInterpreted" in entAnnlInvtSet.keys(): reg_way = entAnnlInvtSet["conFormInterpreted"] else: reg_way = '' if "currency" in entAnnlInvtSet.keys(): unit = entAnnlInvtSet["currency"] else: unit = '' else: reg_amount = '' reg_date = '0000-00-00' reg_way = '' if len(singledata["entAnnlInvtactlSet"])!=0: entAnnlInvtactlSet = singledata["entAnnlInvtactlSet"][0] if "acConAm" in entAnnlInvtactlSet.keys(): ac_amount = entAnnlInvtactlSet["acConAm"] else: ac_amount = '' if "conDate" in entAnnlInvtactlSet.keys(): ac_date = entAnnlInvtactlSet["conDate"] ac_date = change_chinese_date(ac_date) else: ac_date = '0000-00-00' if "conFormInterpreted" in entAnnlInvtactlSet.keys(): ac_way = entAnnlInvtactlSet["conFormInterpreted"] else: ac_way = '' if "currency" in entAnnlInvtactlSet.keys(): unit = entAnnlInvtactlSet["currency"] else: unit = '' else: ac_amount = '' ac_date = '0000-00-00' ac_way = '' information[i] = [name, uuid, reg_amount, reg_date, reg_way, ac_amount, ac_date, ac_way,unit] return information
def deal_single_info(self, data, info): for i,singledata in enumerate(data): number = singledata["PUN_WRIT_SN"] types = singledata["MAIN_FACTS"]+":"+singledata["PUN_KIND"] content = singledata["PUN_REMARK"] date = singledata["PUN_DATE"] date = deal_html_code.change_chinese_date(date) pub_date = singledata["CREATE_DATE"] pub_date = deal_html_code.change_chinese_date(pub_date) gov_dept = singledata["PUN_ORG"] RN = singledata["RN"] info[RN] = [number, types, content, date, pub_date, gov_dept]
def deal_single_info(self,data,info): for i,singledata in enumerate(data): types = '经营异常' in_reason = singledata["FACT_REASON"] in_date = singledata["MARK_DATE"] in_date = deal_html_code.change_chinese_date(in_date) out_reason = singledata["REMOVE_REASON"] out_date = singledata["CREATE_DATE"] out_date =deal_html_code.change_chinese_date(out_date) gov_dept = singledata["CREATE_ORG"] RN = singledata["RN"] info[RN] = [types, in_reason, in_date, out_reason, out_date, gov_dept]
def name(self, data): information = {} for i, singledata in enumerate(data): # nodeNum = singledata["nodeNum"] ia_zch = singledata["regNum"] if "intCls" in singledata.keys(): ia_flh = singledata["intCls"] else: ia_flh = '' if "regAnncIssue" in singledata.keys(): ia_zcgg = singledata["regAnncIssue"] else: ia_zcgg = '' if "goodsCnName" in singledata.keys(): ia_servicelist = deal_html_code.remove_symbol( singledata["goodsCnName"]) else: ia_servicelist = '' if "propertyBgnDate" in singledata.keys(): begin = singledata["propertyBgnDate"] begin = deal_html_code.change_chinese_date(begin) else: begin = '' if "propertyEndDate" in singledata.keys(): end = singledata["propertyEndDate"] end = deal_html_code.change_chinese_date(end) else: end = '' if "uniScid" in singledata.keys(): regNo = singledata["uniScid"] else: regNo = singledata["regNo"] province = judge_province(regNo) if begin == '' and end == '': ia_zyqqx = '' else: ia_zyqqx = begin + '至' + end if "regAnncDate" in singledata.keys(): ia_zcdate = singledata["regAnncDate"] ia_zcdate = deal_html_code.change_chinese_date(ia_zcdate) else: ia_zcdate = '' if "tmImage" in singledata.keys(): tmImage = singledata["tmImage"] else: tmImage = '' information[i] = [ ia_zch, ia_flh, ia_zcgg, ia_servicelist, ia_zyqqx, ia_zcdate, province, tmImage ] return information
def name(self,data): information = {} for i in xrange(len(data)): singledata = data[i] if "pledgeNo" in singledata.keys(): equityNo = singledata["pledgeNo"] else: equityNo = '' if "impAm" in singledata.keys(): impAm = singledata["impAm"] else: impAm = '' if "impOrgName" in singledata.keys(): impOrg = singledata["impOrgName"] else: impOrg = '' if "impBlicNo" in singledata.keys(): impOrgBLicNo = singledata["impBlicNo"] else: impOrgBLicNo = '' # impOrgBLicType_CN = singledata["impOrgBLicType_CN"] # impOrgId = singledata["impOrgId"] # pledAmUnit = singledata["pledAmUnit"] if 'intBlicNo' in singledata.keys(): pledBLicNo = singledata["intBlicNo"] else: pledBLicNo = '' # pledBLicType_CN = singledata["pledBLicType_CN"] if "intName" in singledata.keys(): pledgor = singledata["intName"] else: pledgor = '' if "impTypeInterpreted" in singledata.keys(): type = singledata["impTypeInterpreted"] else: type = '' if "equPleDate" in singledata.keys(): equPleDate = singledata["equPleDate"] equPleDate = deal_html_code.change_chinese_date(equPleDate) else: equPleDate = '0000-00-00' if "publicDate" in singledata.keys(): publicDate = singledata["publicDate"] else: publicDate = '0000-00-00' publicDate = deal_html_code.change_chinese_date(publicDate) information[i] = [equityNo, pledgor, pledBLicNo, impAm, impOrg, impOrgBLicNo, equPleDate, publicDate, type] return information
def deal_detail_content(self,detail_url): result= requests.get(detail_url) status_code = result.status_code result = result.content if status_code == 200: if len(result)!=0: data = json.loads(result)["entBlackList"][0] if "executeItemInterpreted" in data.keys(): items = data["executeItemInterpreted"] else: items = '' if "froDocNo" not in data.keys(): rule_no = data["executeNo"] elif "executeNo" in data.keys(): rule_no = data["froDocNo"] else: rule_no = '' if "executeNo" in data.keys(): enforce_no = data["executeNo"] else: enforce_no = '' if "cerType" in data.keys(): cert_cate = data["cetfTypeInterpreted"] cert_code = data["cerNo"] elif "blicType" in data.keys(): cert_cate = data["blicTypeInterpreted"] cert_code = data["blicNo"] if "froFrom" in data.keys(): start_date = data["froFrom"] start_date = deal_html_code.change_chinese_date(start_date) else: start_date = None if "froTo" in data.keys(): end_date = data["froTo"] end_date = deal_html_code.change_chinese_date(end_date) else: end_date = None if "frozDeadline" in data.keys(): period = data["frozDeadline"] else: period = None if "publicDate" in data.keys(): pub_date = data["publicDate"] pub_date = deal_html_code.change_chinese_date(pub_date) else: pub_date = None return items, rule_no, enforce_no, cert_cate, cert_code, start_date, end_date, period, pub_date
def name(self, data): information = {} for i in xrange(len(data)): singledata = data[i] name = '' if 'licNo' in singledata.keys(): code = singledata["licNo"] else: code = '' if "licName" in singledata.keys(): filename = singledata["licName"] else: filename = '' if "valFrom" in singledata.keys(): start_date = singledata["valFrom"] start_date = deal_html_code.change_chinese_date(start_date) else: start_date = '0000-00-00' if "valTo" in singledata.keys(): end_date = singledata["valTo"] end_date = deal_html_code.change_chinese_date(end_date) else: end_date = '0000-00-00' if "licItem" in singledata.keys(): content = singledata["licItem"] else: content = '' if 'licAuth' in singledata.keys(): gov_dept = singledata["licAuth"] else: gov_dept = '' if "typeInterpreted" in singledata.keys(): status = singledata["typeInterpreted"] else: status = '' entShrpmtAltItemSet = singledata["entShrpmtAltItemSet"] if len(entShrpmtAltItemSet) == 0: logging.info('暂无行政许可变更信息!!!') alter_info = {} else: alter_info = singledata["entShrpmtAltItemSet"] information[i] = [ name, code, filename, start_date, end_date, content, gov_dept, status, alter_info ] return information
def name(self, data): info = {} if len(data) > 0: for i, single in enumerate(data): if "altBe" in single.keys(): content_before = single["altBe"] else: content_before = '' if "altAf" in single.keys(): content_after = single["altAf"] content_after = deal_html_code.remove_symbol(content_after) else: content_after = '' if "altDate" in single.keys(): change_date = single["altDate"] change_date = deal_html_code.change_chinese_date( change_date) else: change_date = '0000-00-00' if "altItem" in single.keys(): item = single["altItem"] else: item = "" info[i] = [content_before, content_after, change_date, item] return info
def get_index(code,province): first_url = config.url_list[province].format(code) # print first_url result = requests.get(first_url) status_code = result.status_code result = result.content second_url = None infomation ={} if status_code == 200: info = json.loads(result)["info"] if len(info) != 0: info = json.loads(result)["info"][0] uuid = info["uuid"] entName = info["entName"] ccode = info["uniScid"] code = info["regNo"] legal_person = info["lerep"] dates = info["estDate"] dates = deal_html_code.change_chinese_date(dates) status = info["opState"] infomation[0] = [entName, code, ccode, legal_person, dates, status] second_url = config.detail_list[province].format(uuid) flag = 1 else: flag = 100000003 else: flag = 100000001 return second_url, flag, infomation
def name(self, data): information = {} if len(data) > 0: for i, singledata in enumerate(data): number = singledata["penDecNo"] if "illegActType" in singledata.keys(): types = singledata["illegActType"] elif "illegAct" in singledata.keys(): types = singledata["illegAct"] content = singledata["penPunishCon"] date = singledata["penDecissDate"] date = deal_html_code.change_chinese_date(date) # updateDate = singledata["updateDate"] pub_date = '0000-00-00' pdfurl = singledata["penFilePath"] name = singledata["illegPt"] gov_dept = singledata["penOrgan"] regNo = singledata["regNo"] if "uniScid" in singledata.keys(): uniScid = singledata["uniScid"] provin = deal_html_code.judge_province(uniScid) else: provin = deal_html_code.judge_province(regNo) pdfurl = pdf_path[provin] % pdfurl information[i] = [ number, types, content, date, name, gov_dept, pdfurl, pub_date ] return information
def deal_single_info(self,data,info): for i,singledata in enumerate(data): name = '' code = singledata["AUDIT_NO"] filename = singledata["AUDIT_NAME"] if filename ==None: filename = '' start_date = singledata["VALID_START_DATE"] start_date = deal_html_code.change_chinese_date(start_date) end_date = singledata["VALID_END_DATE"] end_date = deal_html_code.change_chinese_date(end_date) content = singledata["VALID_CONTENT"] gov_dept = singledata["VALID_ORG"] status = singledata["STATUS"] RN = singledata["RN"] info[RN] = [name, code, filename, start_date, end_date, content, gov_dept,status]
def get_all_info(gs_basic_id, gs_search_id, info_list): Log().found_log(gs_basic_id, gs_search_id) info = class_dict["basic"]().get_info(info_list["basic"]) flag = class_dict["basic"]().update_to_db(info, gs_basic_id) print "basic:%s" % flag pripid = '28890' name = info["name"] for key, value in info_list.iteritems(): # 这两块信息较为特殊,拿出来单独处理 if key == "basic" or key == "report" or key == "report1": continue # 如果没有获取到对应的信息就跳过 if key not in info_list.keys(): continue Judge(pripid, name, config.dict_url[key]).update_info(key, class_dict[key], value, gs_basic_id) # 单独处理年报 tr_list = info_list["report"].xpath(".//tr") fill_data = {} for i, singledata in enumerate(tr_list): td_list = singledata.xpath(".//td") if len(td_list) == 0: continue fill_data[i] = deal_html_code.change_chinese_date( deal_html_code.remove_symbol(td_list[2].xpath("string(.)"))) if len(fill_data) == 0: print "report:-1" else: print "report:%s" % len(fill_data) SHX_report.main(info_list["report1"], fill_data, gs_basic_id)
def get_single_info(self, items): dict = {} openView = items.xpath("./@onclick") pattern = re.compile("openView\('(.*?)','(.*?)','(.*?)','(.*?)'\)") tuple = re.findall(pattern, str(openView))[0] pripid = tuple[0] dict["prirpid"] = pripid types = tuple[3] dict["types"] = types company = items.xpath(".//span[@id = 'mySpan']/@title")[0] dict["company"] = company status = items.xpath(".//span[@id = 'mySpan']/following-sibling::*[1]") status = status[0].xpath("string(.)") status = deal_html_code.remove_symbol(status) dict["status"] = status code = items.xpath("//span[@class = 'shxydm']")[0] code = code.xpath("string(.)").split(":")[1] dict["code"] = code legal_person = items.xpath(".//span[@class = 'fddbr']")[0].xpath( "string(.)") legal_person = deal_html_code.remove_symbol(legal_person) # 将企业负责人的职位确定出来 self.judge_position(legal_person, dict) reg_date = items.xpath(".//span[@class= 'clrq']")[0].xpath("string(.)") reg_date = reg_date.split(":")[1] reg_date = deal_html_code.change_chinese_date(reg_date) dict["reg_date"] = reg_date return dict
def deal_single_info(self, data, info): for i, singledata in enumerate(data): name = singledata["STOCK_NAME"] reg_amount = singledata["SHOULD_CAPI"] reg_amount = deal_html_code.match_float(reg_amount) reg_date = singledata["SHOULD_CAPI_DATE"] reg_date = deal_html_code.change_chinese_date(reg_date) reg_way = singledata["SHOULD_CAPI_TYPE"] ac_amount = singledata["REAL_CAPI"] ac_amount = deal_html_code.match_float(ac_amount) ac_date = singledata["REAL_CAPI_DATE"] ac_date = deal_html_code.change_chinese_date(ac_date) ac_way = singledata["REAL_CAPI_TYPE"] uuid = singledata["ID"] RN = singledata["RN"] info[RN] = [name, uuid,reg_amount, reg_date, reg_way, ac_amount, ac_date, ac_way]
def deal_single_info(self, data, info): for i, singledata in enumerate(data): org = singledata["ORG"] id = singledata["ID"] seqid = singledata["SEQ_ID"] code = singledata["GUARANTY_REG_NO"] dates = singledata["START_DATE"] dates = deal_html_code.change_chinese_date(dates) dept = singledata["CREATE_ORG"] amount = singledata["ASSURE_CAPI"] amount = deal_html_code.match_float(amount) status = singledata["STATUS"] cates = singledata["ASSURE_KIND"] start_date = singledata["ASSURE_START_DATE"] start_date = deal_html_code.change_date_style(start_date) end_date = singledata["ASSURE_END_DATE"] end_date = deal_html_code.change_date_style(end_date) period = "自" + start_date + "至" + end_date ranges = singledata["ASSURE_SCOPE"] remark = singledata["REMARK"] cancel_cause = singledata["WRITEOFF_REASON"] RN = singledata["RN"] types = 'mort_person' person_href = person_url + params.format(org, id, seqid) person_info = self.get_detail_info(person_href, types) types = 'mort_goods' goods_href = goods_url + params.format(org, id, seqid) # print goods_href goods_info = self.get_detail_info(goods_href, types) info[RN] = [ code, dates, dept, amount, status, cates, period, ranges, remark, cancel_cause, person_info, goods_info ]
def name(self,data): info = {} if len(data)!=0: for i,single in enumerate(data): if "invName" in single.keys(): name = single["invName"] else: name = '' if "invTypeInterpreted" in single.keys(): types = single["invTypeInterpreted"] else: types = '' if single["blicTypeInterpreted"]!='': license_type = single["blicTypeInterpreted"] license_code = single["bLicNo"] elif single["cetfTypeInterpreted"]!='': license_type = single["cetfTypeInterpreted"] license_code = single["cetfId"] elif single["blicTypeInterpreted"]=='' and single["cetfTypeInterpreted"] =='': license_code = '' license_type = '' license_code = deal_html_code.remove_symbol(license_code) if "subconAm" in single.keys(): reg_amount = single["subconAm"] else: reg_amount = '' if "acconAm" in single.keys(): true_amount = single["acconAm"] else: true_amount = '' if "conDate" in single.keys(): ta_date = single["conDate"] ta_date = deal_html_code.change_chinese_date(ta_date) else: ta_date = '0000-00-00' if "conForm" in single.keys(): ta_ways = single["conForm"] else: ta_ways = '' if ta_ways == '1': ta_ways = '货币' if "countryInterpreted" in single.keys(): country = single["countryInterpreted"] else: country = '' if "dom" in single.keys(): address = single["dom"] else: address = '' encrypted = single["encrypted"] if "cetfType" in single.keys(): cetfType = single["cetfType"] elif "bLicType" in single.keys(): cetfType = single["bLicType"] else: cetfType = '' info[i] = [name, types, license_code, license_type, reg_amount, true_amount, ta_date, ta_ways, country, address, encrypted, cetfType] return info
def deal_single_info(self, data, info): for i, single in enumerate(data): types = single["CHECK_TYPE"] result = single["CHECK_RESULT"] check_date = single["CHECK_DATE"] check_date = deal_html_code.change_chinese_date(check_date) gov_dept = single["CHECK_ORG"] info[i] = [types, result, check_date, gov_dept]
def deal_single_info(self, data, info): for i, singledata in enumerate(data): uuid = singledata["ID"] name = singledata["STOCK_NAME"] percent_pre = singledata["CHANGE_BEFORE"] percent_after = singledata["CHANGE_AFTER"] dates = singledata["CHANGE_DATE"] dates = deal_html_code.change_chinese_date(dates) RN = singledata["RN"] info[RN] = [name, percent_pre, percent_after, dates, uuid]