def qyzz(self): # 企业资质 try: qy_zz = f'http://jzsc.mohurd.gov.cn/api/webApi/dataservice/query/comp/caDetailList?qyId={self.qyid1}&pg=0&pgsz=500' # qy_zz = f'http://jzsc.mohurd.gov.cn/api/webApi/dataservice/query/comp/caDetailList?qyId=0F0F0E060E0E0D0C0D0E0A0E0D0B09070B09&pg=0' resp2 = requests.post(url=qy_zz, headers=self.headers, proxies=self.ip, timeout=10) if resp2.text.find('服务器繁忙,请稍后重试') != -1: print('服务器繁忙,请稍后重试') else: asddd4 = self.jd_nx(data=f'{resp2.text}') if asddd4['code'] != 200: # self.hq_token(qyid=self.qyid1, name=self.z) # 调用selenuim获得token值 Mysql.dele_token(token=self.jichu) print('token删除成功') Mysql.token(token=self.jichu) self.jichu = self.jichutoken() self.ip = { "http": "http://" + self.jichutoken()[1], "https": "https://" + self.jichutoken()[1] } else: if asddd4['data'] == None: self.gx_qyid() else: a123 = asddd4['data']['pageList']['list'] if len(a123) != 0: p1 = 0 for resp in a123: p1 += 1 print(f'一共{len(a123)}个企业资质现在是第{p1}个企业资质') qyxx.qyzz(resp=resp, qyid=self.qyid) if p1 == len(a123): Mysql.updatet_qyzzzt(qyzzzt='0', eid=self.qyid) return '0' else: print('没有企业资质') Mysql.updatet_qyzzzt(qyzzzt='404', eid=self.qyid) return '404' except Exception as e: qq = str(e) if qq.find("HTTPConnectionPool") != -1: print('ip失效') Mysql.dele_token(token=self.jichu) print('token删除成功') Mysql.token(token=self.jichu) self.jichu = self.jichutoken()[0] self.ip = { "http": "http://" + self.jichutoken()[1], "https": "https://" + self.jichutoken()[1] } else: print('不存在') print(e, '资质信息错误')
def jichu12(self): # 基础信息 try: qy_jichu = f'http://jzsc.mohurd.gov.cn/api/webApi/dataservice/query/comp/compDetail?compId={self.qyid1}' resp1 = requests.get(url=qy_jichu, headers=self.headers, proxies=self.ip, timeout=100) if resp1.text.find('服务器繁忙,请稍后重试') != -1: print('服务器繁忙,请稍后重试') else: asddd2 = self.jd_nx(data=f'{resp1.text}') if asddd2['code'] != 200: # self.hq_token(qyid=self.qyid1, name=self.z)#调用selenuim获得token值 Mysql.dele_token(token=self.jichu) print('token删除成功') Mysql.token(token=self.jichu) self.jichu = self.jichutoken()[0] self.ip = { "http": "http://" + self.jichutoken()[1], "https": "https://" + self.jichutoken()[1] } else: if asddd2['data'] == None: self.gx_qyid() else: qyxx.qyjichu(asddd2['data']['compMap'], qyid=self.qyid) Mysql.update_qyjcxx(qy_jcxx_zt='0', eid=self.qyid) return '0' except Exception as e: qq = str(e) if qq.find("HTTPConnectionPool") != -1: print('ip失效') Mysql.dele_token(token=self.jichu) print('token删除成功') Mysql.token(token=self.jichu) self.jichu = self.jichutoken()[0] self.ip = { "http": "http://" + self.jichutoken()[1], "https": "https://" + self.jichutoken()[1] } else: print('不存在') print(e, '基础信息错误')
def gx_qyid(self): try: print('开始更新企业id') qyurl = f'http://jzsc.mohurd.gov.cn/api/webApi/dataservice/query/comp/list?complexname={self.z}&pg=0&pgsz=15&total=0' resp1 = requests.get(url=qyurl, headers=self.headers, proxies=self.ipz(), timeout=10) ew = resp1.text if str(ew).find('服务器繁忙,请稍后重试') != -1: print('服务器繁忙,请稍后重试') else: asddd2 = self.jd_nx(data=f'{resp1.text}') if len(asddd2['data']['list']) == 0: print('没有这个公司异常') Mysql.gxqy_fupa(cx_state='3', eid=self.qyid) else: qyid = asddd2['data']['list'][0]['QY_ID'] Mysql.update_qyid(qyurl=qyid, eid=self.qyid) #更新企业id print('企业更新完毕') return qyid except Exception as e: qq = str(e) if qq.find("HTTPConnectionPool") != -1: print('ip失效') Mysql.dele_token(token=self.jichu) print('token删除成功') Mysql.token(token=self.jichu) self.jichu = self.jichutoken()[0] self.ip = { "http": "http://" + self.jichutoken()[1], "https": "https://" + self.jichutoken()[1] } else: print('不存在') print(e, '基础信息错误')
def qy_user(self): # 企业人员信息的爬取 try: ry_url = f'http://jzsc.mohurd.gov.cn/api/webApi/dataservice/query/comp/regStaffList?qyId={self.qyid1}&pg=0&pgsz=5000' # print(ry_url) resp2 = requests.get(url=ry_url, headers=self.headers, proxies=self.ip, timeout=10) if resp2.text.find('服务器繁忙,请稍后重试') != -1: print('服务器繁忙,请稍后重试') else: asd4 = self.jd_nx(data=f'{resp2.text}') if asd4['data'] == None: self.gx_qyid() else: ry_urlte = asd4['data']['pageList']['list'] if len(ry_urlte) == 0: print('该公司没有人员信息') Mysql.updateryzt(ryzt='404', eid=self.qyid) return '404' else: # asd=Mysql.selecttbl_qyname(eid=self.qyid) # Mysql.delete_tbl_user(qyid=self.qyid) # 删除人员的基础信息 # Mysql.deletetbl_user_zcxx1(qyid=self.z) a1 = Mysql.selectryurl_ys1(eid=self.qyid) if a1 == None: a2 = 0 po = 1 Mysql.delete_tbl_user(qyid=self.qyid) Mysql.deletetbl_user_zcxx1(qyid=self.qyid) print('删除成功') else: a2 = int(a1) - 1 po = int(a1) # print(ry_urlte[int(a2)-1:]) for res in ry_urlte[a2:]: print( f'--------------------------一共{len(ry_urlte)}个人++++第{po}个人--------------------------------' ) userid = uuid4() user = res['RY_ID'] username = res['RY_NAME'] ry_xinxi = f'http://jzsc.mohurd.gov.cn/api/webApi/dataservice/query/staff/staffDetail?staffId={user}' resp3 = requests.get(url=ry_xinxi, headers=self.headers, proxies=self.ip, timeout=10) if resp3.text.find('服务器繁忙,请稍后重试') != -1: print('服务器繁忙,请稍后重试') else: asd6 = self.jd_nx(data=f'{resp3.text}') print(asd6) if asd6['code'] != 200: print(asd6) Mysql.dele_token(token=self.jichu) print('token删除成功') Mysql.token(token=self.jichu) self.jichu = self.jichutoken()[0] self.ip = { "http": "http://" + self.jichutoken()[1], "https": "https://" + self.jichutoken()[1] } break else: qyxx.ryxx(resp=asd6['data']['staffMap'], qyid=self.qyid, user=userid) asd7 = asd6['data']['regCertList'] x = 1 for res in asd7: print( f'{username}一共有{len(asd7)}个注册信息正在爬取第{x}个注册信息' ) qyxx.ryxx_xinxi(resp=res, user=userid, zc_dwid=self.qyid) x += 1 if len(asd7) == x - 1: Mysql.updatery_page_zd( ry_page_zd=po, eid=self.qyid) # 实时更新爬取的页数 po += 1 else: pass if po - 1 == len(ry_urlte): Mysql.updateryzt( ryzt='0', eid=self.qyid) #更新爬取状态 return '0' else: pass except Exception as e: qq = str(e) if qq.find("HTTPConnectionPool") != -1: print('ip失效') Mysql.dele_token(token=self.jichu) print('token删除成功') Mysql.token(token=self.jichu) self.jichu = self.jichutoken()[0] self.ip = { "http": "http://" + self.jichutoken()[1], "https": "https://" + self.jichutoken()[1] } else: print('不存在') print(e, '人员信息错误')