def qyjcxx(self): try: a = Mysql.qiyexx_url(bh='1')[0] # 从复爬表 if a == None: print('没有数据可以爬取') time.sleep(10) else: self.qyid = a[0] #eid self.z = a[2] #公司名字 qw = self.gx_qyid() #这个东西可以优化,在失败或者加载不出东西可以尝试更新,不用每次加载 self.qyid1 = qw #qyid if a[7] == '1': self.jichu12() # 基础信息的爬取 else: print('基础信息爬取完毕') if a[8] == '1': self.qyzz() # 资质信息的爬取 else: print('资质信息爬取完毕') if a[9] == '1': self.qy_user() # 人员信息的爬取 else: print('人员信息爬取完毕') # self.gcxmxx() a = Mysql.qiyexx_url(bh='1')[0] if a[7] == '0' and (a[8] == '0' or a[8] == '404') and (a[9] == '0' or a[9] == '404'): Mysql.gxqy_fupa(cx_state='0', eid=self.qyid) print('状态更新完毕') except Exception as e: print(e, 'jgfufh')
def gx_qyid(z, eid): print('开始更新企业id') qyurl = f'http://jzsc.mohurd.gov.cn/api/webApi/dataservice/query/comp/list?complexname={z}&pg=0&pgsz=15&total=0' resp1 = requests.get(url=qyurl, headers=headers) asddd2 = jd_nx(data=f'{resp1.text}') if len(asddd2['data']['list']) == 0: print('没有这个公司异常') Mysql.gxqy_fupa(cx_state='3', eid=eid) else: qyid = asddd2['data']['list'][0]['QY_ID'] Mysql.update_qyid(qyurl=qyid, eid=eid) # 更新企业id return qyid
def gx_qyid(self): try: print('开始更新企业id') qyurl = f'http://jzsc.mohurd.gov.cn/api/webApi/dataservice/query/comp/list?complexname={self.z}&pg=0&pgsz=15&total=0' resp1 = requests.get(url=qyurl, headers=self.headers, proxies=self.ipz(), timeout=10) ew = resp1.text if str(ew).find('服务器繁忙,请稍后重试') != -1: print('服务器繁忙,请稍后重试') else: asddd2 = self.jd_nx(data=f'{resp1.text}') if len(asddd2['data']['list']) == 0: print('没有这个公司异常') Mysql.gxqy_fupa(cx_state='3', eid=self.qyid) else: qyid = asddd2['data']['list'][0]['QY_ID'] Mysql.update_qyid(qyurl=qyid, eid=self.qyid) #更新企业id print('企业更新完毕') return qyid except Exception as e: qq = str(e) if qq.find("HTTPConnectionPool") != -1: print('ip失效') Mysql.dele_token(token=self.jichu) print('token删除成功') Mysql.token(token=self.jichu) self.jichu = self.jichutoken()[0] self.ip = { "http": "http://" + self.jichutoken()[1], "https": "https://" + self.jichutoken()[1] } else: print('不存在') print(e, '基础信息错误')