コード例 #1
0
ファイル: temp10.py プロジェクト: Yuan-zewei/jzsc
 def qyzz(self):
     # 企业资质
     try:
         qy_zz = f'http://jzsc.mohurd.gov.cn/api/webApi/dataservice/query/comp/caDetailList?qyId={self.qyid1}&pg=0&pgsz=500'
         # qy_zz = f'http://jzsc.mohurd.gov.cn/api/webApi/dataservice/query/comp/caDetailList?qyId=0F0F0E060E0E0D0C0D0E0A0E0D0B09070B09&pg=0'
         resp2 = requests.post(url=qy_zz,
                               headers=self.headers,
                               proxies=self.ip,
                               timeout=10)
         if resp2.text.find('服务器繁忙,请稍后重试') != -1:
             print('服务器繁忙,请稍后重试')
         else:
             asddd4 = self.jd_nx(data=f'{resp2.text}')
             if asddd4['code'] != 200:
                 # self.hq_token(qyid=self.qyid1, name=self.z)  # 调用selenuim获得token值
                 Mysql.dele_token(token=self.jichu)
                 print('token删除成功')
                 Mysql.token(token=self.jichu)
                 self.jichu = self.jichutoken()
                 self.ip = {
                     "http": "http://" + self.jichutoken()[1],
                     "https": "https://" + self.jichutoken()[1]
                 }
             else:
                 if asddd4['data'] == None:
                     self.gx_qyid()
                 else:
                     a123 = asddd4['data']['pageList']['list']
                     if len(a123) != 0:
                         p1 = 0
                         for resp in a123:
                             p1 += 1
                             print(f'一共{len(a123)}个企业资质现在是第{p1}个企业资质')
                             qyxx.qyzz(resp=resp, qyid=self.qyid)
                         if p1 == len(a123):
                             Mysql.updatet_qyzzzt(qyzzzt='0', eid=self.qyid)
                             return '0'
                     else:
                         print('没有企业资质')
                         Mysql.updatet_qyzzzt(qyzzzt='404', eid=self.qyid)
                         return '404'
     except Exception as e:
         qq = str(e)
         if qq.find("HTTPConnectionPool") != -1:
             print('ip失效')
             Mysql.dele_token(token=self.jichu)
             print('token删除成功')
             Mysql.token(token=self.jichu)
             self.jichu = self.jichutoken()[0]
             self.ip = {
                 "http": "http://" + self.jichutoken()[1],
                 "https": "https://" + self.jichutoken()[1]
             }
         else:
             print('不存在')
             print(e, '资质信息错误')
コード例 #2
0
ファイル: temp10.py プロジェクト: Yuan-zewei/jzsc
 def jichu12(self):
     # 基础信息
     try:
         qy_jichu = f'http://jzsc.mohurd.gov.cn/api/webApi/dataservice/query/comp/compDetail?compId={self.qyid1}'
         resp1 = requests.get(url=qy_jichu,
                              headers=self.headers,
                              proxies=self.ip,
                              timeout=100)
         if resp1.text.find('服务器繁忙,请稍后重试') != -1:
             print('服务器繁忙,请稍后重试')
         else:
             asddd2 = self.jd_nx(data=f'{resp1.text}')
             if asddd2['code'] != 200:
                 # self.hq_token(qyid=self.qyid1, name=self.z)#调用selenuim获得token值
                 Mysql.dele_token(token=self.jichu)
                 print('token删除成功')
                 Mysql.token(token=self.jichu)
                 self.jichu = self.jichutoken()[0]
                 self.ip = {
                     "http": "http://" + self.jichutoken()[1],
                     "https": "https://" + self.jichutoken()[1]
                 }
             else:
                 if asddd2['data'] == None:
                     self.gx_qyid()
                 else:
                     qyxx.qyjichu(asddd2['data']['compMap'], qyid=self.qyid)
                     Mysql.update_qyjcxx(qy_jcxx_zt='0', eid=self.qyid)
                     return '0'
     except Exception as e:
         qq = str(e)
         if qq.find("HTTPConnectionPool") != -1:
             print('ip失效')
             Mysql.dele_token(token=self.jichu)
             print('token删除成功')
             Mysql.token(token=self.jichu)
             self.jichu = self.jichutoken()[0]
             self.ip = {
                 "http": "http://" + self.jichutoken()[1],
                 "https": "https://" + self.jichutoken()[1]
             }
         else:
             print('不存在')
             print(e, '基础信息错误')
コード例 #3
0
ファイル: temp10.py プロジェクト: Yuan-zewei/jzsc
 def gx_qyid(self):
     try:
         print('开始更新企业id')
         qyurl = f'http://jzsc.mohurd.gov.cn/api/webApi/dataservice/query/comp/list?complexname={self.z}&pg=0&pgsz=15&total=0'
         resp1 = requests.get(url=qyurl,
                              headers=self.headers,
                              proxies=self.ipz(),
                              timeout=10)
         ew = resp1.text
         if str(ew).find('服务器繁忙,请稍后重试') != -1:
             print('服务器繁忙,请稍后重试')
         else:
             asddd2 = self.jd_nx(data=f'{resp1.text}')
             if len(asddd2['data']['list']) == 0:
                 print('没有这个公司异常')
                 Mysql.gxqy_fupa(cx_state='3', eid=self.qyid)
             else:
                 qyid = asddd2['data']['list'][0]['QY_ID']
                 Mysql.update_qyid(qyurl=qyid, eid=self.qyid)  #更新企业id
                 print('企业更新完毕')
                 return qyid
     except Exception as e:
         qq = str(e)
         if qq.find("HTTPConnectionPool") != -1:
             print('ip失效')
             Mysql.dele_token(token=self.jichu)
             print('token删除成功')
             Mysql.token(token=self.jichu)
             self.jichu = self.jichutoken()[0]
             self.ip = {
                 "http": "http://" + self.jichutoken()[1],
                 "https": "https://" + self.jichutoken()[1]
             }
         else:
             print('不存在')
             print(e, '基础信息错误')
コード例 #4
0
ファイル: temp10.py プロジェクト: Yuan-zewei/jzsc
 def qy_user(self):
     # 企业人员信息的爬取
     try:
         ry_url = f'http://jzsc.mohurd.gov.cn/api/webApi/dataservice/query/comp/regStaffList?qyId={self.qyid1}&pg=0&pgsz=5000'
         # print(ry_url)
         resp2 = requests.get(url=ry_url,
                              headers=self.headers,
                              proxies=self.ip,
                              timeout=10)
         if resp2.text.find('服务器繁忙,请稍后重试') != -1:
             print('服务器繁忙,请稍后重试')
         else:
             asd4 = self.jd_nx(data=f'{resp2.text}')
             if asd4['data'] == None:
                 self.gx_qyid()
             else:
                 ry_urlte = asd4['data']['pageList']['list']
                 if len(ry_urlte) == 0:
                     print('该公司没有人员信息')
                     Mysql.updateryzt(ryzt='404', eid=self.qyid)
                     return '404'
                 else:
                     # asd=Mysql.selecttbl_qyname(eid=self.qyid)
                     # Mysql.delete_tbl_user(qyid=self.qyid)  # 删除人员的基础信息
                     # Mysql.deletetbl_user_zcxx1(qyid=self.z)
                     a1 = Mysql.selectryurl_ys1(eid=self.qyid)
                     if a1 == None:
                         a2 = 0
                         po = 1
                         Mysql.delete_tbl_user(qyid=self.qyid)
                         Mysql.deletetbl_user_zcxx1(qyid=self.qyid)
                         print('删除成功')
                     else:
                         a2 = int(a1) - 1
                         po = int(a1)
                     # print(ry_urlte[int(a2)-1:])
                     for res in ry_urlte[a2:]:
                         print(
                             f'--------------------------一共{len(ry_urlte)}个人++++第{po}个人--------------------------------'
                         )
                         userid = uuid4()
                         user = res['RY_ID']
                         username = res['RY_NAME']
                         ry_xinxi = f'http://jzsc.mohurd.gov.cn/api/webApi/dataservice/query/staff/staffDetail?staffId={user}'
                         resp3 = requests.get(url=ry_xinxi,
                                              headers=self.headers,
                                              proxies=self.ip,
                                              timeout=10)
                         if resp3.text.find('服务器繁忙,请稍后重试') != -1:
                             print('服务器繁忙,请稍后重试')
                         else:
                             asd6 = self.jd_nx(data=f'{resp3.text}')
                             print(asd6)
                             if asd6['code'] != 200:
                                 print(asd6)
                                 Mysql.dele_token(token=self.jichu)
                                 print('token删除成功')
                                 Mysql.token(token=self.jichu)
                                 self.jichu = self.jichutoken()[0]
                                 self.ip = {
                                     "http":
                                     "http://" + self.jichutoken()[1],
                                     "https":
                                     "https://" + self.jichutoken()[1]
                                 }
                                 break
                             else:
                                 qyxx.ryxx(resp=asd6['data']['staffMap'],
                                           qyid=self.qyid,
                                           user=userid)
                                 asd7 = asd6['data']['regCertList']
                                 x = 1
                                 for res in asd7:
                                     print(
                                         f'{username}一共有{len(asd7)}个注册信息正在爬取第{x}个注册信息'
                                     )
                                     qyxx.ryxx_xinxi(resp=res,
                                                     user=userid,
                                                     zc_dwid=self.qyid)
                                     x += 1
                                 if len(asd7) == x - 1:
                                     Mysql.updatery_page_zd(
                                         ry_page_zd=po,
                                         eid=self.qyid)  # 实时更新爬取的页数
                                     po += 1
                                 else:
                                     pass
                                 if po - 1 == len(ry_urlte):
                                     Mysql.updateryzt(
                                         ryzt='0', eid=self.qyid)  #更新爬取状态
                                     return '0'
                                 else:
                                     pass
     except Exception as e:
         qq = str(e)
         if qq.find("HTTPConnectionPool") != -1:
             print('ip失效')
             Mysql.dele_token(token=self.jichu)
             print('token删除成功')
             Mysql.token(token=self.jichu)
             self.jichu = self.jichutoken()[0]
             self.ip = {
                 "http": "http://" + self.jichutoken()[1],
                 "https": "https://" + self.jichutoken()[1]
             }
         else:
             print('不存在')
             print(e, '人员信息错误')