Esempio n. 1
0
 def qyjcxx(self):
     try:
         a = Mysql.qiyexx_url(bh='1')[0]  # 从复爬表
         if a == None:
             print('没有数据可以爬取')
             time.sleep(10)
         else:
             self.qyid = a[0]  #eid
             self.z = a[2]  #公司名字
             qw = self.gx_qyid()  #这个东西可以优化,在失败或者加载不出东西可以尝试更新,不用每次加载
             self.qyid1 = qw  #qyid
             if a[7] == '1':
                 self.jichu12()  # 基础信息的爬取
             else:
                 print('基础信息爬取完毕')
             if a[8] == '1':
                 self.qyzz()  # 资质信息的爬取
             else:
                 print('资质信息爬取完毕')
             if a[9] == '1':
                 self.qy_user()  # 人员信息的爬取
             else:
                 print('人员信息爬取完毕')
             # self.gcxmxx()
             a = Mysql.qiyexx_url(bh='1')[0]
             if a[7] == '0' and (a[8] == '0'
                                 or a[8] == '404') and (a[9] == '0'
                                                        or a[9] == '404'):
                 Mysql.gxqy_fupa(cx_state='0', eid=self.qyid)
                 print('状态更新完毕')
     except Exception as e:
         print(e, 'jgfufh')
Esempio n. 2
0
                driver.refresh()
                break
        except Exception as e:
            print(e)
            driver.quit()
            break


def ipz():
    # 设置代理连接
    while True:
        resp = requests.get('代理连接').text
        if resp.find('data') != -1:
            resp1 = json.loads(resp)['data']
            http = str(resp1[0]["ip"]) + ":" + str(resp1[0]["port"])
            return http
        else:
            time.sleep(5)


while True:
    a = Mysql.qiyexx_url(bh='1')
    for x in a:
        try:
            qyid = x[0]  # 公司eid
            z = x[2]  # 公司名字
            qyid1 = x[3]  #qyid
            qw = gx_qyid(z=z, eid=qyid)  #这个东西可以优化,在失败或者加载不出东西可以尝试更新,不用每次加载
            selenu(qw, z, ipz())
        except Exception as E:
            print(E)