def test(self, i): """ 验证代理IP有效性的方法 """ socket.setdefaulttimeout(10) # 设置全局超时时间 try: if "https://" in self.proxys_list[i]: proxies = {"https": self.proxys_list[i]} elif "http://" in self.proxys_list[i]: proxies = {"http": self.proxys_list[i]} k = requests.get("http://www.sse.com.cn/", headers=self.headers, proxies=proxies, timeout=5) self.lock.acquire() # 获得锁 print(self.proxys_list[i], 'is OK') ip = self.proxys_list[i] ipitem = dict(id=get_id(), ip=ip, updatetime=datetime.now(), datastatus=1) session.add(Ip_Pool(**ipitem)) session.commit() self.lock.release() # 释放锁 except requests.exceptions.RequestException as e: self.lock.acquire() print(self.proxys_list[i], e) self.lock.release()
def check_out(proxies, check_header): while 1: proxie = proxies[random.randint(0, len(proxies) - 1)] if check_out_base(proxie, check_header): result = proxie break else: session.query(Ip_Pool).filter(Ip_Pool.ip == proxie).update( {Ip_Pool.datastatus: 2}) session.commit() continue return result
# logging.info(msg) # session.query(Sh_Share).filter(Sh_Share.stockcode == stock["stockcode"]).update({Sh_Share.datastatus: 2}) # session.commit() # time.sleep(5) stock = i print stock k = MyReptile(stock) start = time.time() for dd in k.page_urls: thread = myThread(urls=dd, proxies=k.proxies, check_header=k.check_header, referer_header=k.referer_header, stock=k.stock) thread.start() threads.append(thread) for t in threads: t.join() print 'down_success' end = time.time() msg = '股票代码:{},股票名称:{},耗时:{}s,日期:{}'.format(stock["stockcode"], stock["stockname"], end - start, datetime.now()) logging.info(msg) session.query(Sh_Share).filter( Sh_Share.stockcode == stock["stockcode"]).update( {Sh_Share.datastatus: 2}) session.commit() print '{}:end'.format(datetime.now())