예제 #1
0
def push_proxy_queue(q,size=10):
	"""补充代理ip队列
	"""
	if q.qsize <size:
		proxy = datamodel.get_proxy()
		for p in proxy:
			q.put(p)
예제 #2
0
	def run(self):
		#socket.setdefaulttimeout(5)
		queue = datamodel.get_row_queue(self.mode.biz_flag)
		#代理ip存活标识
		proxyip_isAlive = False
		proxyslowip     = set()
		while not self.exit_flag:
			if datamodel.g_exit:
				break
			corp = queue.pop(1)
			if not corp:
				continue
			#检查公司是否已存在
			if exist_corp(corp,self.mode.biz_flag):
				#print("exist_corp:%s" % corp)
				continue
	
			s_time=time.time()
	
			#print(queue.__len__())	
			#获取代理ip
			if not proxyip_isAlive:
				proxyinfo = datamodel.get_proxy()
					#如果没有代理ip则插一个空值,使用本地ip
				if not proxyinfo:
					proxyinfo = ['']
				proxyinfo = set(proxyinfo)
				plen = len(proxyinfo)
				proxyinfo.difference_update(self.mode.ille_proxy_ip,proxyslowip)
					#proxyinfo.add('')
				if plen >5 and len(proxyinfo) <5:
					 proxyslowip=set()
				proxyinfo=random.sample(proxyinfo,1)
							
			#如果没有ip可用,挂起
			if not proxyinfo:
					print("not proxy ip....")
					time.sleep(5)
					continue				
			corp,info,status,proxy = self.mode.get_info(corp,proxyinfo[0])
	
			if info and status==0:
				print(corp,info,status,proxy)
				savedata(info,self.mode.biz_flag)
				proxyip_isAlive=True
			elif  status ==1:	
				proxyip_isAlive=False	
				proxyslowip.add(proxy)
				queue.push(corp)
			elif status ==2:
				proxyip_isAlive=False
				queue.push(corp)
			else:
				proxyip_isAlive=True
			#print(corp,info,status,proxy)
			e_time=time.time()-s_time
			#print("kill time:%s"%(e_time))
		else:
			time.sleep(5)
예제 #3
0
파일: core.py 프로젝트: alofiyin/myoproject
def push_proxy_queue(q,ille_proxy_ip,size=10):
	"""补充代理ip队列
	"""
	if q.qsize() <size:
		proxy = datamodel.get_proxy()
		proxy = set(proxy)
		proxy.difference_update(ille_proxy_ip)
		for p in proxy:
			q.put(p)
예제 #4
0
def exec_main_proxy(mode):
	"""业务线程启动函数
	参数mod为业务模块
	模块工厂:
	mod.
	"""
	#socket.setdefaulttimeout(5)
	queue = datamodel.get_row_queue(mode.biz_flag)
	while 1:
		if datamodel.g_exit:
			break
		s_time=time.time()
		print(queue.__len__())	
		if queue.__len__() >0:
			proxyinfo = datamodel.get_proxy()
			#如果没有代理ip则插一个空值,使用本地ip
			if not proxyinfo:
				proxyinfo = ['']
			proxyinfo = set(proxyinfo)
			proxyinfo.difference_update(mode.ille_proxy_ip)
			#proxyinfo.add('')
			if len(proxyinfo) >mode.g_step:
				proxyinfo=random.sample(proxyinfo,mode.g_step)
			else:
				proxyinfo = list(proxyinfo)
			corps = []
			
			i = 0
			if not proxyinfo:
				print("not proxy ip....")
				time.sleep(5)
			print("start...")
			while i < len(proxyinfo):				
				corp = queue.pop(1)
				if not corp:					
					break
				if exist_corp(corp,mode.biz_flag):
					print(corp)
					continue
				corps.append(corp)
				i+=1
			proxyinfo = proxyinfo[:len(corps)]
			pool = eventlet.GreenPool(len(proxyinfo))
			result={}
			for corp,info,status,proxy in pool.imap(mode.get_info,corps,proxyinfo):
				if status in result:
					result[status]+=1
				else:
					result[status]=0
				if info and status==0:
					res,desc = savedata(info,mode.biz_flag)
					if res == -1:
						info['table'] = 'data_%s'%mode.biz_flag
						datamodel.get_tmp_queue().push(info)
				elif  status ==1:
					mode.ille_proxy_ip.add(proxy)
					queue.push(corp)
				print(corp,info,status)
			print(len(proxyinfo))
			print(result)
			e_time=time.time()-s_time
			print("kill time:%s"%(e_time))
		else:
			time.sleep(5)