Example #1
0
	def run(self):
		#socket.setdefaulttimeout(5)
		queue = datamodel.get_row_queue(self.mode.biz_flag)
		#代理ip存活标识
		proxyip_isAlive = False
		proxyslowip     = set()
		while not self.exit_flag:
			if datamodel.g_exit:
				break
			corp = queue.pop(1)
			if not corp:
				continue
			#检查公司是否已存在
			if exist_corp(corp,self.mode.biz_flag):
				#print("exist_corp:%s" % corp)
				continue
	
			s_time=time.time()
	
			#print(queue.__len__())	
			#获取代理ip
			if not proxyip_isAlive:
				proxyinfo = datamodel.get_proxy()
					#如果没有代理ip则插一个空值,使用本地ip
				if not proxyinfo:
					proxyinfo = ['']
				proxyinfo = set(proxyinfo)
				plen = len(proxyinfo)
				proxyinfo.difference_update(self.mode.ille_proxy_ip,proxyslowip)
					#proxyinfo.add('')
				if plen >5 and len(proxyinfo) <5:
					 proxyslowip=set()
				proxyinfo=random.sample(proxyinfo,1)
							
			#如果没有ip可用,挂起
			if not proxyinfo:
					print("not proxy ip....")
					time.sleep(5)
					continue				
			corp,info,status,proxy = self.mode.get_info(corp,proxyinfo[0])
	
			if info and status==0:
				print(corp,info,status,proxy)
				savedata(info,self.mode.biz_flag)
				proxyip_isAlive=True
			elif  status ==1:	
				proxyip_isAlive=False	
				proxyslowip.add(proxy)
				queue.push(corp)
			elif status ==2:
				proxyip_isAlive=False
				queue.push(corp)
			else:
				proxyip_isAlive=True
			#print(corp,info,status,proxy)
			e_time=time.time()-s_time
			#print("kill time:%s"%(e_time))
		else:
			time.sleep(5)
Example #2
0
def exec_main(mode):
	"""业务线程启动函数
	参数mod为业务模块
	模块工厂:
	mod.
	"""
	x=0
	i=0
	queue = datamodel.get_row_queue(mode.biz_flag)
	while 1:
		if datamodel.g_exit:
			break
		corp = queue.pop(3)
		if not corp:
			continue
		try:	
	
			if exist_corp(corp,mode.biz_flag):
				
				continue

			corp,info,status,proxy =mode.get_info(corp)
			x+=1
			if info and status==0:
				#print(corp,info,status,proxy)
				mode.Ok_num+=1
				savedata(info,mode.biz_flag)
				chang_flag(corp,1,mode.biz_flag)
			elif  status ==1:	
				mode.False_num+=1
				queue.push(corp)
			elif status ==2:
				mode.False_num+=1
				queue.push(corp)
			else:
				mode.Null_num+=1
				chang_flag(corp,-1,mode.biz_flag)
			print(corp,info,status,proxy)
			#print("i=%s,x=%s" %(i,x))
		except:
			traceback.print_exc()	
Example #3
0
def exec_main(mode):
	"""业务线程启动函数
	参数mod为业务模块
	模块工厂:
	mod.
	"""
	x=0
	i=0
	queue = datamodel.get_row_queue(mode.biz_flag)
	while 1:
		if datamodel.g_exit:
			break
		corp = queue.pop(3)
		if not corp:
			time.sleep(5)
			continue
		try:	
	
			if exist_corp(corp,mode.biz_flag):
				print(corp)
				continue

			corp,info,status,proxy =mode.get_info(corp)
			x+=1
			if info and status==0:
				i+=1
				res,desc = savedata(info,mode.biz_flag)
				if res == -1:
					info['table'] = 'data_%s'%mode.biz_flag
					datamodel.get_tmp_queue().push(info)
			elif  status ==1:
					#mode.ille_proxy_ip.add(proxy)
				queue.push(corp)
			print(corp,info,status)
			print("i=%s,x=%s" %(i,x))
		except:
			traceback.print_exc()					
Example #4
0
	def run(self):
		#socket.setdefaulttimeout(5)
		#计时器
		t_count = int(time.time())
		queue = datamodel.get_row_queue(self.mode.biz_flag)
		#代理ip存活标识
		proxyip_isAlive = False
		#慢速代理ip集合
		proxyslowip     = set()
		while not self.exit_flag:
			if datamodel.g_exit:
				break
			try:
				corp = queue.pop(1)
			except:
				corp = ""
				time.sleep(5)
			if not corp:
				continue
			#检查公司是否已存在
			#if exist_corp(corp,self.mode.biz_flag):
				#print("exist_corp:%s" % corp)
			#	continue
	
			#print(queue.__len__())
			
			#到时间清空慢速代理ip集合
			if (int(time.time())- t_count) > 60 :
				proxyslowip     = set()
				t_count = int(time.time())	
			#获取代理ip
			if not proxyip_isAlive:
				#如果没有代理ip则插一个空值,使用本地ip
				while 1:
					if datamodel.g_exit:
						break
					if self.q.qsize() ==0:
						time.sleep(1)
						continue
					try:
						proxyinfo = self.q.get()
					except:
						proxyinfo=""
						
					if proxyinfo in proxyslowip:
						continue
					break	
				
			s_time = time.time()							
			corp,info,status,proxy = self.mode.get_info(corp,proxyinfo)
	
			if info and status==0:
				#print(corp,info,status,proxy)
				self.mode.Ok_num+=1
				savedata(info,self.mode.biz_flag)
				proxyip_isAlive=True
				chang_flag(corp,1,self.mode.biz_flag)
			elif  status ==1:	
				self.mode.False_num+=1
				proxyip_isAlive=False	
				proxyslowip.add(proxy)
				queue.push(corp)
			elif status ==2:
				self.mode.False_num+=1
				proxyip_isAlive=False
				queue.push(corp)
			else:
				self.mode.Null_num+=1
				chang_flag(corp,-1,self.mode.biz_flag)
				proxyip_isAlive=True
			print(corp,info,status,proxy)
			e_time=time.time()-s_time
			#print("kill time:%s"%(e_time))
		else:
			time.sleep(5)
Example #5
0
def exec_main_proxy(mode):
	"""业务线程启动函数
	参数mod为业务模块
	模块工厂:
	mod.
	"""
	#socket.setdefaulttimeout(5)
	queue = datamodel.get_row_queue(mode.biz_flag)
	while 1:
		if datamodel.g_exit:
			break
		s_time=time.time()
		print(queue.__len__())	
		if queue.__len__() >0:
			proxyinfo = datamodel.get_proxy()
			#如果没有代理ip则插一个空值,使用本地ip
			if not proxyinfo:
				proxyinfo = ['']
			proxyinfo = set(proxyinfo)
			proxyinfo.difference_update(mode.ille_proxy_ip)
			#proxyinfo.add('')
			if len(proxyinfo) >mode.g_step:
				proxyinfo=random.sample(proxyinfo,mode.g_step)
			else:
				proxyinfo = list(proxyinfo)
			corps = []
			
			i = 0
			if not proxyinfo:
				print("not proxy ip....")
				time.sleep(5)
			print("start...")
			while i < len(proxyinfo):				
				corp = queue.pop(1)
				if not corp:					
					break
				if exist_corp(corp,mode.biz_flag):
					print(corp)
					continue
				corps.append(corp)
				i+=1
			proxyinfo = proxyinfo[:len(corps)]
			pool = eventlet.GreenPool(len(proxyinfo))
			result={}
			for corp,info,status,proxy in pool.imap(mode.get_info,corps,proxyinfo):
				if status in result:
					result[status]+=1
				else:
					result[status]=0
				if info and status==0:
					res,desc = savedata(info,mode.biz_flag)
					if res == -1:
						info['table'] = 'data_%s'%mode.biz_flag
						datamodel.get_tmp_queue().push(info)
				elif  status ==1:
					mode.ille_proxy_ip.add(proxy)
					queue.push(corp)
				print(corp,info,status)
			print(len(proxyinfo))
			print(result)
			e_time=time.time()-s_time
			print("kill time:%s"%(e_time))
		else:
			time.sleep(5)