Beispiel #1
0
	def __process_items(self, to_process):
		"Procesa los items"
		self.log("Comenzando procesado. Usando %d browsers" % 
				self.get_paralel_num())
				
		# crea el pool de browsers
		self.__browser_pool = _BrowserPool()
		for _ in range(self.get_paralel_num()):
			self.__browser_pool.add_browser(self.__get_browser(
										self.get_process_item_browser_type()
										)
			)
		
		# pone a procesar todos los items
		progress = eta.Progress(len(to_process), unit = "pag")
		pipeline = ItemProcesorPipeline(self.get_paralel_num())
		
		start = time()
		
		# convierte los items a una lista para tenerlos todos antes de
		# agregarlos al pipeline
		to_process = list(to_process)
		for item_manager_item in to_process:
			# print "pusheando %d" % item_manager_item.item_num
			pipeline.push(self.__process_item_call_sequence,
						item_manager_item,
						len(to_process),
						progress,
						pipeline
				)
		
		pipeline.wait_end() # espera a que termine de procesar
		
		processing_time_str =  eta.time_string(time() - start)
		self.log("Procesamiento terminado en %s" % processing_time_str)
	def __process_pages(self, page_manager):
		"Procesa las páginas"
		
		paralel_num = self.get_paralel_num()
		if paralel_num == 1:
			self.log("Comenzando procesado. Usando un browser")
		else:
			self.log("Comenzando procesado. Usando %d browsers" % paralel_num)
				
		pipeline = ItemProcesorPipeline(paralel_num)
		start = time()
		
		progress = Progress(0, "pagina")
		
		page_manager.on_add_page_suscribe(_AddPageListener(
											pipeline,
											self.__process_page_call_sequence,
											progress
										  )
										)
		
		while True:
			pending_page = page_manager.get_pending_page()
			if pending_page == None:
				break
			
			pipeline.push(
				self.__process_page_call_sequence,
				page_manager,
				pending_page,
				pipeline,
				progress
			)
				
		pipeline.wait_end() # espera a que termine de procesar
		
		processing_time_str =  eta.time_string(time() - start)
		self.log("Procesamiento terminado en %s" % processing_time_str)
@author: iavas
'''

from sdf import ItemProcesorPipeline
from time import time, sleep

if __name__ == '__main__':
	def func(k):
		if k % 2 != 0:
			#raise ValueError, "%d not pair" % k
			print("k = %d" % k)
			sleep(0.1)
		else:
			print("k = %d" % k)
	
	start = time()
	pipeline = ItemProcesorPipeline(1)
	for i in range(200):
		pipeline.push(func, i)
	pipeline.wait_end()
	print((time() - start))
	
	start = time()
	for i in range(200):
		func(i)
	print((time() - start))
	
	print("COCOA!")