Esempio n. 1
0
	def run(self):
		self.pool = ParMap(self.map_func, num_workers = self.num_workers, context_func = self.context)
		#map
		result = []
		for response in self.pool.map(self._generate_input()):
			if self._check_stop():
				self.pool.stop()
				return

			self.processed += 1
			self._output(response)
Esempio n. 2
0
	def run(self, chunksize = 10):
		self.pool = ParMap(self._fold_batch, num_workers = self.num_workers, context_func = self.context)
		result = []

		for response in self.pool.map(self._generate_input_batches(chunksize = chunksize)):
			if self._check_stop():
				self.pool.stop()
				return

			result.append(response)

			if len(result) == chunksize:
				result = [self._fold_batch(result)]

			self.processed += 1
		
		self._output(self._fold_batch(result))
Esempio n. 3
0
class MapOperation(Operation):
	def __init__(self, source, map_func, num_workers = multiprocessing.cpu_count(), context = None, name = "Map", **kwargs):
		super(MapOperation, self).__init__(source, num_workers, context, name = name, **kwargs)
		self.map_func = map_func
		self.pool = None #multiprocessing.Pool(num_workers, maxtasksperchild = 1000, initializer = initializer)

	def run(self):
		self.pool = ParMap(self.map_func, num_workers = self.num_workers, context_func = self.context)
		#map
		result = []
		for response in self.pool.map(self._generate_input()):
			if self._check_stop():
				self.pool.stop()
				return

			self.processed += 1
			self._output(response)
Esempio n. 4
0
	def test_vary_time(self):
		def f(a):
			for i in range(0,random.randint(1,10000)):
				random.random()

			return ((a + 1) % 100000, a+1)

		p = ParMap(f)
		t_par = Timer("parmap")
		calculated_values = [v for v in p.map(range(100000))]
		t_par.stop()
		t_map = Timer("map")
		correct_values = map(f,range(100000))
		t_map.stop()
		for calculated_value, correct_value in zip(calculated_values, correct_values):
			self.assertEqual(correct_value, calculated_value)
		self.assertEqual(len(correct_values), len(calculated_values))
Esempio n. 5
0
	def test_stop(self):
		def f(a):
			for i in range(0,1000):
				random.random()

			return ((a + 1) % 100000, a+1)

		def m():
			p.map(range(1000000))

		p = ParMap(f)
		t = threading.Thread(target=m)
		t.start()
		time.sleep(10)
		p.stop()
		time.sleep(10)
		self.assertTrue(not t.is_alive())
Esempio n. 6
0
class FlatMapOperation(MapOperation):
	"""Calls the map function for every value in the dataset and then flattens the result"""
	def __init__(self, source, map_func, num_workers=multiprocessing.cpu_count(), context = None, name = "FlatMap", **kwargs):
		super(FlatMapOperation, self).__init__(source, map_func, num_workers, context, name = name, **kwargs)

	def run(self):
		self.pool = ParMap(self.map_func, num_workers = self.num_workers, context_func = self.context)
		#map
		result = []
		for response in self.pool.map(self._generate_input()):
			if self._check_stop():
				self.pool.stop()
				return

			self.processed += 1
			#flatten result
			for r in response: 
				self._output(r)
Esempio n. 7
0
class FoldOperation(Operation):
	"""Folds the dataset using a combine function"""
	def __init__(self, source, zero_value, fold_func, num_workers=multiprocessing.cpu_count(), context_func = None):
		super(FoldOperation, self).__init__(source, num_workers, context_func)
		self.pool = None #ParMap(self._fold_batch, num_workers = num_workers) #futures.ThreadPoolExecutor(num_workers)
		self.zero_value = zero_value
		self.fold_func = fold_func

	def __str__(self):
		return "Fold"

	def _generate_input_batches(self, chunksize):
		batch = []
		for value in self._generate_input():
			batch.append(value)

			if len(batch) == chunksize:
				yield batch
				batch = []
		yield batch

	def _fold_batch(self, batch):
		result = self.zero_value
		for value in batch:
			result = self.fold_func(result, value)
		return result

	def run(self, chunksize = 10):
		self.pool = ParMap(self._fold_batch, num_workers = self.num_workers, context_func = self.context_func)
		result = []

		for response in self.pool.map(self._generate_input_batches(chunksize = chunksize)):
			if self._check_stop():
				self.pool.stop()
				return

			result.append(response)

			if len(result) == chunksize:
				result = [self._fold_batch(result)]

			self.processed += 1
		
		self._output(self._fold_batch(result))
Esempio n. 8
0
	def run(self):
		self.pool = ParMap(self.map_func, num_workers = self.num_workers, context_func = self.context_func)
		#map
		result = []
		for response in self.pool.map(self._generate_input()):
			if self._check_stop():
				self.pool.stop()
				return

			self.processed += 1
			self._output(response)
Esempio n. 9
0
class MapOperation(Operation):
	def __init__(self, source, map_func, num_workers=multiprocessing.cpu_count(), context_func = None):
		super(MapOperation, self).__init__(source, num_workers, context_func)
		self.map_func = map_func
		self.pool = None #multiprocessing.Pool(num_workers, maxtasksperchild = 1000, initializer = initializer)

	def __str__(self):
		return "Map"

	def run(self):
		self.pool = ParMap(self.map_func, num_workers = self.num_workers, context_func = self.context_func)
		#map
		result = []
		for response in self.pool.map(self._generate_input()):
			if self._check_stop():
				self.pool.stop()
				return

			self.processed += 1
			self._output(response)
Esempio n. 10
0
	def test_plus_one(self):
		def f(a):
			for i in range(0,1000):
				random.random()

			return ((a + 1) % 100000, a+1)

		p = ParMap(f)
		t_par = Timer("parmap")
		#for r in p.map(range(1000000)):
		#	print(r)
		calculated_values = [v for v in p.map(range(1000000))]
		t_par.stop()
		t_map = Timer("map")
		correct_values = map(f,range(1000000))
		t_map.stop()
		self.assertLessEqual(t_par.seconds, 0.8*t_map.seconds)
		for calculated_value, correct_value in zip(calculated_values, correct_values):
			self.assertEqual(correct_value, calculated_value)
		self.assertEqual(len(correct_values), len(calculated_values))
Esempio n. 11
0
class FoldOperation(Operation):
	"""Folds the dataset using a combine function"""
	def __init__(self, source, zero_value, fold_func, num_workers=multiprocessing.cpu_count(), context = None, name = "Fold", **kwargs):
		super(FoldOperation, self).__init__(source, num_workers, context, name = name, **kwargs)
		self.pool = None #ParMap(self._fold_batch, num_workers = num_workers) #futures.ThreadPoolExecutor(num_workers)
		self.zero_value = zero_value
		self.fold_func = fold_func

	def _generate_input_batches(self, chunksize):
		batch = []
		for value in self._generate_input():
			batch.append(value)

			if len(batch) == chunksize:
				yield batch
				batch = []
		yield batch

	def _fold_batch(self, batch):
		result = self.zero_value
		for value in batch:
			result = self.fold_func(result, value)
		return result

	def run(self, chunksize = 10):
		self.pool = ParMap(self._fold_batch, num_workers = self.num_workers, context_func = self.context)
		result = []

		for response in self.pool.map(self._generate_input_batches(chunksize = chunksize)):
			if self._check_stop():
				self.pool.stop()
				return

			result.append(response)

			if len(result) == chunksize:
				result = [self._fold_batch(result)]

			self.processed += 1
		
		self._output(self._fold_batch(result))
Esempio n. 12
0
class FlatMapOperation(MapOperation):
	"""Calls the map function for every value in the dataset and then flattens the result"""
	def __init__(self, source, map_func, num_workers=multiprocessing.cpu_count(), context_func = None):
		super(FlatMapOperation, self).__init__(source, map_func, num_workers, context_func)

	def __str__(self):
		return "FlatMap"

	def run(self):
		self.pool = ParMap(self.map_func, num_workers = self.num_workers, context_func = self.context_func)
		#map
		result = []
		for response in self.pool.map(self._generate_input()):
			if self._check_stop():
				self.pool.stop()
				return

			self.processed += 1
			#flatten result
			for r in response: 
				self._output(r)
Esempio n. 13
0
	def run(self, chunksize = 10):
		self.pool = ParMap(self._fold_batch, num_workers = self.num_workers, context_func = self.context_func)
		result = []

		for response in self.pool.map(self._generate_input_batches(chunksize = chunksize)):
			if self._check_stop():
				self.pool.stop()
				return

			result.append(response)

			if len(result) == chunksize:
				result = [self._fold_batch(result)]

			self.processed += 1
		
		self._output(self._fold_batch(result))