def run(self): self.pool = ParMap(self.map_func, num_workers = self.num_workers, context_func = self.context) #map result = [] for response in self.pool.map(self._generate_input()): if self._check_stop(): self.pool.stop() return self.processed += 1 self._output(response)
def run(self, chunksize = 10): self.pool = ParMap(self._fold_batch, num_workers = self.num_workers, context_func = self.context) result = [] for response in self.pool.map(self._generate_input_batches(chunksize = chunksize)): if self._check_stop(): self.pool.stop() return result.append(response) if len(result) == chunksize: result = [self._fold_batch(result)] self.processed += 1 self._output(self._fold_batch(result))
class MapOperation(Operation): def __init__(self, source, map_func, num_workers = multiprocessing.cpu_count(), context = None, name = "Map", **kwargs): super(MapOperation, self).__init__(source, num_workers, context, name = name, **kwargs) self.map_func = map_func self.pool = None #multiprocessing.Pool(num_workers, maxtasksperchild = 1000, initializer = initializer) def run(self): self.pool = ParMap(self.map_func, num_workers = self.num_workers, context_func = self.context) #map result = [] for response in self.pool.map(self._generate_input()): if self._check_stop(): self.pool.stop() return self.processed += 1 self._output(response)
def test_vary_time(self): def f(a): for i in range(0,random.randint(1,10000)): random.random() return ((a + 1) % 100000, a+1) p = ParMap(f) t_par = Timer("parmap") calculated_values = [v for v in p.map(range(100000))] t_par.stop() t_map = Timer("map") correct_values = map(f,range(100000)) t_map.stop() for calculated_value, correct_value in zip(calculated_values, correct_values): self.assertEqual(correct_value, calculated_value) self.assertEqual(len(correct_values), len(calculated_values))
def test_stop(self): def f(a): for i in range(0,1000): random.random() return ((a + 1) % 100000, a+1) def m(): p.map(range(1000000)) p = ParMap(f) t = threading.Thread(target=m) t.start() time.sleep(10) p.stop() time.sleep(10) self.assertTrue(not t.is_alive())
class FlatMapOperation(MapOperation): """Calls the map function for every value in the dataset and then flattens the result""" def __init__(self, source, map_func, num_workers=multiprocessing.cpu_count(), context = None, name = "FlatMap", **kwargs): super(FlatMapOperation, self).__init__(source, map_func, num_workers, context, name = name, **kwargs) def run(self): self.pool = ParMap(self.map_func, num_workers = self.num_workers, context_func = self.context) #map result = [] for response in self.pool.map(self._generate_input()): if self._check_stop(): self.pool.stop() return self.processed += 1 #flatten result for r in response: self._output(r)
class FoldOperation(Operation): """Folds the dataset using a combine function""" def __init__(self, source, zero_value, fold_func, num_workers=multiprocessing.cpu_count(), context_func = None): super(FoldOperation, self).__init__(source, num_workers, context_func) self.pool = None #ParMap(self._fold_batch, num_workers = num_workers) #futures.ThreadPoolExecutor(num_workers) self.zero_value = zero_value self.fold_func = fold_func def __str__(self): return "Fold" def _generate_input_batches(self, chunksize): batch = [] for value in self._generate_input(): batch.append(value) if len(batch) == chunksize: yield batch batch = [] yield batch def _fold_batch(self, batch): result = self.zero_value for value in batch: result = self.fold_func(result, value) return result def run(self, chunksize = 10): self.pool = ParMap(self._fold_batch, num_workers = self.num_workers, context_func = self.context_func) result = [] for response in self.pool.map(self._generate_input_batches(chunksize = chunksize)): if self._check_stop(): self.pool.stop() return result.append(response) if len(result) == chunksize: result = [self._fold_batch(result)] self.processed += 1 self._output(self._fold_batch(result))
def run(self): self.pool = ParMap(self.map_func, num_workers = self.num_workers, context_func = self.context_func) #map result = [] for response in self.pool.map(self._generate_input()): if self._check_stop(): self.pool.stop() return self.processed += 1 self._output(response)
class MapOperation(Operation): def __init__(self, source, map_func, num_workers=multiprocessing.cpu_count(), context_func = None): super(MapOperation, self).__init__(source, num_workers, context_func) self.map_func = map_func self.pool = None #multiprocessing.Pool(num_workers, maxtasksperchild = 1000, initializer = initializer) def __str__(self): return "Map" def run(self): self.pool = ParMap(self.map_func, num_workers = self.num_workers, context_func = self.context_func) #map result = [] for response in self.pool.map(self._generate_input()): if self._check_stop(): self.pool.stop() return self.processed += 1 self._output(response)
def test_plus_one(self): def f(a): for i in range(0,1000): random.random() return ((a + 1) % 100000, a+1) p = ParMap(f) t_par = Timer("parmap") #for r in p.map(range(1000000)): # print(r) calculated_values = [v for v in p.map(range(1000000))] t_par.stop() t_map = Timer("map") correct_values = map(f,range(1000000)) t_map.stop() self.assertLessEqual(t_par.seconds, 0.8*t_map.seconds) for calculated_value, correct_value in zip(calculated_values, correct_values): self.assertEqual(correct_value, calculated_value) self.assertEqual(len(correct_values), len(calculated_values))
class FoldOperation(Operation): """Folds the dataset using a combine function""" def __init__(self, source, zero_value, fold_func, num_workers=multiprocessing.cpu_count(), context = None, name = "Fold", **kwargs): super(FoldOperation, self).__init__(source, num_workers, context, name = name, **kwargs) self.pool = None #ParMap(self._fold_batch, num_workers = num_workers) #futures.ThreadPoolExecutor(num_workers) self.zero_value = zero_value self.fold_func = fold_func def _generate_input_batches(self, chunksize): batch = [] for value in self._generate_input(): batch.append(value) if len(batch) == chunksize: yield batch batch = [] yield batch def _fold_batch(self, batch): result = self.zero_value for value in batch: result = self.fold_func(result, value) return result def run(self, chunksize = 10): self.pool = ParMap(self._fold_batch, num_workers = self.num_workers, context_func = self.context) result = [] for response in self.pool.map(self._generate_input_batches(chunksize = chunksize)): if self._check_stop(): self.pool.stop() return result.append(response) if len(result) == chunksize: result = [self._fold_batch(result)] self.processed += 1 self._output(self._fold_batch(result))
class FlatMapOperation(MapOperation): """Calls the map function for every value in the dataset and then flattens the result""" def __init__(self, source, map_func, num_workers=multiprocessing.cpu_count(), context_func = None): super(FlatMapOperation, self).__init__(source, map_func, num_workers, context_func) def __str__(self): return "FlatMap" def run(self): self.pool = ParMap(self.map_func, num_workers = self.num_workers, context_func = self.context_func) #map result = [] for response in self.pool.map(self._generate_input()): if self._check_stop(): self.pool.stop() return self.processed += 1 #flatten result for r in response: self._output(r)
def run(self, chunksize = 10): self.pool = ParMap(self._fold_batch, num_workers = self.num_workers, context_func = self.context_func) result = [] for response in self.pool.map(self._generate_input_batches(chunksize = chunksize)): if self._check_stop(): self.pool.stop() return result.append(response) if len(result) == chunksize: result = [self._fold_batch(result)] self.processed += 1 self._output(self._fold_batch(result))