def run_parallel(self): """ Execute on slices processed by workers in parallel. """ # MAP step # Create several tasks (one per worker) and add to job queue N = self.X.shape[0] for start, stop in sliceGenerator(N, self.nWorkers): # SHARED MEM means we only put start/stop ids on queue # This is much cheaper (hopefully) for inter-proc communication self.JobQ.put((start, stop)) # WAIT # It is crucial to force main thread to sleep now, # so other processes can take over the CPU self.JobQ.join() # REDUCE step # Aggregate results across across all workers nDone = 0 SS = 0 while (nDone < self.nWorkers): if not self.ResultQ.empty(): SSchunk = self.ResultQ.get() if nDone == 0: SS = SSchunk else: SS += SSchunk nDone += 1 else: time.sleep(0.02) # wait 2 ms before checking again return SS
def test_correctness_serial(self): ''' Verify that the local step works as expected. No parallelization here. Just verifying that we can split computation up into >1 slice, add up results from all slices and still get the same answer. ''' print('') # Version A: summarize entire dataset SSall = calcLocalParamsAndSummarize(self.X, self.Mu) # Version B: summarize each slice separately, then aggregate N = self.X.shape[0] SSagg = None for start, stop in sliceGenerator(N, self.nWorkers): SSslice = calcLocalParamsAndSummarize(self.X, self.Mu, start, stop) if start == 0: SSagg = SSslice else: SSagg += SSslice # Both A and B better give the same answer assert np.allclose(SSall.CountVec, SSagg.CountVec) assert np.allclose(SSall.DataStatVec, SSagg.DataStatVec)
def run_serial(self): """ Execute on slices processed in serial by master process. """ N = self.X.shape[0] SSagg = None for start, stop in sliceGenerator(N, self.nWorkers): SSslice = calcLocalParamsAndSummarize(self.X, self.Mu, start, stop) if start == 0: SSagg = SSslice else: SSagg += SSslice return SSagg
def run_parallel(self): """ Execute on slices processed by workers in parallel. """ # MAP step # Create several tasks (one per worker) and add to job queue N = self.X.shape[0] for start, stop in sliceGenerator(N, self.nWorkers): self.JobQ.put((self.X[start:stop], self.Mu, None, None)) # self.JobQ.put((self.X, self.Mu, start, stop)) # Pause at this line until all jobs are marked complete. self.JobQ.join() # REDUCE step # Aggregate results across across all workers SS = self.ResultQ.get() while not self.ResultQ.empty(): SSchunk = self.ResultQ.get() SS += SSchunk return SS