예제 #1
0
    def run_parallel(self):
        """ Execute on slices processed by workers in parallel.
        """
        # MAP step
        # Create several tasks (one per worker) and add to job queue
        N = self.X.shape[0]
        for start, stop in sliceGenerator(N, self.nWorkers):
            # SHARED MEM means we only put start/stop ids on queue
            # This is much cheaper (hopefully) for inter-proc communication
            self.JobQ.put((start, stop))

        # WAIT
        # It is crucial to force main thread to sleep now,
        # so other processes can take over the CPU
        self.JobQ.join()

        # REDUCE step
        # Aggregate results across across all workers
        nDone = 0
        SS = 0
        while (nDone < self.nWorkers):
            if not self.ResultQ.empty():
                SSchunk = self.ResultQ.get()
                if nDone == 0:
                    SS = SSchunk
                else:
                    SS += SSchunk
                nDone += 1
            else:
                time.sleep(0.02)  # wait 2 ms before checking again
        return SS
예제 #2
0
    def test_correctness_serial(self):
        ''' Verify that the local step works as expected.

        No parallelization here.
        Just verifying that we can split computation up into >1 slice,
        add up results from all slices and still get the same answer.
        '''
        print('')

        # Version A: summarize entire dataset
        SSall = calcLocalParamsAndSummarize(self.X, self.Mu)

        # Version B: summarize each slice separately, then aggregate
        N = self.X.shape[0]
        SSagg = None
        for start, stop in sliceGenerator(N, self.nWorkers):
            SSslice = calcLocalParamsAndSummarize(self.X, self.Mu, start, stop)
            if start == 0:
                SSagg = SSslice
            else:
                SSagg += SSslice

        # Both A and B better give the same answer
        assert np.allclose(SSall.CountVec, SSagg.CountVec)
        assert np.allclose(SSall.DataStatVec, SSagg.DataStatVec)
예제 #3
0
 def run_serial(self):
     """ Execute on slices processed in serial by master process.
     """
     N = self.X.shape[0]
     SSagg = None
     for start, stop in sliceGenerator(N, self.nWorkers):
         SSslice = calcLocalParamsAndSummarize(self.X, self.Mu, start, stop)
         if start == 0:
             SSagg = SSslice
         else:
             SSagg += SSslice
     return SSagg
예제 #4
0
    def run_parallel(self):
        """ Execute on slices processed by workers in parallel.
        """
        # MAP step
        # Create several tasks (one per worker) and add to job queue
        N = self.X.shape[0]
        for start, stop in sliceGenerator(N, self.nWorkers):
            self.JobQ.put((self.X[start:stop], self.Mu, None, None))
            # self.JobQ.put((self.X, self.Mu, start, stop))

        # Pause at this line until all jobs are marked complete.
        self.JobQ.join()

        # REDUCE step
        # Aggregate results across across all workers
        SS = self.ResultQ.get()
        while not self.ResultQ.empty():
            SSchunk = self.ResultQ.get()
            SS += SSchunk
        return SS