Esempio n. 1
0
    def test_correctness_serial(self):
        ''' Verify that the local step works as expected.

        No parallelization here.
        Just verifying that we can split computation up into >1 slice,
        add up results from all slices and still get the same answer.
        '''
        print('')

        # Version A: summarize entire dataset
        SSall = calcLocalParamsAndSummarize(self.X, self.Mu)

        # Version B: summarize each slice separately, then aggregate
        N = self.X.shape[0]
        SSagg = None
        for start, stop in sliceGenerator(N, self.nWorkers):
            SSslice = calcLocalParamsAndSummarize(self.X, self.Mu, start, stop)
            if start == 0:
                SSagg = SSslice
            else:
                SSagg += SSslice

        # Both A and B better give the same answer
        assert np.allclose(SSall.CountVec, SSagg.CountVec)
        assert np.allclose(SSall.DataStatVec, SSagg.DataStatVec)
Esempio n. 2
0
 def run_baseline(self):
     """ Execute on entire matrix (no slices) in master process.
     """
     SSall = calcLocalParamsAndSummarize(self.Xsh,
                                         self.Msh,
                                         sleepPerUnit=self.sleepPerUnit,
                                         returnVal=self.returnVal)
     return SSall
Esempio n. 3
0
 def run_serial(self):
     """ Execute on slices processed in serial by master process.
     """
     N = self.X.shape[0]
     SSagg = None
     for start, stop in sliceGenerator(N, self.nWorkers):
         SSslice = calcLocalParamsAndSummarize(self.X, self.Mu, start, stop)
         if start == 0:
             SSagg = SSslice
         else:
             SSagg += SSslice
     return SSagg
Esempio n. 4
0
    def run(self):
        #self.printMsg("process SetUp! pid=%d" % (os.getpid()))

        # Construct iterator with sentinel value of None (for termination)
        jobIterator = iter(self.JobQueue.get, None)

        for jobArgs in jobIterator:
            X, Mu, start, stop = jobArgs
            # if start is not None:
            #    self.printMsg("start=%d, stop=%d" % (start, stop))
            #msg = "X memory location: %d" % (getPtrForArray(X))
            #self.printMsg(msg)

            SS = calcLocalParamsAndSummarize(X, Mu, start=start, stop=stop)
            self.ResultQueue.put(SS)
            self.JobQueue.task_done()
Esempio n. 5
0
    def run(self):
        # self.printMsg("process SetUp! pid=%d" % (os.getpid()))

        # Construct iterator with sentinel value of None (for termination)
        jobIterator = iter(self.JobQueue.get, None)

        # Loop over tasks in the job queue
        for jobArgs in jobIterator:
            start, stop = jobArgs
            SS = calcLocalParamsAndSummarize(self.Xsh,
                                             self.Msh,
                                             start=start,
                                             stop=stop,
                                             returnVal=self.returnVal,
                                             sleepPerUnit=self.sleepPerUnit)
            self.ResultQueue.put(SS)
            self.JobQueue.task_done()
Esempio n. 6
0
    def test_correctness_parallel(self):
        """ Verify that we can execute local step across several processes

        Each process does the following:
        * grab its chunk of data from a shared jobQueue
        * performs computations on this chunk
        * load the resulting suff statistics object into resultsQueue
        """
        print('')
        SS = self.run_parallel()

        # Baseline: compute desired answer in master process.
        SSall = calcLocalParamsAndSummarize(self.X, self.Mu)

        print("Parallel Answer: CountVec = ", SS.CountVec[:3])
        print("   Naive Answer: CountVec = ", SSall.CountVec[:3])
        assert np.allclose(SSall.CountVec, SS.CountVec)
        assert np.allclose(SSall.DataStatVec, SS.DataStatVec)
Esempio n. 7
0
 def run_baseline(self):
     """ Execute on entire matrix (no slices) in master process.
     """
     SSall = calcLocalParamsAndSummarize(self.X, self.Mu)
     return SSall