def makeDataSliceFromSharedMem(dataShMemDict, cslice=(0, None), batchID=None): """ Create data slice from provided raw arrays and slice indicators. Returns ------- Dslice : BagOfWordsData object """ if batchID is not None and batchID in dataShMemDict: dataShMemDict = dataShMemDict[batchID] # Make local views (NOT copies) to shared mem arrays doc_range = sharedMemToNumpyArray(dataShMemDict['doc_range']) word_id = sharedMemToNumpyArray(dataShMemDict['word_id']) word_count = sharedMemToNumpyArray(dataShMemDict['word_count']) vocab_size = int(dataShMemDict['vocab_size']) if cslice is None: cslice = (0, doc_range.size - 1) elif cslice[1] is None: cslice = (0, doc_range.size - 1) tstart = doc_range[cslice[0]] tstop = doc_range[cslice[1]] Dslice = BagOfWordsData( vocab_size=vocab_size, doc_range=doc_range[cslice[0]:cslice[1] + 1] - doc_range[cslice[0]], word_id=word_id[tstart:tstop], word_count=word_count[tstart:tstop], nDoc=cslice[1] - cslice[0], ) return Dslice
def makeDataSliceFromSharedMem(dataShMemDict, cslice=(0, None), batchID=None): """ Create data slice from provided raw arrays and slice indicators. Returns ------- Dslice : namedtuple with same fields as XData object * X * n_examples * nObsTotal * n_dims Represents subset of documents identified by cslice tuple. Example ------- >>> dataset = XData(np.random.rand(25,2)) >>> shMemDict = dataset.getRawDataAsSharedMemDict() >>> cur_slice = makeDataSliceFromSharedMem(shMemDict) >>> np.allclose(dataset.X, cur_slice.X) True >>> np.allclose(dataset.nObs, cur_slice.nObs) True >>> dataset.dim == cur_slice.dim True >>> a_slice = makeDataSliceFromSharedMem(shMemDict, (0, 2)) >>> a_slice.nObs 2 """ if batchID is not None and batchID in dataShMemDict: dataShMemDict = dataShMemDict[batchID] # Make local views (NOT copies) to shared mem arrays X = sharedMemToNumpyArray(dataShMemDict['X']) nObsTotal = int(dataShMemDict['nObsTotal']) N, dim = X.shape if cslice is None: cslice = (0, N) elif cslice[1] is None: cslice = (0, N) keys = ['X', 'Xprev', 'n_examples', 'n_dims', 'nObsTotal'] if 'Xprev' in dataShMemDict: Xprev = sharedMemToNumpyArray( dataShMemDict['Xprev'])[cslice[0]:cslice[1]] else: Xprev = None if 'Y' in dataShMemDict: Y = sharedMemToNumpyArray(dataShMemDict['Y'])[cslice[0]:cslice[1]] else: Y = None return XData(X=X[cslice[0]:cslice[1]], Xprev=Xprev, Y=Y, n_examples=cslice[1] - cslice[0], nObsTotal=nObsTotal, do_copy=False)
def run(self): self.printMsg("process SetUp! pid=%d" % (os.getpid())) # Construct iterator with sentinel value of None (for termination) jobIterator = iter(self.JobQueue.get, None) for jobArgs in jobIterator: sliceArgs, aArgs, oArgs = jobArgs aArgs.update(sharedMemDictToNumpy(self.aShMem)) oArgs.update(sharedMemDictToNumpy(self.oShMem)) start, stop = sliceArgs Dslice = self.makeDataSliceFromSharedMem( self.dataShMem, (start, stop)) wc = sharedMemToNumpyArray(self.dataShMem['word_count']) self.printMsg("WCtotal=%d" % wc.sum()) self.printMsg("WCslice=%d" % Dslice.word_count.sum()) # Local step aLPkwargs = self.LPkwargs aLPkwargs.update(**aArgs) LP = self.o_calcLocalParams(Dslice, **oArgs) LP = self.a_calcLocalParams(Dslice, LP, **aLPkwargs) # Summary step SS = self.a_calcSummaryStats(Dslice, LP, **aArgs) SS = self.o_calcSummaryStats(Dslice, SS, LP, **oArgs) self.ResultQueue.put(SS) self.JobQueue.task_done() # Clean up self.printMsg("process CleanUp! pid=%d" % (os.getpid()))
def fillSharedMemDictForLocalStep(self, ShMem=None): """ Get dict of shared mem arrays needed for parallel local step. Returns ------- ShMem : dict of RawArray objects """ ElogphiT, Elog1mphiT = self.GetCached('E_logphiT_log1mphiT', 'all') K = self.K if ShMem is None: ShMem = dict() if 'ElogphiT' not in ShMem: ShMem['ElogphiT'] = numpyToSharedMemArray(ElogphiT) ShMem['Elog1mphiT'] = numpyToSharedMemArray(Elog1mphiT) else: ElogphiT_shView = sharedMemToNumpyArray(ShMem['ElogphiT']) assert ElogphiT_shView.shape >= K ElogphiT_shView[:, :K] = ElogphiT Elog1mphiT_shView = sharedMemToNumpyArray(ShMem['Elog1mphiT']) assert Elog1mphiT_shView.shape >= K Elog1mphiT_shView[:, :K] = Elog1mphiT return ShMem
def fillSharedMemDictForLocalStep(self, ShMem=None): """ Get dict of shared mem arrays needed for parallel local step. Returns ------- ShMem : dict of RawArray objects """ # No shared memory required here. if not isinstance(ShMem, dict): ShMem = dict() alphaEbeta = self.alpha_E_beta() if 'alphaEbeta' in ShMem: shared_alphaEbeta = sharedMemToNumpyArray(ShMem['alphaEbeta']) assert shared_alphaEbeta.size >= self.K shared_alphaEbeta[:alphaEbeta.size] = alphaEbeta else: ShMem['alphaEbeta'] = numpyToSharedMemArray(alphaEbeta.copy()) return ShMem
def makeDataSliceFromSharedMem(dataShMemDict, cslice=(0, None), batchID=None): """ Create data slice from provided raw arrays and slice indicators. Returns ------- Dslice : namedtuple with same fields as GroupXData object * X * nObs * nObsTotal * dim Represents subset of documents identified by cslice tuple. Example ------- >>> Data = GroupXData(np.random.rand(25,2), doc_range=[0,4,12,25]) >>> shMemDict = Data.getRawDataAsSharedMemDict() >>> Dslice = makeDataSliceFromSharedMem(shMemDict) >>> np.allclose(Data.X, Dslice.X) True >>> np.allclose(Data.nObs, Dslice.nObs) True >>> Data.dim == Dslice.dim True >>> Aslice = makeDataSliceFromSharedMem(shMemDict, (0, 2)) >>> Aslice.nDoc 2 >>> np.allclose(Aslice.doc_range, Dslice.doc_range[0:(2+1)]) True """ if batchID is not None and batchID in dataShMemDict: dataShMemDict = dataShMemDict[batchID] # Make local views (NOT copies) to shared mem arrays doc_range = sharedMemToNumpyArray(dataShMemDict['doc_range']) X = sharedMemToNumpyArray(dataShMemDict['X']) nDocTotal = int(dataShMemDict['nDocTotal']) dim = X.shape[1] if cslice is None: cslice = (0, doc_range.size - 1) elif cslice[1] is None: cslice = (0, doc_range.size - 1) tstart = doc_range[cslice[0]] tstop = doc_range[cslice[1]] keys = ['X', 'Xprev', 'doc_range', 'nDoc', 'nObs', 'dim', 'nDocTotal'] if 'Xprev' in dataShMemDict: Xprev = sharedMemToNumpyArray(dataShMemDict['Xprev'])[tstart:tstop] else: Xprev = None Dslice = namedtuple("GroupXDataTuple", keys)( X=X[tstart:tstop], Xprev=Xprev, doc_range=doc_range[cslice[0]:cslice[1] + 1] - doc_range[cslice[0]], nDoc=cslice[1] - cslice[0], nObs=tstop - tstart, dim=dim, nDocTotal=nDocTotal, ) return Dslice